1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * created on: 2014sep07
11 * created by: Markus W. Scherer
14 #ifndef __NORM2ALLMODES_H__
15 #define __NORM2ALLMODES_H__
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_NORMALIZATION
21 #include "unicode/edits.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/stringoptions.h"
24 #include "unicode/unistr.h"
26 #include "normalizer2impl.h"
30 // Intermediate class:
31 // Has Normalizer2Impl and does boilerplate argument checking and setup.
32 class Normalizer2WithImpl
: public Normalizer2
{
34 Normalizer2WithImpl(const Normalizer2Impl
&ni
) : impl(ni
) {}
35 virtual ~Normalizer2WithImpl();
38 virtual UnicodeString
&
39 normalize(const UnicodeString
&src
,
41 UErrorCode
&errorCode
) const {
42 if(U_FAILURE(errorCode
)) {
46 const UChar
*sArray
=src
.getBuffer();
47 if(&dest
==&src
|| sArray
==NULL
) {
48 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
53 ReorderingBuffer
buffer(impl
, dest
);
54 if(buffer
.init(src
.length(), errorCode
)) {
55 normalize(sArray
, sArray
+src
.length(), buffer
, errorCode
);
60 normalize(const UChar
*src
, const UChar
*limit
,
61 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const = 0;
63 // normalize and append
64 virtual UnicodeString
&
65 normalizeSecondAndAppend(UnicodeString
&first
,
66 const UnicodeString
&second
,
67 UErrorCode
&errorCode
) const {
68 return normalizeSecondAndAppend(first
, second
, TRUE
, errorCode
);
70 virtual UnicodeString
&
71 append(UnicodeString
&first
,
72 const UnicodeString
&second
,
73 UErrorCode
&errorCode
) const {
74 return normalizeSecondAndAppend(first
, second
, FALSE
, errorCode
);
77 normalizeSecondAndAppend(UnicodeString
&first
,
78 const UnicodeString
&second
,
80 UErrorCode
&errorCode
) const {
81 uprv_checkCanGetBuffer(first
, errorCode
);
82 if(U_FAILURE(errorCode
)) {
85 const UChar
*secondArray
=second
.getBuffer();
86 if(&first
==&second
|| secondArray
==NULL
) {
87 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
90 int32_t firstLength
=first
.length();
91 UnicodeString safeMiddle
;
93 ReorderingBuffer
buffer(impl
, first
);
94 if(buffer
.init(firstLength
+second
.length(), errorCode
)) {
95 normalizeAndAppend(secondArray
, secondArray
+second
.length(), doNormalize
,
96 safeMiddle
, buffer
, errorCode
);
98 } // The ReorderingBuffer destructor finalizes the first string.
99 if(U_FAILURE(errorCode
)) {
100 // Restore the modified suffix of the first string.
101 first
.replace(firstLength
-safeMiddle
.length(), 0x7fffffff, safeMiddle
);
106 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
107 UnicodeString
&safeMiddle
,
108 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const = 0;
110 getDecomposition(UChar32 c
, UnicodeString
&decomposition
) const {
113 const UChar
*d
=impl
.getDecomposition(c
, buffer
, length
);
118 decomposition
.setTo(buffer
, length
); // copy the string (Jamos from Hangul syllable c)
120 decomposition
.setTo(FALSE
, d
, length
); // read-only alias
125 getRawDecomposition(UChar32 c
, UnicodeString
&decomposition
) const {
128 const UChar
*d
=impl
.getRawDecomposition(c
, buffer
, length
);
133 decomposition
.setTo(buffer
, length
); // copy the string (algorithmic decomposition)
135 decomposition
.setTo(FALSE
, d
, length
); // read-only alias
140 composePair(UChar32 a
, UChar32 b
) const {
141 return impl
.composePair(a
, b
);
145 getCombiningClass(UChar32 c
) const {
146 return impl
.getCC(impl
.getNorm16(c
));
151 isNormalized(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
152 if(U_FAILURE(errorCode
)) {
155 const UChar
*sArray
=s
.getBuffer();
157 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
160 const UChar
*sLimit
=sArray
+s
.length();
161 return sLimit
==spanQuickCheckYes(sArray
, sLimit
, errorCode
);
163 virtual UNormalizationCheckResult
164 quickCheck(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
165 return Normalizer2WithImpl::isNormalized(s
, errorCode
) ? UNORM_YES
: UNORM_NO
;
168 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
169 if(U_FAILURE(errorCode
)) {
172 const UChar
*sArray
=s
.getBuffer();
174 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
177 return (int32_t)(spanQuickCheckYes(sArray
, sArray
+s
.length(), errorCode
)-sArray
);
179 virtual const UChar
*
180 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const = 0;
182 virtual UNormalizationCheckResult
getQuickCheck(UChar32
) const {
186 const Normalizer2Impl
&impl
;
189 class DecomposeNormalizer2
: public Normalizer2WithImpl
{
191 DecomposeNormalizer2(const Normalizer2Impl
&ni
) : Normalizer2WithImpl(ni
) {}
192 virtual ~DecomposeNormalizer2();
196 normalize(const UChar
*src
, const UChar
*limit
,
197 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
198 impl
.decompose(src
, limit
, &buffer
, errorCode
);
200 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
202 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
203 UnicodeString
&safeMiddle
,
204 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
205 impl
.decomposeAndAppend(src
, limit
, doNormalize
, safeMiddle
, buffer
, errorCode
);
207 virtual const UChar
*
208 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const {
209 return impl
.decompose(src
, limit
, NULL
, errorCode
);
211 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
212 virtual UNormalizationCheckResult
getQuickCheck(UChar32 c
) const {
213 return impl
.isDecompYes(impl
.getNorm16(c
)) ? UNORM_YES
: UNORM_NO
;
215 virtual UBool
hasBoundaryBefore(UChar32 c
) const { return impl
.hasDecompBoundaryBefore(c
); }
216 virtual UBool
hasBoundaryAfter(UChar32 c
) const { return impl
.hasDecompBoundaryAfter(c
); }
217 virtual UBool
isInert(UChar32 c
) const { return impl
.isDecompInert(c
); }
220 class ComposeNormalizer2
: public Normalizer2WithImpl
{
222 ComposeNormalizer2(const Normalizer2Impl
&ni
, UBool fcc
) :
223 Normalizer2WithImpl(ni
), onlyContiguous(fcc
) {}
224 virtual ~ComposeNormalizer2();
228 normalize(const UChar
*src
, const UChar
*limit
,
229 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const U_OVERRIDE
{
230 impl
.compose(src
, limit
, onlyContiguous
, TRUE
, buffer
, errorCode
);
232 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
235 normalizeUTF8(uint32_t options
, StringPiece src
, ByteSink
&sink
,
236 Edits
*edits
, UErrorCode
&errorCode
) const U_OVERRIDE
{
237 if (U_FAILURE(errorCode
)) {
240 if (edits
!= nullptr && (options
& U_EDITS_NO_RESET
) == 0) {
243 const uint8_t *s
= reinterpret_cast<const uint8_t *>(src
.data());
244 impl
.composeUTF8(options
, onlyContiguous
, s
, s
+ src
.length(),
245 &sink
, edits
, errorCode
);
250 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
251 UnicodeString
&safeMiddle
,
252 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const U_OVERRIDE
{
253 impl
.composeAndAppend(src
, limit
, doNormalize
, onlyContiguous
, safeMiddle
, buffer
, errorCode
);
257 isNormalized(const UnicodeString
&s
, UErrorCode
&errorCode
) const U_OVERRIDE
{
258 if(U_FAILURE(errorCode
)) {
261 const UChar
*sArray
=s
.getBuffer();
263 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
267 ReorderingBuffer
buffer(impl
, temp
);
268 if(!buffer
.init(5, errorCode
)) { // small destCapacity for substring normalization
271 return impl
.compose(sArray
, sArray
+s
.length(), onlyContiguous
, FALSE
, buffer
, errorCode
);
274 isNormalizedUTF8(StringPiece sp
, UErrorCode
&errorCode
) const U_OVERRIDE
{
275 if(U_FAILURE(errorCode
)) {
278 const uint8_t *s
= reinterpret_cast<const uint8_t *>(sp
.data());
279 return impl
.composeUTF8(0, onlyContiguous
, s
, s
+ sp
.length(), nullptr, nullptr, errorCode
);
281 virtual UNormalizationCheckResult
282 quickCheck(const UnicodeString
&s
, UErrorCode
&errorCode
) const U_OVERRIDE
{
283 if(U_FAILURE(errorCode
)) {
286 const UChar
*sArray
=s
.getBuffer();
288 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
291 UNormalizationCheckResult qcResult
=UNORM_YES
;
292 impl
.composeQuickCheck(sArray
, sArray
+s
.length(), onlyContiguous
, &qcResult
);
295 virtual const UChar
*
296 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&) const U_OVERRIDE
{
297 return impl
.composeQuickCheck(src
, limit
, onlyContiguous
, NULL
);
299 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
300 virtual UNormalizationCheckResult
getQuickCheck(UChar32 c
) const U_OVERRIDE
{
301 return impl
.getCompQuickCheck(impl
.getNorm16(c
));
303 virtual UBool
hasBoundaryBefore(UChar32 c
) const U_OVERRIDE
{
304 return impl
.hasCompBoundaryBefore(c
);
306 virtual UBool
hasBoundaryAfter(UChar32 c
) const U_OVERRIDE
{
307 return impl
.hasCompBoundaryAfter(c
, onlyContiguous
);
309 virtual UBool
isInert(UChar32 c
) const U_OVERRIDE
{
310 return impl
.isCompInert(c
, onlyContiguous
);
313 const UBool onlyContiguous
;
316 class FCDNormalizer2
: public Normalizer2WithImpl
{
318 FCDNormalizer2(const Normalizer2Impl
&ni
) : Normalizer2WithImpl(ni
) {}
319 virtual ~FCDNormalizer2();
323 normalize(const UChar
*src
, const UChar
*limit
,
324 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
325 impl
.makeFCD(src
, limit
, &buffer
, errorCode
);
327 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
329 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
330 UnicodeString
&safeMiddle
,
331 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
332 impl
.makeFCDAndAppend(src
, limit
, doNormalize
, safeMiddle
, buffer
, errorCode
);
334 virtual const UChar
*
335 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const {
336 return impl
.makeFCD(src
, limit
, NULL
, errorCode
);
338 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
339 virtual UBool
hasBoundaryBefore(UChar32 c
) const { return impl
.hasFCDBoundaryBefore(c
); }
340 virtual UBool
hasBoundaryAfter(UChar32 c
) const { return impl
.hasFCDBoundaryAfter(c
); }
341 virtual UBool
isInert(UChar32 c
) const { return impl
.isFCDInert(c
); }
344 struct Norm2AllModes
: public UMemory
{
345 Norm2AllModes(Normalizer2Impl
*i
)
346 : impl(i
), comp(*i
, FALSE
), decomp(*i
), fcd(*i
), fcc(*i
, TRUE
) {}
349 static Norm2AllModes
*createInstance(Normalizer2Impl
*impl
, UErrorCode
&errorCode
);
350 static Norm2AllModes
*createNFCInstance(UErrorCode
&errorCode
);
351 static Norm2AllModes
*createInstance(const char *packageName
,
353 UErrorCode
&errorCode
);
355 static const Norm2AllModes
*getNFCInstance(UErrorCode
&errorCode
);
356 static const Norm2AllModes
*getNFKCInstance(UErrorCode
&errorCode
);
357 static const Norm2AllModes
*getNFKC_CFInstance(UErrorCode
&errorCode
);
359 Normalizer2Impl
*impl
;
360 ComposeNormalizer2 comp
;
361 DecomposeNormalizer2 decomp
;
363 ComposeNormalizer2 fcc
;
368 #endif // !UCONFIG_NO_NORMALIZATION
369 #endif // __NORM2ALLMODES_H__