1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2009-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: normalizer2.cpp
12 * tab size: 8 (not used)
15 * created on: 2009nov22
16 * created by: Markus W. Scherer
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_NORMALIZATION
23 #include "unicode/normalizer2.h"
24 #include "unicode/unistr.h"
25 #include "unicode/unorm.h"
28 #include "norm2allmodes.h"
29 #include "normalizer2impl.h"
33 using icu::Normalizer2Impl
;
35 // NFC/NFD data machine-generated by gennorm2 --csource
36 #define INCLUDED_FROM_NORMALIZER2_CPP
37 #include "norm2_nfc_data.h"
41 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
43 Normalizer2::~Normalizer2() {}
46 Normalizer2::getRawDecomposition(UChar32
, UnicodeString
&) const {
51 Normalizer2::composePair(UChar32
, UChar32
) const {
56 Normalizer2::getCombiningClass(UChar32
/*c*/) const {
60 // Normalizer2 implementation for the old UNORM_NONE.
61 class NoopNormalizer2
: public Normalizer2
{
62 virtual ~NoopNormalizer2();
64 virtual UnicodeString
&
65 normalize(const UnicodeString
&src
,
67 UErrorCode
&errorCode
) const {
68 if(U_SUCCESS(errorCode
)) {
72 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
77 virtual UnicodeString
&
78 normalizeSecondAndAppend(UnicodeString
&first
,
79 const UnicodeString
&second
,
80 UErrorCode
&errorCode
) const {
81 if(U_SUCCESS(errorCode
)) {
85 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
90 virtual UnicodeString
&
91 append(UnicodeString
&first
,
92 const UnicodeString
&second
,
93 UErrorCode
&errorCode
) const {
94 if(U_SUCCESS(errorCode
)) {
98 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
104 getDecomposition(UChar32
, UnicodeString
&) const {
107 // No need to override the default getRawDecomposition().
109 isNormalized(const UnicodeString
&, UErrorCode
&) const {
112 virtual UNormalizationCheckResult
113 quickCheck(const UnicodeString
&, UErrorCode
&) const {
117 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&) const {
120 virtual UBool
hasBoundaryBefore(UChar32
) const { return TRUE
; }
121 virtual UBool
hasBoundaryAfter(UChar32
) const { return TRUE
; }
122 virtual UBool
isInert(UChar32
) const { return TRUE
; }
125 NoopNormalizer2::~NoopNormalizer2() {}
127 Normalizer2WithImpl::~Normalizer2WithImpl() {}
129 DecomposeNormalizer2::~DecomposeNormalizer2() {}
131 ComposeNormalizer2::~ComposeNormalizer2() {}
133 FCDNormalizer2::~FCDNormalizer2() {}
135 // instance cache ---------------------------------------------------------- ***
137 Norm2AllModes::~Norm2AllModes() {
142 Norm2AllModes::createInstance(Normalizer2Impl
*impl
, UErrorCode
&errorCode
) {
143 if(U_FAILURE(errorCode
)) {
147 Norm2AllModes
*allModes
=new Norm2AllModes(impl
);
149 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
157 Norm2AllModes::createNFCInstance(UErrorCode
&errorCode
) {
158 if(U_FAILURE(errorCode
)) {
161 Normalizer2Impl
*impl
=new Normalizer2Impl
;
163 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
166 impl
->init(norm2_nfc_data_indexes
, &norm2_nfc_data_trie
,
167 norm2_nfc_data_extraData
, norm2_nfc_data_smallFCD
);
168 return createInstance(impl
, errorCode
);
172 static UBool U_CALLCONV
uprv_normalizer2_cleanup();
175 static Norm2AllModes
*nfcSingleton
;
176 static Normalizer2
*noopSingleton
;
178 static icu::UInitOnce nfcInitOnce
= U_INITONCE_INITIALIZER
;
179 static icu::UInitOnce noopInitOnce
= U_INITONCE_INITIALIZER
;
181 // UInitOnce singleton initialization functions
182 static void U_CALLCONV
initNFCSingleton(UErrorCode
&errorCode
) {
183 nfcSingleton
=Norm2AllModes::createNFCInstance(errorCode
);
184 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
187 static void U_CALLCONV
initNoopSingleton(UErrorCode
&errorCode
) {
188 if(U_FAILURE(errorCode
)) {
191 noopSingleton
=new NoopNormalizer2
;
192 if(noopSingleton
==NULL
) {
193 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
196 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
201 static UBool U_CALLCONV
uprv_normalizer2_cleanup() {
204 delete noopSingleton
;
205 noopSingleton
= NULL
;
207 noopInitOnce
.reset();
213 const Norm2AllModes
*
214 Norm2AllModes::getNFCInstance(UErrorCode
&errorCode
) {
215 if(U_FAILURE(errorCode
)) { return NULL
; }
216 umtx_initOnce(nfcInitOnce
, &initNFCSingleton
, errorCode
);
221 Normalizer2::getNFCInstance(UErrorCode
&errorCode
) {
222 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
223 return allModes
!=NULL
? &allModes
->comp
: NULL
;
227 Normalizer2::getNFDInstance(UErrorCode
&errorCode
) {
228 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
229 return allModes
!=NULL
? &allModes
->decomp
: NULL
;
232 const Normalizer2
*Normalizer2Factory::getFCDInstance(UErrorCode
&errorCode
) {
233 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
234 return allModes
!=NULL
? &allModes
->fcd
: NULL
;
237 const Normalizer2
*Normalizer2Factory::getFCCInstance(UErrorCode
&errorCode
) {
238 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
239 return allModes
!=NULL
? &allModes
->fcc
: NULL
;
242 const Normalizer2
*Normalizer2Factory::getNoopInstance(UErrorCode
&errorCode
) {
243 if(U_FAILURE(errorCode
)) { return NULL
; }
244 umtx_initOnce(noopInitOnce
, &initNoopSingleton
, errorCode
);
245 return noopSingleton
;
248 const Normalizer2Impl
*
249 Normalizer2Factory::getNFCImpl(UErrorCode
&errorCode
) {
250 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
251 return allModes
!=NULL
? allModes
->impl
: NULL
;
254 const Normalizer2Impl
*
255 Normalizer2Factory::getImpl(const Normalizer2
*norm2
) {
256 return &((Normalizer2WithImpl
*)norm2
)->impl
;
261 // C API ------------------------------------------------------------------- ***
265 U_CAPI
const UNormalizer2
* U_EXPORT2
266 unorm2_getNFCInstance(UErrorCode
*pErrorCode
) {
267 return (const UNormalizer2
*)Normalizer2::getNFCInstance(*pErrorCode
);
270 U_CAPI
const UNormalizer2
* U_EXPORT2
271 unorm2_getNFDInstance(UErrorCode
*pErrorCode
) {
272 return (const UNormalizer2
*)Normalizer2::getNFDInstance(*pErrorCode
);
275 U_CAPI
void U_EXPORT2
276 unorm2_close(UNormalizer2
*norm2
) {
277 delete (Normalizer2
*)norm2
;
280 U_CAPI
int32_t U_EXPORT2
281 unorm2_normalize(const UNormalizer2
*norm2
,
282 const UChar
*src
, int32_t length
,
283 UChar
*dest
, int32_t capacity
,
284 UErrorCode
*pErrorCode
) {
285 if(U_FAILURE(*pErrorCode
)) {
288 if( (src
==NULL
? length
!=0 : length
<-1) ||
289 (dest
==NULL
? capacity
!=0 : capacity
<0) ||
290 (src
==dest
&& src
!=NULL
)
292 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
295 UnicodeString
destString(dest
, 0, capacity
);
296 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
298 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
299 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
301 // Avoid duplicate argument checking and support NUL-terminated src.
302 ReorderingBuffer
buffer(n2wi
->impl
, destString
);
303 if(buffer
.init(length
, *pErrorCode
)) {
304 n2wi
->normalize(src
, length
>=0 ? src
+length
: NULL
, buffer
, *pErrorCode
);
307 UnicodeString
srcString(length
<0, src
, length
);
308 n2
->normalize(srcString
, destString
, *pErrorCode
);
311 return destString
.extract(dest
, capacity
, *pErrorCode
);
315 normalizeSecondAndAppend(const UNormalizer2
*norm2
,
316 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
317 const UChar
*second
, int32_t secondLength
,
319 UErrorCode
*pErrorCode
) {
320 if(U_FAILURE(*pErrorCode
)) {
323 if( (second
==NULL
? secondLength
!=0 : secondLength
<-1) ||
324 (first
==NULL
? (firstCapacity
!=0 || firstLength
!=0) :
325 (firstCapacity
<0 || firstLength
<-1)) ||
326 (first
==second
&& first
!=NULL
)
328 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
331 UnicodeString
firstString(first
, firstLength
, firstCapacity
);
332 firstLength
=firstString
.length(); // In case it was -1.
333 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
334 if(secondLength
!=0) {
335 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
336 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
338 // Avoid duplicate argument checking and support NUL-terminated src.
339 UnicodeString safeMiddle
;
341 ReorderingBuffer
buffer(n2wi
->impl
, firstString
);
342 if(buffer
.init(firstLength
+secondLength
+1, *pErrorCode
)) { // destCapacity>=-1
343 n2wi
->normalizeAndAppend(second
, secondLength
>=0 ? second
+secondLength
: NULL
,
344 doNormalize
, safeMiddle
, buffer
, *pErrorCode
);
346 } // The ReorderingBuffer destructor finalizes firstString.
347 if(U_FAILURE(*pErrorCode
) || firstString
.length()>firstCapacity
) {
348 // Restore the modified suffix of the first string.
349 // This does not restore first[] array contents between firstLength and firstCapacity.
350 // (That might be uninitialized memory, as far as we know.)
351 if(first
!=NULL
) { /* don't dereference NULL */
352 safeMiddle
.extract(0, 0x7fffffff, first
+firstLength
-safeMiddle
.length());
353 if(firstLength
<firstCapacity
) {
354 first
[firstLength
]=0; // NUL-terminate in case it was originally.
359 UnicodeString
secondString(secondLength
<0, second
, secondLength
);
361 n2
->normalizeSecondAndAppend(firstString
, secondString
, *pErrorCode
);
363 n2
->append(firstString
, secondString
, *pErrorCode
);
367 return firstString
.extract(first
, firstCapacity
, *pErrorCode
);
370 U_CAPI
int32_t U_EXPORT2
371 unorm2_normalizeSecondAndAppend(const UNormalizer2
*norm2
,
372 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
373 const UChar
*second
, int32_t secondLength
,
374 UErrorCode
*pErrorCode
) {
375 return normalizeSecondAndAppend(norm2
,
376 first
, firstLength
, firstCapacity
,
377 second
, secondLength
,
381 U_CAPI
int32_t U_EXPORT2
382 unorm2_append(const UNormalizer2
*norm2
,
383 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
384 const UChar
*second
, int32_t secondLength
,
385 UErrorCode
*pErrorCode
) {
386 return normalizeSecondAndAppend(norm2
,
387 first
, firstLength
, firstCapacity
,
388 second
, secondLength
,
392 U_CAPI
int32_t U_EXPORT2
393 unorm2_getDecomposition(const UNormalizer2
*norm2
,
394 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
395 UErrorCode
*pErrorCode
) {
396 if(U_FAILURE(*pErrorCode
)) {
399 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
400 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
403 UnicodeString
destString(decomposition
, 0, capacity
);
404 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getDecomposition(c
, destString
)) {
405 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
411 U_CAPI
int32_t U_EXPORT2
412 unorm2_getRawDecomposition(const UNormalizer2
*norm2
,
413 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
414 UErrorCode
*pErrorCode
) {
415 if(U_FAILURE(*pErrorCode
)) {
418 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
419 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
422 UnicodeString
destString(decomposition
, 0, capacity
);
423 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getRawDecomposition(c
, destString
)) {
424 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
430 U_CAPI UChar32 U_EXPORT2
431 unorm2_composePair(const UNormalizer2
*norm2
, UChar32 a
, UChar32 b
) {
432 return reinterpret_cast<const Normalizer2
*>(norm2
)->composePair(a
, b
);
435 U_CAPI
uint8_t U_EXPORT2
436 unorm2_getCombiningClass(const UNormalizer2
*norm2
, UChar32 c
) {
437 return reinterpret_cast<const Normalizer2
*>(norm2
)->getCombiningClass(c
);
440 U_CAPI UBool U_EXPORT2
441 unorm2_isNormalized(const UNormalizer2
*norm2
,
442 const UChar
*s
, int32_t length
,
443 UErrorCode
*pErrorCode
) {
444 if(U_FAILURE(*pErrorCode
)) {
447 if((s
==NULL
&& length
!=0) || length
<-1) {
448 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
451 UnicodeString
sString(length
<0, s
, length
);
452 return ((const Normalizer2
*)norm2
)->isNormalized(sString
, *pErrorCode
);
455 U_CAPI UNormalizationCheckResult U_EXPORT2
456 unorm2_quickCheck(const UNormalizer2
*norm2
,
457 const UChar
*s
, int32_t length
,
458 UErrorCode
*pErrorCode
) {
459 if(U_FAILURE(*pErrorCode
)) {
462 if((s
==NULL
&& length
!=0) || length
<-1) {
463 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
466 UnicodeString
sString(length
<0, s
, length
);
467 return ((const Normalizer2
*)norm2
)->quickCheck(sString
, *pErrorCode
);
470 U_CAPI
int32_t U_EXPORT2
471 unorm2_spanQuickCheckYes(const UNormalizer2
*norm2
,
472 const UChar
*s
, int32_t length
,
473 UErrorCode
*pErrorCode
) {
474 if(U_FAILURE(*pErrorCode
)) {
477 if((s
==NULL
&& length
!=0) || length
<-1) {
478 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
481 UnicodeString
sString(length
<0, s
, length
);
482 return ((const Normalizer2
*)norm2
)->spanQuickCheckYes(sString
, *pErrorCode
);
485 U_CAPI UBool U_EXPORT2
486 unorm2_hasBoundaryBefore(const UNormalizer2
*norm2
, UChar32 c
) {
487 return ((const Normalizer2
*)norm2
)->hasBoundaryBefore(c
);
490 U_CAPI UBool U_EXPORT2
491 unorm2_hasBoundaryAfter(const UNormalizer2
*norm2
, UChar32 c
) {
492 return ((const Normalizer2
*)norm2
)->hasBoundaryAfter(c
);
495 U_CAPI UBool U_EXPORT2
496 unorm2_isInert(const UNormalizer2
*norm2
, UChar32 c
) {
497 return ((const Normalizer2
*)norm2
)->isInert(c
);
500 // Some properties APIs ---------------------------------------------------- ***
502 U_CAPI
uint8_t U_EXPORT2
503 u_getCombiningClass(UChar32 c
) {
504 UErrorCode errorCode
=U_ZERO_ERROR
;
505 const Normalizer2
*nfd
=Normalizer2::getNFDInstance(errorCode
);
506 if(U_SUCCESS(errorCode
)) {
507 return nfd
->getCombiningClass(c
);
514 unorm_getFCD16(UChar32 c
) {
515 UErrorCode errorCode
=U_ZERO_ERROR
;
516 const Normalizer2Impl
*impl
=Normalizer2Factory::getNFCImpl(errorCode
);
517 if(U_SUCCESS(errorCode
)) {
518 return impl
->getFCD16(c
);
524 #endif // !UCONFIG_NO_NORMALIZATION