2 *******************************************************************************
4 * Copyright (C) 2009-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: normalizer2.cpp
10 * tab size: 8 (not used)
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_NORMALIZATION
21 #include "unicode/normalizer2.h"
22 #include "unicode/unistr.h"
23 #include "unicode/unorm.h"
26 #include "norm2allmodes.h"
27 #include "normalizer2impl.h"
31 using icu::Normalizer2Impl
;
33 // NFC/NFD data machine-generated by gennorm2 --csource
34 #define INCLUDED_FROM_NORMALIZER2_CPP
35 #include "norm2_nfc_data.h"
39 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
41 Normalizer2::~Normalizer2() {}
44 Normalizer2::getRawDecomposition(UChar32
, UnicodeString
&) const {
49 Normalizer2::composePair(UChar32
, UChar32
) const {
54 Normalizer2::getCombiningClass(UChar32
/*c*/) const {
58 // Normalizer2 implementation for the old UNORM_NONE.
59 class NoopNormalizer2
: public Normalizer2
{
60 virtual ~NoopNormalizer2();
62 virtual UnicodeString
&
63 normalize(const UnicodeString
&src
,
65 UErrorCode
&errorCode
) const {
66 if(U_SUCCESS(errorCode
)) {
70 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
75 virtual UnicodeString
&
76 normalizeSecondAndAppend(UnicodeString
&first
,
77 const UnicodeString
&second
,
78 UErrorCode
&errorCode
) const {
79 if(U_SUCCESS(errorCode
)) {
83 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
88 virtual UnicodeString
&
89 append(UnicodeString
&first
,
90 const UnicodeString
&second
,
91 UErrorCode
&errorCode
) const {
92 if(U_SUCCESS(errorCode
)) {
96 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
102 getDecomposition(UChar32
, UnicodeString
&) const {
105 // No need to override the default getRawDecomposition().
107 isNormalized(const UnicodeString
&, UErrorCode
&) const {
110 virtual UNormalizationCheckResult
111 quickCheck(const UnicodeString
&, UErrorCode
&) const {
115 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&) const {
118 virtual UBool
hasBoundaryBefore(UChar32
) const { return TRUE
; }
119 virtual UBool
hasBoundaryAfter(UChar32
) const { return TRUE
; }
120 virtual UBool
isInert(UChar32
) const { return TRUE
; }
123 NoopNormalizer2::~NoopNormalizer2() {}
125 Normalizer2WithImpl::~Normalizer2WithImpl() {}
127 DecomposeNormalizer2::~DecomposeNormalizer2() {}
129 ComposeNormalizer2::~ComposeNormalizer2() {}
131 FCDNormalizer2::~FCDNormalizer2() {}
133 // instance cache ---------------------------------------------------------- ***
135 Norm2AllModes::~Norm2AllModes() {
140 Norm2AllModes::createInstance(Normalizer2Impl
*impl
, UErrorCode
&errorCode
) {
141 if(U_FAILURE(errorCode
)) {
145 Norm2AllModes
*allModes
=new Norm2AllModes(impl
);
147 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
155 Norm2AllModes::createNFCInstance(UErrorCode
&errorCode
) {
156 if(U_FAILURE(errorCode
)) {
159 Normalizer2Impl
*impl
=new Normalizer2Impl
;
161 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
164 impl
->init(norm2_nfc_data_indexes
, &norm2_nfc_data_trie
,
165 norm2_nfc_data_extraData
, norm2_nfc_data_smallFCD
);
166 return createInstance(impl
, errorCode
);
170 static UBool U_CALLCONV
uprv_normalizer2_cleanup();
173 static Norm2AllModes
*nfcSingleton
;
174 static Normalizer2
*noopSingleton
;
176 static icu::UInitOnce nfcInitOnce
= U_INITONCE_INITIALIZER
;
177 static icu::UInitOnce noopInitOnce
= U_INITONCE_INITIALIZER
;
179 // UInitOnce singleton initialization functions
180 static void U_CALLCONV
initNFCSingleton(UErrorCode
&errorCode
) {
181 nfcSingleton
=Norm2AllModes::createNFCInstance(errorCode
);
182 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
185 static void U_CALLCONV
initNoopSingleton(UErrorCode
&errorCode
) {
186 if(U_FAILURE(errorCode
)) {
189 noopSingleton
=new NoopNormalizer2
;
190 if(noopSingleton
==NULL
) {
191 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
194 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
199 static UBool U_CALLCONV
uprv_normalizer2_cleanup() {
202 delete noopSingleton
;
203 noopSingleton
= NULL
;
205 noopInitOnce
.reset();
211 const Norm2AllModes
*
212 Norm2AllModes::getNFCInstance(UErrorCode
&errorCode
) {
213 if(U_FAILURE(errorCode
)) { return NULL
; }
214 umtx_initOnce(nfcInitOnce
, &initNFCSingleton
, errorCode
);
219 Normalizer2::getNFCInstance(UErrorCode
&errorCode
) {
220 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
221 return allModes
!=NULL
? &allModes
->comp
: NULL
;
225 Normalizer2::getNFDInstance(UErrorCode
&errorCode
) {
226 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
227 return allModes
!=NULL
? &allModes
->decomp
: NULL
;
230 const Normalizer2
*Normalizer2Factory::getFCDInstance(UErrorCode
&errorCode
) {
231 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
232 return allModes
!=NULL
? &allModes
->fcd
: NULL
;
235 const Normalizer2
*Normalizer2Factory::getFCCInstance(UErrorCode
&errorCode
) {
236 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
237 return allModes
!=NULL
? &allModes
->fcc
: NULL
;
240 const Normalizer2
*Normalizer2Factory::getNoopInstance(UErrorCode
&errorCode
) {
241 if(U_FAILURE(errorCode
)) { return NULL
; }
242 umtx_initOnce(noopInitOnce
, &initNoopSingleton
, errorCode
);
243 return noopSingleton
;
246 const Normalizer2Impl
*
247 Normalizer2Factory::getNFCImpl(UErrorCode
&errorCode
) {
248 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
249 return allModes
!=NULL
? allModes
->impl
: NULL
;
252 const Normalizer2Impl
*
253 Normalizer2Factory::getImpl(const Normalizer2
*norm2
) {
254 return &((Normalizer2WithImpl
*)norm2
)->impl
;
259 // C API ------------------------------------------------------------------- ***
263 U_CAPI
const UNormalizer2
* U_EXPORT2
264 unorm2_getNFCInstance(UErrorCode
*pErrorCode
) {
265 return (const UNormalizer2
*)Normalizer2::getNFCInstance(*pErrorCode
);
268 U_CAPI
const UNormalizer2
* U_EXPORT2
269 unorm2_getNFDInstance(UErrorCode
*pErrorCode
) {
270 return (const UNormalizer2
*)Normalizer2::getNFDInstance(*pErrorCode
);
273 U_CAPI
void U_EXPORT2
274 unorm2_close(UNormalizer2
*norm2
) {
275 delete (Normalizer2
*)norm2
;
278 U_CAPI
int32_t U_EXPORT2
279 unorm2_normalize(const UNormalizer2
*norm2
,
280 const UChar
*src
, int32_t length
,
281 UChar
*dest
, int32_t capacity
,
282 UErrorCode
*pErrorCode
) {
283 if(U_FAILURE(*pErrorCode
)) {
286 if( (src
==NULL
? length
!=0 : length
<-1) ||
287 (dest
==NULL
? capacity
!=0 : capacity
<0) ||
288 (src
==dest
&& src
!=NULL
)
290 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
293 UnicodeString
destString(dest
, 0, capacity
);
294 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
296 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
297 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
299 // Avoid duplicate argument checking and support NUL-terminated src.
300 ReorderingBuffer
buffer(n2wi
->impl
, destString
);
301 if(buffer
.init(length
, *pErrorCode
)) {
302 n2wi
->normalize(src
, length
>=0 ? src
+length
: NULL
, buffer
, *pErrorCode
);
305 UnicodeString
srcString(length
<0, src
, length
);
306 n2
->normalize(srcString
, destString
, *pErrorCode
);
309 return destString
.extract(dest
, capacity
, *pErrorCode
);
313 normalizeSecondAndAppend(const UNormalizer2
*norm2
,
314 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
315 const UChar
*second
, int32_t secondLength
,
317 UErrorCode
*pErrorCode
) {
318 if(U_FAILURE(*pErrorCode
)) {
321 if( (second
==NULL
? secondLength
!=0 : secondLength
<-1) ||
322 (first
==NULL
? (firstCapacity
!=0 || firstLength
!=0) :
323 (firstCapacity
<0 || firstLength
<-1)) ||
324 (first
==second
&& first
!=NULL
)
326 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
329 UnicodeString
firstString(first
, firstLength
, firstCapacity
);
330 firstLength
=firstString
.length(); // In case it was -1.
331 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
332 if(secondLength
!=0) {
333 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
334 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
336 // Avoid duplicate argument checking and support NUL-terminated src.
337 UnicodeString safeMiddle
;
339 ReorderingBuffer
buffer(n2wi
->impl
, firstString
);
340 if(buffer
.init(firstLength
+secondLength
+1, *pErrorCode
)) { // destCapacity>=-1
341 n2wi
->normalizeAndAppend(second
, secondLength
>=0 ? second
+secondLength
: NULL
,
342 doNormalize
, safeMiddle
, buffer
, *pErrorCode
);
344 } // The ReorderingBuffer destructor finalizes firstString.
345 if(U_FAILURE(*pErrorCode
) || firstString
.length()>firstCapacity
) {
346 // Restore the modified suffix of the first string.
347 // This does not restore first[] array contents between firstLength and firstCapacity.
348 // (That might be uninitialized memory, as far as we know.)
349 if(first
!=NULL
) { /* don't dereference NULL */
350 safeMiddle
.extract(0, 0x7fffffff, first
+firstLength
-safeMiddle
.length());
351 if(firstLength
<firstCapacity
) {
352 first
[firstLength
]=0; // NUL-terminate in case it was originally.
357 UnicodeString
secondString(secondLength
<0, second
, secondLength
);
359 n2
->normalizeSecondAndAppend(firstString
, secondString
, *pErrorCode
);
361 n2
->append(firstString
, secondString
, *pErrorCode
);
365 return firstString
.extract(first
, firstCapacity
, *pErrorCode
);
368 U_CAPI
int32_t U_EXPORT2
369 unorm2_normalizeSecondAndAppend(const UNormalizer2
*norm2
,
370 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
371 const UChar
*second
, int32_t secondLength
,
372 UErrorCode
*pErrorCode
) {
373 return normalizeSecondAndAppend(norm2
,
374 first
, firstLength
, firstCapacity
,
375 second
, secondLength
,
379 U_CAPI
int32_t U_EXPORT2
380 unorm2_append(const UNormalizer2
*norm2
,
381 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
382 const UChar
*second
, int32_t secondLength
,
383 UErrorCode
*pErrorCode
) {
384 return normalizeSecondAndAppend(norm2
,
385 first
, firstLength
, firstCapacity
,
386 second
, secondLength
,
390 U_CAPI
int32_t U_EXPORT2
391 unorm2_getDecomposition(const UNormalizer2
*norm2
,
392 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
393 UErrorCode
*pErrorCode
) {
394 if(U_FAILURE(*pErrorCode
)) {
397 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
398 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
401 UnicodeString
destString(decomposition
, 0, capacity
);
402 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getDecomposition(c
, destString
)) {
403 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
409 U_CAPI
int32_t U_EXPORT2
410 unorm2_getRawDecomposition(const UNormalizer2
*norm2
,
411 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
412 UErrorCode
*pErrorCode
) {
413 if(U_FAILURE(*pErrorCode
)) {
416 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
417 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
420 UnicodeString
destString(decomposition
, 0, capacity
);
421 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getRawDecomposition(c
, destString
)) {
422 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
428 U_CAPI UChar32 U_EXPORT2
429 unorm2_composePair(const UNormalizer2
*norm2
, UChar32 a
, UChar32 b
) {
430 return reinterpret_cast<const Normalizer2
*>(norm2
)->composePair(a
, b
);
433 U_CAPI
uint8_t U_EXPORT2
434 unorm2_getCombiningClass(const UNormalizer2
*norm2
, UChar32 c
) {
435 return reinterpret_cast<const Normalizer2
*>(norm2
)->getCombiningClass(c
);
438 U_CAPI UBool U_EXPORT2
439 unorm2_isNormalized(const UNormalizer2
*norm2
,
440 const UChar
*s
, int32_t length
,
441 UErrorCode
*pErrorCode
) {
442 if(U_FAILURE(*pErrorCode
)) {
445 if((s
==NULL
&& length
!=0) || length
<-1) {
446 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
449 UnicodeString
sString(length
<0, s
, length
);
450 return ((const Normalizer2
*)norm2
)->isNormalized(sString
, *pErrorCode
);
453 U_CAPI UNormalizationCheckResult U_EXPORT2
454 unorm2_quickCheck(const UNormalizer2
*norm2
,
455 const UChar
*s
, int32_t length
,
456 UErrorCode
*pErrorCode
) {
457 if(U_FAILURE(*pErrorCode
)) {
460 if((s
==NULL
&& length
!=0) || length
<-1) {
461 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
464 UnicodeString
sString(length
<0, s
, length
);
465 return ((const Normalizer2
*)norm2
)->quickCheck(sString
, *pErrorCode
);
468 U_CAPI
int32_t U_EXPORT2
469 unorm2_spanQuickCheckYes(const UNormalizer2
*norm2
,
470 const UChar
*s
, int32_t length
,
471 UErrorCode
*pErrorCode
) {
472 if(U_FAILURE(*pErrorCode
)) {
475 if((s
==NULL
&& length
!=0) || length
<-1) {
476 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
479 UnicodeString
sString(length
<0, s
, length
);
480 return ((const Normalizer2
*)norm2
)->spanQuickCheckYes(sString
, *pErrorCode
);
483 U_CAPI UBool U_EXPORT2
484 unorm2_hasBoundaryBefore(const UNormalizer2
*norm2
, UChar32 c
) {
485 return ((const Normalizer2
*)norm2
)->hasBoundaryBefore(c
);
488 U_CAPI UBool U_EXPORT2
489 unorm2_hasBoundaryAfter(const UNormalizer2
*norm2
, UChar32 c
) {
490 return ((const Normalizer2
*)norm2
)->hasBoundaryAfter(c
);
493 U_CAPI UBool U_EXPORT2
494 unorm2_isInert(const UNormalizer2
*norm2
, UChar32 c
) {
495 return ((const Normalizer2
*)norm2
)->isInert(c
);
498 // Some properties APIs ---------------------------------------------------- ***
500 U_CAPI
uint8_t U_EXPORT2
501 u_getCombiningClass(UChar32 c
) {
502 UErrorCode errorCode
=U_ZERO_ERROR
;
503 const Normalizer2
*nfd
=Normalizer2::getNFDInstance(errorCode
);
504 if(U_SUCCESS(errorCode
)) {
505 return nfd
->getCombiningClass(c
);
512 unorm_getFCD16(UChar32 c
) {
513 UErrorCode errorCode
=U_ZERO_ERROR
;
514 const Normalizer2Impl
*impl
=Normalizer2Factory::getNFCImpl(errorCode
);
515 if(U_SUCCESS(errorCode
)) {
516 return impl
->getFCD16(c
);
522 #endif // !UCONFIG_NO_NORMALIZATION