2 *******************************************************************************
4 * Copyright (C) 2009-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: normalizer2.cpp
10 * tab size: 8 (not used)
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_NORMALIZATION
21 #include "unicode/normalizer2.h"
22 #include "unicode/unistr.h"
23 #include "unicode/unorm.h"
26 #include "norm2allmodes.h"
27 #include "normalizer2impl.h"
31 using icu::Normalizer2Impl
;
33 // NFC/NFD data machine-generated by gennorm2 --csource
34 #include "norm2_nfc_data.h"
38 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
40 Normalizer2::~Normalizer2() {}
43 Normalizer2::getRawDecomposition(UChar32
, UnicodeString
&) const {
48 Normalizer2::composePair(UChar32
, UChar32
) const {
53 Normalizer2::getCombiningClass(UChar32
/*c*/) const {
57 // Normalizer2 implementation for the old UNORM_NONE.
58 class NoopNormalizer2
: public Normalizer2
{
59 virtual ~NoopNormalizer2();
61 virtual UnicodeString
&
62 normalize(const UnicodeString
&src
,
64 UErrorCode
&errorCode
) const {
65 if(U_SUCCESS(errorCode
)) {
69 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
74 virtual UnicodeString
&
75 normalizeSecondAndAppend(UnicodeString
&first
,
76 const UnicodeString
&second
,
77 UErrorCode
&errorCode
) const {
78 if(U_SUCCESS(errorCode
)) {
82 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
87 virtual UnicodeString
&
88 append(UnicodeString
&first
,
89 const UnicodeString
&second
,
90 UErrorCode
&errorCode
) const {
91 if(U_SUCCESS(errorCode
)) {
95 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
101 getDecomposition(UChar32
, UnicodeString
&) const {
104 // No need to override the default getRawDecomposition().
106 isNormalized(const UnicodeString
&, UErrorCode
&) const {
109 virtual UNormalizationCheckResult
110 quickCheck(const UnicodeString
&, UErrorCode
&) const {
114 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&) const {
117 virtual UBool
hasBoundaryBefore(UChar32
) const { return TRUE
; }
118 virtual UBool
hasBoundaryAfter(UChar32
) const { return TRUE
; }
119 virtual UBool
isInert(UChar32
) const { return TRUE
; }
122 NoopNormalizer2::~NoopNormalizer2() {}
124 Normalizer2WithImpl::~Normalizer2WithImpl() {}
126 DecomposeNormalizer2::~DecomposeNormalizer2() {}
128 ComposeNormalizer2::~ComposeNormalizer2() {}
130 FCDNormalizer2::~FCDNormalizer2() {}
132 // instance cache ---------------------------------------------------------- ***
134 Norm2AllModes::~Norm2AllModes() {
139 Norm2AllModes::createInstance(Normalizer2Impl
*impl
, UErrorCode
&errorCode
) {
140 if(U_FAILURE(errorCode
)) {
144 Norm2AllModes
*allModes
=new Norm2AllModes(impl
);
146 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
154 Norm2AllModes::createNFCInstance(UErrorCode
&errorCode
) {
155 if(U_FAILURE(errorCode
)) {
158 Normalizer2Impl
*impl
=new Normalizer2Impl
;
160 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
163 impl
->init(norm2_nfc_data_indexes
, &norm2_nfc_data_trie
,
164 norm2_nfc_data_extraData
, norm2_nfc_data_smallFCD
);
165 return createInstance(impl
, errorCode
);
169 static UBool U_CALLCONV
uprv_normalizer2_cleanup();
172 static Norm2AllModes
*nfcSingleton
;
173 static Normalizer2
*noopSingleton
;
175 static icu::UInitOnce nfcInitOnce
= U_INITONCE_INITIALIZER
;
176 static icu::UInitOnce noopInitOnce
= U_INITONCE_INITIALIZER
;
178 // UInitOnce singleton initialization functions
179 static void U_CALLCONV
initNFCSingleton(UErrorCode
&errorCode
) {
180 nfcSingleton
=Norm2AllModes::createNFCInstance(errorCode
);
181 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
184 static void U_CALLCONV
initNoopSingleton(UErrorCode
&errorCode
) {
185 if(U_FAILURE(errorCode
)) {
188 noopSingleton
=new NoopNormalizer2
;
189 if(noopSingleton
==NULL
) {
190 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
193 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
198 static UBool U_CALLCONV
uprv_normalizer2_cleanup() {
201 delete noopSingleton
;
202 noopSingleton
= NULL
;
204 noopInitOnce
.reset();
210 const Norm2AllModes
*
211 Norm2AllModes::getNFCInstance(UErrorCode
&errorCode
) {
212 if(U_FAILURE(errorCode
)) { return NULL
; }
213 umtx_initOnce(nfcInitOnce
, &initNFCSingleton
, errorCode
);
218 Normalizer2::getNFCInstance(UErrorCode
&errorCode
) {
219 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
220 return allModes
!=NULL
? &allModes
->comp
: NULL
;
224 Normalizer2::getNFDInstance(UErrorCode
&errorCode
) {
225 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
226 return allModes
!=NULL
? &allModes
->decomp
: NULL
;
229 const Normalizer2
*Normalizer2Factory::getFCDInstance(UErrorCode
&errorCode
) {
230 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
231 return allModes
!=NULL
? &allModes
->fcd
: NULL
;
234 const Normalizer2
*Normalizer2Factory::getFCCInstance(UErrorCode
&errorCode
) {
235 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
236 return allModes
!=NULL
? &allModes
->fcc
: NULL
;
239 const Normalizer2
*Normalizer2Factory::getNoopInstance(UErrorCode
&errorCode
) {
240 if(U_FAILURE(errorCode
)) { return NULL
; }
241 umtx_initOnce(noopInitOnce
, &initNoopSingleton
, errorCode
);
242 return noopSingleton
;
245 const Normalizer2Impl
*
246 Normalizer2Factory::getNFCImpl(UErrorCode
&errorCode
) {
247 const Norm2AllModes
*allModes
=Norm2AllModes::getNFCInstance(errorCode
);
248 return allModes
!=NULL
? allModes
->impl
: NULL
;
251 const Normalizer2Impl
*
252 Normalizer2Factory::getImpl(const Normalizer2
*norm2
) {
253 return &((Normalizer2WithImpl
*)norm2
)->impl
;
258 // C API ------------------------------------------------------------------- ***
262 U_CAPI
const UNormalizer2
* U_EXPORT2
263 unorm2_getNFCInstance(UErrorCode
*pErrorCode
) {
264 return (const UNormalizer2
*)Normalizer2::getNFCInstance(*pErrorCode
);
267 U_CAPI
const UNormalizer2
* U_EXPORT2
268 unorm2_getNFDInstance(UErrorCode
*pErrorCode
) {
269 return (const UNormalizer2
*)Normalizer2::getNFDInstance(*pErrorCode
);
272 U_CAPI
void U_EXPORT2
273 unorm2_close(UNormalizer2
*norm2
) {
274 delete (Normalizer2
*)norm2
;
277 U_CAPI
int32_t U_EXPORT2
278 unorm2_normalize(const UNormalizer2
*norm2
,
279 const UChar
*src
, int32_t length
,
280 UChar
*dest
, int32_t capacity
,
281 UErrorCode
*pErrorCode
) {
282 if(U_FAILURE(*pErrorCode
)) {
285 if( (src
==NULL
? length
!=0 : length
<-1) ||
286 (dest
==NULL
? capacity
!=0 : capacity
<0) ||
287 (src
==dest
&& src
!=NULL
)
289 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
292 UnicodeString
destString(dest
, 0, capacity
);
293 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
295 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
296 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
298 // Avoid duplicate argument checking and support NUL-terminated src.
299 ReorderingBuffer
buffer(n2wi
->impl
, destString
);
300 if(buffer
.init(length
, *pErrorCode
)) {
301 n2wi
->normalize(src
, length
>=0 ? src
+length
: NULL
, buffer
, *pErrorCode
);
304 UnicodeString
srcString(length
<0, src
, length
);
305 n2
->normalize(srcString
, destString
, *pErrorCode
);
308 return destString
.extract(dest
, capacity
, *pErrorCode
);
312 normalizeSecondAndAppend(const UNormalizer2
*norm2
,
313 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
314 const UChar
*second
, int32_t secondLength
,
316 UErrorCode
*pErrorCode
) {
317 if(U_FAILURE(*pErrorCode
)) {
320 if( (second
==NULL
? secondLength
!=0 : secondLength
<-1) ||
321 (first
==NULL
? (firstCapacity
!=0 || firstLength
!=0) :
322 (firstCapacity
<0 || firstLength
<-1)) ||
323 (first
==second
&& first
!=NULL
)
325 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
328 UnicodeString
firstString(first
, firstLength
, firstCapacity
);
329 firstLength
=firstString
.length(); // In case it was -1.
330 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
331 if(secondLength
!=0) {
332 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
333 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
335 // Avoid duplicate argument checking and support NUL-terminated src.
336 UnicodeString safeMiddle
;
338 ReorderingBuffer
buffer(n2wi
->impl
, firstString
);
339 if(buffer
.init(firstLength
+secondLength
+1, *pErrorCode
)) { // destCapacity>=-1
340 n2wi
->normalizeAndAppend(second
, secondLength
>=0 ? second
+secondLength
: NULL
,
341 doNormalize
, safeMiddle
, buffer
, *pErrorCode
);
343 } // The ReorderingBuffer destructor finalizes firstString.
344 if(U_FAILURE(*pErrorCode
) || firstString
.length()>firstCapacity
) {
345 // Restore the modified suffix of the first string.
346 // This does not restore first[] array contents between firstLength and firstCapacity.
347 // (That might be uninitialized memory, as far as we know.)
348 if(first
!=NULL
) { /* don't dereference NULL */
349 safeMiddle
.extract(0, 0x7fffffff, first
+firstLength
-safeMiddle
.length());
350 if(firstLength
<firstCapacity
) {
351 first
[firstLength
]=0; // NUL-terminate in case it was originally.
356 UnicodeString
secondString(secondLength
<0, second
, secondLength
);
358 n2
->normalizeSecondAndAppend(firstString
, secondString
, *pErrorCode
);
360 n2
->append(firstString
, secondString
, *pErrorCode
);
364 return firstString
.extract(first
, firstCapacity
, *pErrorCode
);
367 U_CAPI
int32_t U_EXPORT2
368 unorm2_normalizeSecondAndAppend(const UNormalizer2
*norm2
,
369 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
370 const UChar
*second
, int32_t secondLength
,
371 UErrorCode
*pErrorCode
) {
372 return normalizeSecondAndAppend(norm2
,
373 first
, firstLength
, firstCapacity
,
374 second
, secondLength
,
378 U_CAPI
int32_t U_EXPORT2
379 unorm2_append(const UNormalizer2
*norm2
,
380 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
381 const UChar
*second
, int32_t secondLength
,
382 UErrorCode
*pErrorCode
) {
383 return normalizeSecondAndAppend(norm2
,
384 first
, firstLength
, firstCapacity
,
385 second
, secondLength
,
389 U_CAPI
int32_t U_EXPORT2
390 unorm2_getDecomposition(const UNormalizer2
*norm2
,
391 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
392 UErrorCode
*pErrorCode
) {
393 if(U_FAILURE(*pErrorCode
)) {
396 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
397 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
400 UnicodeString
destString(decomposition
, 0, capacity
);
401 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getDecomposition(c
, destString
)) {
402 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
408 U_CAPI
int32_t U_EXPORT2
409 unorm2_getRawDecomposition(const UNormalizer2
*norm2
,
410 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
411 UErrorCode
*pErrorCode
) {
412 if(U_FAILURE(*pErrorCode
)) {
415 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
416 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
419 UnicodeString
destString(decomposition
, 0, capacity
);
420 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getRawDecomposition(c
, destString
)) {
421 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
427 U_CAPI UChar32 U_EXPORT2
428 unorm2_composePair(const UNormalizer2
*norm2
, UChar32 a
, UChar32 b
) {
429 return reinterpret_cast<const Normalizer2
*>(norm2
)->composePair(a
, b
);
432 U_CAPI
uint8_t U_EXPORT2
433 unorm2_getCombiningClass(const UNormalizer2
*norm2
, UChar32 c
) {
434 return reinterpret_cast<const Normalizer2
*>(norm2
)->getCombiningClass(c
);
437 U_CAPI UBool U_EXPORT2
438 unorm2_isNormalized(const UNormalizer2
*norm2
,
439 const UChar
*s
, int32_t length
,
440 UErrorCode
*pErrorCode
) {
441 if(U_FAILURE(*pErrorCode
)) {
444 if((s
==NULL
&& length
!=0) || length
<-1) {
445 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
448 UnicodeString
sString(length
<0, s
, length
);
449 return ((const Normalizer2
*)norm2
)->isNormalized(sString
, *pErrorCode
);
452 U_CAPI UNormalizationCheckResult U_EXPORT2
453 unorm2_quickCheck(const UNormalizer2
*norm2
,
454 const UChar
*s
, int32_t length
,
455 UErrorCode
*pErrorCode
) {
456 if(U_FAILURE(*pErrorCode
)) {
459 if((s
==NULL
&& length
!=0) || length
<-1) {
460 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
463 UnicodeString
sString(length
<0, s
, length
);
464 return ((const Normalizer2
*)norm2
)->quickCheck(sString
, *pErrorCode
);
467 U_CAPI
int32_t U_EXPORT2
468 unorm2_spanQuickCheckYes(const UNormalizer2
*norm2
,
469 const UChar
*s
, int32_t length
,
470 UErrorCode
*pErrorCode
) {
471 if(U_FAILURE(*pErrorCode
)) {
474 if((s
==NULL
&& length
!=0) || length
<-1) {
475 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
478 UnicodeString
sString(length
<0, s
, length
);
479 return ((const Normalizer2
*)norm2
)->spanQuickCheckYes(sString
, *pErrorCode
);
482 U_CAPI UBool U_EXPORT2
483 unorm2_hasBoundaryBefore(const UNormalizer2
*norm2
, UChar32 c
) {
484 return ((const Normalizer2
*)norm2
)->hasBoundaryBefore(c
);
487 U_CAPI UBool U_EXPORT2
488 unorm2_hasBoundaryAfter(const UNormalizer2
*norm2
, UChar32 c
) {
489 return ((const Normalizer2
*)norm2
)->hasBoundaryAfter(c
);
492 U_CAPI UBool U_EXPORT2
493 unorm2_isInert(const UNormalizer2
*norm2
, UChar32 c
) {
494 return ((const Normalizer2
*)norm2
)->isInert(c
);
497 // Some properties APIs ---------------------------------------------------- ***
499 U_CAPI
uint8_t U_EXPORT2
500 u_getCombiningClass(UChar32 c
) {
501 UErrorCode errorCode
=U_ZERO_ERROR
;
502 const Normalizer2
*nfd
=Normalizer2::getNFDInstance(errorCode
);
503 if(U_SUCCESS(errorCode
)) {
504 return nfd
->getCombiningClass(c
);
511 unorm_getFCD16(UChar32 c
) {
512 UErrorCode errorCode
=U_ZERO_ERROR
;
513 const Normalizer2Impl
*impl
=Normalizer2Factory::getNFCImpl(errorCode
);
514 if(U_SUCCESS(errorCode
)) {
515 return impl
->getFCD16(c
);
521 #endif // !UCONFIG_NO_NORMALIZATION