2 *******************************************************************************
4 * Copyright (C) 2009-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: normalizer2.cpp
10 * tab size: 8 (not used)
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_NORMALIZATION
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
28 #include "normalizer2impl.h"
35 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
37 Normalizer2::~Normalizer2() {}
40 Normalizer2::getRawDecomposition(UChar32
, UnicodeString
&) const {
45 Normalizer2::composePair(UChar32
, UChar32
) const {
50 Normalizer2::getCombiningClass(UChar32
/*c*/) const {
54 // Normalizer2 implementation for the old UNORM_NONE.
55 class NoopNormalizer2
: public Normalizer2
{
56 virtual ~NoopNormalizer2();
58 virtual UnicodeString
&
59 normalize(const UnicodeString
&src
,
61 UErrorCode
&errorCode
) const {
62 if(U_SUCCESS(errorCode
)) {
66 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
71 virtual UnicodeString
&
72 normalizeSecondAndAppend(UnicodeString
&first
,
73 const UnicodeString
&second
,
74 UErrorCode
&errorCode
) const {
75 if(U_SUCCESS(errorCode
)) {
79 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
84 virtual UnicodeString
&
85 append(UnicodeString
&first
,
86 const UnicodeString
&second
,
87 UErrorCode
&errorCode
) const {
88 if(U_SUCCESS(errorCode
)) {
92 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
98 getDecomposition(UChar32
, UnicodeString
&) const {
101 // No need to override the default getRawDecomposition().
103 isNormalized(const UnicodeString
&, UErrorCode
&) const {
106 virtual UNormalizationCheckResult
107 quickCheck(const UnicodeString
&, UErrorCode
&) const {
111 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&) const {
114 virtual UBool
hasBoundaryBefore(UChar32
) const { return TRUE
; }
115 virtual UBool
hasBoundaryAfter(UChar32
) const { return TRUE
; }
116 virtual UBool
isInert(UChar32
) const { return TRUE
; }
119 NoopNormalizer2::~NoopNormalizer2() {}
121 // Intermediate class:
122 // Has Normalizer2Impl and does boilerplate argument checking and setup.
123 class Normalizer2WithImpl
: public Normalizer2
{
125 Normalizer2WithImpl(const Normalizer2Impl
&ni
) : impl(ni
) {}
126 virtual ~Normalizer2WithImpl();
129 virtual UnicodeString
&
130 normalize(const UnicodeString
&src
,
132 UErrorCode
&errorCode
) const {
133 if(U_FAILURE(errorCode
)) {
137 const UChar
*sArray
=src
.getBuffer();
138 if(&dest
==&src
|| sArray
==NULL
) {
139 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
144 ReorderingBuffer
buffer(impl
, dest
);
145 if(buffer
.init(src
.length(), errorCode
)) {
146 normalize(sArray
, sArray
+src
.length(), buffer
, errorCode
);
151 normalize(const UChar
*src
, const UChar
*limit
,
152 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const = 0;
154 // normalize and append
155 virtual UnicodeString
&
156 normalizeSecondAndAppend(UnicodeString
&first
,
157 const UnicodeString
&second
,
158 UErrorCode
&errorCode
) const {
159 return normalizeSecondAndAppend(first
, second
, TRUE
, errorCode
);
161 virtual UnicodeString
&
162 append(UnicodeString
&first
,
163 const UnicodeString
&second
,
164 UErrorCode
&errorCode
) const {
165 return normalizeSecondAndAppend(first
, second
, FALSE
, errorCode
);
168 normalizeSecondAndAppend(UnicodeString
&first
,
169 const UnicodeString
&second
,
171 UErrorCode
&errorCode
) const {
172 uprv_checkCanGetBuffer(first
, errorCode
);
173 if(U_FAILURE(errorCode
)) {
176 const UChar
*secondArray
=second
.getBuffer();
177 if(&first
==&second
|| secondArray
==NULL
) {
178 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
181 int32_t firstLength
=first
.length();
182 UnicodeString safeMiddle
;
184 ReorderingBuffer
buffer(impl
, first
);
185 if(buffer
.init(firstLength
+second
.length(), errorCode
)) {
186 normalizeAndAppend(secondArray
, secondArray
+second
.length(), doNormalize
,
187 safeMiddle
, buffer
, errorCode
);
189 } // The ReorderingBuffer destructor finalizes the first string.
190 if(U_FAILURE(errorCode
)) {
191 // Restore the modified suffix of the first string.
192 first
.replace(firstLength
-safeMiddle
.length(), 0x7fffffff, safeMiddle
);
197 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
198 UnicodeString
&safeMiddle
,
199 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const = 0;
201 getDecomposition(UChar32 c
, UnicodeString
&decomposition
) const {
204 const UChar
*d
=impl
.getDecomposition(c
, buffer
, length
);
209 decomposition
.setTo(buffer
, length
); // copy the string (Jamos from Hangul syllable c)
211 decomposition
.setTo(FALSE
, d
, length
); // read-only alias
216 getRawDecomposition(UChar32 c
, UnicodeString
&decomposition
) const {
219 const UChar
*d
=impl
.getRawDecomposition(c
, buffer
, length
);
224 decomposition
.setTo(buffer
, length
); // copy the string (algorithmic decomposition)
226 decomposition
.setTo(FALSE
, d
, length
); // read-only alias
231 composePair(UChar32 a
, UChar32 b
) const {
232 return impl
.composePair(a
, b
);
236 getCombiningClass(UChar32 c
) const {
237 return impl
.getCC(impl
.getNorm16(c
));
242 isNormalized(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
243 if(U_FAILURE(errorCode
)) {
246 const UChar
*sArray
=s
.getBuffer();
248 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
251 const UChar
*sLimit
=sArray
+s
.length();
252 return sLimit
==spanQuickCheckYes(sArray
, sLimit
, errorCode
);
254 virtual UNormalizationCheckResult
255 quickCheck(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
256 return Normalizer2WithImpl::isNormalized(s
, errorCode
) ? UNORM_YES
: UNORM_NO
;
259 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
260 if(U_FAILURE(errorCode
)) {
263 const UChar
*sArray
=s
.getBuffer();
265 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
268 return (int32_t)(spanQuickCheckYes(sArray
, sArray
+s
.length(), errorCode
)-sArray
);
270 virtual const UChar
*
271 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const = 0;
273 virtual UNormalizationCheckResult
getQuickCheck(UChar32
) const {
277 const Normalizer2Impl
&impl
;
280 Normalizer2WithImpl::~Normalizer2WithImpl() {}
282 class DecomposeNormalizer2
: public Normalizer2WithImpl
{
284 DecomposeNormalizer2(const Normalizer2Impl
&ni
) : Normalizer2WithImpl(ni
) {}
285 virtual ~DecomposeNormalizer2();
289 normalize(const UChar
*src
, const UChar
*limit
,
290 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
291 impl
.decompose(src
, limit
, &buffer
, errorCode
);
293 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
295 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
296 UnicodeString
&safeMiddle
,
297 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
298 impl
.decomposeAndAppend(src
, limit
, doNormalize
, safeMiddle
, buffer
, errorCode
);
300 virtual const UChar
*
301 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const {
302 return impl
.decompose(src
, limit
, NULL
, errorCode
);
304 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
305 virtual UNormalizationCheckResult
getQuickCheck(UChar32 c
) const {
306 return impl
.isDecompYes(impl
.getNorm16(c
)) ? UNORM_YES
: UNORM_NO
;
308 virtual UBool
hasBoundaryBefore(UChar32 c
) const { return impl
.hasDecompBoundary(c
, TRUE
); }
309 virtual UBool
hasBoundaryAfter(UChar32 c
) const { return impl
.hasDecompBoundary(c
, FALSE
); }
310 virtual UBool
isInert(UChar32 c
) const { return impl
.isDecompInert(c
); }
313 DecomposeNormalizer2::~DecomposeNormalizer2() {}
315 class ComposeNormalizer2
: public Normalizer2WithImpl
{
317 ComposeNormalizer2(const Normalizer2Impl
&ni
, UBool fcc
) :
318 Normalizer2WithImpl(ni
), onlyContiguous(fcc
) {}
319 virtual ~ComposeNormalizer2();
323 normalize(const UChar
*src
, const UChar
*limit
,
324 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
325 impl
.compose(src
, limit
, onlyContiguous
, TRUE
, buffer
, errorCode
);
327 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
329 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
330 UnicodeString
&safeMiddle
,
331 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
332 impl
.composeAndAppend(src
, limit
, doNormalize
, onlyContiguous
, safeMiddle
, buffer
, errorCode
);
336 isNormalized(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
337 if(U_FAILURE(errorCode
)) {
340 const UChar
*sArray
=s
.getBuffer();
342 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
346 ReorderingBuffer
buffer(impl
, temp
);
347 if(!buffer
.init(5, errorCode
)) { // small destCapacity for substring normalization
350 return impl
.compose(sArray
, sArray
+s
.length(), onlyContiguous
, FALSE
, buffer
, errorCode
);
352 virtual UNormalizationCheckResult
353 quickCheck(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
354 if(U_FAILURE(errorCode
)) {
357 const UChar
*sArray
=s
.getBuffer();
359 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
362 UNormalizationCheckResult qcResult
=UNORM_YES
;
363 impl
.composeQuickCheck(sArray
, sArray
+s
.length(), onlyContiguous
, &qcResult
);
366 virtual const UChar
*
367 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&) const {
368 return impl
.composeQuickCheck(src
, limit
, onlyContiguous
, NULL
);
370 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
371 virtual UNormalizationCheckResult
getQuickCheck(UChar32 c
) const {
372 return impl
.getCompQuickCheck(impl
.getNorm16(c
));
374 virtual UBool
hasBoundaryBefore(UChar32 c
) const {
375 return impl
.hasCompBoundaryBefore(c
);
377 virtual UBool
hasBoundaryAfter(UChar32 c
) const {
378 return impl
.hasCompBoundaryAfter(c
, onlyContiguous
, FALSE
);
380 virtual UBool
isInert(UChar32 c
) const {
381 return impl
.hasCompBoundaryAfter(c
, onlyContiguous
, TRUE
);
384 const UBool onlyContiguous
;
387 ComposeNormalizer2::~ComposeNormalizer2() {}
389 class FCDNormalizer2
: public Normalizer2WithImpl
{
391 FCDNormalizer2(const Normalizer2Impl
&ni
) : Normalizer2WithImpl(ni
) {}
392 virtual ~FCDNormalizer2();
396 normalize(const UChar
*src
, const UChar
*limit
,
397 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
398 impl
.makeFCD(src
, limit
, &buffer
, errorCode
);
400 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
402 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
403 UnicodeString
&safeMiddle
,
404 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
405 impl
.makeFCDAndAppend(src
, limit
, doNormalize
, safeMiddle
, buffer
, errorCode
);
407 virtual const UChar
*
408 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const {
409 return impl
.makeFCD(src
, limit
, NULL
, errorCode
);
411 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
412 virtual UBool
hasBoundaryBefore(UChar32 c
) const { return impl
.hasFCDBoundaryBefore(c
); }
413 virtual UBool
hasBoundaryAfter(UChar32 c
) const { return impl
.hasFCDBoundaryAfter(c
); }
414 virtual UBool
isInert(UChar32 c
) const { return impl
.isFCDInert(c
); }
417 FCDNormalizer2::~FCDNormalizer2() {}
419 // instance cache ---------------------------------------------------------- ***
421 struct Norm2AllModes
: public UMemory
{
422 static Norm2AllModes
*createInstance(const char *packageName
,
424 UErrorCode
&errorCode
);
425 Norm2AllModes() : comp(impl
, FALSE
), decomp(impl
), fcd(impl
), fcc(impl
, TRUE
) {}
427 Normalizer2Impl impl
;
428 ComposeNormalizer2 comp
;
429 DecomposeNormalizer2 decomp
;
431 ComposeNormalizer2 fcc
;
435 Norm2AllModes::createInstance(const char *packageName
,
437 UErrorCode
&errorCode
) {
438 if(U_FAILURE(errorCode
)) {
441 LocalPointer
<Norm2AllModes
> allModes(new Norm2AllModes
);
442 if(allModes
.isNull()) {
443 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
446 allModes
->impl
.load(packageName
, name
, errorCode
);
447 return U_SUCCESS(errorCode
) ? allModes
.orphan() : NULL
;
451 static UBool U_CALLCONV
uprv_normalizer2_cleanup();
455 static Norm2AllModes
*nfcSingleton
;
456 static Norm2AllModes
*nfkcSingleton
;
457 static Norm2AllModes
*nfkc_cfSingleton
;
458 static Normalizer2
*noopSingleton
;
459 static UHashtable
*cache
=NULL
;
461 static icu::UInitOnce nfcInitOnce
= U_INITONCE_INITIALIZER
;
462 static icu::UInitOnce nfkcInitOnce
= U_INITONCE_INITIALIZER
;
463 static icu::UInitOnce nfkc_cfInitOnce
= U_INITONCE_INITIALIZER
;
464 static icu::UInitOnce noopInitOnce
= U_INITONCE_INITIALIZER
;
466 // UInitOnce singleton initialization function
467 static void U_CALLCONV
initSingletons(const char *what
, UErrorCode
&errorCode
) {
468 if (uprv_strcmp(what
, "nfc") == 0) {
469 nfcSingleton
= Norm2AllModes::createInstance(NULL
, "nfc", errorCode
);
470 } else if (uprv_strcmp(what
, "nfkc") == 0) {
471 nfkcSingleton
= Norm2AllModes::createInstance(NULL
, "nfkc", errorCode
);
472 } else if (uprv_strcmp(what
, "nfkc_cf") == 0) {
473 nfkc_cfSingleton
= Norm2AllModes::createInstance(NULL
, "nfkc_cf", errorCode
);
474 } else if (uprv_strcmp(what
, "noop") == 0) {
475 noopSingleton
= new NoopNormalizer2
;
477 U_ASSERT(FALSE
); // Unknown singleton
479 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
484 static void U_CALLCONV
deleteNorm2AllModes(void *allModes
) {
485 delete (Norm2AllModes
*)allModes
;
488 static UBool U_CALLCONV
uprv_normalizer2_cleanup() {
491 delete nfkcSingleton
;
492 nfkcSingleton
= NULL
;
493 delete nfkc_cfSingleton
;
494 nfkc_cfSingleton
= NULL
;
495 delete noopSingleton
;
496 noopSingleton
= NULL
;
500 nfkcInitOnce
.reset();
501 nfkc_cfInitOnce
.reset();
502 noopInitOnce
.reset();
508 const Normalizer2
*Normalizer2Factory::getNFCInstance(UErrorCode
&errorCode
) {
509 umtx_initOnce(nfcInitOnce
, &initSingletons
, "nfc", errorCode
);
510 return nfcSingleton
!=NULL
? &nfcSingleton
->comp
: NULL
;
513 const Normalizer2
*Normalizer2Factory::getNFDInstance(UErrorCode
&errorCode
) {
514 umtx_initOnce(nfcInitOnce
, &initSingletons
, "nfc", errorCode
);
515 return nfcSingleton
!=NULL
? &nfcSingleton
->decomp
: NULL
;
518 const Normalizer2
*Normalizer2Factory::getFCDInstance(UErrorCode
&errorCode
) {
519 umtx_initOnce(nfcInitOnce
, &initSingletons
, "nfc", errorCode
);
520 return nfcSingleton
!=NULL
? &nfcSingleton
->fcd
: NULL
;
523 const Normalizer2
*Normalizer2Factory::getFCCInstance(UErrorCode
&errorCode
) {
524 umtx_initOnce(nfcInitOnce
, &initSingletons
, "nfc", errorCode
);
525 return nfcSingleton
!=NULL
? &nfcSingleton
->fcc
: NULL
;
528 const Normalizer2
*Normalizer2Factory::getNFKCInstance(UErrorCode
&errorCode
) {
529 umtx_initOnce(nfkcInitOnce
, &initSingletons
, "nfkc", errorCode
);
530 return nfkcSingleton
!=NULL
? &nfkcSingleton
->comp
: NULL
;
533 const Normalizer2
*Normalizer2Factory::getNFKDInstance(UErrorCode
&errorCode
) {
534 umtx_initOnce(nfkcInitOnce
, &initSingletons
, "nfkc", errorCode
);
535 return nfkcSingleton
!=NULL
? &nfkcSingleton
->decomp
: NULL
;
538 const Normalizer2
*Normalizer2Factory::getNFKC_CFInstance(UErrorCode
&errorCode
) {
539 umtx_initOnce(nfkc_cfInitOnce
, &initSingletons
, "nfkc_cf", errorCode
);
540 return nfkc_cfSingleton
!=NULL
? &nfkc_cfSingleton
->comp
: NULL
;
543 const Normalizer2
*Normalizer2Factory::getNoopInstance(UErrorCode
&errorCode
) {
544 umtx_initOnce(noopInitOnce
, &initSingletons
, "noop", errorCode
);
545 return noopSingleton
;
549 Normalizer2Factory::getInstance(UNormalizationMode mode
, UErrorCode
&errorCode
) {
550 if(U_FAILURE(errorCode
)) {
555 return getNFDInstance(errorCode
);
557 return getNFKDInstance(errorCode
);
559 return getNFCInstance(errorCode
);
561 return getNFKCInstance(errorCode
);
563 return getFCDInstance(errorCode
);
564 default: // UNORM_NONE
565 return getNoopInstance(errorCode
);
569 const Normalizer2Impl
*
570 Normalizer2Factory::getNFCImpl(UErrorCode
&errorCode
) {
571 umtx_initOnce(nfcInitOnce
, &initSingletons
, "nfc", errorCode
);
572 return nfcSingleton
!=NULL
? &nfcSingleton
->impl
: NULL
;
575 const Normalizer2Impl
*
576 Normalizer2Factory::getNFKCImpl(UErrorCode
&errorCode
) {
577 umtx_initOnce(nfkcInitOnce
, &initSingletons
, "nfkc", errorCode
);
578 return nfkcSingleton
!=NULL
? &nfkcSingleton
->impl
: NULL
;
581 const Normalizer2Impl
*
582 Normalizer2Factory::getNFKC_CFImpl(UErrorCode
&errorCode
) {
583 umtx_initOnce(nfkc_cfInitOnce
, &initSingletons
, "nfkc_cf", errorCode
);
584 return nfkc_cfSingleton
!=NULL
? &nfkc_cfSingleton
->impl
: NULL
;
587 const Normalizer2Impl
*
588 Normalizer2Factory::getImpl(const Normalizer2
*norm2
) {
589 return &((Normalizer2WithImpl
*)norm2
)->impl
;
593 Normalizer2::getNFCInstance(UErrorCode
&errorCode
) {
594 return Normalizer2Factory::getNFCInstance(errorCode
);
598 Normalizer2::getNFDInstance(UErrorCode
&errorCode
) {
599 return Normalizer2Factory::getNFDInstance(errorCode
);
603 Normalizer2::getNFKCInstance(UErrorCode
&errorCode
) {
604 return Normalizer2Factory::getNFKCInstance(errorCode
);
608 Normalizer2::getNFKDInstance(UErrorCode
&errorCode
) {
609 return Normalizer2Factory::getNFKDInstance(errorCode
);
613 Normalizer2::getNFKCCasefoldInstance(UErrorCode
&errorCode
) {
614 return Normalizer2Factory::getNFKC_CFInstance(errorCode
);
618 Normalizer2::getInstance(const char *packageName
,
620 UNormalization2Mode mode
,
621 UErrorCode
&errorCode
) {
622 if(U_FAILURE(errorCode
)) {
625 if(name
==NULL
|| *name
==0) {
626 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
629 Norm2AllModes
*allModes
=NULL
;
630 if(packageName
==NULL
) {
631 if(0==uprv_strcmp(name
, "nfc")) {
632 umtx_initOnce(nfcInitOnce
, &initSingletons
, "nfc", errorCode
);
633 allModes
=nfcSingleton
;
634 } else if(0==uprv_strcmp(name
, "nfkc")) {
635 umtx_initOnce(nfkcInitOnce
, &initSingletons
, "nfkc", errorCode
);
636 allModes
=nfkcSingleton
;
637 } else if(0==uprv_strcmp(name
, "nfkc_cf")) {
638 umtx_initOnce(nfkc_cfInitOnce
, &initSingletons
, "nfkc_cf", errorCode
);
639 allModes
=nfkc_cfSingleton
;
642 if(allModes
==NULL
&& U_SUCCESS(errorCode
)) {
646 allModes
=(Norm2AllModes
*)uhash_get(cache
, name
);
650 LocalPointer
<Norm2AllModes
> localAllModes(
651 Norm2AllModes::createInstance(packageName
, name
, errorCode
));
652 if(U_SUCCESS(errorCode
)) {
655 cache
=uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, &errorCode
);
656 if(U_FAILURE(errorCode
)) {
659 uhash_setKeyDeleter(cache
, uprv_free
);
660 uhash_setValueDeleter(cache
, deleteNorm2AllModes
);
662 void *temp
=uhash_get(cache
, name
);
664 int32_t keyLength
=uprv_strlen(name
)+1;
665 char *nameCopy
=(char *)uprv_malloc(keyLength
);
667 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
670 uprv_memcpy(nameCopy
, name
, keyLength
);
671 uhash_put(cache
, nameCopy
, allModes
=localAllModes
.orphan(), &errorCode
);
674 allModes
=(Norm2AllModes
*)temp
;
679 if(allModes
!=NULL
&& U_SUCCESS(errorCode
)) {
682 return &allModes
->comp
;
683 case UNORM2_DECOMPOSE
:
684 return &allModes
->decomp
;
686 return &allModes
->fcd
;
687 case UNORM2_COMPOSE_CONTIGUOUS
:
688 return &allModes
->fcc
;
698 // C API ------------------------------------------------------------------- ***
702 U_CAPI
const UNormalizer2
* U_EXPORT2
703 unorm2_getNFCInstance(UErrorCode
*pErrorCode
) {
704 return (const UNormalizer2
*)Normalizer2::getNFCInstance(*pErrorCode
);
707 U_CAPI
const UNormalizer2
* U_EXPORT2
708 unorm2_getNFDInstance(UErrorCode
*pErrorCode
) {
709 return (const UNormalizer2
*)Normalizer2::getNFDInstance(*pErrorCode
);
712 U_CAPI
const UNormalizer2
* U_EXPORT2
713 unorm2_getNFKCInstance(UErrorCode
*pErrorCode
) {
714 return (const UNormalizer2
*)Normalizer2::getNFKCInstance(*pErrorCode
);
717 U_CAPI
const UNormalizer2
* U_EXPORT2
718 unorm2_getNFKDInstance(UErrorCode
*pErrorCode
) {
719 return (const UNormalizer2
*)Normalizer2::getNFKDInstance(*pErrorCode
);
722 U_CAPI
const UNormalizer2
* U_EXPORT2
723 unorm2_getNFKCCasefoldInstance(UErrorCode
*pErrorCode
) {
724 return (const UNormalizer2
*)Normalizer2::getNFKCCasefoldInstance(*pErrorCode
);
727 U_CAPI
const UNormalizer2
* U_EXPORT2
728 unorm2_getInstance(const char *packageName
,
730 UNormalization2Mode mode
,
731 UErrorCode
*pErrorCode
) {
732 return (const UNormalizer2
*)Normalizer2::getInstance(packageName
, name
, mode
, *pErrorCode
);
735 U_CAPI
void U_EXPORT2
736 unorm2_close(UNormalizer2
*norm2
) {
737 delete (Normalizer2
*)norm2
;
740 U_CAPI
int32_t U_EXPORT2
741 unorm2_normalize(const UNormalizer2
*norm2
,
742 const UChar
*src
, int32_t length
,
743 UChar
*dest
, int32_t capacity
,
744 UErrorCode
*pErrorCode
) {
745 if(U_FAILURE(*pErrorCode
)) {
748 if( (src
==NULL
? length
!=0 : length
<-1) ||
749 (dest
==NULL
? capacity
!=0 : capacity
<0) ||
750 (src
==dest
&& src
!=NULL
)
752 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
755 UnicodeString
destString(dest
, 0, capacity
);
756 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
758 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
759 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
761 // Avoid duplicate argument checking and support NUL-terminated src.
762 ReorderingBuffer
buffer(n2wi
->impl
, destString
);
763 if(buffer
.init(length
, *pErrorCode
)) {
764 n2wi
->normalize(src
, length
>=0 ? src
+length
: NULL
, buffer
, *pErrorCode
);
767 UnicodeString
srcString(length
<0, src
, length
);
768 n2
->normalize(srcString
, destString
, *pErrorCode
);
771 return destString
.extract(dest
, capacity
, *pErrorCode
);
775 normalizeSecondAndAppend(const UNormalizer2
*norm2
,
776 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
777 const UChar
*second
, int32_t secondLength
,
779 UErrorCode
*pErrorCode
) {
780 if(U_FAILURE(*pErrorCode
)) {
783 if( (second
==NULL
? secondLength
!=0 : secondLength
<-1) ||
784 (first
==NULL
? (firstCapacity
!=0 || firstLength
!=0) :
785 (firstCapacity
<0 || firstLength
<-1)) ||
786 (first
==second
&& first
!=NULL
)
788 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
791 UnicodeString
firstString(first
, firstLength
, firstCapacity
);
792 firstLength
=firstString
.length(); // In case it was -1.
793 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
794 if(secondLength
!=0) {
795 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
796 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
798 // Avoid duplicate argument checking and support NUL-terminated src.
799 UnicodeString safeMiddle
;
801 ReorderingBuffer
buffer(n2wi
->impl
, firstString
);
802 if(buffer
.init(firstLength
+secondLength
+1, *pErrorCode
)) { // destCapacity>=-1
803 n2wi
->normalizeAndAppend(second
, secondLength
>=0 ? second
+secondLength
: NULL
,
804 doNormalize
, safeMiddle
, buffer
, *pErrorCode
);
806 } // The ReorderingBuffer destructor finalizes firstString.
807 if(U_FAILURE(*pErrorCode
) || firstString
.length()>firstCapacity
) {
808 // Restore the modified suffix of the first string.
809 // This does not restore first[] array contents between firstLength and firstCapacity.
810 // (That might be uninitialized memory, as far as we know.)
811 if(first
!=NULL
) { /* don't dereference NULL */
812 safeMiddle
.extract(0, 0x7fffffff, first
+firstLength
-safeMiddle
.length());
813 if(firstLength
<firstCapacity
) {
814 first
[firstLength
]=0; // NUL-terminate in case it was originally.
819 UnicodeString
secondString(secondLength
<0, second
, secondLength
);
821 n2
->normalizeSecondAndAppend(firstString
, secondString
, *pErrorCode
);
823 n2
->append(firstString
, secondString
, *pErrorCode
);
827 return firstString
.extract(first
, firstCapacity
, *pErrorCode
);
830 U_CAPI
int32_t U_EXPORT2
831 unorm2_normalizeSecondAndAppend(const UNormalizer2
*norm2
,
832 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
833 const UChar
*second
, int32_t secondLength
,
834 UErrorCode
*pErrorCode
) {
835 return normalizeSecondAndAppend(norm2
,
836 first
, firstLength
, firstCapacity
,
837 second
, secondLength
,
841 U_CAPI
int32_t U_EXPORT2
842 unorm2_append(const UNormalizer2
*norm2
,
843 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
844 const UChar
*second
, int32_t secondLength
,
845 UErrorCode
*pErrorCode
) {
846 return normalizeSecondAndAppend(norm2
,
847 first
, firstLength
, firstCapacity
,
848 second
, secondLength
,
852 U_CAPI
int32_t U_EXPORT2
853 unorm2_getDecomposition(const UNormalizer2
*norm2
,
854 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
855 UErrorCode
*pErrorCode
) {
856 if(U_FAILURE(*pErrorCode
)) {
859 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
860 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
863 UnicodeString
destString(decomposition
, 0, capacity
);
864 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getDecomposition(c
, destString
)) {
865 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
871 U_CAPI
int32_t U_EXPORT2
872 unorm2_getRawDecomposition(const UNormalizer2
*norm2
,
873 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
874 UErrorCode
*pErrorCode
) {
875 if(U_FAILURE(*pErrorCode
)) {
878 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
879 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
882 UnicodeString
destString(decomposition
, 0, capacity
);
883 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getRawDecomposition(c
, destString
)) {
884 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
890 U_CAPI UChar32 U_EXPORT2
891 unorm2_composePair(const UNormalizer2
*norm2
, UChar32 a
, UChar32 b
) {
892 return reinterpret_cast<const Normalizer2
*>(norm2
)->composePair(a
, b
);
895 U_CAPI
uint8_t U_EXPORT2
896 unorm2_getCombiningClass(const UNormalizer2
*norm2
, UChar32 c
) {
897 return reinterpret_cast<const Normalizer2
*>(norm2
)->getCombiningClass(c
);
900 U_CAPI UBool U_EXPORT2
901 unorm2_isNormalized(const UNormalizer2
*norm2
,
902 const UChar
*s
, int32_t length
,
903 UErrorCode
*pErrorCode
) {
904 if(U_FAILURE(*pErrorCode
)) {
907 if((s
==NULL
&& length
!=0) || length
<-1) {
908 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
911 UnicodeString
sString(length
<0, s
, length
);
912 return ((const Normalizer2
*)norm2
)->isNormalized(sString
, *pErrorCode
);
915 U_CAPI UNormalizationCheckResult U_EXPORT2
916 unorm2_quickCheck(const UNormalizer2
*norm2
,
917 const UChar
*s
, int32_t length
,
918 UErrorCode
*pErrorCode
) {
919 if(U_FAILURE(*pErrorCode
)) {
922 if((s
==NULL
&& length
!=0) || length
<-1) {
923 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
926 UnicodeString
sString(length
<0, s
, length
);
927 return ((const Normalizer2
*)norm2
)->quickCheck(sString
, *pErrorCode
);
930 U_CAPI
int32_t U_EXPORT2
931 unorm2_spanQuickCheckYes(const UNormalizer2
*norm2
,
932 const UChar
*s
, int32_t length
,
933 UErrorCode
*pErrorCode
) {
934 if(U_FAILURE(*pErrorCode
)) {
937 if((s
==NULL
&& length
!=0) || length
<-1) {
938 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
941 UnicodeString
sString(length
<0, s
, length
);
942 return ((const Normalizer2
*)norm2
)->spanQuickCheckYes(sString
, *pErrorCode
);
945 U_CAPI UBool U_EXPORT2
946 unorm2_hasBoundaryBefore(const UNormalizer2
*norm2
, UChar32 c
) {
947 return ((const Normalizer2
*)norm2
)->hasBoundaryBefore(c
);
950 U_CAPI UBool U_EXPORT2
951 unorm2_hasBoundaryAfter(const UNormalizer2
*norm2
, UChar32 c
) {
952 return ((const Normalizer2
*)norm2
)->hasBoundaryAfter(c
);
955 U_CAPI UBool U_EXPORT2
956 unorm2_isInert(const UNormalizer2
*norm2
, UChar32 c
) {
957 return ((const Normalizer2
*)norm2
)->isInert(c
);
960 // Some properties APIs ---------------------------------------------------- ***
962 U_CAPI
uint8_t U_EXPORT2
963 u_getCombiningClass(UChar32 c
) {
964 UErrorCode errorCode
=U_ZERO_ERROR
;
965 const Normalizer2
*nfd
=Normalizer2Factory::getNFDInstance(errorCode
);
966 if(U_SUCCESS(errorCode
)) {
967 return nfd
->getCombiningClass(c
);
973 U_CFUNC UNormalizationCheckResult
974 unorm_getQuickCheck(UChar32 c
, UNormalizationMode mode
) {
975 if(mode
<=UNORM_NONE
|| UNORM_FCD
<=mode
) {
978 UErrorCode errorCode
=U_ZERO_ERROR
;
979 const Normalizer2
*norm2
=Normalizer2Factory::getInstance(mode
, errorCode
);
980 if(U_SUCCESS(errorCode
)) {
981 return ((const Normalizer2WithImpl
*)norm2
)->getQuickCheck(c
);
988 unorm_getFCD16(UChar32 c
) {
989 UErrorCode errorCode
=U_ZERO_ERROR
;
990 const Normalizer2Impl
*impl
=Normalizer2Factory::getNFCImpl(errorCode
);
991 if(U_SUCCESS(errorCode
)) {
992 return impl
->getFCD16(c
);
998 #endif // !UCONFIG_NO_NORMALIZATION