2 *******************************************************************************
4 * Copyright (C) 2009-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: normalizer2.cpp
10 * tab size: 8 (not used)
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_NORMALIZATION
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
28 #include "normalizer2impl.h"
34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
36 Normalizer2::~Normalizer2() {}
39 Normalizer2::getRawDecomposition(UChar32
, UnicodeString
&) const {
44 Normalizer2::composePair(UChar32
, UChar32
) const {
49 Normalizer2::getCombiningClass(UChar32
/*c*/) const {
53 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2
)
55 // Normalizer2 implementation for the old UNORM_NONE.
56 class NoopNormalizer2
: public Normalizer2
{
57 virtual ~NoopNormalizer2();
59 virtual UnicodeString
&
60 normalize(const UnicodeString
&src
,
62 UErrorCode
&errorCode
) const {
63 if(U_SUCCESS(errorCode
)) {
67 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
72 virtual UnicodeString
&
73 normalizeSecondAndAppend(UnicodeString
&first
,
74 const UnicodeString
&second
,
75 UErrorCode
&errorCode
) const {
76 if(U_SUCCESS(errorCode
)) {
80 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
85 virtual UnicodeString
&
86 append(UnicodeString
&first
,
87 const UnicodeString
&second
,
88 UErrorCode
&errorCode
) const {
89 if(U_SUCCESS(errorCode
)) {
93 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
99 getDecomposition(UChar32
, UnicodeString
&) const {
102 // No need to override the default getRawDecomposition().
104 isNormalized(const UnicodeString
&, UErrorCode
&) const {
107 virtual UNormalizationCheckResult
108 quickCheck(const UnicodeString
&, UErrorCode
&) const {
112 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&) const {
115 virtual UBool
hasBoundaryBefore(UChar32
) const { return TRUE
; }
116 virtual UBool
hasBoundaryAfter(UChar32
) const { return TRUE
; }
117 virtual UBool
isInert(UChar32
) const { return TRUE
; }
120 NoopNormalizer2::~NoopNormalizer2() {}
122 // Intermediate class:
123 // Has Normalizer2Impl and does boilerplate argument checking and setup.
124 class Normalizer2WithImpl
: public Normalizer2
{
126 Normalizer2WithImpl(const Normalizer2Impl
&ni
) : impl(ni
) {}
127 virtual ~Normalizer2WithImpl();
130 virtual UnicodeString
&
131 normalize(const UnicodeString
&src
,
133 UErrorCode
&errorCode
) const {
134 if(U_FAILURE(errorCode
)) {
138 const UChar
*sArray
=src
.getBuffer();
139 if(&dest
==&src
|| sArray
==NULL
) {
140 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
145 ReorderingBuffer
buffer(impl
, dest
);
146 if(buffer
.init(src
.length(), errorCode
)) {
147 normalize(sArray
, sArray
+src
.length(), buffer
, errorCode
);
152 normalize(const UChar
*src
, const UChar
*limit
,
153 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const = 0;
155 // normalize and append
156 virtual UnicodeString
&
157 normalizeSecondAndAppend(UnicodeString
&first
,
158 const UnicodeString
&second
,
159 UErrorCode
&errorCode
) const {
160 return normalizeSecondAndAppend(first
, second
, TRUE
, errorCode
);
162 virtual UnicodeString
&
163 append(UnicodeString
&first
,
164 const UnicodeString
&second
,
165 UErrorCode
&errorCode
) const {
166 return normalizeSecondAndAppend(first
, second
, FALSE
, errorCode
);
169 normalizeSecondAndAppend(UnicodeString
&first
,
170 const UnicodeString
&second
,
172 UErrorCode
&errorCode
) const {
173 uprv_checkCanGetBuffer(first
, errorCode
);
174 if(U_FAILURE(errorCode
)) {
177 const UChar
*secondArray
=second
.getBuffer();
178 if(&first
==&second
|| secondArray
==NULL
) {
179 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
182 int32_t firstLength
=first
.length();
183 UnicodeString safeMiddle
;
185 ReorderingBuffer
buffer(impl
, first
);
186 if(buffer
.init(firstLength
+second
.length(), errorCode
)) {
187 normalizeAndAppend(secondArray
, secondArray
+second
.length(), doNormalize
,
188 safeMiddle
, buffer
, errorCode
);
190 } // The ReorderingBuffer destructor finalizes the first string.
191 if(U_FAILURE(errorCode
)) {
192 // Restore the modified suffix of the first string.
193 first
.replace(firstLength
-safeMiddle
.length(), 0x7fffffff, safeMiddle
);
198 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
199 UnicodeString
&safeMiddle
,
200 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const = 0;
202 getDecomposition(UChar32 c
, UnicodeString
&decomposition
) const {
205 const UChar
*d
=impl
.getDecomposition(c
, buffer
, length
);
210 decomposition
.setTo(buffer
, length
); // copy the string (Jamos from Hangul syllable c)
212 decomposition
.setTo(FALSE
, d
, length
); // read-only alias
217 getRawDecomposition(UChar32 c
, UnicodeString
&decomposition
) const {
220 const UChar
*d
=impl
.getRawDecomposition(c
, buffer
, length
);
225 decomposition
.setTo(buffer
, length
); // copy the string (algorithmic decomposition)
227 decomposition
.setTo(FALSE
, d
, length
); // read-only alias
232 composePair(UChar32 a
, UChar32 b
) const {
233 return impl
.composePair(a
, b
);
237 getCombiningClass(UChar32 c
) const {
238 return impl
.getCC(impl
.getNorm16(c
));
243 isNormalized(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
244 if(U_FAILURE(errorCode
)) {
247 const UChar
*sArray
=s
.getBuffer();
249 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
252 const UChar
*sLimit
=sArray
+s
.length();
253 return sLimit
==spanQuickCheckYes(sArray
, sLimit
, errorCode
);
255 virtual UNormalizationCheckResult
256 quickCheck(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
257 return Normalizer2WithImpl::isNormalized(s
, errorCode
) ? UNORM_YES
: UNORM_NO
;
260 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
261 if(U_FAILURE(errorCode
)) {
264 const UChar
*sArray
=s
.getBuffer();
266 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
269 return (int32_t)(spanQuickCheckYes(sArray
, sArray
+s
.length(), errorCode
)-sArray
);
271 virtual const UChar
*
272 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const = 0;
274 virtual UNormalizationCheckResult
getQuickCheck(UChar32
) const {
278 const Normalizer2Impl
&impl
;
281 Normalizer2WithImpl::~Normalizer2WithImpl() {}
283 class DecomposeNormalizer2
: public Normalizer2WithImpl
{
285 DecomposeNormalizer2(const Normalizer2Impl
&ni
) : Normalizer2WithImpl(ni
) {}
286 virtual ~DecomposeNormalizer2();
290 normalize(const UChar
*src
, const UChar
*limit
,
291 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
292 impl
.decompose(src
, limit
, &buffer
, errorCode
);
294 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
296 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
297 UnicodeString
&safeMiddle
,
298 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
299 impl
.decomposeAndAppend(src
, limit
, doNormalize
, safeMiddle
, buffer
, errorCode
);
301 virtual const UChar
*
302 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const {
303 return impl
.decompose(src
, limit
, NULL
, errorCode
);
305 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
306 virtual UNormalizationCheckResult
getQuickCheck(UChar32 c
) const {
307 return impl
.isDecompYes(impl
.getNorm16(c
)) ? UNORM_YES
: UNORM_NO
;
309 virtual UBool
hasBoundaryBefore(UChar32 c
) const { return impl
.hasDecompBoundary(c
, TRUE
); }
310 virtual UBool
hasBoundaryAfter(UChar32 c
) const { return impl
.hasDecompBoundary(c
, FALSE
); }
311 virtual UBool
isInert(UChar32 c
) const { return impl
.isDecompInert(c
); }
314 DecomposeNormalizer2::~DecomposeNormalizer2() {}
316 class ComposeNormalizer2
: public Normalizer2WithImpl
{
318 ComposeNormalizer2(const Normalizer2Impl
&ni
, UBool fcc
) :
319 Normalizer2WithImpl(ni
), onlyContiguous(fcc
) {}
320 virtual ~ComposeNormalizer2();
324 normalize(const UChar
*src
, const UChar
*limit
,
325 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
326 impl
.compose(src
, limit
, onlyContiguous
, TRUE
, buffer
, errorCode
);
328 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
330 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
331 UnicodeString
&safeMiddle
,
332 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
333 impl
.composeAndAppend(src
, limit
, doNormalize
, onlyContiguous
, safeMiddle
, buffer
, errorCode
);
337 isNormalized(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
338 if(U_FAILURE(errorCode
)) {
341 const UChar
*sArray
=s
.getBuffer();
343 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
347 ReorderingBuffer
buffer(impl
, temp
);
348 if(!buffer
.init(5, errorCode
)) { // small destCapacity for substring normalization
351 return impl
.compose(sArray
, sArray
+s
.length(), onlyContiguous
, FALSE
, buffer
, errorCode
);
353 virtual UNormalizationCheckResult
354 quickCheck(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
355 if(U_FAILURE(errorCode
)) {
358 const UChar
*sArray
=s
.getBuffer();
360 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
363 UNormalizationCheckResult qcResult
=UNORM_YES
;
364 impl
.composeQuickCheck(sArray
, sArray
+s
.length(), onlyContiguous
, &qcResult
);
367 virtual const UChar
*
368 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&) const {
369 return impl
.composeQuickCheck(src
, limit
, onlyContiguous
, NULL
);
371 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
372 virtual UNormalizationCheckResult
getQuickCheck(UChar32 c
) const {
373 return impl
.getCompQuickCheck(impl
.getNorm16(c
));
375 virtual UBool
hasBoundaryBefore(UChar32 c
) const {
376 return impl
.hasCompBoundaryBefore(c
);
378 virtual UBool
hasBoundaryAfter(UChar32 c
) const {
379 return impl
.hasCompBoundaryAfter(c
, onlyContiguous
, FALSE
);
381 virtual UBool
isInert(UChar32 c
) const {
382 return impl
.hasCompBoundaryAfter(c
, onlyContiguous
, TRUE
);
385 const UBool onlyContiguous
;
388 ComposeNormalizer2::~ComposeNormalizer2() {}
390 class FCDNormalizer2
: public Normalizer2WithImpl
{
392 FCDNormalizer2(const Normalizer2Impl
&ni
) : Normalizer2WithImpl(ni
) {}
393 virtual ~FCDNormalizer2();
397 normalize(const UChar
*src
, const UChar
*limit
,
398 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
399 impl
.makeFCD(src
, limit
, &buffer
, errorCode
);
401 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
403 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
404 UnicodeString
&safeMiddle
,
405 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
406 impl
.makeFCDAndAppend(src
, limit
, doNormalize
, safeMiddle
, buffer
, errorCode
);
408 virtual const UChar
*
409 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const {
410 return impl
.makeFCD(src
, limit
, NULL
, errorCode
);
412 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
413 virtual UBool
hasBoundaryBefore(UChar32 c
) const { return impl
.hasFCDBoundaryBefore(c
); }
414 virtual UBool
hasBoundaryAfter(UChar32 c
) const { return impl
.hasFCDBoundaryAfter(c
); }
415 virtual UBool
isInert(UChar32 c
) const { return impl
.isFCDInert(c
); }
418 FCDNormalizer2::~FCDNormalizer2() {}
420 // instance cache ---------------------------------------------------------- ***
422 struct Norm2AllModes
: public UMemory
{
423 static Norm2AllModes
*createInstance(const char *packageName
,
425 UErrorCode
&errorCode
);
426 Norm2AllModes() : comp(impl
, FALSE
), decomp(impl
), fcd(impl
), fcc(impl
, TRUE
) {}
428 Normalizer2Impl impl
;
429 ComposeNormalizer2 comp
;
430 DecomposeNormalizer2 decomp
;
432 ComposeNormalizer2 fcc
;
436 Norm2AllModes::createInstance(const char *packageName
,
438 UErrorCode
&errorCode
) {
439 if(U_FAILURE(errorCode
)) {
442 LocalPointer
<Norm2AllModes
> allModes(new Norm2AllModes
);
443 if(allModes
.isNull()) {
444 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
447 allModes
->impl
.load(packageName
, name
, errorCode
);
448 return U_SUCCESS(errorCode
) ? allModes
.orphan() : NULL
;
452 static UBool U_CALLCONV
uprv_normalizer2_cleanup();
455 class Norm2AllModesSingleton
: public TriStateSingletonWrapper
<Norm2AllModes
> {
457 Norm2AllModesSingleton(TriStateSingleton
&s
, const char *n
) :
458 TriStateSingletonWrapper
<Norm2AllModes
>(s
), name(n
) {}
459 Norm2AllModes
*getInstance(UErrorCode
&errorCode
) {
460 return TriStateSingletonWrapper
<Norm2AllModes
>::getInstance(createInstance
, name
, errorCode
);
463 static void *createInstance(const void *context
, UErrorCode
&errorCode
) {
464 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
465 return Norm2AllModes::createInstance(NULL
, (const char *)context
, errorCode
);
471 STATIC_TRI_STATE_SINGLETON(nfcSingleton
);
472 STATIC_TRI_STATE_SINGLETON(nfkcSingleton
);
473 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton
);
475 class Norm2Singleton
: public SimpleSingletonWrapper
<Normalizer2
> {
477 Norm2Singleton(SimpleSingleton
&s
) : SimpleSingletonWrapper
<Normalizer2
>(s
) {}
478 Normalizer2
*getInstance(UErrorCode
&errorCode
) {
479 return SimpleSingletonWrapper
<Normalizer2
>::getInstance(createInstance
, NULL
, errorCode
);
482 static void *createInstance(const void *, UErrorCode
&errorCode
) {
483 Normalizer2
*noop
=new NoopNormalizer2
;
485 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
487 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
492 STATIC_SIMPLE_SINGLETON(noopSingleton
);
494 static UHashtable
*cache
=NULL
;
498 static void U_CALLCONV
deleteNorm2AllModes(void *allModes
) {
499 delete (Norm2AllModes
*)allModes
;
502 static UBool U_CALLCONV
uprv_normalizer2_cleanup() {
503 Norm2AllModesSingleton(nfcSingleton
, NULL
).deleteInstance();
504 Norm2AllModesSingleton(nfkcSingleton
, NULL
).deleteInstance();
505 Norm2AllModesSingleton(nfkc_cfSingleton
, NULL
).deleteInstance();
506 Norm2Singleton(noopSingleton
).deleteInstance();
514 const Normalizer2
*Normalizer2Factory::getNFCInstance(UErrorCode
&errorCode
) {
515 Norm2AllModes
*allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
516 return allModes
!=NULL
? &allModes
->comp
: NULL
;
519 const Normalizer2
*Normalizer2Factory::getNFDInstance(UErrorCode
&errorCode
) {
520 Norm2AllModes
*allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
521 return allModes
!=NULL
? &allModes
->decomp
: NULL
;
524 const Normalizer2
*Normalizer2Factory::getFCDInstance(UErrorCode
&errorCode
) {
525 Norm2AllModes
*allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
526 return allModes
!=NULL
? &allModes
->fcd
: NULL
;
529 const Normalizer2
*Normalizer2Factory::getFCCInstance(UErrorCode
&errorCode
) {
530 Norm2AllModes
*allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
531 return allModes
!=NULL
? &allModes
->fcc
: NULL
;
534 const Normalizer2
*Normalizer2Factory::getNFKCInstance(UErrorCode
&errorCode
) {
535 Norm2AllModes
*allModes
=
536 Norm2AllModesSingleton(nfkcSingleton
, "nfkc").getInstance(errorCode
);
537 return allModes
!=NULL
? &allModes
->comp
: NULL
;
540 const Normalizer2
*Normalizer2Factory::getNFKDInstance(UErrorCode
&errorCode
) {
541 Norm2AllModes
*allModes
=
542 Norm2AllModesSingleton(nfkcSingleton
, "nfkc").getInstance(errorCode
);
543 return allModes
!=NULL
? &allModes
->decomp
: NULL
;
546 const Normalizer2
*Normalizer2Factory::getNFKC_CFInstance(UErrorCode
&errorCode
) {
547 Norm2AllModes
*allModes
=
548 Norm2AllModesSingleton(nfkc_cfSingleton
, "nfkc_cf").getInstance(errorCode
);
549 return allModes
!=NULL
? &allModes
->comp
: NULL
;
552 const Normalizer2
*Normalizer2Factory::getNoopInstance(UErrorCode
&errorCode
) {
553 return Norm2Singleton(noopSingleton
).getInstance(errorCode
);
557 Normalizer2Factory::getInstance(UNormalizationMode mode
, UErrorCode
&errorCode
) {
558 if(U_FAILURE(errorCode
)) {
563 return getNFDInstance(errorCode
);
565 return getNFKDInstance(errorCode
);
567 return getNFCInstance(errorCode
);
569 return getNFKCInstance(errorCode
);
571 return getFCDInstance(errorCode
);
572 default: // UNORM_NONE
573 return getNoopInstance(errorCode
);
577 const Normalizer2Impl
*
578 Normalizer2Factory::getNFCImpl(UErrorCode
&errorCode
) {
579 Norm2AllModes
*allModes
=
580 Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
581 return allModes
!=NULL
? &allModes
->impl
: NULL
;
584 const Normalizer2Impl
*
585 Normalizer2Factory::getNFKCImpl(UErrorCode
&errorCode
) {
586 Norm2AllModes
*allModes
=
587 Norm2AllModesSingleton(nfkcSingleton
, "nfkc").getInstance(errorCode
);
588 return allModes
!=NULL
? &allModes
->impl
: NULL
;
591 const Normalizer2Impl
*
592 Normalizer2Factory::getNFKC_CFImpl(UErrorCode
&errorCode
) {
593 Norm2AllModes
*allModes
=
594 Norm2AllModesSingleton(nfkc_cfSingleton
, "nfkc_cf").getInstance(errorCode
);
595 return allModes
!=NULL
? &allModes
->impl
: NULL
;
598 const Normalizer2Impl
*
599 Normalizer2Factory::getImpl(const Normalizer2
*norm2
) {
600 return &((Normalizer2WithImpl
*)norm2
)->impl
;
604 Normalizer2::getNFCInstance(UErrorCode
&errorCode
) {
605 return Normalizer2Factory::getNFCInstance(errorCode
);
609 Normalizer2::getNFDInstance(UErrorCode
&errorCode
) {
610 return Normalizer2Factory::getNFDInstance(errorCode
);
614 Normalizer2::getNFKCInstance(UErrorCode
&errorCode
) {
615 return Normalizer2Factory::getNFKCInstance(errorCode
);
619 Normalizer2::getNFKDInstance(UErrorCode
&errorCode
) {
620 return Normalizer2Factory::getNFKDInstance(errorCode
);
624 Normalizer2::getNFKCCasefoldInstance(UErrorCode
&errorCode
) {
625 return Normalizer2Factory::getNFKC_CFInstance(errorCode
);
629 Normalizer2::getInstance(const char *packageName
,
631 UNormalization2Mode mode
,
632 UErrorCode
&errorCode
) {
633 if(U_FAILURE(errorCode
)) {
636 if(name
==NULL
|| *name
==0) {
637 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
640 Norm2AllModes
*allModes
=NULL
;
641 if(packageName
==NULL
) {
642 if(0==uprv_strcmp(name
, "nfc")) {
643 allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
644 } else if(0==uprv_strcmp(name
, "nfkc")) {
645 allModes
=Norm2AllModesSingleton(nfkcSingleton
, "nfkc").getInstance(errorCode
);
646 } else if(0==uprv_strcmp(name
, "nfkc_cf")) {
647 allModes
=Norm2AllModesSingleton(nfkc_cfSingleton
, "nfkc_cf").getInstance(errorCode
);
650 if(allModes
==NULL
&& U_SUCCESS(errorCode
)) {
654 allModes
=(Norm2AllModes
*)uhash_get(cache
, name
);
658 LocalPointer
<Norm2AllModes
> localAllModes(
659 Norm2AllModes::createInstance(packageName
, name
, errorCode
));
660 if(U_SUCCESS(errorCode
)) {
663 cache
=uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, &errorCode
);
664 if(U_FAILURE(errorCode
)) {
667 uhash_setKeyDeleter(cache
, uprv_free
);
668 uhash_setValueDeleter(cache
, deleteNorm2AllModes
);
670 void *temp
=uhash_get(cache
, name
);
672 int32_t keyLength
=uprv_strlen(name
)+1;
673 char *nameCopy
=(char *)uprv_malloc(keyLength
);
675 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
678 uprv_memcpy(nameCopy
, name
, keyLength
);
679 uhash_put(cache
, nameCopy
, allModes
=localAllModes
.orphan(), &errorCode
);
682 allModes
=(Norm2AllModes
*)temp
;
687 if(allModes
!=NULL
&& U_SUCCESS(errorCode
)) {
690 return &allModes
->comp
;
691 case UNORM2_DECOMPOSE
:
692 return &allModes
->decomp
;
694 return &allModes
->fcd
;
695 case UNORM2_COMPOSE_CONTIGUOUS
:
696 return &allModes
->fcc
;
706 // C API ------------------------------------------------------------------- ***
710 U_DRAFT
const UNormalizer2
* U_EXPORT2
711 unorm2_getNFCInstance(UErrorCode
*pErrorCode
) {
712 return (const UNormalizer2
*)Normalizer2::getNFCInstance(*pErrorCode
);
715 U_DRAFT
const UNormalizer2
* U_EXPORT2
716 unorm2_getNFDInstance(UErrorCode
*pErrorCode
) {
717 return (const UNormalizer2
*)Normalizer2::getNFDInstance(*pErrorCode
);
720 U_DRAFT
const UNormalizer2
* U_EXPORT2
721 unorm2_getNFKCInstance(UErrorCode
*pErrorCode
) {
722 return (const UNormalizer2
*)Normalizer2::getNFKCInstance(*pErrorCode
);
725 U_DRAFT
const UNormalizer2
* U_EXPORT2
726 unorm2_getNFKDInstance(UErrorCode
*pErrorCode
) {
727 return (const UNormalizer2
*)Normalizer2::getNFKDInstance(*pErrorCode
);
730 U_DRAFT
const UNormalizer2
* U_EXPORT2
731 unorm2_getNFKCCasefoldInstance(UErrorCode
*pErrorCode
) {
732 return (const UNormalizer2
*)Normalizer2::getNFKCCasefoldInstance(*pErrorCode
);
735 U_DRAFT
const UNormalizer2
* U_EXPORT2
736 unorm2_getInstance(const char *packageName
,
738 UNormalization2Mode mode
,
739 UErrorCode
*pErrorCode
) {
740 return (const UNormalizer2
*)Normalizer2::getInstance(packageName
, name
, mode
, *pErrorCode
);
743 U_DRAFT
void U_EXPORT2
744 unorm2_close(UNormalizer2
*norm2
) {
745 delete (Normalizer2
*)norm2
;
748 U_DRAFT
int32_t U_EXPORT2
749 unorm2_normalize(const UNormalizer2
*norm2
,
750 const UChar
*src
, int32_t length
,
751 UChar
*dest
, int32_t capacity
,
752 UErrorCode
*pErrorCode
) {
753 if(U_FAILURE(*pErrorCode
)) {
756 if( (src
==NULL
? length
!=0 : length
<-1) ||
757 (dest
==NULL
? capacity
!=0 : capacity
<0) ||
758 (src
==dest
&& src
!=NULL
)
760 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
763 UnicodeString
destString(dest
, 0, capacity
);
764 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
766 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
767 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
769 // Avoid duplicate argument checking and support NUL-terminated src.
770 ReorderingBuffer
buffer(n2wi
->impl
, destString
);
771 if(buffer
.init(length
, *pErrorCode
)) {
772 n2wi
->normalize(src
, length
>=0 ? src
+length
: NULL
, buffer
, *pErrorCode
);
775 UnicodeString
srcString(length
<0, src
, length
);
776 n2
->normalize(srcString
, destString
, *pErrorCode
);
779 return destString
.extract(dest
, capacity
, *pErrorCode
);
783 normalizeSecondAndAppend(const UNormalizer2
*norm2
,
784 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
785 const UChar
*second
, int32_t secondLength
,
787 UErrorCode
*pErrorCode
) {
788 if(U_FAILURE(*pErrorCode
)) {
791 if( (second
==NULL
? secondLength
!=0 : secondLength
<-1) ||
792 (first
==NULL
? (firstCapacity
!=0 || firstLength
!=0) :
793 (firstCapacity
<0 || firstLength
<-1)) ||
794 (first
==second
&& first
!=NULL
)
796 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
799 UnicodeString
firstString(first
, firstLength
, firstCapacity
);
800 firstLength
=firstString
.length(); // In case it was -1.
801 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
802 if(secondLength
!=0) {
803 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
804 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
806 // Avoid duplicate argument checking and support NUL-terminated src.
807 UnicodeString safeMiddle
;
809 ReorderingBuffer
buffer(n2wi
->impl
, firstString
);
810 if(buffer
.init(firstLength
+secondLength
+1, *pErrorCode
)) { // destCapacity>=-1
811 n2wi
->normalizeAndAppend(second
, secondLength
>=0 ? second
+secondLength
: NULL
,
812 doNormalize
, safeMiddle
, buffer
, *pErrorCode
);
814 } // The ReorderingBuffer destructor finalizes firstString.
815 if(U_FAILURE(*pErrorCode
) || firstString
.length()>firstCapacity
) {
816 // Restore the modified suffix of the first string.
817 // This does not restore first[] array contents between firstLength and firstCapacity.
818 // (That might be uninitialized memory, as far as we know.)
819 if(first
!=NULL
) { /* don't dereference NULL */
820 safeMiddle
.extract(0, 0x7fffffff, first
+firstLength
-safeMiddle
.length());
821 if(firstLength
<firstCapacity
) {
822 first
[firstLength
]=0; // NUL-terminate in case it was originally.
827 UnicodeString
secondString(secondLength
<0, second
, secondLength
);
829 n2
->normalizeSecondAndAppend(firstString
, secondString
, *pErrorCode
);
831 n2
->append(firstString
, secondString
, *pErrorCode
);
835 return firstString
.extract(first
, firstCapacity
, *pErrorCode
);
838 U_DRAFT
int32_t U_EXPORT2
839 unorm2_normalizeSecondAndAppend(const UNormalizer2
*norm2
,
840 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
841 const UChar
*second
, int32_t secondLength
,
842 UErrorCode
*pErrorCode
) {
843 return normalizeSecondAndAppend(norm2
,
844 first
, firstLength
, firstCapacity
,
845 second
, secondLength
,
849 U_DRAFT
int32_t U_EXPORT2
850 unorm2_append(const UNormalizer2
*norm2
,
851 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
852 const UChar
*second
, int32_t secondLength
,
853 UErrorCode
*pErrorCode
) {
854 return normalizeSecondAndAppend(norm2
,
855 first
, firstLength
, firstCapacity
,
856 second
, secondLength
,
860 U_DRAFT
int32_t U_EXPORT2
861 unorm2_getDecomposition(const UNormalizer2
*norm2
,
862 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
863 UErrorCode
*pErrorCode
) {
864 if(U_FAILURE(*pErrorCode
)) {
867 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
868 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
871 UnicodeString
destString(decomposition
, 0, capacity
);
872 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getDecomposition(c
, destString
)) {
873 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
879 U_DRAFT
int32_t U_EXPORT2
880 unorm2_getRawDecomposition(const UNormalizer2
*norm2
,
881 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
882 UErrorCode
*pErrorCode
) {
883 if(U_FAILURE(*pErrorCode
)) {
886 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
887 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
890 UnicodeString
destString(decomposition
, 0, capacity
);
891 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getRawDecomposition(c
, destString
)) {
892 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
898 U_DRAFT UChar32 U_EXPORT2
899 unorm2_composePair(const UNormalizer2
*norm2
, UChar32 a
, UChar32 b
) {
900 return reinterpret_cast<const Normalizer2
*>(norm2
)->composePair(a
, b
);
903 U_DRAFT
uint8_t U_EXPORT2
904 unorm2_getCombiningClass(const UNormalizer2
*norm2
, UChar32 c
) {
905 return reinterpret_cast<const Normalizer2
*>(norm2
)->getCombiningClass(c
);
908 U_DRAFT UBool U_EXPORT2
909 unorm2_isNormalized(const UNormalizer2
*norm2
,
910 const UChar
*s
, int32_t length
,
911 UErrorCode
*pErrorCode
) {
912 if(U_FAILURE(*pErrorCode
)) {
915 if((s
==NULL
&& length
!=0) || length
<-1) {
916 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
919 UnicodeString
sString(length
<0, s
, length
);
920 return ((const Normalizer2
*)norm2
)->isNormalized(sString
, *pErrorCode
);
923 U_DRAFT UNormalizationCheckResult U_EXPORT2
924 unorm2_quickCheck(const UNormalizer2
*norm2
,
925 const UChar
*s
, int32_t length
,
926 UErrorCode
*pErrorCode
) {
927 if(U_FAILURE(*pErrorCode
)) {
930 if((s
==NULL
&& length
!=0) || length
<-1) {
931 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
934 UnicodeString
sString(length
<0, s
, length
);
935 return ((const Normalizer2
*)norm2
)->quickCheck(sString
, *pErrorCode
);
938 U_DRAFT
int32_t U_EXPORT2
939 unorm2_spanQuickCheckYes(const UNormalizer2
*norm2
,
940 const UChar
*s
, int32_t length
,
941 UErrorCode
*pErrorCode
) {
942 if(U_FAILURE(*pErrorCode
)) {
945 if((s
==NULL
&& length
!=0) || length
<-1) {
946 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
949 UnicodeString
sString(length
<0, s
, length
);
950 return ((const Normalizer2
*)norm2
)->spanQuickCheckYes(sString
, *pErrorCode
);
953 U_DRAFT UBool U_EXPORT2
954 unorm2_hasBoundaryBefore(const UNormalizer2
*norm2
, UChar32 c
) {
955 return ((const Normalizer2
*)norm2
)->hasBoundaryBefore(c
);
958 U_DRAFT UBool U_EXPORT2
959 unorm2_hasBoundaryAfter(const UNormalizer2
*norm2
, UChar32 c
) {
960 return ((const Normalizer2
*)norm2
)->hasBoundaryAfter(c
);
963 U_DRAFT UBool U_EXPORT2
964 unorm2_isInert(const UNormalizer2
*norm2
, UChar32 c
) {
965 return ((const Normalizer2
*)norm2
)->isInert(c
);
968 // Some properties APIs ---------------------------------------------------- ***
970 U_CAPI
uint8_t U_EXPORT2
971 u_getCombiningClass(UChar32 c
) {
972 UErrorCode errorCode
=U_ZERO_ERROR
;
973 const Normalizer2
*nfd
=Normalizer2Factory::getNFDInstance(errorCode
);
974 if(U_SUCCESS(errorCode
)) {
975 return nfd
->getCombiningClass(c
);
981 U_CFUNC UNormalizationCheckResult
982 unorm_getQuickCheck(UChar32 c
, UNormalizationMode mode
) {
983 if(mode
<=UNORM_NONE
|| UNORM_FCD
<=mode
) {
986 UErrorCode errorCode
=U_ZERO_ERROR
;
987 const Normalizer2
*norm2
=Normalizer2Factory::getInstance(mode
, errorCode
);
988 if(U_SUCCESS(errorCode
)) {
989 return ((const Normalizer2WithImpl
*)norm2
)->getQuickCheck(c
);
996 unorm_getFCD16(UChar32 c
) {
997 UErrorCode errorCode
=U_ZERO_ERROR
;
998 const Normalizer2Impl
*impl
=Normalizer2Factory::getNFCImpl(errorCode
);
999 if(U_SUCCESS(errorCode
)) {
1000 return impl
->getFCD16(c
);
1006 #endif // !UCONFIG_NO_NORMALIZATION