2 *******************************************************************************
4 * Copyright (C) 2009-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: normalizer2.cpp
10 * tab size: 8 (not used)
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_NORMALIZATION
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
28 #include "normalizer2impl.h"
34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
36 Normalizer2::~Normalizer2() {}
39 Normalizer2::getRawDecomposition(UChar32
, UnicodeString
&) const {
44 Normalizer2::composePair(UChar32
, UChar32
) const {
49 Normalizer2::getCombiningClass(UChar32
/*c*/) const {
53 // Normalizer2 implementation for the old UNORM_NONE.
54 class NoopNormalizer2
: public Normalizer2
{
55 virtual ~NoopNormalizer2();
57 virtual UnicodeString
&
58 normalize(const UnicodeString
&src
,
60 UErrorCode
&errorCode
) const {
61 if(U_SUCCESS(errorCode
)) {
65 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
70 virtual UnicodeString
&
71 normalizeSecondAndAppend(UnicodeString
&first
,
72 const UnicodeString
&second
,
73 UErrorCode
&errorCode
) const {
74 if(U_SUCCESS(errorCode
)) {
78 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
83 virtual UnicodeString
&
84 append(UnicodeString
&first
,
85 const UnicodeString
&second
,
86 UErrorCode
&errorCode
) const {
87 if(U_SUCCESS(errorCode
)) {
91 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
97 getDecomposition(UChar32
, UnicodeString
&) const {
100 // No need to override the default getRawDecomposition().
102 isNormalized(const UnicodeString
&, UErrorCode
&) const {
105 virtual UNormalizationCheckResult
106 quickCheck(const UnicodeString
&, UErrorCode
&) const {
110 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&) const {
113 virtual UBool
hasBoundaryBefore(UChar32
) const { return TRUE
; }
114 virtual UBool
hasBoundaryAfter(UChar32
) const { return TRUE
; }
115 virtual UBool
isInert(UChar32
) const { return TRUE
; }
118 NoopNormalizer2::~NoopNormalizer2() {}
120 // Intermediate class:
121 // Has Normalizer2Impl and does boilerplate argument checking and setup.
122 class Normalizer2WithImpl
: public Normalizer2
{
124 Normalizer2WithImpl(const Normalizer2Impl
&ni
) : impl(ni
) {}
125 virtual ~Normalizer2WithImpl();
128 virtual UnicodeString
&
129 normalize(const UnicodeString
&src
,
131 UErrorCode
&errorCode
) const {
132 if(U_FAILURE(errorCode
)) {
136 const UChar
*sArray
=src
.getBuffer();
137 if(&dest
==&src
|| sArray
==NULL
) {
138 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
143 ReorderingBuffer
buffer(impl
, dest
);
144 if(buffer
.init(src
.length(), errorCode
)) {
145 normalize(sArray
, sArray
+src
.length(), buffer
, errorCode
);
150 normalize(const UChar
*src
, const UChar
*limit
,
151 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const = 0;
153 // normalize and append
154 virtual UnicodeString
&
155 normalizeSecondAndAppend(UnicodeString
&first
,
156 const UnicodeString
&second
,
157 UErrorCode
&errorCode
) const {
158 return normalizeSecondAndAppend(first
, second
, TRUE
, errorCode
);
160 virtual UnicodeString
&
161 append(UnicodeString
&first
,
162 const UnicodeString
&second
,
163 UErrorCode
&errorCode
) const {
164 return normalizeSecondAndAppend(first
, second
, FALSE
, errorCode
);
167 normalizeSecondAndAppend(UnicodeString
&first
,
168 const UnicodeString
&second
,
170 UErrorCode
&errorCode
) const {
171 uprv_checkCanGetBuffer(first
, errorCode
);
172 if(U_FAILURE(errorCode
)) {
175 const UChar
*secondArray
=second
.getBuffer();
176 if(&first
==&second
|| secondArray
==NULL
) {
177 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
180 int32_t firstLength
=first
.length();
181 UnicodeString safeMiddle
;
183 ReorderingBuffer
buffer(impl
, first
);
184 if(buffer
.init(firstLength
+second
.length(), errorCode
)) {
185 normalizeAndAppend(secondArray
, secondArray
+second
.length(), doNormalize
,
186 safeMiddle
, buffer
, errorCode
);
188 } // The ReorderingBuffer destructor finalizes the first string.
189 if(U_FAILURE(errorCode
)) {
190 // Restore the modified suffix of the first string.
191 first
.replace(firstLength
-safeMiddle
.length(), 0x7fffffff, safeMiddle
);
196 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
197 UnicodeString
&safeMiddle
,
198 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const = 0;
200 getDecomposition(UChar32 c
, UnicodeString
&decomposition
) const {
203 const UChar
*d
=impl
.getDecomposition(c
, buffer
, length
);
208 decomposition
.setTo(buffer
, length
); // copy the string (Jamos from Hangul syllable c)
210 decomposition
.setTo(FALSE
, d
, length
); // read-only alias
215 getRawDecomposition(UChar32 c
, UnicodeString
&decomposition
) const {
218 const UChar
*d
=impl
.getRawDecomposition(c
, buffer
, length
);
223 decomposition
.setTo(buffer
, length
); // copy the string (algorithmic decomposition)
225 decomposition
.setTo(FALSE
, d
, length
); // read-only alias
230 composePair(UChar32 a
, UChar32 b
) const {
231 return impl
.composePair(a
, b
);
235 getCombiningClass(UChar32 c
) const {
236 return impl
.getCC(impl
.getNorm16(c
));
241 isNormalized(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
242 if(U_FAILURE(errorCode
)) {
245 const UChar
*sArray
=s
.getBuffer();
247 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
250 const UChar
*sLimit
=sArray
+s
.length();
251 return sLimit
==spanQuickCheckYes(sArray
, sLimit
, errorCode
);
253 virtual UNormalizationCheckResult
254 quickCheck(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
255 return Normalizer2WithImpl::isNormalized(s
, errorCode
) ? UNORM_YES
: UNORM_NO
;
258 spanQuickCheckYes(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
259 if(U_FAILURE(errorCode
)) {
262 const UChar
*sArray
=s
.getBuffer();
264 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
267 return (int32_t)(spanQuickCheckYes(sArray
, sArray
+s
.length(), errorCode
)-sArray
);
269 virtual const UChar
*
270 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const = 0;
272 virtual UNormalizationCheckResult
getQuickCheck(UChar32
) const {
276 const Normalizer2Impl
&impl
;
279 Normalizer2WithImpl::~Normalizer2WithImpl() {}
281 class DecomposeNormalizer2
: public Normalizer2WithImpl
{
283 DecomposeNormalizer2(const Normalizer2Impl
&ni
) : Normalizer2WithImpl(ni
) {}
284 virtual ~DecomposeNormalizer2();
288 normalize(const UChar
*src
, const UChar
*limit
,
289 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
290 impl
.decompose(src
, limit
, &buffer
, errorCode
);
292 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
294 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
295 UnicodeString
&safeMiddle
,
296 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
297 impl
.decomposeAndAppend(src
, limit
, doNormalize
, safeMiddle
, buffer
, errorCode
);
299 virtual const UChar
*
300 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const {
301 return impl
.decompose(src
, limit
, NULL
, errorCode
);
303 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
304 virtual UNormalizationCheckResult
getQuickCheck(UChar32 c
) const {
305 return impl
.isDecompYes(impl
.getNorm16(c
)) ? UNORM_YES
: UNORM_NO
;
307 virtual UBool
hasBoundaryBefore(UChar32 c
) const { return impl
.hasDecompBoundary(c
, TRUE
); }
308 virtual UBool
hasBoundaryAfter(UChar32 c
) const { return impl
.hasDecompBoundary(c
, FALSE
); }
309 virtual UBool
isInert(UChar32 c
) const { return impl
.isDecompInert(c
); }
312 DecomposeNormalizer2::~DecomposeNormalizer2() {}
314 class ComposeNormalizer2
: public Normalizer2WithImpl
{
316 ComposeNormalizer2(const Normalizer2Impl
&ni
, UBool fcc
) :
317 Normalizer2WithImpl(ni
), onlyContiguous(fcc
) {}
318 virtual ~ComposeNormalizer2();
322 normalize(const UChar
*src
, const UChar
*limit
,
323 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
324 impl
.compose(src
, limit
, onlyContiguous
, TRUE
, buffer
, errorCode
);
326 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
328 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
329 UnicodeString
&safeMiddle
,
330 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
331 impl
.composeAndAppend(src
, limit
, doNormalize
, onlyContiguous
, safeMiddle
, buffer
, errorCode
);
335 isNormalized(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
336 if(U_FAILURE(errorCode
)) {
339 const UChar
*sArray
=s
.getBuffer();
341 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
345 ReorderingBuffer
buffer(impl
, temp
);
346 if(!buffer
.init(5, errorCode
)) { // small destCapacity for substring normalization
349 return impl
.compose(sArray
, sArray
+s
.length(), onlyContiguous
, FALSE
, buffer
, errorCode
);
351 virtual UNormalizationCheckResult
352 quickCheck(const UnicodeString
&s
, UErrorCode
&errorCode
) const {
353 if(U_FAILURE(errorCode
)) {
356 const UChar
*sArray
=s
.getBuffer();
358 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
361 UNormalizationCheckResult qcResult
=UNORM_YES
;
362 impl
.composeQuickCheck(sArray
, sArray
+s
.length(), onlyContiguous
, &qcResult
);
365 virtual const UChar
*
366 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&) const {
367 return impl
.composeQuickCheck(src
, limit
, onlyContiguous
, NULL
);
369 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
370 virtual UNormalizationCheckResult
getQuickCheck(UChar32 c
) const {
371 return impl
.getCompQuickCheck(impl
.getNorm16(c
));
373 virtual UBool
hasBoundaryBefore(UChar32 c
) const {
374 return impl
.hasCompBoundaryBefore(c
);
376 virtual UBool
hasBoundaryAfter(UChar32 c
) const {
377 return impl
.hasCompBoundaryAfter(c
, onlyContiguous
, FALSE
);
379 virtual UBool
isInert(UChar32 c
) const {
380 return impl
.hasCompBoundaryAfter(c
, onlyContiguous
, TRUE
);
383 const UBool onlyContiguous
;
386 ComposeNormalizer2::~ComposeNormalizer2() {}
388 class FCDNormalizer2
: public Normalizer2WithImpl
{
390 FCDNormalizer2(const Normalizer2Impl
&ni
) : Normalizer2WithImpl(ni
) {}
391 virtual ~FCDNormalizer2();
395 normalize(const UChar
*src
, const UChar
*limit
,
396 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
397 impl
.makeFCD(src
, limit
, &buffer
, errorCode
);
399 using Normalizer2WithImpl::normalize
; // Avoid warning about hiding base class function.
401 normalizeAndAppend(const UChar
*src
, const UChar
*limit
, UBool doNormalize
,
402 UnicodeString
&safeMiddle
,
403 ReorderingBuffer
&buffer
, UErrorCode
&errorCode
) const {
404 impl
.makeFCDAndAppend(src
, limit
, doNormalize
, safeMiddle
, buffer
, errorCode
);
406 virtual const UChar
*
407 spanQuickCheckYes(const UChar
*src
, const UChar
*limit
, UErrorCode
&errorCode
) const {
408 return impl
.makeFCD(src
, limit
, NULL
, errorCode
);
410 using Normalizer2WithImpl::spanQuickCheckYes
; // Avoid warning about hiding base class function.
411 virtual UBool
hasBoundaryBefore(UChar32 c
) const { return impl
.hasFCDBoundaryBefore(c
); }
412 virtual UBool
hasBoundaryAfter(UChar32 c
) const { return impl
.hasFCDBoundaryAfter(c
); }
413 virtual UBool
isInert(UChar32 c
) const { return impl
.isFCDInert(c
); }
416 FCDNormalizer2::~FCDNormalizer2() {}
418 // instance cache ---------------------------------------------------------- ***
420 struct Norm2AllModes
: public UMemory
{
421 static Norm2AllModes
*createInstance(const char *packageName
,
423 UErrorCode
&errorCode
);
424 Norm2AllModes() : comp(impl
, FALSE
), decomp(impl
), fcd(impl
), fcc(impl
, TRUE
) {}
426 Normalizer2Impl impl
;
427 ComposeNormalizer2 comp
;
428 DecomposeNormalizer2 decomp
;
430 ComposeNormalizer2 fcc
;
434 Norm2AllModes::createInstance(const char *packageName
,
436 UErrorCode
&errorCode
) {
437 if(U_FAILURE(errorCode
)) {
440 LocalPointer
<Norm2AllModes
> allModes(new Norm2AllModes
);
441 if(allModes
.isNull()) {
442 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
445 allModes
->impl
.load(packageName
, name
, errorCode
);
446 return U_SUCCESS(errorCode
) ? allModes
.orphan() : NULL
;
450 static UBool U_CALLCONV
uprv_normalizer2_cleanup();
453 class Norm2AllModesSingleton
: public TriStateSingletonWrapper
<Norm2AllModes
> {
455 Norm2AllModesSingleton(TriStateSingleton
&s
, const char *n
) :
456 TriStateSingletonWrapper
<Norm2AllModes
>(s
), name(n
) {}
457 Norm2AllModes
*getInstance(UErrorCode
&errorCode
) {
458 return TriStateSingletonWrapper
<Norm2AllModes
>::getInstance(createInstance
, name
, errorCode
);
461 static void *createInstance(const void *context
, UErrorCode
&errorCode
) {
462 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
463 return Norm2AllModes::createInstance(NULL
, (const char *)context
, errorCode
);
469 STATIC_TRI_STATE_SINGLETON(nfcSingleton
);
470 STATIC_TRI_STATE_SINGLETON(nfkcSingleton
);
471 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton
);
473 class Norm2Singleton
: public SimpleSingletonWrapper
<Normalizer2
> {
475 Norm2Singleton(SimpleSingleton
&s
) : SimpleSingletonWrapper
<Normalizer2
>(s
) {}
476 Normalizer2
*getInstance(UErrorCode
&errorCode
) {
477 return SimpleSingletonWrapper
<Normalizer2
>::getInstance(createInstance
, NULL
, errorCode
);
480 static void *createInstance(const void *, UErrorCode
&errorCode
) {
481 Normalizer2
*noop
=new NoopNormalizer2
;
483 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
485 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
);
490 STATIC_SIMPLE_SINGLETON(noopSingleton
);
492 static UHashtable
*cache
=NULL
;
496 static void U_CALLCONV
deleteNorm2AllModes(void *allModes
) {
497 delete (Norm2AllModes
*)allModes
;
500 static UBool U_CALLCONV
uprv_normalizer2_cleanup() {
501 Norm2AllModesSingleton(nfcSingleton
, NULL
).deleteInstance();
502 Norm2AllModesSingleton(nfkcSingleton
, NULL
).deleteInstance();
503 Norm2AllModesSingleton(nfkc_cfSingleton
, NULL
).deleteInstance();
504 Norm2Singleton(noopSingleton
).deleteInstance();
512 const Normalizer2
*Normalizer2Factory::getNFCInstance(UErrorCode
&errorCode
) {
513 Norm2AllModes
*allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
514 return allModes
!=NULL
? &allModes
->comp
: NULL
;
517 const Normalizer2
*Normalizer2Factory::getNFDInstance(UErrorCode
&errorCode
) {
518 Norm2AllModes
*allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
519 return allModes
!=NULL
? &allModes
->decomp
: NULL
;
522 const Normalizer2
*Normalizer2Factory::getFCDInstance(UErrorCode
&errorCode
) {
523 Norm2AllModes
*allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
524 return allModes
!=NULL
? &allModes
->fcd
: NULL
;
527 const Normalizer2
*Normalizer2Factory::getFCCInstance(UErrorCode
&errorCode
) {
528 Norm2AllModes
*allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
529 return allModes
!=NULL
? &allModes
->fcc
: NULL
;
532 const Normalizer2
*Normalizer2Factory::getNFKCInstance(UErrorCode
&errorCode
) {
533 Norm2AllModes
*allModes
=
534 Norm2AllModesSingleton(nfkcSingleton
, "nfkc").getInstance(errorCode
);
535 return allModes
!=NULL
? &allModes
->comp
: NULL
;
538 const Normalizer2
*Normalizer2Factory::getNFKDInstance(UErrorCode
&errorCode
) {
539 Norm2AllModes
*allModes
=
540 Norm2AllModesSingleton(nfkcSingleton
, "nfkc").getInstance(errorCode
);
541 return allModes
!=NULL
? &allModes
->decomp
: NULL
;
544 const Normalizer2
*Normalizer2Factory::getNFKC_CFInstance(UErrorCode
&errorCode
) {
545 Norm2AllModes
*allModes
=
546 Norm2AllModesSingleton(nfkc_cfSingleton
, "nfkc_cf").getInstance(errorCode
);
547 return allModes
!=NULL
? &allModes
->comp
: NULL
;
550 const Normalizer2
*Normalizer2Factory::getNoopInstance(UErrorCode
&errorCode
) {
551 return Norm2Singleton(noopSingleton
).getInstance(errorCode
);
555 Normalizer2Factory::getInstance(UNormalizationMode mode
, UErrorCode
&errorCode
) {
556 if(U_FAILURE(errorCode
)) {
561 return getNFDInstance(errorCode
);
563 return getNFKDInstance(errorCode
);
565 return getNFCInstance(errorCode
);
567 return getNFKCInstance(errorCode
);
569 return getFCDInstance(errorCode
);
570 default: // UNORM_NONE
571 return getNoopInstance(errorCode
);
575 const Normalizer2Impl
*
576 Normalizer2Factory::getNFCImpl(UErrorCode
&errorCode
) {
577 Norm2AllModes
*allModes
=
578 Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
579 return allModes
!=NULL
? &allModes
->impl
: NULL
;
582 const Normalizer2Impl
*
583 Normalizer2Factory::getNFKCImpl(UErrorCode
&errorCode
) {
584 Norm2AllModes
*allModes
=
585 Norm2AllModesSingleton(nfkcSingleton
, "nfkc").getInstance(errorCode
);
586 return allModes
!=NULL
? &allModes
->impl
: NULL
;
589 const Normalizer2Impl
*
590 Normalizer2Factory::getNFKC_CFImpl(UErrorCode
&errorCode
) {
591 Norm2AllModes
*allModes
=
592 Norm2AllModesSingleton(nfkc_cfSingleton
, "nfkc_cf").getInstance(errorCode
);
593 return allModes
!=NULL
? &allModes
->impl
: NULL
;
596 const Normalizer2Impl
*
597 Normalizer2Factory::getImpl(const Normalizer2
*norm2
) {
598 return &((Normalizer2WithImpl
*)norm2
)->impl
;
602 Normalizer2::getNFCInstance(UErrorCode
&errorCode
) {
603 return Normalizer2Factory::getNFCInstance(errorCode
);
607 Normalizer2::getNFDInstance(UErrorCode
&errorCode
) {
608 return Normalizer2Factory::getNFDInstance(errorCode
);
612 Normalizer2::getNFKCInstance(UErrorCode
&errorCode
) {
613 return Normalizer2Factory::getNFKCInstance(errorCode
);
617 Normalizer2::getNFKDInstance(UErrorCode
&errorCode
) {
618 return Normalizer2Factory::getNFKDInstance(errorCode
);
622 Normalizer2::getNFKCCasefoldInstance(UErrorCode
&errorCode
) {
623 return Normalizer2Factory::getNFKC_CFInstance(errorCode
);
627 Normalizer2::getInstance(const char *packageName
,
629 UNormalization2Mode mode
,
630 UErrorCode
&errorCode
) {
631 if(U_FAILURE(errorCode
)) {
634 if(name
==NULL
|| *name
==0) {
635 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
638 Norm2AllModes
*allModes
=NULL
;
639 if(packageName
==NULL
) {
640 if(0==uprv_strcmp(name
, "nfc")) {
641 allModes
=Norm2AllModesSingleton(nfcSingleton
, "nfc").getInstance(errorCode
);
642 } else if(0==uprv_strcmp(name
, "nfkc")) {
643 allModes
=Norm2AllModesSingleton(nfkcSingleton
, "nfkc").getInstance(errorCode
);
644 } else if(0==uprv_strcmp(name
, "nfkc_cf")) {
645 allModes
=Norm2AllModesSingleton(nfkc_cfSingleton
, "nfkc_cf").getInstance(errorCode
);
648 if(allModes
==NULL
&& U_SUCCESS(errorCode
)) {
652 allModes
=(Norm2AllModes
*)uhash_get(cache
, name
);
656 LocalPointer
<Norm2AllModes
> localAllModes(
657 Norm2AllModes::createInstance(packageName
, name
, errorCode
));
658 if(U_SUCCESS(errorCode
)) {
661 cache
=uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, &errorCode
);
662 if(U_FAILURE(errorCode
)) {
665 uhash_setKeyDeleter(cache
, uprv_free
);
666 uhash_setValueDeleter(cache
, deleteNorm2AllModes
);
668 void *temp
=uhash_get(cache
, name
);
670 int32_t keyLength
=uprv_strlen(name
)+1;
671 char *nameCopy
=(char *)uprv_malloc(keyLength
);
673 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
676 uprv_memcpy(nameCopy
, name
, keyLength
);
677 uhash_put(cache
, nameCopy
, allModes
=localAllModes
.orphan(), &errorCode
);
680 allModes
=(Norm2AllModes
*)temp
;
685 if(allModes
!=NULL
&& U_SUCCESS(errorCode
)) {
688 return &allModes
->comp
;
689 case UNORM2_DECOMPOSE
:
690 return &allModes
->decomp
;
692 return &allModes
->fcd
;
693 case UNORM2_COMPOSE_CONTIGUOUS
:
694 return &allModes
->fcc
;
704 // C API ------------------------------------------------------------------- ***
708 U_CAPI
const UNormalizer2
* U_EXPORT2
709 unorm2_getNFCInstance(UErrorCode
*pErrorCode
) {
710 return (const UNormalizer2
*)Normalizer2::getNFCInstance(*pErrorCode
);
713 U_CAPI
const UNormalizer2
* U_EXPORT2
714 unorm2_getNFDInstance(UErrorCode
*pErrorCode
) {
715 return (const UNormalizer2
*)Normalizer2::getNFDInstance(*pErrorCode
);
718 U_CAPI
const UNormalizer2
* U_EXPORT2
719 unorm2_getNFKCInstance(UErrorCode
*pErrorCode
) {
720 return (const UNormalizer2
*)Normalizer2::getNFKCInstance(*pErrorCode
);
723 U_CAPI
const UNormalizer2
* U_EXPORT2
724 unorm2_getNFKDInstance(UErrorCode
*pErrorCode
) {
725 return (const UNormalizer2
*)Normalizer2::getNFKDInstance(*pErrorCode
);
728 U_CAPI
const UNormalizer2
* U_EXPORT2
729 unorm2_getNFKCCasefoldInstance(UErrorCode
*pErrorCode
) {
730 return (const UNormalizer2
*)Normalizer2::getNFKCCasefoldInstance(*pErrorCode
);
733 U_CAPI
const UNormalizer2
* U_EXPORT2
734 unorm2_getInstance(const char *packageName
,
736 UNormalization2Mode mode
,
737 UErrorCode
*pErrorCode
) {
738 return (const UNormalizer2
*)Normalizer2::getInstance(packageName
, name
, mode
, *pErrorCode
);
741 U_CAPI
void U_EXPORT2
742 unorm2_close(UNormalizer2
*norm2
) {
743 delete (Normalizer2
*)norm2
;
746 U_CAPI
int32_t U_EXPORT2
747 unorm2_normalize(const UNormalizer2
*norm2
,
748 const UChar
*src
, int32_t length
,
749 UChar
*dest
, int32_t capacity
,
750 UErrorCode
*pErrorCode
) {
751 if(U_FAILURE(*pErrorCode
)) {
754 if( (src
==NULL
? length
!=0 : length
<-1) ||
755 (dest
==NULL
? capacity
!=0 : capacity
<0) ||
756 (src
==dest
&& src
!=NULL
)
758 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
761 UnicodeString
destString(dest
, 0, capacity
);
762 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
764 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
765 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
767 // Avoid duplicate argument checking and support NUL-terminated src.
768 ReorderingBuffer
buffer(n2wi
->impl
, destString
);
769 if(buffer
.init(length
, *pErrorCode
)) {
770 n2wi
->normalize(src
, length
>=0 ? src
+length
: NULL
, buffer
, *pErrorCode
);
773 UnicodeString
srcString(length
<0, src
, length
);
774 n2
->normalize(srcString
, destString
, *pErrorCode
);
777 return destString
.extract(dest
, capacity
, *pErrorCode
);
781 normalizeSecondAndAppend(const UNormalizer2
*norm2
,
782 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
783 const UChar
*second
, int32_t secondLength
,
785 UErrorCode
*pErrorCode
) {
786 if(U_FAILURE(*pErrorCode
)) {
789 if( (second
==NULL
? secondLength
!=0 : secondLength
<-1) ||
790 (first
==NULL
? (firstCapacity
!=0 || firstLength
!=0) :
791 (firstCapacity
<0 || firstLength
<-1)) ||
792 (first
==second
&& first
!=NULL
)
794 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
797 UnicodeString
firstString(first
, firstLength
, firstCapacity
);
798 firstLength
=firstString
.length(); // In case it was -1.
799 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
800 if(secondLength
!=0) {
801 const Normalizer2
*n2
=(const Normalizer2
*)norm2
;
802 const Normalizer2WithImpl
*n2wi
=dynamic_cast<const Normalizer2WithImpl
*>(n2
);
804 // Avoid duplicate argument checking and support NUL-terminated src.
805 UnicodeString safeMiddle
;
807 ReorderingBuffer
buffer(n2wi
->impl
, firstString
);
808 if(buffer
.init(firstLength
+secondLength
+1, *pErrorCode
)) { // destCapacity>=-1
809 n2wi
->normalizeAndAppend(second
, secondLength
>=0 ? second
+secondLength
: NULL
,
810 doNormalize
, safeMiddle
, buffer
, *pErrorCode
);
812 } // The ReorderingBuffer destructor finalizes firstString.
813 if(U_FAILURE(*pErrorCode
) || firstString
.length()>firstCapacity
) {
814 // Restore the modified suffix of the first string.
815 // This does not restore first[] array contents between firstLength and firstCapacity.
816 // (That might be uninitialized memory, as far as we know.)
817 if(first
!=NULL
) { /* don't dereference NULL */
818 safeMiddle
.extract(0, 0x7fffffff, first
+firstLength
-safeMiddle
.length());
819 if(firstLength
<firstCapacity
) {
820 first
[firstLength
]=0; // NUL-terminate in case it was originally.
825 UnicodeString
secondString(secondLength
<0, second
, secondLength
);
827 n2
->normalizeSecondAndAppend(firstString
, secondString
, *pErrorCode
);
829 n2
->append(firstString
, secondString
, *pErrorCode
);
833 return firstString
.extract(first
, firstCapacity
, *pErrorCode
);
836 U_CAPI
int32_t U_EXPORT2
837 unorm2_normalizeSecondAndAppend(const UNormalizer2
*norm2
,
838 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
839 const UChar
*second
, int32_t secondLength
,
840 UErrorCode
*pErrorCode
) {
841 return normalizeSecondAndAppend(norm2
,
842 first
, firstLength
, firstCapacity
,
843 second
, secondLength
,
847 U_CAPI
int32_t U_EXPORT2
848 unorm2_append(const UNormalizer2
*norm2
,
849 UChar
*first
, int32_t firstLength
, int32_t firstCapacity
,
850 const UChar
*second
, int32_t secondLength
,
851 UErrorCode
*pErrorCode
) {
852 return normalizeSecondAndAppend(norm2
,
853 first
, firstLength
, firstCapacity
,
854 second
, secondLength
,
858 U_CAPI
int32_t U_EXPORT2
859 unorm2_getDecomposition(const UNormalizer2
*norm2
,
860 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
861 UErrorCode
*pErrorCode
) {
862 if(U_FAILURE(*pErrorCode
)) {
865 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
866 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
869 UnicodeString
destString(decomposition
, 0, capacity
);
870 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getDecomposition(c
, destString
)) {
871 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
877 U_CAPI
int32_t U_EXPORT2
878 unorm2_getRawDecomposition(const UNormalizer2
*norm2
,
879 UChar32 c
, UChar
*decomposition
, int32_t capacity
,
880 UErrorCode
*pErrorCode
) {
881 if(U_FAILURE(*pErrorCode
)) {
884 if(decomposition
==NULL
? capacity
!=0 : capacity
<0) {
885 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
888 UnicodeString
destString(decomposition
, 0, capacity
);
889 if(reinterpret_cast<const Normalizer2
*>(norm2
)->getRawDecomposition(c
, destString
)) {
890 return destString
.extract(decomposition
, capacity
, *pErrorCode
);
896 U_CAPI UChar32 U_EXPORT2
897 unorm2_composePair(const UNormalizer2
*norm2
, UChar32 a
, UChar32 b
) {
898 return reinterpret_cast<const Normalizer2
*>(norm2
)->composePair(a
, b
);
901 U_CAPI
uint8_t U_EXPORT2
902 unorm2_getCombiningClass(const UNormalizer2
*norm2
, UChar32 c
) {
903 return reinterpret_cast<const Normalizer2
*>(norm2
)->getCombiningClass(c
);
906 U_CAPI UBool U_EXPORT2
907 unorm2_isNormalized(const UNormalizer2
*norm2
,
908 const UChar
*s
, int32_t length
,
909 UErrorCode
*pErrorCode
) {
910 if(U_FAILURE(*pErrorCode
)) {
913 if((s
==NULL
&& length
!=0) || length
<-1) {
914 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
917 UnicodeString
sString(length
<0, s
, length
);
918 return ((const Normalizer2
*)norm2
)->isNormalized(sString
, *pErrorCode
);
921 U_CAPI UNormalizationCheckResult U_EXPORT2
922 unorm2_quickCheck(const UNormalizer2
*norm2
,
923 const UChar
*s
, int32_t length
,
924 UErrorCode
*pErrorCode
) {
925 if(U_FAILURE(*pErrorCode
)) {
928 if((s
==NULL
&& length
!=0) || length
<-1) {
929 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
932 UnicodeString
sString(length
<0, s
, length
);
933 return ((const Normalizer2
*)norm2
)->quickCheck(sString
, *pErrorCode
);
936 U_CAPI
int32_t U_EXPORT2
937 unorm2_spanQuickCheckYes(const UNormalizer2
*norm2
,
938 const UChar
*s
, int32_t length
,
939 UErrorCode
*pErrorCode
) {
940 if(U_FAILURE(*pErrorCode
)) {
943 if((s
==NULL
&& length
!=0) || length
<-1) {
944 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
947 UnicodeString
sString(length
<0, s
, length
);
948 return ((const Normalizer2
*)norm2
)->spanQuickCheckYes(sString
, *pErrorCode
);
951 U_CAPI UBool U_EXPORT2
952 unorm2_hasBoundaryBefore(const UNormalizer2
*norm2
, UChar32 c
) {
953 return ((const Normalizer2
*)norm2
)->hasBoundaryBefore(c
);
956 U_CAPI UBool U_EXPORT2
957 unorm2_hasBoundaryAfter(const UNormalizer2
*norm2
, UChar32 c
) {
958 return ((const Normalizer2
*)norm2
)->hasBoundaryAfter(c
);
961 U_CAPI UBool U_EXPORT2
962 unorm2_isInert(const UNormalizer2
*norm2
, UChar32 c
) {
963 return ((const Normalizer2
*)norm2
)->isInert(c
);
966 // Some properties APIs ---------------------------------------------------- ***
968 U_CAPI
uint8_t U_EXPORT2
969 u_getCombiningClass(UChar32 c
) {
970 UErrorCode errorCode
=U_ZERO_ERROR
;
971 const Normalizer2
*nfd
=Normalizer2Factory::getNFDInstance(errorCode
);
972 if(U_SUCCESS(errorCode
)) {
973 return nfd
->getCombiningClass(c
);
979 U_CFUNC UNormalizationCheckResult
980 unorm_getQuickCheck(UChar32 c
, UNormalizationMode mode
) {
981 if(mode
<=UNORM_NONE
|| UNORM_FCD
<=mode
) {
984 UErrorCode errorCode
=U_ZERO_ERROR
;
985 const Normalizer2
*norm2
=Normalizer2Factory::getInstance(mode
, errorCode
);
986 if(U_SUCCESS(errorCode
)) {
987 return ((const Normalizer2WithImpl
*)norm2
)->getQuickCheck(c
);
994 unorm_getFCD16(UChar32 c
) {
995 UErrorCode errorCode
=U_ZERO_ERROR
;
996 const Normalizer2Impl
*impl
=Normalizer2Factory::getNFCImpl(errorCode
);
997 if(U_SUCCESS(errorCode
)) {
998 return impl
->getFCD16(c
);
1004 #endif // !UCONFIG_NO_NORMALIZATION