2 ******************************************************************************* 
   4 *   Copyright (C) 2009-2016, International Business Machines 
   5 *   Corporation and others.  All Rights Reserved. 
   7 ******************************************************************************* 
   8 *   file name:  normalizer2.cpp 
  10 *   tab size:   8 (not used) 
  13 *   created on: 2009nov22 
  14 *   created by: Markus W. Scherer 
  17 #include "unicode/utypes.h" 
  19 #if !UCONFIG_NO_NORMALIZATION 
  21 #include "unicode/normalizer2.h" 
  22 #include "unicode/unistr.h" 
  23 #include "unicode/unorm.h" 
  26 #include "norm2allmodes.h" 
  27 #include "normalizer2impl.h" 
  31 using icu::Normalizer2Impl
; 
  33 // NFC/NFD data machine-generated by gennorm2 --csource 
  34 #define INCLUDED_FROM_NORMALIZER2_CPP 
  35 #include "norm2_nfc_data.h" 
  39 // Public API dispatch via Normalizer2 subclasses -------------------------- *** 
  41 Normalizer2::~Normalizer2() {} 
  44 Normalizer2::getRawDecomposition(UChar32
, UnicodeString 
&) const { 
  49 Normalizer2::composePair(UChar32
, UChar32
) const { 
  54 Normalizer2::getCombiningClass(UChar32 
/*c*/) const { 
  58 // Normalizer2 implementation for the old UNORM_NONE. 
  59 class NoopNormalizer2 
: public Normalizer2 
{ 
  60     virtual ~NoopNormalizer2(); 
  62     virtual UnicodeString 
& 
  63     normalize(const UnicodeString 
&src
, 
  65               UErrorCode 
&errorCode
) const { 
  66         if(U_SUCCESS(errorCode
)) { 
  70                 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
  75     virtual UnicodeString 
& 
  76     normalizeSecondAndAppend(UnicodeString 
&first
, 
  77                              const UnicodeString 
&second
, 
  78                              UErrorCode 
&errorCode
) const { 
  79         if(U_SUCCESS(errorCode
)) { 
  83                 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
  88     virtual UnicodeString 
& 
  89     append(UnicodeString 
&first
, 
  90            const UnicodeString 
&second
, 
  91            UErrorCode 
&errorCode
) const { 
  92         if(U_SUCCESS(errorCode
)) { 
  96                 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 102     getDecomposition(UChar32
, UnicodeString 
&) const { 
 105     // No need to override the default getRawDecomposition(). 
 107     isNormalized(const UnicodeString 
&, UErrorCode 
&) const { 
 110     virtual UNormalizationCheckResult
 
 111     quickCheck(const UnicodeString 
&, UErrorCode 
&) const { 
 115     spanQuickCheckYes(const UnicodeString 
&s
, UErrorCode 
&) const { 
 118     virtual UBool 
hasBoundaryBefore(UChar32
) const { return TRUE
; } 
 119     virtual UBool 
hasBoundaryAfter(UChar32
) const { return TRUE
; } 
 120     virtual UBool 
isInert(UChar32
) const { return TRUE
; } 
 123 NoopNormalizer2::~NoopNormalizer2() {} 
 125 Normalizer2WithImpl::~Normalizer2WithImpl() {} 
 127 DecomposeNormalizer2::~DecomposeNormalizer2() {} 
 129 ComposeNormalizer2::~ComposeNormalizer2() {} 
 131 FCDNormalizer2::~FCDNormalizer2() {} 
 133 // instance cache ---------------------------------------------------------- *** 
 135 Norm2AllModes::~Norm2AllModes() { 
 140 Norm2AllModes::createInstance(Normalizer2Impl 
*impl
, UErrorCode 
&errorCode
) { 
 141     if(U_FAILURE(errorCode
)) { 
 145     Norm2AllModes 
*allModes
=new Norm2AllModes(impl
); 
 147         errorCode
=U_MEMORY_ALLOCATION_ERROR
; 
 155 Norm2AllModes::createNFCInstance(UErrorCode 
&errorCode
) { 
 156     if(U_FAILURE(errorCode
)) { 
 159     Normalizer2Impl 
*impl
=new Normalizer2Impl
; 
 161         errorCode
=U_MEMORY_ALLOCATION_ERROR
; 
 164     impl
->init(norm2_nfc_data_indexes
, &norm2_nfc_data_trie
, 
 165                norm2_nfc_data_extraData
, norm2_nfc_data_smallFCD
); 
 166     return createInstance(impl
, errorCode
); 
 170 static UBool U_CALLCONV 
uprv_normalizer2_cleanup(); 
 173 static Norm2AllModes 
*nfcSingleton
; 
 174 static Normalizer2   
*noopSingleton
; 
 176 static icu::UInitOnce nfcInitOnce 
= U_INITONCE_INITIALIZER
; 
 177 static icu::UInitOnce noopInitOnce 
= U_INITONCE_INITIALIZER
; 
 179 // UInitOnce singleton initialization functions 
 180 static void U_CALLCONV 
initNFCSingleton(UErrorCode 
&errorCode
) { 
 181     nfcSingleton
=Norm2AllModes::createNFCInstance(errorCode
); 
 182     ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
); 
 185 static void U_CALLCONV 
initNoopSingleton(UErrorCode 
&errorCode
) { 
 186     if(U_FAILURE(errorCode
)) { 
 189     noopSingleton
=new NoopNormalizer2
; 
 190     if(noopSingleton
==NULL
) { 
 191         errorCode
=U_MEMORY_ALLOCATION_ERROR
; 
 194     ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
); 
 199 static UBool U_CALLCONV 
uprv_normalizer2_cleanup() { 
 202     delete noopSingleton
; 
 203     noopSingleton 
= NULL
; 
 205     noopInitOnce
.reset();  
 211 const Norm2AllModes 
* 
 212 Norm2AllModes::getNFCInstance(UErrorCode 
&errorCode
) { 
 213     if(U_FAILURE(errorCode
)) { return NULL
; } 
 214     umtx_initOnce(nfcInitOnce
, &initNFCSingleton
, errorCode
); 
 219 Normalizer2::getNFCInstance(UErrorCode 
&errorCode
) { 
 220     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 221     return allModes
!=NULL 
? &allModes
->comp 
: NULL
; 
 225 Normalizer2::getNFDInstance(UErrorCode 
&errorCode
) { 
 226     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 227     return allModes
!=NULL 
? &allModes
->decomp 
: NULL
; 
 230 const Normalizer2 
*Normalizer2Factory::getFCDInstance(UErrorCode 
&errorCode
) { 
 231     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 232     return allModes
!=NULL 
? &allModes
->fcd 
: NULL
; 
 235 const Normalizer2 
*Normalizer2Factory::getFCCInstance(UErrorCode 
&errorCode
) { 
 236     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 237     return allModes
!=NULL 
? &allModes
->fcc 
: NULL
; 
 240 const Normalizer2 
*Normalizer2Factory::getNoopInstance(UErrorCode 
&errorCode
) { 
 241     if(U_FAILURE(errorCode
)) { return NULL
; } 
 242     umtx_initOnce(noopInitOnce
, &initNoopSingleton
, errorCode
); 
 243     return noopSingleton
; 
 246 const Normalizer2Impl 
* 
 247 Normalizer2Factory::getNFCImpl(UErrorCode 
&errorCode
) { 
 248     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 249     return allModes
!=NULL 
? allModes
->impl 
: NULL
; 
 252 const Normalizer2Impl 
* 
 253 Normalizer2Factory::getImpl(const Normalizer2 
*norm2
) { 
 254     return &((Normalizer2WithImpl 
*)norm2
)->impl
; 
 259 // C API ------------------------------------------------------------------- *** 
 263 U_CAPI 
const UNormalizer2 
* U_EXPORT2
 
 264 unorm2_getNFCInstance(UErrorCode 
*pErrorCode
) { 
 265     return (const UNormalizer2 
*)Normalizer2::getNFCInstance(*pErrorCode
); 
 268 U_CAPI 
const UNormalizer2 
* U_EXPORT2
 
 269 unorm2_getNFDInstance(UErrorCode 
*pErrorCode
) { 
 270     return (const UNormalizer2 
*)Normalizer2::getNFDInstance(*pErrorCode
); 
 273 U_CAPI 
void U_EXPORT2
 
 274 unorm2_close(UNormalizer2 
*norm2
) { 
 275     delete (Normalizer2 
*)norm2
; 
 278 U_CAPI 
int32_t U_EXPORT2
 
 279 unorm2_normalize(const UNormalizer2 
*norm2
, 
 280                  const UChar 
*src
, int32_t length
, 
 281                  UChar 
*dest
, int32_t capacity
, 
 282                  UErrorCode 
*pErrorCode
) { 
 283     if(U_FAILURE(*pErrorCode
)) { 
 286     if( (src
==NULL 
? length
!=0 : length
<-1) || 
 287         (dest
==NULL 
? capacity
!=0 : capacity
<0) || 
 288         (src
==dest 
&& src
!=NULL
) 
 290         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 293     UnicodeString 
destString(dest
, 0, capacity
); 
 294     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. 
 296         const Normalizer2 
*n2
=(const Normalizer2 
*)norm2
; 
 297         const Normalizer2WithImpl 
*n2wi
=dynamic_cast<const Normalizer2WithImpl 
*>(n2
); 
 299             // Avoid duplicate argument checking and support NUL-terminated src. 
 300             ReorderingBuffer 
buffer(n2wi
->impl
, destString
); 
 301             if(buffer
.init(length
, *pErrorCode
)) { 
 302                 n2wi
->normalize(src
, length
>=0 ? src
+length 
: NULL
, buffer
, *pErrorCode
); 
 305             UnicodeString 
srcString(length
<0, src
, length
); 
 306             n2
->normalize(srcString
, destString
, *pErrorCode
); 
 309     return destString
.extract(dest
, capacity
, *pErrorCode
); 
 313 normalizeSecondAndAppend(const UNormalizer2 
*norm2
, 
 314                          UChar 
*first
, int32_t firstLength
, int32_t firstCapacity
, 
 315                          const UChar 
*second
, int32_t secondLength
, 
 317                          UErrorCode 
*pErrorCode
) { 
 318     if(U_FAILURE(*pErrorCode
)) { 
 321     if( (second
==NULL 
? secondLength
!=0 : secondLength
<-1) || 
 322         (first
==NULL 
? (firstCapacity
!=0 || firstLength
!=0) : 
 323                        (firstCapacity
<0 || firstLength
<-1)) || 
 324         (first
==second 
&& first
!=NULL
) 
 326         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 329     UnicodeString 
firstString(first
, firstLength
, firstCapacity
); 
 330     firstLength
=firstString
.length();  // In case it was -1. 
 331     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. 
 332     if(secondLength
!=0) { 
 333         const Normalizer2 
*n2
=(const Normalizer2 
*)norm2
; 
 334         const Normalizer2WithImpl 
*n2wi
=dynamic_cast<const Normalizer2WithImpl 
*>(n2
); 
 336             // Avoid duplicate argument checking and support NUL-terminated src. 
 337             UnicodeString safeMiddle
; 
 339                 ReorderingBuffer 
buffer(n2wi
->impl
, firstString
); 
 340                 if(buffer
.init(firstLength
+secondLength
+1, *pErrorCode
)) {  // destCapacity>=-1 
 341                     n2wi
->normalizeAndAppend(second
, secondLength
>=0 ? second
+secondLength 
: NULL
, 
 342                                              doNormalize
, safeMiddle
, buffer
, *pErrorCode
); 
 344             }  // The ReorderingBuffer destructor finalizes firstString. 
 345             if(U_FAILURE(*pErrorCode
) || firstString
.length()>firstCapacity
) { 
 346                 // Restore the modified suffix of the first string. 
 347                 // This does not restore first[] array contents between firstLength and firstCapacity. 
 348                 // (That might be uninitialized memory, as far as we know.) 
 349                 if(first
!=NULL
) { /* don't dereference NULL */ 
 350                   safeMiddle
.extract(0, 0x7fffffff, first
+firstLength
-safeMiddle
.length()); 
 351                   if(firstLength
<firstCapacity
) { 
 352                     first
[firstLength
]=0;  // NUL-terminate in case it was originally. 
 357             UnicodeString 
secondString(secondLength
<0, second
, secondLength
); 
 359                 n2
->normalizeSecondAndAppend(firstString
, secondString
, *pErrorCode
); 
 361                 n2
->append(firstString
, secondString
, *pErrorCode
); 
 365     return firstString
.extract(first
, firstCapacity
, *pErrorCode
); 
 368 U_CAPI 
int32_t U_EXPORT2
 
 369 unorm2_normalizeSecondAndAppend(const UNormalizer2 
*norm2
, 
 370                                 UChar 
*first
, int32_t firstLength
, int32_t firstCapacity
, 
 371                                 const UChar 
*second
, int32_t secondLength
, 
 372                                 UErrorCode 
*pErrorCode
) { 
 373     return normalizeSecondAndAppend(norm2
, 
 374                                     first
, firstLength
, firstCapacity
, 
 375                                     second
, secondLength
, 
 379 U_CAPI 
int32_t U_EXPORT2
 
 380 unorm2_append(const UNormalizer2 
*norm2
, 
 381               UChar 
*first
, int32_t firstLength
, int32_t firstCapacity
, 
 382               const UChar 
*second
, int32_t secondLength
, 
 383               UErrorCode 
*pErrorCode
) { 
 384     return normalizeSecondAndAppend(norm2
, 
 385                                     first
, firstLength
, firstCapacity
, 
 386                                     second
, secondLength
, 
 390 U_CAPI 
int32_t U_EXPORT2
 
 391 unorm2_getDecomposition(const UNormalizer2 
*norm2
, 
 392                         UChar32 c
, UChar 
*decomposition
, int32_t capacity
, 
 393                         UErrorCode 
*pErrorCode
) { 
 394     if(U_FAILURE(*pErrorCode
)) { 
 397     if(decomposition
==NULL 
? capacity
!=0 : capacity
<0) { 
 398         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 401     UnicodeString 
destString(decomposition
, 0, capacity
); 
 402     if(reinterpret_cast<const Normalizer2 
*>(norm2
)->getDecomposition(c
, destString
)) { 
 403         return destString
.extract(decomposition
, capacity
, *pErrorCode
); 
 409 U_CAPI 
int32_t U_EXPORT2
 
 410 unorm2_getRawDecomposition(const UNormalizer2 
*norm2
, 
 411                            UChar32 c
, UChar 
*decomposition
, int32_t capacity
, 
 412                            UErrorCode 
*pErrorCode
) { 
 413     if(U_FAILURE(*pErrorCode
)) { 
 416     if(decomposition
==NULL 
? capacity
!=0 : capacity
<0) { 
 417         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 420     UnicodeString 
destString(decomposition
, 0, capacity
); 
 421     if(reinterpret_cast<const Normalizer2 
*>(norm2
)->getRawDecomposition(c
, destString
)) { 
 422         return destString
.extract(decomposition
, capacity
, *pErrorCode
); 
 428 U_CAPI UChar32 U_EXPORT2
 
 429 unorm2_composePair(const UNormalizer2 
*norm2
, UChar32 a
, UChar32 b
) { 
 430     return reinterpret_cast<const Normalizer2 
*>(norm2
)->composePair(a
, b
); 
 433 U_CAPI 
uint8_t U_EXPORT2
 
 434 unorm2_getCombiningClass(const UNormalizer2 
*norm2
, UChar32 c
) { 
 435     return reinterpret_cast<const Normalizer2 
*>(norm2
)->getCombiningClass(c
); 
 438 U_CAPI UBool U_EXPORT2
 
 439 unorm2_isNormalized(const UNormalizer2 
*norm2
, 
 440                     const UChar 
*s
, int32_t length
, 
 441                     UErrorCode 
*pErrorCode
) { 
 442     if(U_FAILURE(*pErrorCode
)) { 
 445     if((s
==NULL 
&& length
!=0) || length
<-1) { 
 446         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 449     UnicodeString 
sString(length
<0, s
, length
); 
 450     return ((const Normalizer2 
*)norm2
)->isNormalized(sString
, *pErrorCode
); 
 453 U_CAPI UNormalizationCheckResult U_EXPORT2
 
 454 unorm2_quickCheck(const UNormalizer2 
*norm2
, 
 455                   const UChar 
*s
, int32_t length
, 
 456                   UErrorCode 
*pErrorCode
) { 
 457     if(U_FAILURE(*pErrorCode
)) { 
 460     if((s
==NULL 
&& length
!=0) || length
<-1) { 
 461         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 464     UnicodeString 
sString(length
<0, s
, length
); 
 465     return ((const Normalizer2 
*)norm2
)->quickCheck(sString
, *pErrorCode
); 
 468 U_CAPI 
int32_t U_EXPORT2
 
 469 unorm2_spanQuickCheckYes(const UNormalizer2 
*norm2
, 
 470                          const UChar 
*s
, int32_t length
, 
 471                          UErrorCode 
*pErrorCode
) { 
 472     if(U_FAILURE(*pErrorCode
)) { 
 475     if((s
==NULL 
&& length
!=0) || length
<-1) { 
 476         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 479     UnicodeString 
sString(length
<0, s
, length
); 
 480     return ((const Normalizer2 
*)norm2
)->spanQuickCheckYes(sString
, *pErrorCode
); 
 483 U_CAPI UBool U_EXPORT2
 
 484 unorm2_hasBoundaryBefore(const UNormalizer2 
*norm2
, UChar32 c
) { 
 485     return ((const Normalizer2 
*)norm2
)->hasBoundaryBefore(c
); 
 488 U_CAPI UBool U_EXPORT2
 
 489 unorm2_hasBoundaryAfter(const UNormalizer2 
*norm2
, UChar32 c
) { 
 490     return ((const Normalizer2 
*)norm2
)->hasBoundaryAfter(c
); 
 493 U_CAPI UBool U_EXPORT2
 
 494 unorm2_isInert(const UNormalizer2 
*norm2
, UChar32 c
) { 
 495     return ((const Normalizer2 
*)norm2
)->isInert(c
); 
 498 // Some properties APIs ---------------------------------------------------- *** 
 500 U_CAPI 
uint8_t U_EXPORT2
 
 501 u_getCombiningClass(UChar32 c
) { 
 502     UErrorCode errorCode
=U_ZERO_ERROR
; 
 503     const Normalizer2 
*nfd
=Normalizer2::getNFDInstance(errorCode
); 
 504     if(U_SUCCESS(errorCode
)) { 
 505         return nfd
->getCombiningClass(c
); 
 512 unorm_getFCD16(UChar32 c
) { 
 513     UErrorCode errorCode
=U_ZERO_ERROR
; 
 514     const Normalizer2Impl 
*impl
=Normalizer2Factory::getNFCImpl(errorCode
); 
 515     if(U_SUCCESS(errorCode
)) { 
 516         return impl
->getFCD16(c
); 
 522 #endif  // !UCONFIG_NO_NORMALIZATION