2 ******************************************************************************* 
   4 *   Copyright (C) 2009-2014, International Business Machines 
   5 *   Corporation and others.  All Rights Reserved. 
   7 ******************************************************************************* 
   8 *   file name:  normalizer2.cpp 
  10 *   tab size:   8 (not used) 
  13 *   created on: 2009nov22 
  14 *   created by: Markus W. Scherer 
  17 #include "unicode/utypes.h" 
  19 #if !UCONFIG_NO_NORMALIZATION 
  21 #include "unicode/normalizer2.h" 
  22 #include "unicode/unistr.h" 
  23 #include "unicode/unorm.h" 
  26 #include "norm2allmodes.h" 
  27 #include "normalizer2impl.h" 
  31 using icu::Normalizer2Impl
; 
  33 // NFC/NFD data machine-generated by gennorm2 --csource 
  34 #include "norm2_nfc_data.h" 
  38 // Public API dispatch via Normalizer2 subclasses -------------------------- *** 
  40 Normalizer2::~Normalizer2() {} 
  43 Normalizer2::getRawDecomposition(UChar32
, UnicodeString 
&) const { 
  48 Normalizer2::composePair(UChar32
, UChar32
) const { 
  53 Normalizer2::getCombiningClass(UChar32 
/*c*/) const { 
  57 // Normalizer2 implementation for the old UNORM_NONE. 
  58 class NoopNormalizer2 
: public Normalizer2 
{ 
  59     virtual ~NoopNormalizer2(); 
  61     virtual UnicodeString 
& 
  62     normalize(const UnicodeString 
&src
, 
  64               UErrorCode 
&errorCode
) const { 
  65         if(U_SUCCESS(errorCode
)) { 
  69                 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
  74     virtual UnicodeString 
& 
  75     normalizeSecondAndAppend(UnicodeString 
&first
, 
  76                              const UnicodeString 
&second
, 
  77                              UErrorCode 
&errorCode
) const { 
  78         if(U_SUCCESS(errorCode
)) { 
  82                 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
  87     virtual UnicodeString 
& 
  88     append(UnicodeString 
&first
, 
  89            const UnicodeString 
&second
, 
  90            UErrorCode 
&errorCode
) const { 
  91         if(U_SUCCESS(errorCode
)) { 
  95                 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 101     getDecomposition(UChar32
, UnicodeString 
&) const { 
 104     // No need to override the default getRawDecomposition(). 
 106     isNormalized(const UnicodeString 
&, UErrorCode 
&) const { 
 109     virtual UNormalizationCheckResult
 
 110     quickCheck(const UnicodeString 
&, UErrorCode 
&) const { 
 114     spanQuickCheckYes(const UnicodeString 
&s
, UErrorCode 
&) const { 
 117     virtual UBool 
hasBoundaryBefore(UChar32
) const { return TRUE
; } 
 118     virtual UBool 
hasBoundaryAfter(UChar32
) const { return TRUE
; } 
 119     virtual UBool 
isInert(UChar32
) const { return TRUE
; } 
 122 NoopNormalizer2::~NoopNormalizer2() {} 
 124 Normalizer2WithImpl::~Normalizer2WithImpl() {} 
 126 DecomposeNormalizer2::~DecomposeNormalizer2() {} 
 128 ComposeNormalizer2::~ComposeNormalizer2() {} 
 130 FCDNormalizer2::~FCDNormalizer2() {} 
 132 // instance cache ---------------------------------------------------------- *** 
 134 Norm2AllModes::~Norm2AllModes() { 
 139 Norm2AllModes::createInstance(Normalizer2Impl 
*impl
, UErrorCode 
&errorCode
) { 
 140     if(U_FAILURE(errorCode
)) { 
 144     Norm2AllModes 
*allModes
=new Norm2AllModes(impl
); 
 146         errorCode
=U_MEMORY_ALLOCATION_ERROR
; 
 154 Norm2AllModes::createNFCInstance(UErrorCode 
&errorCode
) { 
 155     if(U_FAILURE(errorCode
)) { 
 158     Normalizer2Impl 
*impl
=new Normalizer2Impl
; 
 160         errorCode
=U_MEMORY_ALLOCATION_ERROR
; 
 163     impl
->init(norm2_nfc_data_indexes
, &norm2_nfc_data_trie
, 
 164                norm2_nfc_data_extraData
, norm2_nfc_data_smallFCD
); 
 165     return createInstance(impl
, errorCode
); 
 169 static UBool U_CALLCONV 
uprv_normalizer2_cleanup(); 
 172 static Norm2AllModes 
*nfcSingleton
; 
 173 static Normalizer2   
*noopSingleton
; 
 175 static icu::UInitOnce nfcInitOnce 
= U_INITONCE_INITIALIZER
; 
 176 static icu::UInitOnce noopInitOnce 
= U_INITONCE_INITIALIZER
; 
 178 // UInitOnce singleton initialization functions 
 179 static void U_CALLCONV 
initNFCSingleton(UErrorCode 
&errorCode
) { 
 180     nfcSingleton
=Norm2AllModes::createNFCInstance(errorCode
); 
 181     ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
); 
 184 static void U_CALLCONV 
initNoopSingleton(UErrorCode 
&errorCode
) { 
 185     if(U_FAILURE(errorCode
)) { 
 188     noopSingleton
=new NoopNormalizer2
; 
 189     if(noopSingleton
==NULL
) { 
 190         errorCode
=U_MEMORY_ALLOCATION_ERROR
; 
 193     ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2
, uprv_normalizer2_cleanup
); 
 198 static UBool U_CALLCONV 
uprv_normalizer2_cleanup() { 
 201     delete noopSingleton
; 
 202     noopSingleton 
= NULL
; 
 204     noopInitOnce
.reset();  
 210 const Norm2AllModes 
* 
 211 Norm2AllModes::getNFCInstance(UErrorCode 
&errorCode
) { 
 212     if(U_FAILURE(errorCode
)) { return NULL
; } 
 213     umtx_initOnce(nfcInitOnce
, &initNFCSingleton
, errorCode
); 
 218 Normalizer2::getNFCInstance(UErrorCode 
&errorCode
) { 
 219     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 220     return allModes
!=NULL 
? &allModes
->comp 
: NULL
; 
 224 Normalizer2::getNFDInstance(UErrorCode 
&errorCode
) { 
 225     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 226     return allModes
!=NULL 
? &allModes
->decomp 
: NULL
; 
 229 const Normalizer2 
*Normalizer2Factory::getFCDInstance(UErrorCode 
&errorCode
) { 
 230     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 231     return allModes
!=NULL 
? &allModes
->fcd 
: NULL
; 
 234 const Normalizer2 
*Normalizer2Factory::getFCCInstance(UErrorCode 
&errorCode
) { 
 235     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 236     return allModes
!=NULL 
? &allModes
->fcc 
: NULL
; 
 239 const Normalizer2 
*Normalizer2Factory::getNoopInstance(UErrorCode 
&errorCode
) { 
 240     if(U_FAILURE(errorCode
)) { return NULL
; } 
 241     umtx_initOnce(noopInitOnce
, &initNoopSingleton
, errorCode
); 
 242     return noopSingleton
; 
 245 const Normalizer2Impl 
* 
 246 Normalizer2Factory::getNFCImpl(UErrorCode 
&errorCode
) { 
 247     const Norm2AllModes 
*allModes
=Norm2AllModes::getNFCInstance(errorCode
); 
 248     return allModes
!=NULL 
? allModes
->impl 
: NULL
; 
 251 const Normalizer2Impl 
* 
 252 Normalizer2Factory::getImpl(const Normalizer2 
*norm2
) { 
 253     return &((Normalizer2WithImpl 
*)norm2
)->impl
; 
 258 // C API ------------------------------------------------------------------- *** 
 262 U_CAPI 
const UNormalizer2 
* U_EXPORT2
 
 263 unorm2_getNFCInstance(UErrorCode 
*pErrorCode
) { 
 264     return (const UNormalizer2 
*)Normalizer2::getNFCInstance(*pErrorCode
); 
 267 U_CAPI 
const UNormalizer2 
* U_EXPORT2
 
 268 unorm2_getNFDInstance(UErrorCode 
*pErrorCode
) { 
 269     return (const UNormalizer2 
*)Normalizer2::getNFDInstance(*pErrorCode
); 
 272 U_CAPI 
void U_EXPORT2
 
 273 unorm2_close(UNormalizer2 
*norm2
) { 
 274     delete (Normalizer2 
*)norm2
; 
 277 U_CAPI 
int32_t U_EXPORT2
 
 278 unorm2_normalize(const UNormalizer2 
*norm2
, 
 279                  const UChar 
*src
, int32_t length
, 
 280                  UChar 
*dest
, int32_t capacity
, 
 281                  UErrorCode 
*pErrorCode
) { 
 282     if(U_FAILURE(*pErrorCode
)) { 
 285     if( (src
==NULL 
? length
!=0 : length
<-1) || 
 286         (dest
==NULL 
? capacity
!=0 : capacity
<0) || 
 287         (src
==dest 
&& src
!=NULL
) 
 289         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 292     UnicodeString 
destString(dest
, 0, capacity
); 
 293     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. 
 295         const Normalizer2 
*n2
=(const Normalizer2 
*)norm2
; 
 296         const Normalizer2WithImpl 
*n2wi
=dynamic_cast<const Normalizer2WithImpl 
*>(n2
); 
 298             // Avoid duplicate argument checking and support NUL-terminated src. 
 299             ReorderingBuffer 
buffer(n2wi
->impl
, destString
); 
 300             if(buffer
.init(length
, *pErrorCode
)) { 
 301                 n2wi
->normalize(src
, length
>=0 ? src
+length 
: NULL
, buffer
, *pErrorCode
); 
 304             UnicodeString 
srcString(length
<0, src
, length
); 
 305             n2
->normalize(srcString
, destString
, *pErrorCode
); 
 308     return destString
.extract(dest
, capacity
, *pErrorCode
); 
 312 normalizeSecondAndAppend(const UNormalizer2 
*norm2
, 
 313                          UChar 
*first
, int32_t firstLength
, int32_t firstCapacity
, 
 314                          const UChar 
*second
, int32_t secondLength
, 
 316                          UErrorCode 
*pErrorCode
) { 
 317     if(U_FAILURE(*pErrorCode
)) { 
 320     if( (second
==NULL 
? secondLength
!=0 : secondLength
<-1) || 
 321         (first
==NULL 
? (firstCapacity
!=0 || firstLength
!=0) : 
 322                        (firstCapacity
<0 || firstLength
<-1)) || 
 323         (first
==second 
&& first
!=NULL
) 
 325         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 328     UnicodeString 
firstString(first
, firstLength
, firstCapacity
); 
 329     firstLength
=firstString
.length();  // In case it was -1. 
 330     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. 
 331     if(secondLength
!=0) { 
 332         const Normalizer2 
*n2
=(const Normalizer2 
*)norm2
; 
 333         const Normalizer2WithImpl 
*n2wi
=dynamic_cast<const Normalizer2WithImpl 
*>(n2
); 
 335             // Avoid duplicate argument checking and support NUL-terminated src. 
 336             UnicodeString safeMiddle
; 
 338                 ReorderingBuffer 
buffer(n2wi
->impl
, firstString
); 
 339                 if(buffer
.init(firstLength
+secondLength
+1, *pErrorCode
)) {  // destCapacity>=-1 
 340                     n2wi
->normalizeAndAppend(second
, secondLength
>=0 ? second
+secondLength 
: NULL
, 
 341                                              doNormalize
, safeMiddle
, buffer
, *pErrorCode
); 
 343             }  // The ReorderingBuffer destructor finalizes firstString. 
 344             if(U_FAILURE(*pErrorCode
) || firstString
.length()>firstCapacity
) { 
 345                 // Restore the modified suffix of the first string. 
 346                 // This does not restore first[] array contents between firstLength and firstCapacity. 
 347                 // (That might be uninitialized memory, as far as we know.) 
 348                 if(first
!=NULL
) { /* don't dereference NULL */ 
 349                   safeMiddle
.extract(0, 0x7fffffff, first
+firstLength
-safeMiddle
.length()); 
 350                   if(firstLength
<firstCapacity
) { 
 351                     first
[firstLength
]=0;  // NUL-terminate in case it was originally. 
 356             UnicodeString 
secondString(secondLength
<0, second
, secondLength
); 
 358                 n2
->normalizeSecondAndAppend(firstString
, secondString
, *pErrorCode
); 
 360                 n2
->append(firstString
, secondString
, *pErrorCode
); 
 364     return firstString
.extract(first
, firstCapacity
, *pErrorCode
); 
 367 U_CAPI 
int32_t U_EXPORT2
 
 368 unorm2_normalizeSecondAndAppend(const UNormalizer2 
*norm2
, 
 369                                 UChar 
*first
, int32_t firstLength
, int32_t firstCapacity
, 
 370                                 const UChar 
*second
, int32_t secondLength
, 
 371                                 UErrorCode 
*pErrorCode
) { 
 372     return normalizeSecondAndAppend(norm2
, 
 373                                     first
, firstLength
, firstCapacity
, 
 374                                     second
, secondLength
, 
 378 U_CAPI 
int32_t U_EXPORT2
 
 379 unorm2_append(const UNormalizer2 
*norm2
, 
 380               UChar 
*first
, int32_t firstLength
, int32_t firstCapacity
, 
 381               const UChar 
*second
, int32_t secondLength
, 
 382               UErrorCode 
*pErrorCode
) { 
 383     return normalizeSecondAndAppend(norm2
, 
 384                                     first
, firstLength
, firstCapacity
, 
 385                                     second
, secondLength
, 
 389 U_CAPI 
int32_t U_EXPORT2
 
 390 unorm2_getDecomposition(const UNormalizer2 
*norm2
, 
 391                         UChar32 c
, UChar 
*decomposition
, int32_t capacity
, 
 392                         UErrorCode 
*pErrorCode
) { 
 393     if(U_FAILURE(*pErrorCode
)) { 
 396     if(decomposition
==NULL 
? capacity
!=0 : capacity
<0) { 
 397         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 400     UnicodeString 
destString(decomposition
, 0, capacity
); 
 401     if(reinterpret_cast<const Normalizer2 
*>(norm2
)->getDecomposition(c
, destString
)) { 
 402         return destString
.extract(decomposition
, capacity
, *pErrorCode
); 
 408 U_CAPI 
int32_t U_EXPORT2
 
 409 unorm2_getRawDecomposition(const UNormalizer2 
*norm2
, 
 410                            UChar32 c
, UChar 
*decomposition
, int32_t capacity
, 
 411                            UErrorCode 
*pErrorCode
) { 
 412     if(U_FAILURE(*pErrorCode
)) { 
 415     if(decomposition
==NULL 
? capacity
!=0 : capacity
<0) { 
 416         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 419     UnicodeString 
destString(decomposition
, 0, capacity
); 
 420     if(reinterpret_cast<const Normalizer2 
*>(norm2
)->getRawDecomposition(c
, destString
)) { 
 421         return destString
.extract(decomposition
, capacity
, *pErrorCode
); 
 427 U_CAPI UChar32 U_EXPORT2
 
 428 unorm2_composePair(const UNormalizer2 
*norm2
, UChar32 a
, UChar32 b
) { 
 429     return reinterpret_cast<const Normalizer2 
*>(norm2
)->composePair(a
, b
); 
 432 U_CAPI 
uint8_t U_EXPORT2
 
 433 unorm2_getCombiningClass(const UNormalizer2 
*norm2
, UChar32 c
) { 
 434     return reinterpret_cast<const Normalizer2 
*>(norm2
)->getCombiningClass(c
); 
 437 U_CAPI UBool U_EXPORT2
 
 438 unorm2_isNormalized(const UNormalizer2 
*norm2
, 
 439                     const UChar 
*s
, int32_t length
, 
 440                     UErrorCode 
*pErrorCode
) { 
 441     if(U_FAILURE(*pErrorCode
)) { 
 444     if((s
==NULL 
&& length
!=0) || length
<-1) { 
 445         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 448     UnicodeString 
sString(length
<0, s
, length
); 
 449     return ((const Normalizer2 
*)norm2
)->isNormalized(sString
, *pErrorCode
); 
 452 U_CAPI UNormalizationCheckResult U_EXPORT2
 
 453 unorm2_quickCheck(const UNormalizer2 
*norm2
, 
 454                   const UChar 
*s
, int32_t length
, 
 455                   UErrorCode 
*pErrorCode
) { 
 456     if(U_FAILURE(*pErrorCode
)) { 
 459     if((s
==NULL 
&& length
!=0) || length
<-1) { 
 460         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 463     UnicodeString 
sString(length
<0, s
, length
); 
 464     return ((const Normalizer2 
*)norm2
)->quickCheck(sString
, *pErrorCode
); 
 467 U_CAPI 
int32_t U_EXPORT2
 
 468 unorm2_spanQuickCheckYes(const UNormalizer2 
*norm2
, 
 469                          const UChar 
*s
, int32_t length
, 
 470                          UErrorCode 
*pErrorCode
) { 
 471     if(U_FAILURE(*pErrorCode
)) { 
 474     if((s
==NULL 
&& length
!=0) || length
<-1) { 
 475         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 478     UnicodeString 
sString(length
<0, s
, length
); 
 479     return ((const Normalizer2 
*)norm2
)->spanQuickCheckYes(sString
, *pErrorCode
); 
 482 U_CAPI UBool U_EXPORT2
 
 483 unorm2_hasBoundaryBefore(const UNormalizer2 
*norm2
, UChar32 c
) { 
 484     return ((const Normalizer2 
*)norm2
)->hasBoundaryBefore(c
); 
 487 U_CAPI UBool U_EXPORT2
 
 488 unorm2_hasBoundaryAfter(const UNormalizer2 
*norm2
, UChar32 c
) { 
 489     return ((const Normalizer2 
*)norm2
)->hasBoundaryAfter(c
); 
 492 U_CAPI UBool U_EXPORT2
 
 493 unorm2_isInert(const UNormalizer2 
*norm2
, UChar32 c
) { 
 494     return ((const Normalizer2 
*)norm2
)->isInert(c
); 
 497 // Some properties APIs ---------------------------------------------------- *** 
 499 U_CAPI 
uint8_t U_EXPORT2
 
 500 u_getCombiningClass(UChar32 c
) { 
 501     UErrorCode errorCode
=U_ZERO_ERROR
; 
 502     const Normalizer2 
*nfd
=Normalizer2::getNFDInstance(errorCode
); 
 503     if(U_SUCCESS(errorCode
)) { 
 504         return nfd
->getCombiningClass(c
); 
 511 unorm_getFCD16(UChar32 c
) { 
 512     UErrorCode errorCode
=U_ZERO_ERROR
; 
 513     const Normalizer2Impl 
*impl
=Normalizer2Factory::getNFCImpl(errorCode
); 
 514     if(U_SUCCESS(errorCode
)) { 
 515         return impl
->getFCD16(c
); 
 521 #endif  // !UCONFIG_NO_NORMALIZATION