X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/4388f060552cc537e71e957d32f35e9d75a61233..HEAD:/icuSources/common/normalizer2.cpp diff --git a/icuSources/common/normalizer2.cpp b/icuSources/common/normalizer2.cpp index 527731da..6be7e0b2 100644 --- a/icuSources/common/normalizer2.cpp +++ b/icuSources/common/normalizer2.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * -* Copyright (C) 2009-2011, International Business Machines +* Copyright (C) 2009-2016, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: normalizer2.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -18,16 +20,25 @@ #if !UCONFIG_NO_NORMALIZATION -#include "unicode/localpointer.h" +#include "unicode/edits.h" #include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" #include "unicode/unistr.h" #include "unicode/unorm.h" -#include "cpputils.h" #include "cstring.h" #include "mutex.h" +#include "norm2allmodes.h" #include "normalizer2impl.h" +#include "uassert.h" #include "ucln_cmn.h" -#include "uhash.h" + +using icu::Normalizer2Impl; + +#if NORM2_HARDCODE_NFC_DATA +// NFC/NFD data machine-generated by gennorm2 --csource +#define INCLUDED_FROM_NORMALIZER2_CPP +#include "norm2_nfc_data.h" +#endif U_NAMESPACE_BEGIN @@ -35,6 +46,20 @@ U_NAMESPACE_BEGIN Normalizer2::~Normalizer2() {} +void +Normalizer2::normalizeUTF8(uint32_t /*options*/, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return; + } + if (edits != nullptr) { + errorCode = U_UNSUPPORTED_ERROR; + return; + } + UnicodeString src16 = UnicodeString::fromUTF8(src); + normalize(src16, errorCode).toUTF8(sink); +} + UBool Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { return FALSE; @@ -50,7 +75,10 @@ Normalizer2::getCombiningClass(UChar32 /*c*/) const { return 0; } -UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2) +UBool +Normalizer2::isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const { + return U_SUCCESS(errorCode) && isNormalized(UnicodeString::fromUTF8(s), errorCode); +} // Normalizer2 implementation for the old UNORM_NONE. class NoopNormalizer2 : public Normalizer2 { @@ -59,7 +87,7 @@ class NoopNormalizer2 : public Normalizer2 { virtual UnicodeString & normalize(const UnicodeString &src, UnicodeString &dest, - UErrorCode &errorCode) const { + UErrorCode &errorCode) const U_OVERRIDE { if(U_SUCCESS(errorCode)) { if(&dest!=&src) { dest=src; @@ -69,10 +97,27 @@ class NoopNormalizer2 : public Normalizer2 { } return dest; } + virtual void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { + if(U_SUCCESS(errorCode)) { + if (edits != nullptr) { + if ((options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + edits->addUnchanged(src.length()); + } + if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { + sink.Append(src.data(), src.length()); + } + sink.Flush(); + } + } + virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, - UErrorCode &errorCode) const { + UErrorCode &errorCode) const U_OVERRIDE { if(U_SUCCESS(errorCode)) { if(&first!=&second) { first.append(second); @@ -85,7 +130,7 @@ class NoopNormalizer2 : public Normalizer2 { virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, - UErrorCode &errorCode) const { + UErrorCode &errorCode) const U_OVERRIDE { if(U_SUCCESS(errorCode)) { if(&first!=&second) { first.append(second); @@ -96,656 +141,191 @@ class NoopNormalizer2 : public Normalizer2 { return first; } virtual UBool - getDecomposition(UChar32, UnicodeString &) const { + getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE { return FALSE; } - // No need to override the default getRawDecomposition(). + // No need to U_OVERRIDE the default getRawDecomposition(). + virtual UBool + isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE { + return U_SUCCESS(errorCode); + } virtual UBool - isNormalized(const UnicodeString &, UErrorCode &) const { - return TRUE; + isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE { + return U_SUCCESS(errorCode); } virtual UNormalizationCheckResult - quickCheck(const UnicodeString &, UErrorCode &) const { + quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE { return UNORM_YES; } virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { + spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE { return s.length(); } - virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } - virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } - virtual UBool isInert(UChar32) const { return TRUE; } + virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; } + virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; } + virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; } }; NoopNormalizer2::~NoopNormalizer2() {} -// Intermediate class: -// Has Normalizer2Impl and does boilerplate argument checking and setup. -class Normalizer2WithImpl : public Normalizer2 { -public: - Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} - virtual ~Normalizer2WithImpl(); - - // normalize - virtual UnicodeString & - normalize(const UnicodeString &src, - UnicodeString &dest, - UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - dest.setToBogus(); - return dest; - } - const UChar *sArray=src.getBuffer(); - if(&dest==&src || sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - dest.setToBogus(); - return dest; - } - dest.remove(); - ReorderingBuffer buffer(impl, dest); - if(buffer.init(src.length(), errorCode)) { - normalize(sArray, sArray+src.length(), buffer, errorCode); - } - return dest; - } - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; - - // normalize and append - virtual UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const { - return normalizeSecondAndAppend(first, second, TRUE, errorCode); - } - virtual UnicodeString & - append(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const { - return normalizeSecondAndAppend(first, second, FALSE, errorCode); - } - UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UBool doNormalize, - UErrorCode &errorCode) const { - uprv_checkCanGetBuffer(first, errorCode); - if(U_FAILURE(errorCode)) { - return first; - } - const UChar *secondArray=second.getBuffer(); - if(&first==&second || secondArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return first; - } - int32_t firstLength=first.length(); - UnicodeString safeMiddle; - { - ReorderingBuffer buffer(impl, first); - if(buffer.init(firstLength+second.length(), errorCode)) { - normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, - safeMiddle, buffer, errorCode); - } - } // The ReorderingBuffer destructor finalizes the first string. - if(U_FAILURE(errorCode)) { - // Restore the modified suffix of the first string. - first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); - } - return first; - } - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; - virtual UBool - getDecomposition(UChar32 c, UnicodeString &decomposition) const { - UChar buffer[4]; - int32_t length; - const UChar *d=impl.getDecomposition(c, buffer, length); - if(d==NULL) { - return FALSE; - } - if(d==buffer) { - decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) - } else { - decomposition.setTo(FALSE, d, length); // read-only alias - } - return TRUE; - } - virtual UBool - getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { - UChar buffer[30]; - int32_t length; - const UChar *d=impl.getRawDecomposition(c, buffer, length); - if(d==NULL) { - return FALSE; - } - if(d==buffer) { - decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) - } else { - decomposition.setTo(FALSE, d, length); // read-only alias - } - return TRUE; - } - virtual UChar32 - composePair(UChar32 a, UChar32 b) const { - return impl.composePair(a, b); - } - - virtual uint8_t - getCombiningClass(UChar32 c) const { - return impl.getCC(impl.getNorm16(c)); - } - - // quick checks - virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return FALSE; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - const UChar *sLimit=sArray+s.length(); - return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); - } - virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { - return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; - } - virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return 0; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; - - virtual UNormalizationCheckResult getQuickCheck(UChar32) const { - return UNORM_YES; - } - - const Normalizer2Impl &impl; -}; - Normalizer2WithImpl::~Normalizer2WithImpl() {} -class DecomposeNormalizer2 : public Normalizer2WithImpl { -public: - DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} - virtual ~DecomposeNormalizer2(); - -private: - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.decompose(src, limit, &buffer, errorCode); - } - using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { - return impl.decompose(src, limit, NULL, errorCode); - } - using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { - return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; - } - virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } - virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } - virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } -}; - DecomposeNormalizer2::~DecomposeNormalizer2() {} -class ComposeNormalizer2 : public Normalizer2WithImpl { -public: - ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : - Normalizer2WithImpl(ni), onlyContiguous(fcc) {} - virtual ~ComposeNormalizer2(); - -private: - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); - } - using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); - } - - virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return FALSE; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - UnicodeString temp; - ReorderingBuffer buffer(impl, temp); - if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization - return FALSE; - } - return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); - } - virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return UNORM_MAYBE; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return UNORM_MAYBE; - } - UNormalizationCheckResult qcResult=UNORM_YES; - impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); - return qcResult; - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { - return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); - } - using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { - return impl.getCompQuickCheck(impl.getNorm16(c)); - } - virtual UBool hasBoundaryBefore(UChar32 c) const { - return impl.hasCompBoundaryBefore(c); - } - virtual UBool hasBoundaryAfter(UChar32 c) const { - return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); - } - virtual UBool isInert(UChar32 c) const { - return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); - } - - const UBool onlyContiguous; -}; - ComposeNormalizer2::~ComposeNormalizer2() {} -class FCDNormalizer2 : public Normalizer2WithImpl { -public: - FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} - virtual ~FCDNormalizer2(); - -private: - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.makeFCD(src, limit, &buffer, errorCode); - } - using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { - return impl.makeFCD(src, limit, NULL, errorCode); - } - using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } - virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } - virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } -}; - FCDNormalizer2::~FCDNormalizer2() {} // instance cache ---------------------------------------------------------- *** -struct Norm2AllModes : public UMemory { - static Norm2AllModes *createInstance(const char *packageName, - const char *name, - UErrorCode &errorCode); - Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} - - Normalizer2Impl impl; - ComposeNormalizer2 comp; - DecomposeNormalizer2 decomp; - FCDNormalizer2 fcd; - ComposeNormalizer2 fcc; -}; - -Norm2AllModes * -Norm2AllModes::createInstance(const char *packageName, - const char *name, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - LocalPointer allModes(new Norm2AllModes); - if(allModes.isNull()) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - allModes->impl.load(packageName, name, errorCode); - return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; -} - U_CDECL_BEGIN static UBool U_CALLCONV uprv_normalizer2_cleanup(); U_CDECL_END -class Norm2AllModesSingleton : public TriStateSingletonWrapper { -public: - Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : - TriStateSingletonWrapper(s), name(n) {} - Norm2AllModes *getInstance(UErrorCode &errorCode) { - return TriStateSingletonWrapper::getInstance(createInstance, name, errorCode); - } -private: - static void *createInstance(const void *context, UErrorCode &errorCode) { - ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); - return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode); - } - - const char *name; -}; +static Normalizer2 *noopSingleton; +static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; -STATIC_TRI_STATE_SINGLETON(nfcSingleton); -STATIC_TRI_STATE_SINGLETON(nfkcSingleton); -STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); - -class Norm2Singleton : public SimpleSingletonWrapper { -public: - Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper(s) {} - Normalizer2 *getInstance(UErrorCode &errorCode) { - return SimpleSingletonWrapper::getInstance(createInstance, NULL, errorCode); - } -private: - static void *createInstance(const void *, UErrorCode &errorCode) { - Normalizer2 *noop=new NoopNormalizer2; - if(noop==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } - ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); - return noop; +static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; } -}; - -STATIC_SIMPLE_SINGLETON(noopSingleton); - -static UHashtable *cache=NULL; - -U_CDECL_BEGIN - -static void U_CALLCONV deleteNorm2AllModes(void *allModes) { - delete (Norm2AllModes *)allModes; -} - -static UBool U_CALLCONV uprv_normalizer2_cleanup() { - Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); - Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); - Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); - Norm2Singleton(noopSingleton).deleteInstance(); - uhash_close(cache); - cache=NULL; - return TRUE; -} - -U_CDECL_END - -const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { - Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); - return allModes!=NULL ? &allModes->comp : NULL; -} - -const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { - Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); - return allModes!=NULL ? &allModes->decomp : NULL; -} - -const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { - Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); - return allModes!=NULL ? &allModes->fcd : NULL; -} - -const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { - Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); - return allModes!=NULL ? &allModes->fcc : NULL; + noopSingleton=new NoopNormalizer2; + if(noopSingleton==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); } -const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { - Norm2AllModes *allModes= - Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); - return allModes!=NULL ? &allModes->comp : NULL; +const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode); + return noopSingleton; } -const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { - Norm2AllModes *allModes= - Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); - return allModes!=NULL ? &allModes->decomp : NULL; +const Normalizer2Impl * +Normalizer2Factory::getImpl(const Normalizer2 *norm2) { + return &((Normalizer2WithImpl *)norm2)->impl; } -const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { - Norm2AllModes *allModes= - Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); - return allModes!=NULL ? &allModes->comp : NULL; +Norm2AllModes::~Norm2AllModes() { + delete impl; } -const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { - return Norm2Singleton(noopSingleton).getInstance(errorCode); +Norm2AllModes * +Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + delete impl; + return NULL; + } + Norm2AllModes *allModes=new Norm2AllModes(impl); + if(allModes==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + delete impl; + return NULL; + } + return allModes; } -const Normalizer2 * -Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { +#if NORM2_HARDCODE_NFC_DATA +Norm2AllModes * +Norm2AllModes::createNFCInstance(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return NULL; } - switch(mode) { - case UNORM_NFD: - return getNFDInstance(errorCode); - case UNORM_NFKD: - return getNFKDInstance(errorCode); - case UNORM_NFC: - return getNFCInstance(errorCode); - case UNORM_NFKC: - return getNFKCInstance(errorCode); - case UNORM_FCD: - return getFCDInstance(errorCode); - default: // UNORM_NONE - return getNoopInstance(errorCode); + Normalizer2Impl *impl=new Normalizer2Impl; + if(impl==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; } + impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie, + norm2_nfc_data_extraData, norm2_nfc_data_smallFCD); + return createInstance(impl, errorCode); } -const Normalizer2Impl * -Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { - Norm2AllModes *allModes= - Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); - return allModes!=NULL ? &allModes->impl : NULL; -} +static Norm2AllModes *nfcSingleton; -const Normalizer2Impl * -Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { - Norm2AllModes *allModes= - Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); - return allModes!=NULL ? &allModes->impl : NULL; -} +static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; -const Normalizer2Impl * -Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { - Norm2AllModes *allModes= - Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); - return allModes!=NULL ? &allModes->impl : NULL; +static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) { + nfcSingleton=Norm2AllModes::createNFCInstance(errorCode); + ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); } -const Normalizer2Impl * -Normalizer2Factory::getImpl(const Normalizer2 *norm2) { - return &((Normalizer2WithImpl *)norm2)->impl; +const Norm2AllModes * +Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode); + return nfcSingleton; } const Normalizer2 * Normalizer2::getNFCInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFCInstance(errorCode); + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; } const Normalizer2 * Normalizer2::getNFDInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFDInstance(errorCode); + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->decomp : NULL; } -const Normalizer2 * -Normalizer2::getNFKCInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFKCInstance(errorCode); +const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcd : NULL; } -const Normalizer2 * -Normalizer2::getNFKDInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFKDInstance(errorCode); +const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcc : NULL; } -const Normalizer2 * -Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFKC_CFInstance(errorCode); +const Normalizer2Impl * +Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? allModes->impl : NULL; } +#endif // NORM2_HARDCODE_NFC_DATA -const Normalizer2 * -Normalizer2::getInstance(const char *packageName, - const char *name, - UNormalization2Mode mode, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - if(name==NULL || *name==0) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - Norm2AllModes *allModes=NULL; - if(packageName==NULL) { - if(0==uprv_strcmp(name, "nfc")) { - allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); - } else if(0==uprv_strcmp(name, "nfkc")) { - allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); - } else if(0==uprv_strcmp(name, "nfkc_cf")) { - allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); - } - } - if(allModes==NULL && U_SUCCESS(errorCode)) { - { - Mutex lock; - if(cache!=NULL) { - allModes=(Norm2AllModes *)uhash_get(cache, name); - } - } - if(allModes==NULL) { - LocalPointer localAllModes( - Norm2AllModes::createInstance(packageName, name, errorCode)); - if(U_SUCCESS(errorCode)) { - Mutex lock; - if(cache==NULL) { - cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); - if(U_FAILURE(errorCode)) { - return NULL; - } - uhash_setKeyDeleter(cache, uprv_free); - uhash_setValueDeleter(cache, deleteNorm2AllModes); - } - void *temp=uhash_get(cache, name); - if(temp==NULL) { - int32_t keyLength=uprv_strlen(name)+1; - char *nameCopy=(char *)uprv_malloc(keyLength); - if(nameCopy==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(nameCopy, name, keyLength); - uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); - } else { - // race condition - allModes=(Norm2AllModes *)temp; - } - } - } - } - if(allModes!=NULL && U_SUCCESS(errorCode)) { - switch(mode) { - case UNORM2_COMPOSE: - return &allModes->comp; - case UNORM2_DECOMPOSE: - return &allModes->decomp; - case UNORM2_FCD: - return &allModes->fcd; - case UNORM2_COMPOSE_CONTIGUOUS: - return &allModes->fcc; - default: - break; // do nothing - } - } - return NULL; +U_CDECL_BEGIN + +static UBool U_CALLCONV uprv_normalizer2_cleanup() { + delete noopSingleton; + noopSingleton = NULL; + noopInitOnce.reset(); +#if NORM2_HARDCODE_NFC_DATA + delete nfcSingleton; + nfcSingleton = NULL; + nfcInitOnce.reset(); +#endif + return TRUE; } +U_CDECL_END + U_NAMESPACE_END // C API ------------------------------------------------------------------- *** U_NAMESPACE_USE -U_DRAFT const UNormalizer2 * U_EXPORT2 +U_CAPI const UNormalizer2 * U_EXPORT2 unorm2_getNFCInstance(UErrorCode *pErrorCode) { return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); } -U_DRAFT const UNormalizer2 * U_EXPORT2 +U_CAPI const UNormalizer2 * U_EXPORT2 unorm2_getNFDInstance(UErrorCode *pErrorCode) { return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); } -U_DRAFT const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); -} - -U_DRAFT const UNormalizer2 * U_EXPORT2 -unorm2_getNFKDInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); -} - -U_DRAFT const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); -} - -U_DRAFT const UNormalizer2 * U_EXPORT2 -unorm2_getInstance(const char *packageName, - const char *name, - UNormalization2Mode mode, - UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); -} - -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 unorm2_close(UNormalizer2 *norm2) { delete (Normalizer2 *)norm2; } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, @@ -835,7 +415,7 @@ normalizeSecondAndAppend(const UNormalizer2 *norm2, return firstString.extract(first, firstCapacity, *pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, @@ -846,7 +426,7 @@ unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, TRUE, pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, @@ -857,7 +437,7 @@ unorm2_append(const UNormalizer2 *norm2, FALSE, pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode) { @@ -876,7 +456,7 @@ unorm2_getDecomposition(const UNormalizer2 *norm2, } } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode) { @@ -895,17 +475,17 @@ unorm2_getRawDecomposition(const UNormalizer2 *norm2, } } -U_DRAFT UChar32 U_EXPORT2 +U_CAPI UChar32 U_EXPORT2 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { return reinterpret_cast(norm2)->composePair(a, b); } -U_DRAFT uint8_t U_EXPORT2 +U_CAPI uint8_t U_EXPORT2 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { return reinterpret_cast(norm2)->getCombiningClass(c); } -U_DRAFT UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode) { @@ -920,7 +500,7 @@ unorm2_isNormalized(const UNormalizer2 *norm2, return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); } -U_DRAFT UNormalizationCheckResult U_EXPORT2 +U_CAPI UNormalizationCheckResult U_EXPORT2 unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode) { @@ -935,7 +515,7 @@ unorm2_quickCheck(const UNormalizer2 *norm2, return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode) { @@ -950,17 +530,17 @@ unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); } -U_DRAFT UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); } -U_DRAFT UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); } -U_DRAFT UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { return ((const Normalizer2 *)norm2)->isInert(c); } @@ -970,7 +550,7 @@ unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { U_CAPI uint8_t U_EXPORT2 u_getCombiningClass(UChar32 c) { UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); + const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode); if(U_SUCCESS(errorCode)) { return nfd->getCombiningClass(c); } else { @@ -978,20 +558,6 @@ u_getCombiningClass(UChar32 c) { } } -U_CFUNC UNormalizationCheckResult -unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { - if(mode<=UNORM_NONE || UNORM_FCD<=mode) { - return UNORM_YES; - } - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); - if(U_SUCCESS(errorCode)) { - return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); - } else { - return UNORM_MAYBE; - } -} - U_CFUNC uint16_t unorm_getFCD16(UChar32 c) { UErrorCode errorCode=U_ZERO_ERROR;