X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/57a6839dcb3bba09e8228b822b290604668416fe..HEAD:/icuSources/common/normalizer2.cpp diff --git a/icuSources/common/normalizer2.cpp b/icuSources/common/normalizer2.cpp index ed82f2c0..6be7e0b2 100644 --- a/icuSources/common/normalizer2.cpp +++ b/icuSources/common/normalizer2.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * -* Copyright (C) 2009-2013, International Business Machines +* Copyright (C) 2009-2016, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: normalizer2.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -18,17 +20,25 @@ #if !UCONFIG_NO_NORMALIZATION -#include "unicode/localpointer.h" +#include "unicode/edits.h" #include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" #include "unicode/unistr.h" #include "unicode/unorm.h" -#include "cpputils.h" #include "cstring.h" #include "mutex.h" +#include "norm2allmodes.h" #include "normalizer2impl.h" #include "uassert.h" #include "ucln_cmn.h" -#include "uhash.h" + +using icu::Normalizer2Impl; + +#if NORM2_HARDCODE_NFC_DATA +// NFC/NFD data machine-generated by gennorm2 --csource +#define INCLUDED_FROM_NORMALIZER2_CPP +#include "norm2_nfc_data.h" +#endif U_NAMESPACE_BEGIN @@ -36,6 +46,20 @@ U_NAMESPACE_BEGIN Normalizer2::~Normalizer2() {} +void +Normalizer2::normalizeUTF8(uint32_t /*options*/, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return; + } + if (edits != nullptr) { + errorCode = U_UNSUPPORTED_ERROR; + return; + } + UnicodeString src16 = UnicodeString::fromUTF8(src); + normalize(src16, errorCode).toUTF8(sink); +} + UBool Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { return FALSE; @@ -51,6 +75,11 @@ Normalizer2::getCombiningClass(UChar32 /*c*/) const { return 0; } +UBool +Normalizer2::isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const { + return U_SUCCESS(errorCode) && isNormalized(UnicodeString::fromUTF8(s), errorCode); +} + // Normalizer2 implementation for the old UNORM_NONE. class NoopNormalizer2 : public Normalizer2 { virtual ~NoopNormalizer2(); @@ -58,7 +87,7 @@ class NoopNormalizer2 : public Normalizer2 { virtual UnicodeString & normalize(const UnicodeString &src, UnicodeString &dest, - UErrorCode &errorCode) const { + UErrorCode &errorCode) const U_OVERRIDE { if(U_SUCCESS(errorCode)) { if(&dest!=&src) { dest=src; @@ -68,10 +97,27 @@ class NoopNormalizer2 : public Normalizer2 { } return dest; } + virtual void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { + if(U_SUCCESS(errorCode)) { + if (edits != nullptr) { + if ((options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + edits->addUnchanged(src.length()); + } + if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { + sink.Append(src.data(), src.length()); + } + sink.Flush(); + } + } + virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, - UErrorCode &errorCode) const { + UErrorCode &errorCode) const U_OVERRIDE { if(U_SUCCESS(errorCode)) { if(&first!=&second) { first.append(second); @@ -84,7 +130,7 @@ class NoopNormalizer2 : public Normalizer2 { virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, - UErrorCode &errorCode) const { + UErrorCode &errorCode) const U_OVERRIDE { if(U_SUCCESS(errorCode)) { if(&first!=&second) { first.append(second); @@ -95,604 +141,169 @@ class NoopNormalizer2 : public Normalizer2 { return first; } virtual UBool - getDecomposition(UChar32, UnicodeString &) const { + getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE { return FALSE; } - // No need to override the default getRawDecomposition(). + // No need to U_OVERRIDE the default getRawDecomposition(). + virtual UBool + isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE { + return U_SUCCESS(errorCode); + } virtual UBool - isNormalized(const UnicodeString &, UErrorCode &) const { - return TRUE; + isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE { + return U_SUCCESS(errorCode); } virtual UNormalizationCheckResult - quickCheck(const UnicodeString &, UErrorCode &) const { + quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE { return UNORM_YES; } virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { + spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE { return s.length(); } - virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } - virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } - virtual UBool isInert(UChar32) const { return TRUE; } + virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; } + virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; } + virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; } }; NoopNormalizer2::~NoopNormalizer2() {} -// Intermediate class: -// Has Normalizer2Impl and does boilerplate argument checking and setup. -class Normalizer2WithImpl : public Normalizer2 { -public: - Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} - virtual ~Normalizer2WithImpl(); - - // normalize - virtual UnicodeString & - normalize(const UnicodeString &src, - UnicodeString &dest, - UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - dest.setToBogus(); - return dest; - } - const UChar *sArray=src.getBuffer(); - if(&dest==&src || sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - dest.setToBogus(); - return dest; - } - dest.remove(); - ReorderingBuffer buffer(impl, dest); - if(buffer.init(src.length(), errorCode)) { - normalize(sArray, sArray+src.length(), buffer, errorCode); - } - return dest; - } - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; - - // normalize and append - virtual UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const { - return normalizeSecondAndAppend(first, second, TRUE, errorCode); - } - virtual UnicodeString & - append(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const { - return normalizeSecondAndAppend(first, second, FALSE, errorCode); - } - UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UBool doNormalize, - UErrorCode &errorCode) const { - uprv_checkCanGetBuffer(first, errorCode); - if(U_FAILURE(errorCode)) { - return first; - } - const UChar *secondArray=second.getBuffer(); - if(&first==&second || secondArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return first; - } - int32_t firstLength=first.length(); - UnicodeString safeMiddle; - { - ReorderingBuffer buffer(impl, first); - if(buffer.init(firstLength+second.length(), errorCode)) { - normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, - safeMiddle, buffer, errorCode); - } - } // The ReorderingBuffer destructor finalizes the first string. - if(U_FAILURE(errorCode)) { - // Restore the modified suffix of the first string. - first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); - } - return first; - } - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; - virtual UBool - getDecomposition(UChar32 c, UnicodeString &decomposition) const { - UChar buffer[4]; - int32_t length; - const UChar *d=impl.getDecomposition(c, buffer, length); - if(d==NULL) { - return FALSE; - } - if(d==buffer) { - decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) - } else { - decomposition.setTo(FALSE, d, length); // read-only alias - } - return TRUE; - } - virtual UBool - getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { - UChar buffer[30]; - int32_t length; - const UChar *d=impl.getRawDecomposition(c, buffer, length); - if(d==NULL) { - return FALSE; - } - if(d==buffer) { - decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) - } else { - decomposition.setTo(FALSE, d, length); // read-only alias - } - return TRUE; - } - virtual UChar32 - composePair(UChar32 a, UChar32 b) const { - return impl.composePair(a, b); - } - - virtual uint8_t - getCombiningClass(UChar32 c) const { - return impl.getCC(impl.getNorm16(c)); - } - - // quick checks - virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return FALSE; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - const UChar *sLimit=sArray+s.length(); - return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); - } - virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { - return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; - } - virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return 0; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; - - virtual UNormalizationCheckResult getQuickCheck(UChar32) const { - return UNORM_YES; - } - - const Normalizer2Impl &impl; -}; - Normalizer2WithImpl::~Normalizer2WithImpl() {} -class DecomposeNormalizer2 : public Normalizer2WithImpl { -public: - DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} - virtual ~DecomposeNormalizer2(); - -private: - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.decompose(src, limit, &buffer, errorCode); - } - using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { - return impl.decompose(src, limit, NULL, errorCode); - } - using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { - return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; - } - virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } - virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } - virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } -}; - DecomposeNormalizer2::~DecomposeNormalizer2() {} -class ComposeNormalizer2 : public Normalizer2WithImpl { -public: - ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : - Normalizer2WithImpl(ni), onlyContiguous(fcc) {} - virtual ~ComposeNormalizer2(); - -private: - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); - } - using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); - } - - virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return FALSE; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - UnicodeString temp; - ReorderingBuffer buffer(impl, temp); - if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization - return FALSE; - } - return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); - } - virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return UNORM_MAYBE; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return UNORM_MAYBE; - } - UNormalizationCheckResult qcResult=UNORM_YES; - impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); - return qcResult; - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { - return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); - } - using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { - return impl.getCompQuickCheck(impl.getNorm16(c)); - } - virtual UBool hasBoundaryBefore(UChar32 c) const { - return impl.hasCompBoundaryBefore(c); - } - virtual UBool hasBoundaryAfter(UChar32 c) const { - return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); - } - virtual UBool isInert(UChar32 c) const { - return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); - } - - const UBool onlyContiguous; -}; - ComposeNormalizer2::~ComposeNormalizer2() {} -class FCDNormalizer2 : public Normalizer2WithImpl { -public: - FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} - virtual ~FCDNormalizer2(); - -private: - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.makeFCD(src, limit, &buffer, errorCode); - } - using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { - return impl.makeFCD(src, limit, NULL, errorCode); - } - using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } - virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } - virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } -}; - FCDNormalizer2::~FCDNormalizer2() {} // instance cache ---------------------------------------------------------- *** -struct Norm2AllModes : public UMemory { - static Norm2AllModes *createInstance(const char *packageName, - const char *name, - UErrorCode &errorCode); - Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} - - Normalizer2Impl impl; - ComposeNormalizer2 comp; - DecomposeNormalizer2 decomp; - FCDNormalizer2 fcd; - ComposeNormalizer2 fcc; -}; - -Norm2AllModes * -Norm2AllModes::createInstance(const char *packageName, - const char *name, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - LocalPointer allModes(new Norm2AllModes); - if(allModes.isNull()) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - allModes->impl.load(packageName, name, errorCode); - return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; -} - U_CDECL_BEGIN static UBool U_CALLCONV uprv_normalizer2_cleanup(); U_CDECL_END - -static Norm2AllModes *nfcSingleton; -static Norm2AllModes *nfkcSingleton; -static Norm2AllModes *nfkc_cfSingleton; static Normalizer2 *noopSingleton; -static UHashtable *cache=NULL; - -static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; -static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; -static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; -// UInitOnce singleton initialization function -static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { - if (uprv_strcmp(what, "nfc") == 0) { - nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); - } else if (uprv_strcmp(what, "nfkc") == 0) { - nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); - } else if (uprv_strcmp(what, "nfkc_cf") == 0) { - nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); - } else if (uprv_strcmp(what, "noop") == 0) { - noopSingleton = new NoopNormalizer2; - } else { - U_ASSERT(FALSE); // Unknown singleton +static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + noopSingleton=new NoopNormalizer2; + if(noopSingleton==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; } ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); } -U_CDECL_BEGIN - -static void U_CALLCONV deleteNorm2AllModes(void *allModes) { - delete (Norm2AllModes *)allModes; -} - -static UBool U_CALLCONV uprv_normalizer2_cleanup() { - delete nfcSingleton; - nfcSingleton = NULL; - delete nfkcSingleton; - nfkcSingleton = NULL; - delete nfkc_cfSingleton; - nfkc_cfSingleton = NULL; - delete noopSingleton; - noopSingleton = NULL; - uhash_close(cache); - cache=NULL; - nfcInitOnce.reset(); - nfkcInitOnce.reset(); - nfkc_cfInitOnce.reset(); - noopInitOnce.reset(); - return TRUE; -} - -U_CDECL_END - -const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { - umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); - return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL; -} - -const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { - umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); - return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL; -} - -const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { - umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); - return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL; -} - -const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { - umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); - return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL; -} - -const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { - umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); - return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL; +const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode); + return noopSingleton; } -const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { - umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); - return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL; +const Normalizer2Impl * +Normalizer2Factory::getImpl(const Normalizer2 *norm2) { + return &((Normalizer2WithImpl *)norm2)->impl; } -const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { - umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); - return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL; +Norm2AllModes::~Norm2AllModes() { + delete impl; } -const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { - umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode); - return noopSingleton; +Norm2AllModes * +Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + delete impl; + return NULL; + } + Norm2AllModes *allModes=new Norm2AllModes(impl); + if(allModes==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + delete impl; + return NULL; + } + return allModes; } -const Normalizer2 * -Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { +#if NORM2_HARDCODE_NFC_DATA +Norm2AllModes * +Norm2AllModes::createNFCInstance(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return NULL; } - switch(mode) { - case UNORM_NFD: - return getNFDInstance(errorCode); - case UNORM_NFKD: - return getNFKDInstance(errorCode); - case UNORM_NFC: - return getNFCInstance(errorCode); - case UNORM_NFKC: - return getNFKCInstance(errorCode); - case UNORM_FCD: - return getFCDInstance(errorCode); - default: // UNORM_NONE - return getNoopInstance(errorCode); + Normalizer2Impl *impl=new Normalizer2Impl; + if(impl==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; } + impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie, + norm2_nfc_data_extraData, norm2_nfc_data_smallFCD); + return createInstance(impl, errorCode); } -const Normalizer2Impl * -Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { - umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); - return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL; -} +static Norm2AllModes *nfcSingleton; -const Normalizer2Impl * -Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { - umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); - return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL; -} +static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; -const Normalizer2Impl * -Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { - umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); - return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL; +static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) { + nfcSingleton=Norm2AllModes::createNFCInstance(errorCode); + ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); } -const Normalizer2Impl * -Normalizer2Factory::getImpl(const Normalizer2 *norm2) { - return &((Normalizer2WithImpl *)norm2)->impl; +const Norm2AllModes * +Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode); + return nfcSingleton; } const Normalizer2 * Normalizer2::getNFCInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFCInstance(errorCode); + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; } const Normalizer2 * Normalizer2::getNFDInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFDInstance(errorCode); + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->decomp : NULL; } -const Normalizer2 * -Normalizer2::getNFKCInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFKCInstance(errorCode); +const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcd : NULL; } -const Normalizer2 * -Normalizer2::getNFKDInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFKDInstance(errorCode); +const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcc : NULL; } -const Normalizer2 * -Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { - return Normalizer2Factory::getNFKC_CFInstance(errorCode); +const Normalizer2Impl * +Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? allModes->impl : NULL; } +#endif // NORM2_HARDCODE_NFC_DATA -const Normalizer2 * -Normalizer2::getInstance(const char *packageName, - const char *name, - UNormalization2Mode mode, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - if(name==NULL || *name==0) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - Norm2AllModes *allModes=NULL; - if(packageName==NULL) { - if(0==uprv_strcmp(name, "nfc")) { - umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); - allModes=nfcSingleton; - } else if(0==uprv_strcmp(name, "nfkc")) { - umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); - allModes=nfkcSingleton; - } else if(0==uprv_strcmp(name, "nfkc_cf")) { - umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); - allModes=nfkc_cfSingleton; - } - } - if(allModes==NULL && U_SUCCESS(errorCode)) { - { - Mutex lock; - if(cache!=NULL) { - allModes=(Norm2AllModes *)uhash_get(cache, name); - } - } - if(allModes==NULL) { - LocalPointer localAllModes( - Norm2AllModes::createInstance(packageName, name, errorCode)); - if(U_SUCCESS(errorCode)) { - Mutex lock; - if(cache==NULL) { - cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); - if(U_FAILURE(errorCode)) { - return NULL; - } - uhash_setKeyDeleter(cache, uprv_free); - uhash_setValueDeleter(cache, deleteNorm2AllModes); - } - void *temp=uhash_get(cache, name); - if(temp==NULL) { - int32_t keyLength=uprv_strlen(name)+1; - char *nameCopy=(char *)uprv_malloc(keyLength); - if(nameCopy==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(nameCopy, name, keyLength); - uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); - } else { - // race condition - allModes=(Norm2AllModes *)temp; - } - } - } - } - if(allModes!=NULL && U_SUCCESS(errorCode)) { - switch(mode) { - case UNORM2_COMPOSE: - return &allModes->comp; - case UNORM2_DECOMPOSE: - return &allModes->decomp; - case UNORM2_FCD: - return &allModes->fcd; - case UNORM2_COMPOSE_CONTIGUOUS: - return &allModes->fcc; - default: - break; // do nothing - } - } - return NULL; +U_CDECL_BEGIN + +static UBool U_CALLCONV uprv_normalizer2_cleanup() { + delete noopSingleton; + noopSingleton = NULL; + noopInitOnce.reset(); +#if NORM2_HARDCODE_NFC_DATA + delete nfcSingleton; + nfcSingleton = NULL; + nfcInitOnce.reset(); +#endif + return TRUE; } +U_CDECL_END + U_NAMESPACE_END // C API ------------------------------------------------------------------- *** @@ -709,29 +320,6 @@ unorm2_getNFDInstance(UErrorCode *pErrorCode) { return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); } -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); -} - -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getNFKDInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); -} - -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); -} - -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getInstance(const char *packageName, - const char *name, - UNormalization2Mode mode, - UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); -} - U_CAPI void U_EXPORT2 unorm2_close(UNormalizer2 *norm2) { delete (Normalizer2 *)norm2; @@ -962,7 +550,7 @@ unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { U_CAPI uint8_t U_EXPORT2 u_getCombiningClass(UChar32 c) { UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); + const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode); if(U_SUCCESS(errorCode)) { return nfd->getCombiningClass(c); } else { @@ -970,20 +558,6 @@ u_getCombiningClass(UChar32 c) { } } -U_CFUNC UNormalizationCheckResult -unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { - if(mode<=UNORM_NONE || UNORM_FCD<=mode) { - return UNORM_YES; - } - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); - if(U_SUCCESS(errorCode)) { - return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); - } else { - return UNORM_MAYBE; - } -} - U_CFUNC uint16_t unorm_getFCD16(UChar32 c) { UErrorCode errorCode=U_ZERO_ERROR;