X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..4d9eefca008a7bc544144ef830fa144ce89deaa0:/icuSources/common/normalizer2.cpp?ds=inline diff --git a/icuSources/common/normalizer2.cpp b/icuSources/common/normalizer2.cpp index fd0048c5..a2c27dd1 100644 --- a/icuSources/common/normalizer2.cpp +++ b/icuSources/common/normalizer2.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2009-2010, International Business Machines +* Copyright (C) 2009-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -33,8 +33,27 @@ U_NAMESPACE_BEGIN // Public API dispatch via Normalizer2 subclasses -------------------------- *** +Normalizer2::~Normalizer2() {} + +UBool +Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { + return FALSE; +} + +UChar32 +Normalizer2::composePair(UChar32, UChar32) const { + return U_SENTINEL; +} + +uint8_t +Normalizer2::getCombiningClass(UChar32 /*c*/) const { + return 0; +} + // Normalizer2 implementation for the old UNORM_NONE. class NoopNormalizer2 : public Normalizer2 { + virtual ~NoopNormalizer2(); + virtual UnicodeString & normalize(const UnicodeString &src, UnicodeString &dest, @@ -78,6 +97,7 @@ class NoopNormalizer2 : public Normalizer2 { getDecomposition(UChar32, UnicodeString &) const { return FALSE; } + // No need to override the default getRawDecomposition(). virtual UBool isNormalized(const UnicodeString &, UErrorCode &) const { return TRUE; @@ -95,11 +115,14 @@ class NoopNormalizer2 : public Normalizer2 { virtual UBool isInert(UChar32) const { return TRUE; } }; +NoopNormalizer2::~NoopNormalizer2() {} + // Intermediate class: // Has Normalizer2Impl and does boilerplate argument checking and setup. class Normalizer2WithImpl : public Normalizer2 { public: Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} + virtual ~Normalizer2WithImpl(); // normalize virtual UnicodeString & @@ -154,15 +177,24 @@ public: errorCode=U_ILLEGAL_ARGUMENT_ERROR; return first; } - ReorderingBuffer buffer(impl, first); - if(buffer.init(first.length()+second.length(), errorCode)) { - normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, - buffer, errorCode); + int32_t firstLength=first.length(); + UnicodeString safeMiddle; + { + ReorderingBuffer buffer(impl, first); + if(buffer.init(firstLength+second.length(), errorCode)) { + normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, + safeMiddle, buffer, errorCode); + } + } // The ReorderingBuffer destructor finalizes the first string. + if(U_FAILURE(errorCode)) { + // Restore the modified suffix of the first string. + first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); } return first; } virtual void normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; virtual UBool getDecomposition(UChar32 c, UnicodeString &decomposition) const { @@ -179,6 +211,30 @@ public: } return TRUE; } + virtual UBool + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { + UChar buffer[30]; + int32_t length; + const UChar *d=impl.getRawDecomposition(c, buffer, length); + if(d==NULL) { + return FALSE; + } + if(d==buffer) { + decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) + } else { + decomposition.setTo(FALSE, d, length); // read-only alias + } + return TRUE; + } + virtual UChar32 + composePair(UChar32 a, UChar32 b) const { + return impl.composePair(a, b); + } + + virtual uint8_t + getCombiningClass(UChar32 c) const { + return impl.getCC(impl.getNorm16(c)); + } // quick checks virtual UBool @@ -220,9 +276,12 @@ public: const Normalizer2Impl &impl; }; +Normalizer2WithImpl::~Normalizer2WithImpl() {} + class DecomposeNormalizer2 : public Normalizer2WithImpl { public: DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + virtual ~DecomposeNormalizer2(); private: virtual void @@ -233,8 +292,9 @@ private: using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. virtual void normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode); + impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); } virtual const UChar * spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { @@ -249,10 +309,13 @@ private: virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } }; +DecomposeNormalizer2::~DecomposeNormalizer2() {} + class ComposeNormalizer2 : public Normalizer2WithImpl { public: ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : Normalizer2WithImpl(ni), onlyContiguous(fcc) {} + virtual ~ComposeNormalizer2(); private: virtual void @@ -263,8 +326,9 @@ private: using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. virtual void normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode); + impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); } virtual UBool @@ -319,9 +383,12 @@ private: const UBool onlyContiguous; }; +ComposeNormalizer2::~ComposeNormalizer2() {} + class FCDNormalizer2 : public Normalizer2WithImpl { public: FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + virtual ~FCDNormalizer2(); private: virtual void @@ -332,8 +399,9 @@ private: using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. virtual void normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode); + impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); } virtual const UChar * spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { @@ -345,6 +413,8 @@ private: virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } }; +FCDNormalizer2::~FCDNormalizer2() {} + // instance cache ---------------------------------------------------------- *** struct Norm2AllModes : public UMemory { @@ -451,12 +521,7 @@ const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); - if(allModes!=NULL) { - allModes->impl.getFCDTrie(errorCode); - return &allModes->fcd; - } else { - return NULL; - } + return allModes!=NULL ? &allModes->fcd : NULL; } const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { @@ -533,15 +598,29 @@ Normalizer2Factory::getImpl(const Normalizer2 *norm2) { return &((Normalizer2WithImpl *)norm2)->impl; } -const UTrie2 * -Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) { - Norm2AllModes *allModes= - Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); - if(allModes!=NULL) { - return allModes->impl.getFCDTrie(errorCode); - } else { - return NULL; - } +const Normalizer2 * +Normalizer2::getNFCInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFCInstance(errorCode); +} + +const Normalizer2 * +Normalizer2::getNFDInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFDInstance(errorCode); +} + +const Normalizer2 * +Normalizer2::getNFKCInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFKCInstance(errorCode); +} + +const Normalizer2 * +Normalizer2::getNFKDInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFKDInstance(errorCode); +} + +const Normalizer2 * +Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { + return Normalizer2Factory::getNFKC_CFInstance(errorCode); } const Normalizer2 * @@ -554,6 +633,7 @@ Normalizer2::getInstance(const char *packageName, } if(name==NULL || *name==0) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; } Norm2AllModes *allModes=NULL; if(packageName==NULL) { @@ -609,7 +689,6 @@ Normalizer2::getInstance(const char *packageName, case UNORM2_DECOMPOSE: return &allModes->decomp; case UNORM2_FCD: - allModes->impl.getFCDTrie(errorCode); return &allModes->fcd; case UNORM2_COMPOSE_CONTIGUOUS: return &allModes->fcc; @@ -620,15 +699,38 @@ Normalizer2::getInstance(const char *packageName, return NULL; } -UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2) - U_NAMESPACE_END // C API ------------------------------------------------------------------- *** U_NAMESPACE_USE -U_DRAFT const UNormalizer2 * U_EXPORT2 +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFCInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); +} + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFDInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); +} + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); +} + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKDInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); +} + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); +} + +U_CAPI const UNormalizer2 * U_EXPORT2 unorm2_getInstance(const char *packageName, const char *name, UNormalization2Mode mode, @@ -636,12 +738,12 @@ unorm2_getInstance(const char *packageName, return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); } -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 unorm2_close(UNormalizer2 *norm2) { delete (Normalizer2 *)norm2; } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, @@ -693,16 +795,31 @@ normalizeSecondAndAppend(const UNormalizer2 *norm2, return 0; } UnicodeString firstString(first, firstLength, firstCapacity); + firstLength=firstString.length(); // In case it was -1. // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. if(secondLength!=0) { const Normalizer2 *n2=(const Normalizer2 *)norm2; const Normalizer2WithImpl *n2wi=dynamic_cast(n2); if(n2wi!=NULL) { // Avoid duplicate argument checking and support NUL-terminated src. - ReorderingBuffer buffer(n2wi->impl, firstString); - if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 - n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, - doNormalize, buffer, *pErrorCode); + UnicodeString safeMiddle; + { + ReorderingBuffer buffer(n2wi->impl, firstString); + if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 + n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, + doNormalize, safeMiddle, buffer, *pErrorCode); + } + } // The ReorderingBuffer destructor finalizes firstString. + if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { + // Restore the modified suffix of the first string. + // This does not restore first[] array contents between firstLength and firstCapacity. + // (That might be uninitialized memory, as far as we know.) + if(first!=NULL) { /* don't dereference NULL */ + safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); + if(firstLength(norm2)->getRawDecomposition(c, destString)) { + return destString.extract(decomposition, capacity, *pErrorCode); + } else { + return -1; + } +} + +U_CAPI UChar32 U_EXPORT2 +unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { + return reinterpret_cast(norm2)->composePair(a, b); +} + +U_CAPI uint8_t U_EXPORT2 +unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { + return reinterpret_cast(norm2)->getCombiningClass(c); +} + +U_CAPI UBool U_EXPORT2 unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode) { @@ -772,7 +918,7 @@ unorm2_isNormalized(const UNormalizer2 *norm2, return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); } -U_DRAFT UNormalizationCheckResult U_EXPORT2 +U_CAPI UNormalizationCheckResult U_EXPORT2 unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode) { @@ -787,7 +933,7 @@ unorm2_quickCheck(const UNormalizer2 *norm2, return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode) { @@ -802,24 +948,35 @@ unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); } -U_DRAFT UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); } -U_DRAFT UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); } -U_DRAFT UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { return ((const Normalizer2 *)norm2)->isInert(c); } // Some properties APIs ---------------------------------------------------- *** -U_CFUNC UNormalizationCheckResult U_EXPORT2 +U_CAPI uint8_t U_EXPORT2 +u_getCombiningClass(UChar32 c) { + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); + if(U_SUCCESS(errorCode)) { + return nfd->getCombiningClass(c); + } else { + return 0; + } +} + +U_CFUNC UNormalizationCheckResult unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { if(mode<=UNORM_NONE || UNORM_FCD<=mode) { return UNORM_YES; @@ -833,14 +990,14 @@ unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { } } -U_CAPI const uint16_t * U_EXPORT2 -unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) { - const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode); - if(U_SUCCESS(*pErrorCode)) { - fcdHighStart=trie->highStart; - return trie->index; +U_CFUNC uint16_t +unorm_getFCD16(UChar32 c) { + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + return impl->getFCD16(c); } else { - return NULL; + return 0; } }