]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/normalizer2.cpp
ICU-511.31.tar.gz
[apple/icu.git] / icuSources / common / normalizer2.cpp
index fd0048c56863da352dd84bba0be2a20deba8378b..a2c27dd1426bcb2ee8beda7b67ab11375039d7e3 100644 (file)
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2009-2010, International Business Machines
+*   Copyright (C) 2009-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -33,8 +33,27 @@ U_NAMESPACE_BEGIN
 
 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
 
+Normalizer2::~Normalizer2() {}
+
+UBool
+Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
+    return FALSE;
+}
+
+UChar32
+Normalizer2::composePair(UChar32, UChar32) const {
+    return U_SENTINEL;
+}
+
+uint8_t
+Normalizer2::getCombiningClass(UChar32 /*c*/) const {
+    return 0;
+}
+
 // Normalizer2 implementation for the old UNORM_NONE.
 class NoopNormalizer2 : public Normalizer2 {
+    virtual ~NoopNormalizer2();
+
     virtual UnicodeString &
     normalize(const UnicodeString &src,
               UnicodeString &dest,
@@ -78,6 +97,7 @@ class NoopNormalizer2 : public Normalizer2 {
     getDecomposition(UChar32, UnicodeString &) const {
         return FALSE;
     }
+    // No need to override the default getRawDecomposition().
     virtual UBool
     isNormalized(const UnicodeString &, UErrorCode &) const {
         return TRUE;
@@ -95,11 +115,14 @@ class NoopNormalizer2 : public Normalizer2 {
     virtual UBool isInert(UChar32) const { return TRUE; }
 };
 
+NoopNormalizer2::~NoopNormalizer2() {}
+
 // Intermediate class:
 // Has Normalizer2Impl and does boilerplate argument checking and setup.
 class Normalizer2WithImpl : public Normalizer2 {
 public:
     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
+    virtual ~Normalizer2WithImpl();
 
     // normalize
     virtual UnicodeString &
@@ -154,15 +177,24 @@ public:
             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
             return first;
         }
-        ReorderingBuffer buffer(impl, first);
-        if(buffer.init(first.length()+second.length(), errorCode)) {
-            normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
-                               buffer, errorCode);
+        int32_t firstLength=first.length();
+        UnicodeString safeMiddle;
+        {
+            ReorderingBuffer buffer(impl, first);
+            if(buffer.init(firstLength+second.length(), errorCode)) {
+                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
+                                   safeMiddle, buffer, errorCode);
+            }
+        }  // The ReorderingBuffer destructor finalizes the first string.
+        if(U_FAILURE(errorCode)) {
+            // Restore the modified suffix of the first string.
+            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
         }
         return first;
     }
     virtual void
     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+                       UnicodeString &safeMiddle,
                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
     virtual UBool
     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
@@ -179,6 +211,30 @@ public:
         }
         return TRUE;
     }
+    virtual UBool
+    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
+        UChar buffer[30];
+        int32_t length;
+        const UChar *d=impl.getRawDecomposition(c, buffer, length);
+        if(d==NULL) {
+            return FALSE;
+        }
+        if(d==buffer) {
+            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
+        } else {
+            decomposition.setTo(FALSE, d, length);  // read-only alias
+        }
+        return TRUE;
+    }
+    virtual UChar32
+    composePair(UChar32 a, UChar32 b) const {
+        return impl.composePair(a, b);
+    }
+
+    virtual uint8_t
+    getCombiningClass(UChar32 c) const {
+        return impl.getCC(impl.getNorm16(c));
+    }
 
     // quick checks
     virtual UBool
@@ -220,9 +276,12 @@ public:
     const Normalizer2Impl &impl;
 };
 
+Normalizer2WithImpl::~Normalizer2WithImpl() {}
+
 class DecomposeNormalizer2 : public Normalizer2WithImpl {
 public:
     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
+    virtual ~DecomposeNormalizer2();
 
 private:
     virtual void
@@ -233,8 +292,9 @@ private:
     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
     virtual void
     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+                       UnicodeString &safeMiddle,
                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
-        impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode);
+        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
     }
     virtual const UChar *
     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
@@ -249,10 +309,13 @@ private:
     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
 };
 
+DecomposeNormalizer2::~DecomposeNormalizer2() {}
+
 class ComposeNormalizer2 : public Normalizer2WithImpl {
 public:
     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
+    virtual ~ComposeNormalizer2();
 
 private:
     virtual void
@@ -263,8 +326,9 @@ private:
     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
     virtual void
     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+                       UnicodeString &safeMiddle,
                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
-        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode);
+        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
     }
 
     virtual UBool
@@ -319,9 +383,12 @@ private:
     const UBool onlyContiguous;
 };
 
+ComposeNormalizer2::~ComposeNormalizer2() {}
+
 class FCDNormalizer2 : public Normalizer2WithImpl {
 public:
     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
+    virtual ~FCDNormalizer2();
 
 private:
     virtual void
@@ -332,8 +399,9 @@ private:
     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
     virtual void
     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
+                       UnicodeString &safeMiddle,
                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
-        impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode);
+        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
     }
     virtual const UChar *
     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
@@ -345,6 +413,8 @@ private:
     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
 };
 
+FCDNormalizer2::~FCDNormalizer2() {}
+
 // instance cache ---------------------------------------------------------- ***
 
 struct Norm2AllModes : public UMemory {
@@ -451,12 +521,7 @@ const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
 
 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
     Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
-    if(allModes!=NULL) {
-        allModes->impl.getFCDTrie(errorCode);
-        return &allModes->fcd;
-    } else {
-        return NULL;
-    }
+    return allModes!=NULL ? &allModes->fcd : NULL;
 }
 
 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
@@ -533,15 +598,29 @@ Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
     return &((Normalizer2WithImpl *)norm2)->impl;
 }
 
-const UTrie2 *
-Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
-    Norm2AllModes *allModes=
-        Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
-    if(allModes!=NULL) {
-        return allModes->impl.getFCDTrie(errorCode);
-    } else {
-        return NULL;
-    }
+const Normalizer2 *
+Normalizer2::getNFCInstance(UErrorCode &errorCode) {
+    return Normalizer2Factory::getNFCInstance(errorCode);
+}
+
+const Normalizer2 *
+Normalizer2::getNFDInstance(UErrorCode &errorCode) {
+    return Normalizer2Factory::getNFDInstance(errorCode);
+}
+
+const Normalizer2 *
+Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
+    return Normalizer2Factory::getNFKCInstance(errorCode);
+}
+
+const Normalizer2 *
+Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
+    return Normalizer2Factory::getNFKDInstance(errorCode);
+}
+
+const Normalizer2 *
+Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
+    return Normalizer2Factory::getNFKC_CFInstance(errorCode);
 }
 
 const Normalizer2 *
@@ -554,6 +633,7 @@ Normalizer2::getInstance(const char *packageName,
     }
     if(name==NULL || *name==0) {
         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
     }
     Norm2AllModes *allModes=NULL;
     if(packageName==NULL) {
@@ -609,7 +689,6 @@ Normalizer2::getInstance(const char *packageName,
         case UNORM2_DECOMPOSE:
             return &allModes->decomp;
         case UNORM2_FCD:
-            allModes->impl.getFCDTrie(errorCode);
             return &allModes->fcd;
         case UNORM2_COMPOSE_CONTIGUOUS:
             return &allModes->fcc;
@@ -620,15 +699,38 @@ Normalizer2::getInstance(const char *packageName,
     return NULL;
 }
 
-UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
-
 U_NAMESPACE_END
 
 // C API ------------------------------------------------------------------- ***
 
 U_NAMESPACE_USE
 
-U_DRAFT const UNormalizer2 * U_EXPORT2
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFCInstance(UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFDInstance(UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
 unorm2_getInstance(const char *packageName,
                    const char *name,
                    UNormalization2Mode mode,
@@ -636,12 +738,12 @@ unorm2_getInstance(const char *packageName,
     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
 }
 
-U_DRAFT void U_EXPORT2
+U_CAPI void U_EXPORT2
 unorm2_close(UNormalizer2 *norm2) {
     delete (Normalizer2 *)norm2;
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 unorm2_normalize(const UNormalizer2 *norm2,
                  const UChar *src, int32_t length,
                  UChar *dest, int32_t capacity,
@@ -693,16 +795,31 @@ normalizeSecondAndAppend(const UNormalizer2 *norm2,
         return 0;
     }
     UnicodeString firstString(first, firstLength, firstCapacity);
+    firstLength=firstString.length();  // In case it was -1.
     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
     if(secondLength!=0) {
         const Normalizer2 *n2=(const Normalizer2 *)norm2;
         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
         if(n2wi!=NULL) {
             // Avoid duplicate argument checking and support NUL-terminated src.
-            ReorderingBuffer buffer(n2wi->impl, firstString);
-            if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
-                n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
-                                        doNormalize, buffer, *pErrorCode);
+            UnicodeString safeMiddle;
+            {
+                ReorderingBuffer buffer(n2wi->impl, firstString);
+                if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
+                    n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
+                                             doNormalize, safeMiddle, buffer, *pErrorCode);
+                }
+            }  // The ReorderingBuffer destructor finalizes firstString.
+            if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
+                // Restore the modified suffix of the first string.
+                // This does not restore first[] array contents between firstLength and firstCapacity.
+                // (That might be uninitialized memory, as far as we know.)
+                if(first!=NULL) { /* don't dereference NULL */
+                  safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
+                  if(firstLength<firstCapacity) {
+                    first[firstLength]=0;  // NUL-terminate in case it was originally.
+                  }
+                }
             }
         } else {
             UnicodeString secondString(secondLength<0, second, secondLength);
@@ -716,7 +833,7 @@ normalizeSecondAndAppend(const UNormalizer2 *norm2,
     return firstString.extract(first, firstCapacity, *pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
                                 const UChar *second, int32_t secondLength,
@@ -727,7 +844,7 @@ unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
                                     TRUE, pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 unorm2_append(const UNormalizer2 *norm2,
               UChar *first, int32_t firstLength, int32_t firstCapacity,
               const UChar *second, int32_t secondLength,
@@ -738,7 +855,7 @@ unorm2_append(const UNormalizer2 *norm2,
                                     FALSE, pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 unorm2_getDecomposition(const UNormalizer2 *norm2,
                         UChar32 c, UChar *decomposition, int32_t capacity,
                         UErrorCode *pErrorCode) {
@@ -757,7 +874,36 @@ unorm2_getDecomposition(const UNormalizer2 *norm2,
     }
 }
 
-U_DRAFT UBool U_EXPORT2
+U_CAPI int32_t U_EXPORT2
+unorm2_getRawDecomposition(const UNormalizer2 *norm2,
+                           UChar32 c, UChar *decomposition, int32_t capacity,
+                           UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(decomposition==NULL ? capacity!=0 : capacity<0) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    UnicodeString destString(decomposition, 0, capacity);
+    if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
+        return destString.extract(decomposition, capacity, *pErrorCode);
+    } else {
+        return -1;
+    }
+}
+
+U_CAPI UChar32 U_EXPORT2
+unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
+    return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
+}
+
+U_CAPI uint8_t U_EXPORT2
+unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
+    return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
+}
+
+U_CAPI UBool U_EXPORT2
 unorm2_isNormalized(const UNormalizer2 *norm2,
                     const UChar *s, int32_t length,
                     UErrorCode *pErrorCode) {
@@ -772,7 +918,7 @@ unorm2_isNormalized(const UNormalizer2 *norm2,
     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
 }
 
-U_DRAFT UNormalizationCheckResult U_EXPORT2
+U_CAPI UNormalizationCheckResult U_EXPORT2
 unorm2_quickCheck(const UNormalizer2 *norm2,
                   const UChar *s, int32_t length,
                   UErrorCode *pErrorCode) {
@@ -787,7 +933,7 @@ unorm2_quickCheck(const UNormalizer2 *norm2,
     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
                          const UChar *s, int32_t length,
                          UErrorCode *pErrorCode) {
@@ -802,24 +948,35 @@ unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
 }
 
-U_DRAFT UBool U_EXPORT2
+U_CAPI UBool U_EXPORT2
 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
 }
 
-U_DRAFT UBool U_EXPORT2
+U_CAPI UBool U_EXPORT2
 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
 }
 
-U_DRAFT UBool U_EXPORT2
+U_CAPI UBool U_EXPORT2
 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
     return ((const Normalizer2 *)norm2)->isInert(c);
 }
 
 // Some properties APIs ---------------------------------------------------- ***
 
-U_CFUNC UNormalizationCheckResult U_EXPORT2
+U_CAPI uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c) {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return nfd->getCombiningClass(c);
+    } else {
+        return 0;
+    }
+}
+
+U_CFUNC UNormalizationCheckResult
 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
         return UNORM_YES;
@@ -833,14 +990,14 @@ unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
     }
 }
 
-U_CAPI const uint16_t * U_EXPORT2
-unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
-    const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
-    if(U_SUCCESS(*pErrorCode)) {
-        fcdHighStart=trie->highStart;
-        return trie->index;
+U_CFUNC uint16_t
+unorm_getFCD16(UChar32 c) {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return impl->getFCD16(c);
     } else {
-        return NULL;
+        return 0;
     }
 }