X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/249c4c5ea9376c24572daf9c2effa7484a282f14..3d1f044b704633e2e541231cd17ae9ecf9ad5c7a:/icuSources/common/normalizer2impl.cpp?ds=sidebyside

diff --git a/icuSources/common/normalizer2impl.cpp b/icuSources/common/normalizer2impl.cpp
index 15b4a528..b2dd7ad4 100644
--- a/icuSources/common/normalizer2impl.cpp
+++ b/icuSources/common/normalizer2impl.cpp
@@ -16,6 +16,8 @@
 *   created by: Markus W. Scherer
 */
 
+// #define UCPTRIE_DEBUG
+
 #include "unicode/utypes.h"
 
 #if !UCONFIG_NO_NORMALIZATION
@@ -24,7 +26,9 @@
 #include "unicode/edits.h"
 #include "unicode/normalizer2.h"
 #include "unicode/stringoptions.h"
+#include "unicode/ucptrie.h"
 #include "unicode/udata.h"
+#include "unicode/umutablecptrie.h"
 #include "unicode/ustring.h"
 #include "unicode/utf16.h"
 #include "unicode/utf8.h"
@@ -34,8 +38,8 @@
 #include "normalizer2impl.h"
 #include "putilimp.h"
 #include "uassert.h"
+#include "ucptrie_impl.h"
 #include "uset_imp.h"
-#include "utrie2.h"
 #include "uvector.h"
 
 U_NAMESPACE_BEGIN
@@ -62,7 +66,7 @@ inline uint8_t leadByteForCP(UChar32 c) {
  * Returns the code point from one single well-formed UTF-8 byte sequence
  * between cpStart and cpLimit.
  *
- * UTrie2 UTF-8 macros do not assemble whole code points (for efficiency).
+ * Trie UTF-8 macros do not assemble whole code points (for efficiency).
  * When we do need the code point, we call this function.
  * We should not need it for normalization-inert data (norm16==0).
  * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points.
@@ -82,8 +86,7 @@ UChar32 codePointFromValidUTF8(const uint8_t *cpStart, const uint8_t *cpLimit) {
     case 4:
         return ((c&7)<<18) | ((cpStart[1]&0x3f)<<12) | ((cpStart[2]&0x3f)<<6) | (cpStart[3]&0x3f);
     default:
-        U_ASSERT(FALSE);  // Should not occur.
-        return U_SENTINEL;
+        UPRV_UNREACHABLE;  // Should not occur.
     }
 }
 
@@ -122,7 +125,7 @@ int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) {
             }
         } else if (src[1] == 0x87) {
             uint8_t t = src[2];
-            if ((int8_t)t <= (int8_t)0x82) {
+            if ((int8_t)t <= (int8_t)0x82u) {
                 return t - (0xa7 - 0x40);
             }
         }
@@ -253,7 +256,7 @@ UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &e
     return TRUE;
 }
 
-UBool ReorderingBuffer::append(const UChar *s, int32_t length,
+UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD,
                                uint8_t leadCC, uint8_t trailCC,
                                UErrorCode &errorCode) {
     if(length==0) {
@@ -280,8 +283,11 @@ UBool ReorderingBuffer::append(const UChar *s, int32_t length,
         while(i<length) {
             U16_NEXT(s, i, length, c);
             if(i<length) {
-                // s must be in NFD, otherwise we need to use getCC().
-                leadCC=Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
+                if (isNFD) {
+                    leadCC = Normalizer2Impl::getCCFromYesOrMaybe(impl.getRawNorm16(c));
+                } else {
+                    leadCC = impl.getCC(impl.getNorm16(c));
+                }
             } else {
                 leadCC=trailCC;
             }
@@ -411,7 +417,8 @@ struct CanonIterData : public UMemory {
     CanonIterData(UErrorCode &errorCode);
     ~CanonIterData();
     void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
-    UTrie2 *trie;
+    UMutableCPTrie *mutableTrie;
+    UCPTrie *trie;
     UVector canonStartSets;  // contains UnicodeSet *
 };
 
@@ -420,22 +427,22 @@ Normalizer2Impl::~Normalizer2Impl() {
 }
 
 void
-Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,
+Normalizer2Impl::init(const int32_t *inIndexes, const UCPTrie *inTrie,
                       const uint16_t *inExtraData, const uint8_t *inSmallFCD) {
-    minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
-    minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
-    minLcccCP=inIndexes[IX_MIN_LCCC_CP];
-
-    minYesNo=inIndexes[IX_MIN_YES_NO];
-    minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
-    minNoNo=inIndexes[IX_MIN_NO_NO];
-    minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
-    minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
-    minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY];
-    limitNoNo=inIndexes[IX_LIMIT_NO_NO];
-    minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
-    U_ASSERT((minMaybeYes&7)==0);  // 8-aligned for noNoDelta bit fields
-    centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1;
+    minDecompNoCP = static_cast<UChar>(inIndexes[IX_MIN_DECOMP_NO_CP]);
+    minCompNoMaybeCP = static_cast<UChar>(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]);
+    minLcccCP = static_cast<UChar>(inIndexes[IX_MIN_LCCC_CP]);
+
+    minYesNo = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO]);
+    minYesNoMappingsOnly = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]);
+    minNoNo = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO]);
+    minNoNoCompBoundaryBefore = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]);
+    minNoNoCompNoMaybeCC = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]);
+    minNoNoEmpty = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_EMPTY]);
+    limitNoNo = static_cast<uint16_t>(inIndexes[IX_LIMIT_NO_NO]);
+    minMaybeYes = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_YES]);
+    U_ASSERT((minMaybeYes & 7) == 0);  // 8-aligned for noNoDelta bit fields
+    centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1;
 
     normTrie=inTrie;
 
@@ -445,75 +452,8 @@ Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,
     smallFCD=inSmallFCD;
 }
 
-class LcccContext {
-public:
-    LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {}
-
-    void handleRange(UChar32 start, UChar32 end, uint16_t norm16) {
-        if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
-                norm16 != Normalizer2Impl::JAMO_VT) {
-            set.add(start, end);
-        } else if (impl.minNoNoCompNoMaybeCC <= norm16 && norm16 < impl.limitNoNo) {
-            uint16_t fcd16=impl.getFCD16(start);
-            if(fcd16>0xff) { set.add(start, end); }
-        }
-    }
-
-private:
-    const Normalizer2Impl &impl;
-    UnicodeSet &set;
-};
-
-namespace {
-
-struct PropertyStartsContext {
-    PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder)
-            : impl(ni), sa(adder) {}
-
-    const Normalizer2Impl &impl;
-    const USetAdder *sa;
-};
-
-}  // namespace
-
 U_CDECL_BEGIN
 
-static UBool U_CALLCONV
-enumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
-    ((LcccContext *)context)->handleRange(start, end, (uint16_t)value);
-    return TRUE;
-}
-
-static UBool U_CALLCONV
-enumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
-    /* add the start code point to the USet */
-    const PropertyStartsContext *ctx=(const PropertyStartsContext *)context;
-    const USetAdder *sa=ctx->sa;
-    sa->add(sa->set, start);
-    if (start != end && ctx->impl.isAlgorithmicNoNo((uint16_t)value) &&
-            (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
-        // Range of code points with same-norm16-value algorithmic decompositions.
-        // They might have different non-zero FCD16 values.
-        uint16_t prevFCD16=ctx->impl.getFCD16(start);
-        while(++start<=end) {
-            uint16_t fcd16=ctx->impl.getFCD16(start);
-            if(fcd16!=prevFCD16) {
-                sa->add(sa->set, start);
-                prevFCD16=fcd16;
-            }
-        }
-    }
-    return TRUE;
-}
-
-static UBool U_CALLCONV
-enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
-    /* add the start code point to the USet */
-    const USetAdder *sa=(const USetAdder *)context;
-    sa->add(sa->set, start);
-    return TRUE;
-}
-
 static uint32_t U_CALLCONV
 segmentStarterMapper(const void * /*context*/, uint32_t value) {
     return value&CANON_NOT_SEGMENT_STARTER;
@@ -523,15 +463,44 @@ U_CDECL_END
 
 void
 Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
-    LcccContext context(*this, set);
-    utrie2_enum(normTrie, NULL, enumLcccRange, &context);
+    UChar32 start = 0, end;
+    uint32_t norm16;
+    while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
+                                   nullptr, nullptr, &norm16)) >= 0) {
+        if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
+                norm16 != Normalizer2Impl::JAMO_VT) {
+            set.add(start, end);
+        } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) {
+            uint16_t fcd16 = getFCD16(start);
+            if (fcd16 > 0xff) { set.add(start, end); }
+        }
+        start = end + 1;
+    }
 }
 
 void
 Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
-    /* add the start code point of each same-value range of each trie */
-    PropertyStartsContext context(*this, sa);
-    utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context);
+    // Add the start code point of each same-value range of the trie.
+    UChar32 start = 0, end;
+    uint32_t value;
+    while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
+                                   nullptr, nullptr, &value)) >= 0) {
+        sa->add(sa->set, start);
+        if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
+                (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
+            // Range of code points with same-norm16-value algorithmic decompositions.
+            // They might have different non-zero FCD16 values.
+            uint16_t prevFCD16 = getFCD16(start);
+            while (++start <= end) {
+                uint16_t fcd16 = getFCD16(start);
+                if (fcd16 != prevFCD16) {
+                    sa->add(sa->set, start);
+                    prevFCD16 = fcd16;
+                }
+            }
+        }
+        start = end + 1;
+    }
 
     /* add Hangul LV syllables and LV+1 because of skippables */
     for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
@@ -543,10 +512,15 @@ Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode
 
 void
 Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
-    /* add the start code point of each same-value range of the canonical iterator data trie */
-    if(ensureCanonIterData(errorCode)) {
-        // currently only used for the SEGMENT_STARTER property
-        utrie2_enum(fCanonIterData->trie, segmentStarterMapper, enumPropertyStartsRange, sa);
+    // Add the start code point of each same-value range of the canonical iterator data trie.
+    if (!ensureCanonIterData(errorCode)) { return; }
+    // Currently only used for the SEGMENT_STARTER property.
+    UChar32 start = 0, end;
+    uint32_t value;
+    while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0,
+                                   segmentStarterMapper, nullptr, &value)) >= 0) {
+        sa->add(sa->set, start);
+        start = end + 1;
     }
 }
 
@@ -633,27 +607,23 @@ Normalizer2Impl::decompose(const UChar *src, const UChar *limit,
         // count code units below the minimum or with irrelevant data for the quick check
         for(prevSrc=src; src!=limit;) {
             if( (c=*src)<minNoCP ||
-                isMostDecompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
+                isMostDecompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
             ) {
                 ++src;
-            } else if(!U16_IS_SURROGATE(c)) {
+            } else if(!U16_IS_LEAD(c)) {
                 break;
             } else {
                 UChar c2;
-                if(U16_IS_SURROGATE_LEAD(c)) {
-                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
-                        c=U16_GET_SUPPLEMENTARY(c, c2);
-                    }
-                } else /* trail surrogate */ {
-                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
-                        --src;
-                        c=U16_GET_SUPPLEMENTARY(c2, c);
+                if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
+                    c=U16_GET_SUPPLEMENTARY(c, c2);
+                    norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
+                    if(isMostDecompYesAndZeroCC(norm16)) {
+                        src+=2;
+                    } else {
+                        break;
                     }
-                }
-                if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
-                    src+=U16_LENGTH(c);
                 } else {
-                    break;
+                    ++src;  // unpaired lead surrogate: inert
                 }
             }
         }
@@ -713,7 +683,7 @@ Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit,
         const UChar *prevSrc = src;
         UChar32 c;
         uint16_t norm16;
-        UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
+        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
         if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
             return prevSrc;
         }
@@ -737,7 +707,7 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
         }
         // Maps to an isCompYesAndZeroCC.
         c=mapAlgorithmic(c, norm16);
-        norm16=getNorm16(c);
+        norm16=getRawNorm16(c);
     }
     if (norm16 < minYesNo) {
         // c does not decompose
@@ -758,7 +728,7 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
     } else {
         leadCC=0;
     }
-    return buffer.append((const UChar *)mapping+1, length, leadCC, trailCC, errorCode);
+    return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode);
 }
 
 const uint8_t *
@@ -771,7 +741,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
     while (src < limit) {
         const uint8_t *prevSrc = src;
         uint16_t norm16;
-        UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
+        UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
         // Get the decomposition and the lead and trail cc's.
         UChar32 c = U_SENTINEL;
         if (norm16 >= limitNoNo) {
@@ -789,7 +759,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
             }
             c = codePointFromValidUTF8(prevSrc, src);
             c = mapAlgorithmic(c, norm16);
-            norm16 = getNorm16(c);
+            norm16 = getRawNorm16(c);
         } else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) {
             return prevSrc;
         }
@@ -828,7 +798,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
             } else {
                 leadCC = 0;
             }
-            if (!buffer.append((const char16_t *)mapping+1, length, leadCC, trailCC, errorCode)) {
+            if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) {
                 return nullptr;
             }
         }
@@ -854,7 +824,7 @@ Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) c
         length=0;
         U16_APPEND_UNSAFE(buffer, length, c);
         // The mapping might decompose further.
-        norm16 = getNorm16(c);
+        norm16 = getRawNorm16(c);
     }
     if (norm16 < minYesNo) {
         return decomp;
@@ -926,19 +896,30 @@ void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
         return;
     }
     // Just merge the strings at the boundary.
-    ForwardUTrie2StringIterator iter(normTrie, src, limit);
-    uint8_t firstCC, prevCC, cc;
-    firstCC=prevCC=cc=getCC(iter.next16());
-    while(cc!=0) {
-        prevCC=cc;
-        cc=getCC(iter.next16());
-    };
+    bool isFirst = true;
+    uint8_t firstCC = 0, prevCC = 0, cc;
+    const UChar *p = src;
+    while (p != limit) {
+        const UChar *codePointStart = p;
+        UChar32 c;
+        uint16_t norm16;
+        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
+        if ((cc = getCC(norm16)) == 0) {
+            p = codePointStart;
+            break;
+        }
+        if (isFirst) {
+            firstCC = cc;
+            isFirst = false;
+        }
+        prevCC = cc;
+    }
     if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
-        limit=u_strchr(iter.codePointStart, 0);
+        limit=u_strchr(p, 0);
     }
 
-    if (buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode)) {
-        buffer.appendZeroCC(iter.codePointStart, limit, errorCode);
+    if (buffer.append(src, (int32_t)(p - src), FALSE, firstCC, prevCC, errorCode)) {
+        buffer.appendZeroCC(p, limit, errorCode);
     }
 }
 
@@ -1085,7 +1066,7 @@ void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const
         }
         UChar32 composite=compositeAndFwd>>1;
         if((compositeAndFwd&1)!=0) {
-            addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
+            addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set);
         }
         set.add(composite);
     } while((firstUnit&COMP_1_LAST_TUPLE)==0);
@@ -1124,7 +1105,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
     prevCC=0;
 
     for(;;) {
-        UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
+        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
         cc=getCCFromYesOrMaybe(norm16);
         if( // this character combines backward and
             isMaybe(norm16) &&
@@ -1229,7 +1210,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
                 // Is the composite a starter that combines forward?
                 if(compositeAndFwd&1) {
                     compositionsList=
-                        getCompositionsListForComposite(getNorm16(composite));
+                        getCompositionsListForComposite(getRawNorm16(composite));
                 } else {
                     compositionsList=NULL;
                 }
@@ -1268,7 +1249,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
 
 UChar32
 Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
-    uint16_t norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16=0
+    uint16_t norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16
     const uint16_t *list;
     if(isInert(norm16)) {
         return U_SENTINEL;
@@ -1359,29 +1340,23 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit,
                 return TRUE;
             }
             if( (c=*src)<minNoMaybeCP ||
-                isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
+                isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
             ) {
                 ++src;
             } else {
                 prevSrc = src++;
-                if(!U16_IS_SURROGATE(c)) {
+                if(!U16_IS_LEAD(c)) {
                     break;
                 } else {
                     UChar c2;
-                    if(U16_IS_SURROGATE_LEAD(c)) {
-                        if(src!=limit && U16_IS_TRAIL(c2=*src)) {
-                            ++src;
-                            c=U16_GET_SUPPLEMENTARY(c, c2);
-                        }
-                    } else /* trail surrogate */ {
-                        if(prevBoundary<prevSrc && U16_IS_LEAD(c2=*(prevSrc-1))) {
-                            --prevSrc;
-                            c=U16_GET_SUPPLEMENTARY(c2, c);
+                    if(src!=limit && U16_IS_TRAIL(c2=*src)) {
+                        ++src;
+                        c=U16_GET_SUPPLEMENTARY(c, c2);
+                        norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
+                        if(!isCompYesAndZeroCC(norm16)) {
+                            break;
                         }
                     }
-                    if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
-                        break;
-                    }
                 }
             }
         }
@@ -1529,7 +1504,7 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit,
                     }
                     uint8_t prevCC = cc;
                     nextSrc = src;
-                    UTRIE2_U16_NEXT16(normTrie, nextSrc, limit, c, n16);
+                    UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16);
                     if (n16 >= MIN_YES_YES_WITH_CC) {
                         cc = getCCFromNormalYesOrMaybe(n16);
                         if (prevCC > cc) {
@@ -1559,7 +1534,7 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit,
         // decompose and recompose.
         if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
             const UChar *p = prevSrc;
-            UTRIE2_U16_PREV16(normTrie, prevBoundary, p, c, norm16);
+            UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16);
             if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
                 prevSrc = p;
             }
@@ -1626,29 +1601,23 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
                 return src;
             }
             if( (c=*src)<minNoMaybeCP ||
-                isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
+                isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
             ) {
                 ++src;
             } else {
                 prevSrc = src++;
-                if(!U16_IS_SURROGATE(c)) {
+                if(!U16_IS_LEAD(c)) {
                     break;
                 } else {
                     UChar c2;
-                    if(U16_IS_SURROGATE_LEAD(c)) {
-                        if(src!=limit && U16_IS_TRAIL(c2=*src)) {
-                            ++src;
-                            c=U16_GET_SUPPLEMENTARY(c, c2);
-                        }
-                    } else /* trail surrogate */ {
-                        if(prevBoundary<prevSrc && U16_IS_LEAD(c2=*(prevSrc-1))) {
-                            --prevSrc;
-                            c=U16_GET_SUPPLEMENTARY(c2, c);
+                    if(src!=limit && U16_IS_TRAIL(c2=*src)) {
+                        ++src;
+                        c=U16_GET_SUPPLEMENTARY(c, c2);
+                        norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
+                        if(!isCompYesAndZeroCC(norm16)) {
+                            break;
                         }
                     }
-                    if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
-                        break;
-                    }
                 }
             }
         }
@@ -1665,7 +1634,7 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
             } else {
                 const UChar *p = prevSrc;
                 uint16_t n16;
-                UTRIE2_U16_PREV16(normTrie, prevBoundary, p, c, n16);
+                UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16);
                 if (norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
                     prevBoundary = prevSrc;
                 } else {
@@ -1699,7 +1668,7 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
                     }
                     uint8_t prevCC = cc;
                     nextSrc = src;
-                    UTRIE2_U16_NEXT16(normTrie, nextSrc, limit, c, norm16);
+                    UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16);
                     if (isMaybeOrNonZeroCC(norm16)) {
                         cc = getCCFromYesOrMaybe(norm16);
                         if (!(prevCC <= cc || cc == 0)) {
@@ -1786,7 +1755,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
                 ++src;
             } else {
                 prevSrc = src;
-                UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
+                UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
                 if (!isCompYesAndZeroCC(norm16)) {
                     break;
                 }
@@ -1945,7 +1914,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
                     }
                     uint8_t prevCC = cc;
                     nextSrc = src;
-                    UTRIE2_U8_NEXT16(normTrie, nextSrc, limit, n16);
+                    UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16);
                     if (n16 >= MIN_YES_YES_WITH_CC) {
                         cc = getCCFromNormalYesOrMaybe(n16);
                         if (prevCC > cc) {
@@ -1975,7 +1944,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
         // decompose and recompose.
         if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
             const uint8_t *p = prevSrc;
-            UTRIE2_U8_PREV16(normTrie, prevBoundary, p, norm16);
+            UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16);
             if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
                 prevSrc = p;
             }
@@ -2023,7 +1992,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limi
     }
     UChar32 c;
     uint16_t norm16;
-    UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
+    UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
     return norm16HasCompBoundaryBefore(norm16);
 }
 
@@ -2032,7 +2001,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *
         return TRUE;
     }
     uint16_t norm16;
-    UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
+    UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
     return norm16HasCompBoundaryBefore(norm16);
 }
 
@@ -2043,7 +2012,7 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p,
     }
     UChar32 c;
     uint16_t norm16;
-    UTRIE2_U16_PREV16(normTrie, start, p, c, norm16);
+    UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
     return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
 }
 
@@ -2053,36 +2022,42 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t
         return TRUE;
     }
     uint16_t norm16;
-    UTRIE2_U8_PREV16(normTrie, start, p, norm16);
+    UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16);
     return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
 }
 
 const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p,
                                                        UBool onlyContiguous) const {
-    BackwardUTrie2StringIterator iter(normTrie, start, p);
-    for(;;) {
-        uint16_t norm16=iter.previous16();
+    while (p != start) {
+        const UChar *codePointLimit = p;
+        UChar32 c;
+        uint16_t norm16;
+        UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
         if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
-            return iter.codePointLimit;
+            return codePointLimit;
         }
-        if (hasCompBoundaryBefore(iter.codePoint, norm16)) {
-            return iter.codePointStart;
+        if (hasCompBoundaryBefore(c, norm16)) {
+            return p;
         }
     }
+    return p;
 }
 
 const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit,
                                                    UBool onlyContiguous) const {
-    ForwardUTrie2StringIterator iter(normTrie, p, limit);
-    for(;;) {
-        uint16_t norm16=iter.next16();
-        if (hasCompBoundaryBefore(iter.codePoint, norm16)) {
-            return iter.codePointStart;
+    while (p != limit) {
+        const UChar *codePointStart = p;
+        UChar32 c;
+        uint16_t norm16;
+        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
+        if (hasCompBoundaryBefore(c, norm16)) {
+            return codePointStart;
         }
         if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
-            return iter.codePointLimit;
+            return p;
         }
     }
+    return p;
 }
 
 uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const {
@@ -2130,7 +2105,7 @@ uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
             }
             // Maps to an isCompYesAndZeroCC.
             c=mapAlgorithmic(c, norm16);
-            norm16=getNorm16(c);
+            norm16=getRawNorm16(c);
         }
     }
     if(norm16<=minYesNo || isHangulLVT(norm16)) {
@@ -2195,17 +2170,10 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
                 prevFCD16=0;
                 ++src;
             } else {
-                if(U16_IS_SURROGATE(c)) {
+                if(U16_IS_LEAD(c)) {
                     UChar c2;
-                    if(U16_IS_SURROGATE_LEAD(c)) {
-                        if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
-                            c=U16_GET_SUPPLEMENTARY(c, c2);
-                        }
-                    } else /* trail surrogate */ {
-                        if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
-                            --src;
-                            c=U16_GET_SUPPLEMENTARY(c2, c);
-                        }
+                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
+                        c=U16_GET_SUPPLEMENTARY(c, c2);
                     }
                 }
                 if((fcd16=getFCD16FromNormData(c))<=0xff) {
@@ -2336,7 +2304,7 @@ const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const
         const UChar *codePointLimit = p;
         UChar32 c;
         uint16_t norm16;
-        UTRIE2_U16_PREV16(normTrie, start, p, c, norm16);
+        UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
         if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16)) {
             return codePointLimit;
         }
@@ -2352,7 +2320,7 @@ const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *l
         const UChar *codePointStart=p;
         UChar32 c;
         uint16_t norm16;
-        UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
+        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
         if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16)) {
             return codePointStart;
         }
@@ -2366,19 +2334,20 @@ const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *l
 // CanonicalIterator data -------------------------------------------------- ***
 
 CanonIterData::CanonIterData(UErrorCode &errorCode) :
-        trie(utrie2_open(0, 0, &errorCode)),
+        mutableTrie(umutablecptrie_open(0, 0, &errorCode)), trie(nullptr),
         canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
 
 CanonIterData::~CanonIterData() {
-    utrie2_close(trie);
+    umutablecptrie_close(mutableTrie);
+    ucptrie_close(trie);
 }
 
 void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
-    uint32_t canonValue=utrie2_get32(trie, decompLead);
+    uint32_t canonValue = umutablecptrie_get(mutableTrie, decompLead);
     if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
         // origin is the first character whose decomposition starts with
         // the character for which we are setting the value.
-        utrie2_set32(trie, decompLead, canonValue|origin, &errorCode);
+        umutablecptrie_set(mutableTrie, decompLead, canonValue|origin, &errorCode);
     } else {
         // origin is not the first character, or it is U+0000.
         UnicodeSet *set;
@@ -2390,7 +2359,7 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
             }
             UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
             canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
-            utrie2_set32(trie, decompLead, canonValue, &errorCode);
+            umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
             canonStartSets.addElement(set, errorCode);
             if(firstOrigin!=0) {
                 set->add(firstOrigin);
@@ -2406,7 +2375,6 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
 class InitCanonIterData {
 public:
     static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode);
-    static void handleRange(Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode);
 };
 
 U_CDECL_BEGIN
@@ -2417,18 +2385,6 @@ initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
     InitCanonIterData::doInit(impl, errorCode);
 }
 
-// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
-//     context: the Normalizer2Impl
-static UBool U_CALLCONV
-enumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
-    UErrorCode errorCode = U_ZERO_ERROR;
-    if (value != Normalizer2Impl::INERT) {
-        Normalizer2Impl *impl = (Normalizer2Impl *)context;
-        InitCanonIterData::handleRange(impl, start, end, (uint16_t)value, errorCode);
-    }
-    return U_SUCCESS(errorCode);
-}
-
 U_CDECL_END
 
 void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
@@ -2438,8 +2394,24 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
         errorCode=U_MEMORY_ALLOCATION_ERROR;
     }
     if (U_SUCCESS(errorCode)) {
-        utrie2_enum(impl->normTrie, NULL, enumCIDRangeHandler, impl);
-        utrie2_freeze(impl->fCanonIterData->trie, UTRIE2_32_VALUE_BITS, &errorCode);
+        UChar32 start = 0, end;
+        uint32_t value;
+        while ((end = ucptrie_getRange(impl->normTrie, start,
+                                       UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT,
+                                       nullptr, nullptr, &value)) >= 0) {
+            // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
+            if (value != Normalizer2Impl::INERT) {
+                impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
+            }
+            start = end + 1;
+        }
+#ifdef UCPTRIE_DEBUG
+        umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData");
+#endif
+        impl->fCanonIterData->trie = umutablecptrie_buildImmutable(
+            impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode);
+        umutablecptrie_close(impl->fCanonIterData->mutableTrie);
+        impl->fCanonIterData->mutableTrie = nullptr;
     }
     if (U_FAILURE(errorCode)) {
         delete impl->fCanonIterData;
@@ -2447,11 +2419,6 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
     }
 }
 
-void InitCanonIterData::handleRange(
-        Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode) {
-    impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
-}
-
 void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
                                                   CanonIterData &newData,
                                                   UErrorCode &errorCode) const {
@@ -2465,7 +2432,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
         return;
     }
     for(UChar32 c=start; c<=end; ++c) {
-        uint32_t oldValue=utrie2_get32(newData.trie, c);
+        uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c);
         uint32_t newValue=oldValue;
         if(isMaybeOrNonZeroCC(norm16)) {
             // not a segment starter if it occurs in a decomposition or has cc!=0
@@ -2483,7 +2450,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
             if (isDecompNoAlgorithmic(norm16_2)) {
                 // Maps to an isCompYesAndZeroCC.
                 c2 = mapAlgorithmic(c2, norm16_2);
-                norm16_2 = getNorm16(c2);
+                norm16_2 = getRawNorm16(c2);
                 // No compatibility mappings for the CanonicalIterator.
                 U_ASSERT(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2)));
             }
@@ -2510,10 +2477,10 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
                     if(norm16_2>=minNoNo) {
                         while(i<length) {
                             U16_NEXT_UNSAFE(mapping, i, c2);
-                            uint32_t c2Value=utrie2_get32(newData.trie, c2);
+                            uint32_t c2Value = umutablecptrie_get(newData.mutableTrie, c2);
                             if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
-                                utrie2_set32(newData.trie, c2, c2Value|CANON_NOT_SEGMENT_STARTER,
-                                             &errorCode);
+                                umutablecptrie_set(newData.mutableTrie, c2,
+                                                   c2Value|CANON_NOT_SEGMENT_STARTER, &errorCode);
                             }
                         }
                     }
@@ -2524,7 +2491,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
             }
         }
         if(newValue!=oldValue) {
-            utrie2_set32(newData.trie, c, newValue, &errorCode);
+            umutablecptrie_set(newData.mutableTrie, c, newValue, &errorCode);
         }
     }
 }
@@ -2537,7 +2504,7 @@ UBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
 }
 
 int32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
-    return (int32_t)utrie2_get32(fCanonIterData->trie, c);
+    return (int32_t)ucptrie_get(fCanonIterData->trie, c);
 }
 
 const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
@@ -2561,7 +2528,7 @@ UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
         set.add(value);
     }
     if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
-        uint16_t norm16=getNorm16(c);
+        uint16_t norm16=getRawNorm16(c);
         if(norm16==JAMO_L) {
             UChar32 syllable=
                 (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
@@ -2608,7 +2575,7 @@ unorm2_swap(const UDataSwapper *ds,
         pInfo->dataFormat[1]==0x72 &&
         pInfo->dataFormat[2]==0x6d &&
         pInfo->dataFormat[3]==0x32 &&
-        (1<=formatVersion0 && formatVersion0<=3)
+        (1<=formatVersion0 && formatVersion0<=4)
     )) {
         udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
                          pInfo->dataFormat[0], pInfo->dataFormat[1],
@@ -2669,9 +2636,9 @@ unorm2_swap(const UDataSwapper *ds,
         ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
         offset=nextOffset;
 
-        /* swap the UTrie2 */
+        /* swap the trie */
         nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
-        utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
+        utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
         offset=nextOffset;
 
         /* swap the uint16_t extraData[] */