]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/rbbitblb.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / rbbitblb.cpp
index 44c8e9fdd986f2f9532ccc92f173bfd1e1b75474..a20b51777cda7adf209fe08fedc4d45aa5bd745c 100644 (file)
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 **********************************************************************
-*   Copyright (c) 2002-2008, International Business Machines
+*   Copyright (c) 2002-2016, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
 #include "rbbidata.h"
 #include "cstring.h"
 #include "uassert.h"
+#include "uvectr32.h"
 #include "cmemory.h"
 
 U_NAMESPACE_BEGIN
 
-RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode) :
- fTree(*rootNode) {
-    fRB                 = rb;
-    fStatus             = fRB->fStatus;
-    UErrorCode status   = U_ZERO_ERROR;
-    fDStates            = new UVector(status);
-    if (U_FAILURE(*fStatus)) {
-        return;
-    }
+RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :
+        fRB(rb),
+        fTree(*rootNode),
+        fStatus(&status),
+        fDStates(nullptr),
+        fSafeTable(nullptr) {
     if (U_FAILURE(status)) {
-        *fStatus = status;
         return;
     }
-    if (fDStates == NULL) {
-        *fStatus = U_MEMORY_ALLOCATION_ERROR;;
+    // fDStates is UVector<RBBIStateDescriptor *>
+    fDStates = new UVector(status);
+    if (U_SUCCESS(status) && fDStates == nullptr ) {
+        status = U_MEMORY_ALLOCATION_ERROR;
     }
 }
 
@@ -49,17 +50,18 @@ RBBITableBuilder::~RBBITableBuilder() {
     for (i=0; i<fDStates->size(); i++) {
         delete (RBBIStateDescriptor *)fDStates->elementAt(i);
     }
-    delete   fDStates;
+    delete fDStates;
+    delete fSafeTable;
 }
 
 
 //-----------------------------------------------------------------------------
 //
-//   RBBITableBuilder::build  -  This is the main function for building the DFA state transtion
-//                               table from the RBBI rules parse tree.
+//   RBBITableBuilder::buildForwardTable  -  This is the main function for building
+//                               the DFA state transition table from the RBBI rules parse tree.
 //
 //-----------------------------------------------------------------------------
-void  RBBITableBuilder::build() {
+void  RBBITableBuilder::buildForwardTable() {
 
     if (U_FAILURE(*fStatus)) {
         return;
@@ -78,8 +80,8 @@ void  RBBITableBuilder::build() {
     fTree = fTree->flattenVariables();
 #ifdef RBBI_DEBUG
     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
-        RBBIDebugPuts("Parse tree after flattening variable references.");
-        fTree->printTree(TRUE);
+        RBBIDebugPuts("\nParse tree after flattening variable references.");
+        RBBINode::printTree(fTree, TRUE);
     }
 #endif
 
@@ -136,8 +138,8 @@ void  RBBITableBuilder::build() {
     fTree->flattenSets();
 #ifdef RBBI_DEBUG
     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
-        RBBIDebugPuts("Parse tree after flattening Unicode Set references.");
-        fTree->printTree(TRUE);
+        RBBIDebugPuts("\nParse tree after flattening Unicode Set references.");
+        RBBINode::printTree(fTree, TRUE);
     }
 #endif
 
@@ -186,8 +188,6 @@ void  RBBITableBuilder::build() {
     //    for all tables.  Merge the ones from this table into the global set.
     //
     mergeRuleStatusVals();
-
-    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();};
 }
 
 
@@ -375,6 +375,25 @@ void RBBITableBuilder::calcFollowPos(RBBINode *n) {
 
 }
 
+//-----------------------------------------------------------------------------
+//
+//    addRuleRootNodes    Recursively walk a parse tree, adding all nodes flagged
+//                        as roots of a rule to a destination vector.
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) {
+    if (node == NULL || U_FAILURE(*fStatus)) {
+        return;
+    }
+    if (node->fRuleRoot) {
+        dest->addElement(node, *fStatus);
+        // Note: rules cannot nest. If we found a rule start node,
+        //       no child node can also be a start node.
+        return;
+    }
+    addRuleRootNodes(dest, node->fLeftChild);
+    addRuleRootNodes(dest, node->fRightChild);
+}
 
 //-----------------------------------------------------------------------------
 //
@@ -401,19 +420,24 @@ void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) {
         return;
     }
 
-    // Get all nodes that can be the start a match, which is FirstPosition()
-    // of the portion of the tree corresponding to user-written rules.
-    // See the tree description in bofFixup().
-    RBBINode *userRuleRoot = tree;
-    if (fRB->fSetBuilder->sawBOF()) {
-        userRuleRoot = tree->fLeftChild->fRightChild;
-    }
-    U_ASSERT(userRuleRoot != NULL);
-    UVector *matchStartNodes = userRuleRoot->fFirstPosSet;
+    // Collect all leaf nodes that can start matches for rules
+    // with inbound chaining enabled, which is the union of the 
+    // firstPosition sets from each of the rule root nodes.
+    
+    UVector ruleRootNodes(*fStatus);
+    addRuleRootNodes(&ruleRootNodes, tree);
 
+    UVector matchStartNodes(*fStatus);
+    for (int j=0; j<ruleRootNodes.size(); ++j) {
+        RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));
+        if (node->fChainIn) {
+            setAdd(&matchStartNodes, node->fFirstPosSet);
+        }
+    }
+    if (U_FAILURE(*fStatus)) {
+        return;
+    }
 
-    // Iteratate over all leaf nodes,
-    //
     int32_t  endNodeIx;
     int32_t  startNodeIx;
 
@@ -455,8 +479,8 @@ void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) {
         // Now iterate over the nodes that can start a match, looking for ones
         //   with the same char class as our ending node.
         RBBINode *startNode;
-        for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) {
-            startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx);
+        for (startNodeIx = 0; startNodeIx<matchStartNodes.size(); startNodeIx++) {
+            startNode = (RBBINode *)matchStartNodes.elementAt(startNodeIx);
             if (startNode->fType != RBBINode::leafChar) {
                 continue;
             }
@@ -735,7 +759,7 @@ void     RBBITableBuilder::flagAcceptingStates() {
                 // if sd->fAccepting already had a value other than 0 or -1, leave it be.
 
                 // If the end marker node is from a look-ahead rule, set
-                //   the fLookAhead field or this state also.
+                //   the fLookAhead field for this state also.
                 if (endMarker->fLookAheadEnd) {
                     // TODO:  don't change value if already set?
                     // TODO:  allow for more than one active look-ahead rule in engine.
@@ -955,74 +979,56 @@ void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
     int32_t destOriginalSize = dest->size();
     int32_t sourceSize       = source->size();
     int32_t di           = 0;
-    void *(destS[16]), *(sourceS[16]);  // Handle small cases without malloc
-    void **destH = 0, **sourceH = 0;
-    void **destBuff, **sourceBuff;
+    MaybeStackArray<void *, 16> destArray, sourceArray;  // Handle small cases without malloc
+    void **destPtr, **sourcePtr;
     void **destLim, **sourceLim;
 
-    if (destOriginalSize > (int32_t)(sizeof(destS)/sizeof(destS[0]))) {
-        destH = (void **)uprv_malloc(sizeof(void *) * destOriginalSize);
-        destBuff = destH;
-    }
-    else {
-        destBuff = destS;
-    }
-    if (destBuff == 0) {
-        return;
+    if (destOriginalSize > destArray.getCapacity()) {
+        if (destArray.resize(destOriginalSize) == NULL) {
+            return;
+        }
     }
-    destLim = destBuff + destOriginalSize;
+    destPtr = destArray.getAlias();
+    destLim = destPtr + destOriginalSize;  // destArray.getArrayLimit()?
 
-    if (sourceSize > (int32_t)(sizeof(sourceS)/sizeof(sourceS[0]))) {
-        sourceH = (void **)uprv_malloc(sizeof(void *) * sourceSize);
-        sourceBuff = sourceH;
-    }
-    else {
-        sourceBuff = sourceS;
-    }
-    if (sourceBuff == 0) {
-        if (destH) {
-            uprv_free(destH);
+    if (sourceSize > sourceArray.getCapacity()) {
+        if (sourceArray.resize(sourceSize) == NULL) {
+            return;
         }
-        return;
     }
-    sourceLim = sourceBuff + sourceSize;
+    sourcePtr = sourceArray.getAlias();
+    sourceLim = sourcePtr + sourceSize;  // sourceArray.getArrayLimit()?
 
     // Avoid multiple "get element" calls by getting the contents into arrays
-    (void) dest->toArray(destBuff);
-    (void) source->toArray(sourceBuff);
+    (void) dest->toArray(destPtr);
+    (void) source->toArray(sourcePtr);
 
     dest->setSize(sourceSize+destOriginalSize, *fStatus);
 
-    while (sourceBuff < sourceLim && destBuff < destLim) {
-        if (*destBuff == *sourceBuff) {
-            dest->setElementAt(*sourceBuff++, di++);
-            destBuff++;
+    while (sourcePtr < sourceLim && destPtr < destLim) {
+        if (*destPtr == *sourcePtr) {
+            dest->setElementAt(*sourcePtr++, di++);
+            destPtr++;
         }
         // This check is required for machines with segmented memory, like i5/OS.
         // Direct pointer comparison is not recommended.
-        else if (uprv_memcmp(destBuff, sourceBuff, sizeof(void *)) < 0) {
-            dest->setElementAt(*destBuff++, di++);
+        else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) {
+            dest->setElementAt(*destPtr++, di++);
         }
-        else { /* *sourceBuff < *destBuff */
-            dest->setElementAt(*sourceBuff++, di++);
+        else { /* *sourcePtr < *destPtr */
+            dest->setElementAt(*sourcePtr++, di++);
         }
     }
 
     // At most one of these two cleanup loops will execute
-    while (destBuff < destLim) {
-        dest->setElementAt(*destBuff++, di++);
+    while (destPtr < destLim) {
+        dest->setElementAt(*destPtr++, di++);
     }
-    while (sourceBuff < sourceLim) {
-        dest->setElementAt(*sourceBuff++, di++);
+    while (sourcePtr < sourceLim) {
+        dest->setElementAt(*sourcePtr++, di++);
     }
 
     dest->setSize(di, *fStatus);
-    if (destH) {
-        uprv_free(destH);
-    }
-    if (sourceH) {
-        uprv_free(sourceH);
-    }
 }
 
 
@@ -1050,7 +1056,9 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
     if (n==NULL) {
         return;
     }
-    n->printNode();
+    printf("\n");
+    RBBINode::printNodeHeader();
+    RBBINode::printNode(n);
     RBBIDebugPrintf("         Nullable:  %s\n", n->fNullable?"TRUE":"FALSE");
 
     RBBIDebugPrintf("         firstpos:  ");
@@ -1067,6 +1075,188 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
 }
 #endif
 
+//
+//    findDuplCharClassFrom()
+//
+bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
+    int32_t numStates = fDStates->size();
+    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+
+    for (; categories->first < numCols-1; categories->first++) {
+        for (categories->second=categories->first+1; categories->second < numCols; categories->second++) {
+            // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).
+            uint16_t table_base = 0;
+            uint16_t table_dupl = 1;
+            for (int32_t state=0; state<numStates; state++) {
+                RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+                table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
+                table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
+                if (table_base != table_dupl) {
+                    break;
+                }
+            }
+            if (table_base == table_dupl) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+
+//
+//    removeColumn()
+//
+void RBBITableBuilder::removeColumn(int32_t column) {
+    int32_t numStates = fDStates->size();
+    for (int32_t state=0; state<numStates; state++) {
+        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+        U_ASSERT(column < sd->fDtran->size());
+        sd->fDtran->removeElementAt(column);
+    }
+}
+
+/*
+ * findDuplicateState
+ */
+bool RBBITableBuilder::findDuplicateState(IntPair *states) {
+    int32_t numStates = fDStates->size();
+    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+
+    for (; states->first<numStates-1; states->first++) {
+        RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first);
+        for (states->second=states->first+1; states->second<numStates; states->second++) {
+            RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second);
+            if (firstSD->fAccepting != duplSD->fAccepting ||
+                firstSD->fLookAhead != duplSD->fLookAhead ||
+                firstSD->fTagsIdx   != duplSD->fTagsIdx) {
+                continue;
+            }
+            bool rowsMatch = true;
+            for (int32_t col=0; col < numCols; ++col) {
+                int32_t firstVal = firstSD->fDtran->elementAti(col);
+                int32_t duplVal = duplSD->fDtran->elementAti(col);
+                if (!((firstVal == duplVal) ||
+                        ((firstVal == states->first || firstVal == states->second) &&
+                        (duplVal  == states->first || duplVal  == states->second)))) {
+                    rowsMatch = false;
+                    break;
+                }
+            }
+            if (rowsMatch) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+
+bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
+    int32_t numStates = fSafeTable->size();
+
+    for (; states->first<numStates-1; states->first++) {
+        UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
+        for (states->second=states->first+1; states->second<numStates; states->second++) {
+            UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
+            bool rowsMatch = true;
+            int32_t numCols = firstRow->length();
+            for (int32_t col=0; col < numCols; ++col) {
+                int32_t firstVal = firstRow->charAt(col);
+                int32_t duplVal = duplRow->charAt(col);
+                if (!((firstVal == duplVal) ||
+                        ((firstVal == states->first || firstVal == states->second) &&
+                        (duplVal  == states->first || duplVal  == states->second)))) {
+                    rowsMatch = false;
+                    break;
+                }
+            }
+            if (rowsMatch) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+
+void RBBITableBuilder::removeState(IntPair duplStates) {
+    const int32_t keepState = duplStates.first;
+    const int32_t duplState = duplStates.second;
+    U_ASSERT(keepState < duplState);
+    U_ASSERT(duplState < fDStates->size());
+
+    RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
+    fDStates->removeElementAt(duplState);
+    delete duplSD;
+
+    int32_t numStates = fDStates->size();
+    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+    for (int32_t state=0; state<numStates; ++state) {
+        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+        for (int32_t col=0; col<numCols; col++) {
+            int32_t existingVal = sd->fDtran->elementAti(col);
+            int32_t newVal = existingVal;
+            if (existingVal == duplState) {
+                newVal = keepState;
+            } else if (existingVal > duplState) {
+                newVal = existingVal - 1;
+            }
+            sd->fDtran->setElementAt(newVal, col);
+        }
+        if (sd->fAccepting == duplState) {
+            sd->fAccepting = keepState;
+        } else if (sd->fAccepting > duplState) {
+            sd->fAccepting--;
+        }
+        if (sd->fLookAhead == duplState) {
+            sd->fLookAhead = keepState;
+        } else if (sd->fLookAhead > duplState) {
+            sd->fLookAhead--;
+        }
+    }
+}
+
+void RBBITableBuilder::removeSafeState(IntPair duplStates) {
+    const int32_t keepState = duplStates.first;
+    const int32_t duplState = duplStates.second;
+    U_ASSERT(keepState < duplState);
+    U_ASSERT(duplState < fSafeTable->size());
+
+    fSafeTable->removeElementAt(duplState);   // Note that fSafeTable has a deleter function
+                                              // and will auto-delete the removed element.
+    int32_t numStates = fSafeTable->size();
+    for (int32_t state=0; state<numStates; ++state) {
+        UnicodeString *sd = (UnicodeString *)fSafeTable->elementAt(state);
+        int32_t numCols = sd->length();
+        for (int32_t col=0; col<numCols; col++) {
+            int32_t existingVal = sd->charAt(col);
+            int32_t newVal = existingVal;
+            if (existingVal == duplState) {
+                newVal = keepState;
+            } else if (existingVal > duplState) {
+                newVal = existingVal - 1;
+            }
+            sd->setCharAt(col, static_cast<char16_t>(newVal));
+        }
+    }
+}
+
+
+/*
+ * RemoveDuplicateStates
+ */
+int32_t RBBITableBuilder::removeDuplicateStates() {
+    IntPair dupls = {3, 0};
+    int32_t numStatesRemoved = 0;
+
+    while (findDuplicateState(&dupls)) {
+        // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
+        removeState(dupls);
+        ++numStatesRemoved;
+    }
+    return numStatesRemoved;
+}
 
 
 //-----------------------------------------------------------------------------
@@ -1085,21 +1275,17 @@ int32_t  RBBITableBuilder::getTableSize() const {
         return 0;
     }
 
-    size    = sizeof(RBBIStateTable) - 4;    // The header, with no rows to the table.
+    size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.
 
     numRows = fDStates->size();
     numCols = fRB->fSetBuilder->getNumCharCategories();
 
-    //  Note  The declaration of RBBIStateTableRow is for a table of two columns.
-    //        Therefore we subtract two from numCols when determining
-    //        how much storage to add to a row for the total columns.
-    rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2);
+    rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
     size   += numRows * rowSize;
     return size;
 }
 
 
-
 //-----------------------------------------------------------------------------
 //
 //   exportTable()    export the state transition table in the format required
@@ -1116,14 +1302,14 @@ void RBBITableBuilder::exportTable(void *where) {
         return;
     }
 
-    if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff ||
+    int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
+    if (catCount > 0x7fff ||
         fDStates->size() > 0x7fff) {
         *fStatus = U_BRK_INTERNAL_ERROR;
         return;
     }
 
-    table->fRowLen    = sizeof(RBBIStateTableRow) +
-                            sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2);
+    table->fRowLen    = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
     table->fNumStates = fDStates->size();
     table->fFlags     = 0;
     if (fRB->fLookAheadHardBreak) {
@@ -1142,13 +1328,192 @@ void RBBITableBuilder::exportTable(void *where) {
         row->fAccepting = (int16_t)sd->fAccepting;
         row->fLookAhead = (int16_t)sd->fLookAhead;
         row->fTagIdx    = (int16_t)sd->fTagsIdx;
-        for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) {
+        for (col=0; col<catCount; col++) {
             row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col);
         }
     }
 }
 
 
+/**
+ *   Synthesize a safe state table from the main state table.
+ */
+void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
+    // The safe table creation has three steps:
+
+    // 1. Identifiy pairs of character classes that are "safe." Safe means that boundaries
+    // following the pair do not depend on context or state before the pair. To test
+    // whether a pair is safe, run it through the main forward state table, starting
+    // from each state. If the the final state is the same, no matter what the starting state,
+    // the pair is safe.
+    //
+    // 2. Build a state table that recognizes the safe pairs. It's similar to their
+    // forward table, with a column for each input character [class], and a row for
+    // each state. Row 1 is the start state, and row 0 is the stop state. Initially
+    // create an additional state for each input character category; being in
+    // one of these states means that the character has been seen, and is potentially
+    // the first of a pair. In each of these rows, the entry for the second character
+    // of a safe pair is set to the stop state (0), indicating that a match was found.
+    // All other table entries are set to the state corresponding the current input
+    // character, allowing that charcter to be the of a start following pair.
+    //
+    // Because the safe rules are to be run in reverse, moving backwards in the text,
+    // the first and second pair categories are swapped when building the table.
+    //
+    // 3. Compress the table. There are typically many rows (states) that are
+    // equivalent - that have zeroes (match completed) in the same columns -
+    // and can be folded together.
+
+    // Each safe pair is stored as two UChars in the safePair string.
+    UnicodeString safePairs;
+
+    int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories();
+    int32_t numStates = fDStates->size();
+
+    for (int32_t c1=0; c1<numCharClasses; ++c1) {
+        for (int32_t c2=0; c2 < numCharClasses; ++c2) {
+            int32_t wantedEndState = -1;
+            int32_t endState = 0;
+            for (int32_t startState = 1; startState < numStates; ++startState) {
+                RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState));
+                int32_t s2 = startStateD->fDtran->elementAti(c1);
+                RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2));
+                endState = s2StateD->fDtran->elementAti(c2);
+                if (wantedEndState < 0) {
+                    wantedEndState = endState;
+                } else {
+                    if (wantedEndState != endState) {
+                        break;
+                    }
+                }
+            }
+            if (wantedEndState == endState) {
+                safePairs.append((char16_t)c1);
+                safePairs.append((char16_t)c2);
+                // printf("(%d, %d) ", c1, c2);
+            }
+        }
+        // printf("\n");
+    }
+
+    // Populate the initial safe table.
+    // The table as a whole is UVector<UnicodeString>
+    // Each row is represented by a UnicodeString, being used as a Vector<int16>.
+    // Row 0 is the stop state.
+    // Row 1 is the start sate.
+    // Row 2 and beyond are other states, initially one per char class, but
+    //   after initial construction, many of the states will be combined, compacting the table.
+    // The String holds the nextState data only. The four leading fields of a row, fAccepting,
+    // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
+
+    U_ASSERT(fSafeTable == nullptr);
+    fSafeTable = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status);
+    for (int32_t row=0; row<numCharClasses + 2; ++row) {
+        fSafeTable->addElement(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
+    }
+
+    // From the start state, each input char class transitions to the state for that input.
+    UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
+    for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
+        // Note: +2 for the start & stop state.
+        startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));
+    }
+
+    // Initially make every other state table row look like the start state row,
+    for (int32_t row=2; row<numCharClasses+2; ++row) {
+        UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row));
+        rowState = startState;   // UnicodeString assignment, copies contents.
+    }
+
+    // Run through the safe pairs, set the next state to zero when pair has been seen.
+    // Zero being the stop state, meaning we found a safe point.
+    for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
+        int32_t c1 = safePairs.charAt(pairIdx);
+        int32_t c2 = safePairs.charAt(pairIdx + 1);
+
+        UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2));
+        rowState.setCharAt(c1, 0);
+    }
+
+    // Remove duplicate or redundant rows from the table.
+    IntPair states = {1, 0};
+    while (findDuplicateSafeState(&states)) {
+        // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
+        removeSafeState(states);
+    }
+}
+
+
+//-----------------------------------------------------------------------------
+//
+//   getSafeTableSize()    Calculate the size of the runtime form of this
+//                         safe state table.
+//
+//-----------------------------------------------------------------------------
+int32_t  RBBITableBuilder::getSafeTableSize() const {
+    int32_t    size = 0;
+    int32_t    numRows;
+    int32_t    numCols;
+    int32_t    rowSize;
+
+    if (fSafeTable == nullptr) {
+        return 0;
+    }
+
+    size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.
+
+    numRows = fSafeTable->size();
+    numCols = fRB->fSetBuilder->getNumCharCategories();
+
+    rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
+    size   += numRows * rowSize;
+    return size;
+}
+
+
+//-----------------------------------------------------------------------------
+//
+//   exportSafeTable()   export the state transition table in the format required
+//                       by the runtime engine.  getTableSize() bytes of memory
+//                       must be available at the output address "where".
+//
+//-----------------------------------------------------------------------------
+void RBBITableBuilder::exportSafeTable(void *where) {
+    RBBIStateTable    *table = (RBBIStateTable *)where;
+    uint32_t           state;
+    int                col;
+
+    if (U_FAILURE(*fStatus) || fSafeTable == nullptr) {
+        return;
+    }
+
+    int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
+    if (catCount > 0x7fff ||
+            fSafeTable->size() > 0x7fff) {
+        *fStatus = U_BRK_INTERNAL_ERROR;
+        return;
+    }
+
+    table->fRowLen    = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
+    table->fNumStates = fSafeTable->size();
+    table->fFlags     = 0;
+    table->fReserved  = 0;
+
+    for (state=0; state<table->fNumStates; state++) {
+        UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state);
+        RBBIStateTableRow   *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
+        row->fAccepting = 0;
+        row->fLookAhead = 0;
+        row->fTagIdx    = 0;
+        row->fReserved  = 0;
+        for (col=0; col<catCount; col++) {
+            row->fNextState[col] = rowString->charAt(col);
+        }
+    }
+}
+
+
+
 
 //-----------------------------------------------------------------------------
 //
@@ -1159,8 +1524,8 @@ void RBBITableBuilder::exportTable(void *where) {
 void RBBITableBuilder::printSet(UVector *s) {
     int32_t  i;
     for (i=0; i<s->size(); i++) {
-        void *v = s->elementAt(i);
-        RBBIDebugPrintf("%10p", v);
+        const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i));
+        RBBIDebugPrintf("%5d", v==NULL? -1 : v->fSerialNum);
     }
     RBBIDebugPrintf("\n");
 }
@@ -1203,6 +1568,47 @@ void RBBITableBuilder::printStates() {
 #endif
 
 
+//-----------------------------------------------------------------------------
+//
+//   printSafeTable    Debug Function.  Dump the fully constructed safe table.
+//
+//-----------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBITableBuilder::printReverseTable() {
+    int     c;    // input "character"
+    int     n;    // state number
+
+    RBBIDebugPrintf("    Safe Reverse Table \n");
+    if (fSafeTable == nullptr) {
+        RBBIDebugPrintf("   --- nullptr ---\n");
+        return;
+    }
+    RBBIDebugPrintf("state |           i n p u t     s y m b o l s \n");
+    RBBIDebugPrintf("      | Acc  LA    Tag");
+    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
+        RBBIDebugPrintf(" %2d", c);
+    }
+    RBBIDebugPrintf("\n");
+    RBBIDebugPrintf("      |---------------");
+    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
+        RBBIDebugPrintf("---");
+    }
+    RBBIDebugPrintf("\n");
+
+    for (n=0; n<fSafeTable->size(); n++) {
+        UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n);
+        RBBIDebugPrintf("  %3d | " , n);
+        RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0);  // Accepting, LookAhead, Tags
+        for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
+            RBBIDebugPrintf(" %2d", rowString->charAt(c));
+        }
+        RBBIDebugPrintf("\n");
+    }
+    RBBIDebugPrintf("\n\n");
+}
+#endif
+
+
 
 //-----------------------------------------------------------------------------
 //
@@ -1249,7 +1655,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
     fPositions = NULL;
     fDtran     = NULL;
 
-    fDtran     = new UVector(lastInputSymbol+1, *fStatus);
+    fDtran     = new UVector32(lastInputSymbol+1, *fStatus);
     if (U_FAILURE(*fStatus)) {
         return;
     }
@@ -1257,7 +1663,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
         *fStatus = U_MEMORY_ALLOCATION_ERROR;
         return;
     }
-    fDtran->setSize(lastInputSymbol+1, *fStatus);    // fDtran needs to be pre-sized.
+    fDtran->setSize(lastInputSymbol+1);    // fDtran needs to be pre-sized.
                                            //   It is indexed by input symbols, and will
                                            //   hold  the next state number for each
                                            //   symbol.