X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..1546d4af2ed1219a41ef4170bf188f2ab91442e6:/icuSources/common/rbbitblb.h diff --git a/icuSources/common/rbbitblb.h b/icuSources/common/rbbitblb.h index 47f7de27..eea243e4 100644 --- a/icuSources/common/rbbitblb.h +++ b/icuSources/common/rbbitblb.h @@ -1,10 +1,12 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html // // rbbitblb.h // /* ********************************************************************** -* Copyright (c) 2002, International Business Machines +* Copyright (c) 2002-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -15,6 +17,7 @@ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/rbbi.h" +#include "rbbirb.h" #include "rbbinode.h" @@ -22,6 +25,7 @@ U_NAMESPACE_BEGIN class RBBIRuleScanner; class RBBIRuleBuilder; +class UVector32; // // class RBBITableBuilder is part of the RBBI rule compiler. @@ -34,25 +38,99 @@ class RBBIRuleBuilder; class RBBITableBuilder : public UMemory { public: - RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode); + RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status); ~RBBITableBuilder(); - void build(); - int32_t getTableSize(); // Return the runtime size in bytes of - // the built state table - void exportTable(void *where); // fill in the runtime state table. - // Sufficient memory must exist at - // the specified location. + void buildForwardTable(); + + /** Return the runtime size in bytes of the built state table. */ + int32_t getTableSize() const; + + /** Fill in the runtime state table. Sufficient memory must exist at the specified location. + */ + void exportTable(void *where); + + /** + * Find duplicate (redundant) character classes. Begin looking with categories.first. + * Duplicate, if found are returned in the categories parameter. + * This is an iterator-like function, used to identify character classes + * (state table columns) that can be eliminated. + * @param categories in/out parameter, specifies where to start looking for duplicates, + * and returns the first pair of duplicates found, if any. + * @return true if duplicate char classes were found, false otherwise. + */ + bool findDuplCharClassFrom(IntPair *categories); + + /** Remove a column from the state table. Used when two character categories + * have been found equivalent, and merged together, to eliminate the uneeded table column. + */ + void removeColumn(int32_t column); + + /** Check for, and remove dupicate states (table rows). */ + void removeDuplicateStates(); + + /** Build the safe reverse table from the already-constructed forward table. */ + void buildSafeReverseTable(UErrorCode &status); + + /** Return the runtime size in bytes of the built safe reverse state table. */ + int32_t getSafeTableSize() const; + + /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location. + */ + void exportSafeTable(void *where); + private: void calcNullable(RBBINode *n); void calcFirstPos(RBBINode *n); void calcLastPos(RBBINode *n); void calcFollowPos(RBBINode *n); + void calcChainedFollowPos(RBBINode *n); + void bofFixup(); void buildStateTable(); void flagAcceptingStates(); void flagLookAheadStates(); void flagTaggedStates(); + void mergeRuleStatusVals(); + + /** + * Merge redundant state table columns, eliminating character classes with identical behavior. + * Done after the state tables are generated, just before converting to their run-time format. + */ + int32_t mergeColumns(); + + void addRuleRootNodes(UVector *dest, RBBINode *node); + + /** + * Find duplicate (redundant) states, beginning at the specified pair, + * within this state table. This is an iterator-like function, used to + * identify states (state table rows) that can be eliminated. + * @param states in/out parameter, specifies where to start looking for duplicates, + * and returns the first pair of duplicates found, if any. + * @return true if duplicate states were found, false otherwise. + */ + bool findDuplicateState(IntPair *states); + + /** Remove a duplicate state. + * @param duplStates The duplicate states. The first is kept, the second is removed. + * All references to the second in the state table are retargeted + * to the first. + */ + void removeState(IntPair duplStates); + + /** Find the next duplicate state in the safe reverse table. An iterator function. + * @param states in/out parameter, specifies where to start looking for duplicates, + * and returns the first pair of duplicates found, if any. + * @return true if a duplicate pair of states was found. + */ + bool findDuplicateSafeState(IntPair *states); + + /** Remove a duplicate state from the safe table. + * @param duplStates The duplicate states. The first is kept, the second is removed. + * All references to the second in the state table are retargeted + * to the first. + */ + void removeSafeState(IntPair duplStates); // Set functions for UVector. // TODO: make a USet subclass of UVector @@ -60,10 +138,22 @@ private: void setAdd(UVector *dest, UVector *source); UBool setEquals(UVector *a, UVector *b); + void sortedAdd(UVector **dest, int32_t val); + +public: +#ifdef RBBI_DEBUG void printSet(UVector *s); - void printPosSets(RBBINode *n = NULL); + void printPosSets(RBBINode *n /* = NULL*/); void printStates(); - + void printRuleStatusTable(); + void printReverseTable(); +#else + #define printSet(s) + #define printPosSets(n) + #define printStates() + #define printRuleStatusTable() + #define printReverseTable() +#endif private: RBBIRuleBuilder *fRB; @@ -71,10 +161,15 @@ private: // table for. UErrorCode *fStatus; + /** State Descriptors, UVector */ UVector *fDStates; // D states (Aho's terminology) // Index is state number // Contents are RBBIStateDescriptor pointers. + /** Synthesized safe table, UVector of UnicodeString, one string per table row. */ + UVector *fSafeTable; + + RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class }; @@ -87,12 +182,13 @@ public: UBool fMarked; int32_t fAccepting; int32_t fLookAhead; - int32_t fTagVal; + UVector *fTagVals; + int32_t fTagsIdx; UVector *fPositions; // Set of parse tree positions associated // with this state. Unordered (it's a set). // UVector contents are RBBINode * - UVector *fDtran; // Transitions out of this state. + UVector32 *fDtran; // Transitions out of this state. // indexed by input character // contents is int index of dest state // in RBBITableBuilder.fDStates