]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/rbbirb.h
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbirb.h
index 7b38187fd530cd57033092d8dfe19ee4fa3587c6..037c1dc2ce8ff798ead8a3e4fe44b958f7a8b69c 100644 (file)
@@ -1,10 +1,13 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 //
 //  rbbirb.h
 //
-//  Copyright (C) 2002, International Business Machines Corporation and others.
+//  Copyright (C) 2002-2008, International Business Machines Corporation and others.
 //  All Rights Reserved.
 //
-//  This file contains declarations for several from the Rule Based Break Iterator rule builder.
+//  This file contains declarations for several classes from the
+//    Rule Based Break Iterator rule builder.
 //
 
 
 #define RBBIRB_H
 
 #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include <utility>
+
 #include "unicode/uobject.h"
 #include "unicode/rbbi.h"
 #include "unicode/uniset.h"
 #include "unicode/parseerr.h"
 #include "uhash.h"
 #include "uvector.h"
-#include "symtable.h"     // For UnicodeSet parsing, is the interface that
-                          //    looks up references to $variables within a set.
-
+#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
+                             //    looks up references to $variables within a set.
 
 
 U_NAMESPACE_BEGIN
@@ -78,7 +85,14 @@ public:
     virtual RBBINode *lookupNode(const UnicodeString &key) const;
     virtual void      addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err);
 
-    virtual void      print() const;
+#ifdef RBBI_DEBUG
+    virtual void      rbbiSymtablePrint() const;
+#else
+    // A do-nothing inline function for non-debug builds.  Member funcs can't be empty
+    //  or the call sites won't compile.
+    int32_t fFakeField;
+    #define rbbiSymtablePrint() fFakeField=0; 
+#endif
 
 private:
     RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
@@ -99,7 +113,7 @@ public:
     //   public ICU API for creating RBBIs uses this function to do the actual work.
     //
     static BreakIterator * createRuleBasedBreakIterator( const UnicodeString    &rules,
-                                    UParseError      &parseError,
+                                    UParseError      *parseError,
                                     UErrorCode       &status);
 
 public:
@@ -108,25 +122,57 @@ public:
     //  are NOT intended to be accessed by anything outside of the
     //  rule builder implementation.
     RBBIRuleBuilder(const UnicodeString  &rules,
-                    UParseError          &parseErr,
+                    UParseError          *parseErr,
                     UErrorCode           &status
-        );
+    );
 
     virtual    ~RBBIRuleBuilder();
+
+    /**
+     *  Build the state tables and char class Trie from the source rules.
+     */
+    RBBIDataHeader  *build(UErrorCode &status);
+
+
+    /**
+     * Fold together redundant character classes (table columns) and
+     * redundant states (table rows). Done after initial table generation,
+     * before serializing the result.
+     */
+    void optimizeTables();
+
     char                          *fDebugEnv;        // controls debug trace output
     UErrorCode                    *fStatus;          // Error reporting.  Keeping status
     UParseError                   *fParseError;      //   here avoids passing it everywhere.
     const UnicodeString           &fRules;           // The rule string that we are compiling
+    UnicodeString                 fStrippedRules;    // The rule string, with comments stripped.
 
     RBBIRuleScanner               *fScanner;         // The scanner.
     RBBINode                      *fForwardTree;     // The parse trees, generated by the scanner,
     RBBINode                      *fReverseTree;     //   then manipulated by subsequent steps.
+    RBBINode                      *fSafeFwdTree;
+    RBBINode                      *fSafeRevTree;
+
+    RBBINode                      **fDefaultTree;    // For rules not qualified with a !
+                                                     //   the tree to which they belong to.
+
+    UBool                         fChainRules;       // True for chained Unicode TR style rules.
+                                                     // False for traditional regexp rules.
+
+    UBool                         fLBCMNoChain;      // True:  suppress chaining of rules on
+                                                     //   chars with LineBreak property == CM.
+
+    UBool                         fLookAheadHardBreak;  // True:  Look ahead matches cause an
+                                                     // immediate break, no continuing for the
+                                                     // longest match.
 
     RBBISetBuilder                *fSetBuilder;      // Set and Character Category builder.
     UVector                       *fUSetNodes;       // Vector of all uset nodes.
 
-    RBBITableBuilder              *fForwardTables;   // State transition tables
-    RBBITableBuilder              *fReverseTables;
+    RBBITableBuilder              *fForwardTable;    // State transition table, build time form.
+
+    UVector                       *fRuleStatusVals;  // The values that can be returned
+                                                     //   from getRuleStatus().
 
     RBBIDataHeader                *flattenData();    // Create the flattened (runtime format)
                                                      // data tables..
@@ -158,6 +204,11 @@ struct RBBISetTableEl {
     RBBINode      *val;
 };
 
+/**
+ *   A pair of ints, used to bundle pairs of states or pairs of character classes.
+ */
+typedef std::pair<int32_t, int32_t> IntPair;
+
 
 //----------------------------------------------------------------------------
 //
@@ -170,11 +221,16 @@ struct RBBISetTableEl {
 #ifdef RBBI_DEBUG
 #include <stdio.h>
 #define RBBIDebugPrintf printf
+#define RBBIDebugPuts puts
 #else
-inline void RBBIDebugPrintf(...) {}
+#undef RBBIDebugPrintf 
+#define RBBIDebugPuts(arg)
 #endif
 
 U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
 #endif