+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
//
// rbbirb.h
//
-// Copyright (C) 2002, International Business Machines Corporation and others.
+// Copyright (C) 2002-2008, International Business Machines Corporation and others.
// All Rights Reserved.
//
-// This file contains declarations for several from the Rule Based Break Iterator rule builder.
+// This file contains declarations for several classes from the
+// Rule Based Break Iterator rule builder.
//
#define RBBIRB_H
#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include <utility>
+
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "unicode/uniset.h"
#include "unicode/parseerr.h"
#include "uhash.h"
#include "uvector.h"
-#include "symtable.h" // For UnicodeSet parsing, is the interface that
- // looks up references to $variables within a set.
-
+#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
+ // looks up references to $variables within a set.
U_NAMESPACE_BEGIN
virtual RBBINode *lookupNode(const UnicodeString &key) const;
virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
- virtual void print() const;
+#ifdef RBBI_DEBUG
+ virtual void rbbiSymtablePrint() const;
+#else
+ // A do-nothing inline function for non-debug builds. Member funcs can't be empty
+ // or the call sites won't compile.
+ int32_t fFakeField;
+ #define rbbiSymtablePrint() fFakeField=0;
+#endif
private:
RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
// public ICU API for creating RBBIs uses this function to do the actual work.
//
static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
- UParseError &parseError,
+ UParseError *parseError,
UErrorCode &status);
public:
// are NOT intended to be accessed by anything outside of the
// rule builder implementation.
RBBIRuleBuilder(const UnicodeString &rules,
- UParseError &parseErr,
+ UParseError *parseErr,
UErrorCode &status
- );
+ );
virtual ~RBBIRuleBuilder();
+
+ /**
+ * Build the state tables and char class Trie from the source rules.
+ */
+ RBBIDataHeader *build(UErrorCode &status);
+
+
+ /**
+ * Fold together redundant character classes (table columns) and
+ * redundant states (table rows). Done after initial table generation,
+ * before serializing the result.
+ */
+ void optimizeTables();
+
char *fDebugEnv; // controls debug trace output
UErrorCode *fStatus; // Error reporting. Keeping status
UParseError *fParseError; // here avoids passing it everywhere.
const UnicodeString &fRules; // The rule string that we are compiling
+ UnicodeString fStrippedRules; // The rule string, with comments stripped.
RBBIRuleScanner *fScanner; // The scanner.
RBBINode *fForwardTree; // The parse trees, generated by the scanner,
RBBINode *fReverseTree; // then manipulated by subsequent steps.
+ RBBINode *fSafeFwdTree;
+ RBBINode *fSafeRevTree;
+
+ RBBINode **fDefaultTree; // For rules not qualified with a !
+ // the tree to which they belong to.
+
+ UBool fChainRules; // True for chained Unicode TR style rules.
+ // False for traditional regexp rules.
+
+ UBool fLBCMNoChain; // True: suppress chaining of rules on
+ // chars with LineBreak property == CM.
+
+ UBool fLookAheadHardBreak; // True: Look ahead matches cause an
+ // immediate break, no continuing for the
+ // longest match.
RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
UVector *fUSetNodes; // Vector of all uset nodes.
- RBBITableBuilder *fForwardTables; // State transition tables
- RBBITableBuilder *fReverseTables;
+ RBBITableBuilder *fForwardTable; // State transition table, build time form.
+
+ UVector *fRuleStatusVals; // The values that can be returned
+ // from getRuleStatus().
RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
// data tables..
RBBINode *val;
};
+/**
+ * A pair of ints, used to bundle pairs of states or pairs of character classes.
+ */
+typedef std::pair<int32_t, int32_t> IntPair;
+
//----------------------------------------------------------------------------
//
#ifdef RBBI_DEBUG
#include <stdio.h>
#define RBBIDebugPrintf printf
+#define RBBIDebugPuts puts
#else
-inline void RBBIDebugPrintf(...) {}
+#undef RBBIDebugPrintf
+#define RBBIDebugPuts(arg)
#endif
U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
#endif