]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | // |
2 | // rbbiscan.h | |
3 | // | |
2ca993e8 | 4 | // Copyright (C) 2002-2016, International Business Machines Corporation and others. |
b75a7d8f A |
5 | // All Rights Reserved. |
6 | // | |
7 | // This file contains declarations for class RBBIRuleScanner | |
8 | // | |
9 | ||
10 | ||
11 | #ifndef RBBISCAN_H | |
12 | #define RBBISCAN_H | |
13 | ||
14 | #include "unicode/utypes.h" | |
15 | #include "unicode/uobject.h" | |
16 | #include "unicode/rbbi.h" | |
17 | #include "unicode/uniset.h" | |
18 | #include "unicode/parseerr.h" | |
19 | #include "uhash.h" | |
20 | #include "uvector.h" | |
374ca955 | 21 | #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that |
b75a7d8f A |
22 | // looks up references to $variables within a set. |
23 | #include "rbbinode.h" | |
2ca993e8 | 24 | #include "rbbirpt.h" |
b75a7d8f A |
25 | |
26 | U_NAMESPACE_BEGIN | |
27 | ||
28 | class RBBIRuleBuilder; | |
29 | class RBBISymbolTable; | |
30 | ||
31 | ||
32 | //-------------------------------------------------------------------------------- | |
33 | // | |
34 | // class RBBIRuleScanner does the lowest level, character-at-a-time | |
35 | // scanning of break iterator rules. | |
36 | // | |
37 | // The output of the scanner is parse trees for | |
38 | // the rule expressions and a list of all Unicode Sets | |
39 | // encountered. | |
40 | // | |
41 | //-------------------------------------------------------------------------------- | |
b75a7d8f A |
42 | |
43 | class RBBIRuleScanner : public UMemory { | |
44 | public: | |
45 | ||
46f4442e A |
46 | enum { |
47 | kStackSize = 100 // The size of the state stack for | |
48 | }; // rules parsing. Corresponds roughly | |
49 | // to the depth of parentheses nesting | |
50 | // that is allowed in the rules. | |
51 | ||
b75a7d8f A |
52 | struct RBBIRuleChar { |
53 | UChar32 fChar; | |
54 | UBool fEscaped; | |
2ca993e8 | 55 | RBBIRuleChar() : fChar(0), fEscaped(FALSE) {}; |
b75a7d8f A |
56 | }; |
57 | ||
58 | RBBIRuleScanner(RBBIRuleBuilder *rb); | |
59 | ||
60 | ||
61 | virtual ~RBBIRuleScanner(); | |
62 | ||
63 | void nextChar(RBBIRuleChar &c); // Get the next char from the input stream. | |
64 | // Return false if at end. | |
65 | ||
66 | UBool push(const RBBIRuleChar &c); // Push (unget) one character. | |
67 | // Only a single character may be pushed. | |
68 | ||
69 | void parse(); // Parse the rules, generating two parse | |
70 | // trees, one each for the forward and | |
71 | // reverse rules, | |
72 | // and a list of UnicodeSets encountered. | |
73 | ||
74 | /** | |
75 | * Return a rules string without unnecessary | |
76 | * characters. | |
77 | */ | |
78 | static UnicodeString stripRules(const UnicodeString &rules); | |
79 | private: | |
80 | ||
46f4442e | 81 | UBool doParseActions(int32_t a); |
b75a7d8f A |
82 | void error(UErrorCode e); // error reporting convenience function. |
83 | void fixOpStack(RBBINode::OpPrecedence p); | |
84 | // a character. | |
85 | void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL); | |
86 | ||
87 | UChar32 nextCharLL(); | |
73c04bcf | 88 | #ifdef RBBI_DEBUG |
b75a7d8f | 89 | void printNodeStack(const char *title); |
73c04bcf | 90 | #endif |
b75a7d8f A |
91 | RBBINode *pushNewNode(RBBINode::NodeType t); |
92 | void scanSet(); | |
93 | ||
94 | ||
95 | RBBIRuleBuilder *fRB; // The rule builder that we are part of. | |
96 | ||
97 | int32_t fScanIndex; // Index of current character being processed | |
98 | // in the rule input string. | |
99 | int32_t fNextIndex; // Index of the next character, which | |
100 | // is the first character not yet scanned. | |
101 | UBool fQuoteMode; // Scan is in a 'quoted region' | |
73c04bcf A |
102 | int32_t fLineNum; // Line number in input file. |
103 | int32_t fCharNum; // Char position within the line. | |
b75a7d8f A |
104 | UChar32 fLastChar; // Previous char, needed to count CR-LF |
105 | // as a single line, not two. | |
106 | ||
107 | RBBIRuleChar fC; // Current char for parse state machine | |
108 | // processing. | |
109 | UnicodeString fVarName; // $variableName, valid when we've just | |
110 | // scanned one. | |
111 | ||
112 | RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule | |
113 | // parsing. index by p[state][char-class] | |
114 | ||
115 | uint16_t fStack[kStackSize]; // State stack, holds state pushes | |
73c04bcf | 116 | int32_t fStackPtr; // and pops as specified in the state |
b75a7d8f A |
117 | // transition rules. |
118 | ||
119 | RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created | |
120 | // during the parse of a rule | |
73c04bcf | 121 | int32_t fNodeStackPtr; |
b75a7d8f A |
122 | |
123 | ||
124 | UBool fReverseRule; // True if the rule currently being scanned | |
125 | // is a reverse direction rule (if it | |
126 | // starts with a '!') | |
127 | ||
128 | UBool fLookAheadRule; // True if the rule includes a '/' | |
129 | // somewhere within it. | |
130 | ||
2ca993e8 A |
131 | UBool fNoChainInRule; // True if the current rule starts with a '^'. |
132 | ||
b75a7d8f A |
133 | RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of |
134 | // $variable symbols. | |
135 | ||
136 | UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to | |
137 | // the sets created while parsing rules. | |
138 | // The key is the string used for creating | |
139 | // the set. | |
140 | ||
46f4442e | 141 | UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during |
b75a7d8f A |
142 | // the scanning of RBBI rules. The |
143 | // indicies for these are assigned by the | |
144 | // perl script that builds the state tables. | |
145 | // See rbbirpt.h. | |
146 | ||
147 | int32_t fRuleNum; // Counts each rule as it is scanned. | |
148 | ||
374ca955 A |
149 | int32_t fOptionStart; // Input index of start of a !!option |
150 | // keyword, while being scanned. | |
151 | ||
b75a7d8f A |
152 | UnicodeSet *gRuleSet_rule_char; |
153 | UnicodeSet *gRuleSet_white_space; | |
154 | UnicodeSet *gRuleSet_name_char; | |
155 | UnicodeSet *gRuleSet_name_start_char; | |
156 | ||
157 | RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class | |
158 | RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class | |
159 | }; | |
160 | ||
161 | U_NAMESPACE_END | |
162 | ||
163 | #endif |