]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbiscan.h
4 // Copyright (C) 2002-2008, International Business Machines Corporation and others.
5 // All Rights Reserved.
7 // This file contains declarations for class RBBIRuleScanner
14 #include "unicode/utypes.h"
15 #include "unicode/uobject.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/uniset.h"
18 #include "unicode/parseerr.h"
21 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
22 // looks up references to $variables within a set.
24 //#include "rbbitblb.h"
30 class RBBIRuleBuilder
;
31 class RBBISymbolTable
;
34 //--------------------------------------------------------------------------------
36 // class RBBIRuleScanner does the lowest level, character-at-a-time
37 // scanning of break iterator rules.
39 // The output of the scanner is parse trees for
40 // the rule expressions and a list of all Unicode Sets
43 //--------------------------------------------------------------------------------
45 class RBBIRuleScanner
: public UMemory
{
49 kStackSize
= 100 // The size of the state stack for
50 }; // rules parsing. Corresponds roughly
51 // to the depth of parentheses nesting
52 // that is allowed in the rules.
59 RBBIRuleScanner(RBBIRuleBuilder
*rb
);
62 virtual ~RBBIRuleScanner();
64 void nextChar(RBBIRuleChar
&c
); // Get the next char from the input stream.
65 // Return false if at end.
67 UBool
push(const RBBIRuleChar
&c
); // Push (unget) one character.
68 // Only a single character may be pushed.
70 void parse(); // Parse the rules, generating two parse
71 // trees, one each for the forward and
73 // and a list of UnicodeSets encountered.
76 * Return a rules string without unnecessary
79 static UnicodeString
stripRules(const UnicodeString
&rules
);
82 UBool
doParseActions(int32_t a
);
83 void error(UErrorCode e
); // error reporting convenience function.
84 void fixOpStack(RBBINode::OpPrecedence p
);
86 void findSetFor(const UnicodeString
&s
, RBBINode
*node
, UnicodeSet
*setToAdopt
= NULL
);
90 void printNodeStack(const char *title
);
92 RBBINode
*pushNewNode(RBBINode::NodeType t
);
96 RBBIRuleBuilder
*fRB
; // The rule builder that we are part of.
98 int32_t fScanIndex
; // Index of current character being processed
99 // in the rule input string.
100 int32_t fNextIndex
; // Index of the next character, which
101 // is the first character not yet scanned.
102 UBool fQuoteMode
; // Scan is in a 'quoted region'
103 int32_t fLineNum
; // Line number in input file.
104 int32_t fCharNum
; // Char position within the line.
105 UChar32 fLastChar
; // Previous char, needed to count CR-LF
106 // as a single line, not two.
108 RBBIRuleChar fC
; // Current char for parse state machine
110 UnicodeString fVarName
; // $variableName, valid when we've just
113 RBBIRuleTableEl
**fStateTable
; // State Transition Table for RBBI Rule
114 // parsing. index by p[state][char-class]
116 uint16_t fStack
[kStackSize
]; // State stack, holds state pushes
117 int32_t fStackPtr
; // and pops as specified in the state
120 RBBINode
*fNodeStack
[kStackSize
]; // Node stack, holds nodes created
121 // during the parse of a rule
122 int32_t fNodeStackPtr
;
125 UBool fReverseRule
; // True if the rule currently being scanned
126 // is a reverse direction rule (if it
127 // starts with a '!')
129 UBool fLookAheadRule
; // True if the rule includes a '/'
130 // somewhere within it.
132 RBBISymbolTable
*fSymbolTable
; // symbol table, holds definitions of
133 // $variable symbols.
135 UHashtable
*fSetTable
; // UnicocodeSet hash table, holds indexes to
136 // the sets created while parsing rules.
137 // The key is the string used for creating
140 UnicodeSet fRuleSets
[10]; // Unicode Sets that are needed during
141 // the scanning of RBBI rules. The
142 // indicies for these are assigned by the
143 // perl script that builds the state tables.
146 int32_t fRuleNum
; // Counts each rule as it is scanned.
148 int32_t fOptionStart
; // Input index of start of a !!option
149 // keyword, while being scanned.
151 UnicodeSet
*gRuleSet_rule_char
;
152 UnicodeSet
*gRuleSet_white_space
;
153 UnicodeSet
*gRuleSet_name_char
;
154 UnicodeSet
*gRuleSet_name_start_char
;
156 RBBIRuleScanner(const RBBIRuleScanner
&other
); // forbid copying of this class
157 RBBIRuleScanner
&operator=(const RBBIRuleScanner
&other
); // forbid copying of this class