1 //---------------------------------------------------------------------------------
3 // Generated Header File. Do not edit by hand.
4 // This file contains the state table for the ICU Regular Expression Pattern Parser
5 // It is generated by the Perl script "regexcst.pl" from
6 // the rule parser state definitions file "regexcst.txt".
8 // Copyright (C) 2002-2003 International Business Machines Corporation
9 // and others. All rights reserved.
11 //---------------------------------------------------------------------------------
17 // Character classes for regex pattern scanning.
19 static const uint8_t kRuleSet_digit_char
= 128;
20 static const uint8_t kRuleSet_white_space
= 129;
21 static const uint8_t kRuleSet_rule_char
= 130;
24 enum Regex_PatternParseAction
{
49 doOpenNonCaptureParen
,
90 //-------------------------------------------------------------------------------
92 // RegexTableEl represents the structure of a row in the transition table
93 // for the pattern parser state machine.
94 //-------------------------------------------------------------------------------
96 Regex_PatternParseAction fAction
;
97 uint8_t fCharClass
; // 0-127: an individual ASCII character
98 // 128-255: character class index
99 uint8_t fNextState
; // 0-250: normal next-state numbers
100 // 255: pop next-state from stack.
105 static const struct RegexTableEl gRuleParseStateTable
[] = {
106 {doNOP
, 0, 0, 0, TRUE
}
107 , {doPatStart
, 255, 2,0, FALSE
} // 1 start
108 , {doLiteralChar
, 254, 14,0, TRUE
} // 2 term
109 , {doLiteralChar
, 130, 14,0, TRUE
} // 3
110 , {doScanUnicodeSet
, 91 /* [ */, 14,0, TRUE
} // 4
111 , {doNOP
, 40 /* ( */, 27,0, TRUE
} // 5
112 , {doDotAny
, 46 /* . */, 14,0, TRUE
} // 6
113 , {doCaret
, 94 /* ^ */, 2,0, TRUE
} // 7
114 , {doDollar
, 36 /* $ */, 2,0, TRUE
} // 8
115 , {doNOP
, 92 /* \ */, 81,0, TRUE
} // 9
116 , {doOrOperator
, 124 /* | */, 2,0, TRUE
} // 10
117 , {doCloseParen
, 41 /* ) */, 255,0, TRUE
} // 11
118 , {doPatFinish
, 253, 2,0, FALSE
} // 12
119 , {doRuleError
, 255, 101,0, FALSE
} // 13
120 , {doNOP
, 42 /* * */, 59,0, TRUE
} // 14 expr-quant
121 , {doNOP
, 43 /* + */, 62,0, TRUE
} // 15
122 , {doNOP
, 63 /* ? */, 65,0, TRUE
} // 16
123 , {doIntervalInit
, 123 /* { */, 68,0, TRUE
} // 17
124 , {doNOP
, 40 /* ( */, 23,0, TRUE
} // 18
125 , {doNOP
, 255, 20,0, FALSE
} // 19
126 , {doOrOperator
, 124 /* | */, 2,0, TRUE
} // 20 expr-cont
127 , {doCloseParen
, 41 /* ) */, 255,0, TRUE
} // 21
128 , {doNOP
, 255, 2,0, FALSE
} // 22
129 , {doSuppressComments
, 63 /* ? */, 25,0, TRUE
} // 23 open-paren-quant
130 , {doNOP
, 255, 27,0, FALSE
} // 24
131 , {doNOP
, 35 /* # */, 47, 14, TRUE
} // 25 open-paren-quant2
132 , {doNOP
, 255, 29,0, FALSE
} // 26
133 , {doSuppressComments
, 63 /* ? */, 29,0, TRUE
} // 27 open-paren
134 , {doOpenCaptureParen
, 255, 2, 14, FALSE
} // 28
135 , {doOpenNonCaptureParen
, 58 /* : */, 2, 14, TRUE
} // 29 open-paren-extended
136 , {doOpenAtomicParen
, 62 /* > */, 2, 14, TRUE
} // 30
137 , {doOpenLookAhead
, 61 /* = */, 2, 20, TRUE
} // 31
138 , {doOpenLookAheadNeg
, 33 /* ! */, 2, 20, TRUE
} // 32
139 , {doNOP
, 60 /* < */, 44,0, TRUE
} // 33
140 , {doNOP
, 35 /* # */, 47, 2, TRUE
} // 34
141 , {doBeginMatchMode
, 105 /* i */, 50,0, FALSE
} // 35
142 , {doBeginMatchMode
, 109 /* m */, 50,0, FALSE
} // 36
143 , {doBeginMatchMode
, 115 /* s */, 50,0, FALSE
} // 37
144 , {doBeginMatchMode
, 119 /* w */, 50,0, FALSE
} // 38
145 , {doBeginMatchMode
, 120 /* x */, 50,0, FALSE
} // 39
146 , {doBeginMatchMode
, 45 /* - */, 50,0, FALSE
} // 40
147 , {doConditionalExpr
, 40 /* ( */, 101,0, TRUE
} // 41
148 , {doPerlInline
, 123 /* { */, 101,0, TRUE
} // 42
149 , {doBadOpenParenType
, 255, 101,0, FALSE
} // 43
150 , {doOpenLookBehind
, 61 /* = */, 2, 20, TRUE
} // 44 open-paren-lookbehind
151 , {doOpenLookBehindNeg
, 33 /* ! */, 2, 20, TRUE
} // 45
152 , {doBadOpenParenType
, 255, 101,0, FALSE
} // 46
153 , {doNOP
, 41 /* ) */, 255,0, TRUE
} // 47 paren-comment
154 , {doMismatchedParenErr
, 253, 101,0, FALSE
} // 48
155 , {doNOP
, 255, 47,0, TRUE
} // 49
156 , {doMatchMode
, 105 /* i */, 50,0, TRUE
} // 50 paren-flag
157 , {doMatchMode
, 109 /* m */, 50,0, TRUE
} // 51
158 , {doMatchMode
, 115 /* s */, 50,0, TRUE
} // 52
159 , {doMatchMode
, 119 /* w */, 50,0, TRUE
} // 53
160 , {doMatchMode
, 120 /* x */, 50,0, TRUE
} // 54
161 , {doMatchMode
, 45 /* - */, 50,0, TRUE
} // 55
162 , {doSetMatchMode
, 41 /* ) */, 2,0, TRUE
} // 56
163 , {doMatchModeParen
, 58 /* : */, 2, 14, TRUE
} // 57
164 , {doBadModeFlag
, 255, 101,0, FALSE
} // 58
165 , {doNGStar
, 63 /* ? */, 20,0, TRUE
} // 59 quant-star
166 , {doPossessiveStar
, 43 /* + */, 20,0, TRUE
} // 60
167 , {doStar
, 255, 20,0, FALSE
} // 61
168 , {doNGPlus
, 63 /* ? */, 20,0, TRUE
} // 62 quant-plus
169 , {doPossessivePlus
, 43 /* + */, 20,0, TRUE
} // 63
170 , {doPlus
, 255, 20,0, FALSE
} // 64
171 , {doNGOpt
, 63 /* ? */, 20,0, TRUE
} // 65 quant-opt
172 , {doPossessiveOpt
, 43 /* + */, 20,0, TRUE
} // 66
173 , {doOpt
, 255, 20,0, FALSE
} // 67
174 , {doNOP
, 129, 68,0, TRUE
} // 68 interval-open
175 , {doNOP
, 128, 71,0, FALSE
} // 69
176 , {doIntervalError
, 255, 101,0, FALSE
} // 70
177 , {doIntevalLowerDigit
, 128, 71,0, TRUE
} // 71 interval-lower
178 , {doNOP
, 44 /* , */, 75,0, TRUE
} // 72
179 , {doIntervalSame
, 125 /* } */, 78,0, TRUE
} // 73
180 , {doIntervalError
, 255, 101,0, FALSE
} // 74
181 , {doIntervalUpperDigit
, 128, 75,0, TRUE
} // 75 interval-upper
182 , {doNOP
, 125 /* } */, 78,0, TRUE
} // 76
183 , {doIntervalError
, 255, 101,0, FALSE
} // 77
184 , {doNGInterval
, 63 /* ? */, 20,0, TRUE
} // 78 interval-type
185 , {doPossessiveInterval
, 43 /* + */, 20,0, TRUE
} // 79
186 , {doInterval
, 255, 20,0, FALSE
} // 80
187 , {doBackslashA
, 65 /* A */, 2,0, TRUE
} // 81 backslash
188 , {doBackslashB
, 66 /* B */, 2,0, TRUE
} // 82
189 , {doBackslashb
, 98 /* b */, 2,0, TRUE
} // 83
190 , {doBackslashd
, 100 /* d */, 14,0, TRUE
} // 84
191 , {doBackslashD
, 68 /* D */, 14,0, TRUE
} // 85
192 , {doBackslashG
, 71 /* G */, 2,0, TRUE
} // 86
193 , {doProperty
, 78 /* N */, 14,0, FALSE
} // 87
194 , {doProperty
, 112 /* p */, 14,0, FALSE
} // 88
195 , {doProperty
, 80 /* P */, 14,0, FALSE
} // 89
196 , {doEnterQuoteMode
, 81 /* Q */, 2,0, TRUE
} // 90
197 , {doBackslashS
, 83 /* S */, 14,0, TRUE
} // 91
198 , {doBackslashs
, 115 /* s */, 14,0, TRUE
} // 92
199 , {doBackslashW
, 87 /* W */, 14,0, TRUE
} // 93
200 , {doBackslashw
, 119 /* w */, 14,0, TRUE
} // 94
201 , {doBackslashX
, 88 /* X */, 14,0, TRUE
} // 95
202 , {doBackslashZ
, 90 /* Z */, 2,0, TRUE
} // 96
203 , {doBackslashz
, 122 /* z */, 2,0, TRUE
} // 97
204 , {doBackRef
, 128, 14,0, TRUE
} // 98
205 , {doEscapeError
, 253, 101,0, FALSE
} // 99
206 , {doLiteralChar
, 255, 14,0, TRUE
} // 100
207 , {doExit
, 255, 101,0, TRUE
} // 101 errorDeath
209 static const char * const RegexStateNames
[] = { 0,
238 "open-paren-extended",
253 "open-paren-lookbehind",