1 //---------------------------------------------------------------------------------
3 // Generated Header File. Do not edit by hand.
4 // This file contains the state table for the ICU Regular Expression Pattern Parser
5 // It is generated by the Perl script "regexcst.pl" from
6 // the rule parser state definitions file "regexcst.txt".
8 // Copyright (C) 2002-2007 International Business Machines Corporation
9 // and others. All rights reserved.
11 //---------------------------------------------------------------------------------
17 // Character classes for regex pattern scanning.
19 static const uint8_t kRuleSet_digit_char
= 128;
20 static const uint8_t kRuleSet_rule_char
= 129;
23 enum Regex_PatternParseAction
{
53 doOpenNonCaptureParen
,
92 doSetBeginDifference1
,
102 doSetBeginIntersection1
,
110 doEscapedLiteralChar
,
116 //-------------------------------------------------------------------------------
118 // RegexTableEl represents the structure of a row in the transition table
119 // for the pattern parser state machine.
120 //-------------------------------------------------------------------------------
121 struct RegexTableEl
{
122 Regex_PatternParseAction fAction
;
123 uint8_t fCharClass
; // 0-127: an individual ASCII character
124 // 128-255: character class index
125 uint8_t fNextState
; // 0-250: normal next-state numbers
126 // 255: pop next-state from stack.
131 static const struct RegexTableEl gRuleParseStateTable
[] = {
132 {doNOP
, 0, 0, 0, TRUE
}
133 , {doPatStart
, 255, 2,0, FALSE
} // 1 start
134 , {doLiteralChar
, 254, 14,0, TRUE
} // 2 term
135 , {doLiteralChar
, 129, 14,0, TRUE
} // 3
136 , {doSetBegin
, 91 /* [ */, 104, 182, TRUE
} // 4
137 , {doNOP
, 40 /* ( */, 27,0, TRUE
} // 5
138 , {doDotAny
, 46 /* . */, 14,0, TRUE
} // 6
139 , {doCaret
, 94 /* ^ */, 14,0, TRUE
} // 7
140 , {doDollar
, 36 /* $ */, 14,0, TRUE
} // 8
141 , {doNOP
, 92 /* \ */, 84,0, TRUE
} // 9
142 , {doOrOperator
, 124 /* | */, 2,0, TRUE
} // 10
143 , {doCloseParen
, 41 /* ) */, 255,0, TRUE
} // 11
144 , {doPatFinish
, 253, 2,0, FALSE
} // 12
145 , {doRuleError
, 255, 183,0, FALSE
} // 13
146 , {doNOP
, 42 /* * */, 63,0, TRUE
} // 14 expr-quant
147 , {doNOP
, 43 /* + */, 66,0, TRUE
} // 15
148 , {doNOP
, 63 /* ? */, 69,0, TRUE
} // 16
149 , {doIntervalInit
, 123 /* { */, 72,0, TRUE
} // 17
150 , {doNOP
, 40 /* ( */, 23,0, TRUE
} // 18
151 , {doNOP
, 255, 20,0, FALSE
} // 19
152 , {doOrOperator
, 124 /* | */, 2,0, TRUE
} // 20 expr-cont
153 , {doCloseParen
, 41 /* ) */, 255,0, TRUE
} // 21
154 , {doNOP
, 255, 2,0, FALSE
} // 22
155 , {doSuppressComments
, 63 /* ? */, 25,0, TRUE
} // 23 open-paren-quant
156 , {doNOP
, 255, 27,0, FALSE
} // 24
157 , {doNOP
, 35 /* # */, 49, 14, TRUE
} // 25 open-paren-quant2
158 , {doNOP
, 255, 29,0, FALSE
} // 26
159 , {doSuppressComments
, 63 /* ? */, 29,0, TRUE
} // 27 open-paren
160 , {doOpenCaptureParen
, 255, 2, 14, FALSE
} // 28
161 , {doOpenNonCaptureParen
, 58 /* : */, 2, 14, TRUE
} // 29 open-paren-extended
162 , {doOpenAtomicParen
, 62 /* > */, 2, 14, TRUE
} // 30
163 , {doOpenLookAhead
, 61 /* = */, 2, 20, TRUE
} // 31
164 , {doOpenLookAheadNeg
, 33 /* ! */, 2, 20, TRUE
} // 32
165 , {doNOP
, 60 /* < */, 46,0, TRUE
} // 33
166 , {doNOP
, 35 /* # */, 49, 2, TRUE
} // 34
167 , {doBeginMatchMode
, 105 /* i */, 52,0, FALSE
} // 35
168 , {doBeginMatchMode
, 100 /* d */, 52,0, FALSE
} // 36
169 , {doBeginMatchMode
, 109 /* m */, 52,0, FALSE
} // 37
170 , {doBeginMatchMode
, 115 /* s */, 52,0, FALSE
} // 38
171 , {doBeginMatchMode
, 117 /* u */, 52,0, FALSE
} // 39
172 , {doBeginMatchMode
, 119 /* w */, 52,0, FALSE
} // 40
173 , {doBeginMatchMode
, 120 /* x */, 52,0, FALSE
} // 41
174 , {doBeginMatchMode
, 45 /* - */, 52,0, FALSE
} // 42
175 , {doConditionalExpr
, 40 /* ( */, 183,0, TRUE
} // 43
176 , {doPerlInline
, 123 /* { */, 183,0, TRUE
} // 44
177 , {doBadOpenParenType
, 255, 183,0, FALSE
} // 45
178 , {doOpenLookBehind
, 61 /* = */, 2, 20, TRUE
} // 46 open-paren-lookbehind
179 , {doOpenLookBehindNeg
, 33 /* ! */, 2, 20, TRUE
} // 47
180 , {doBadOpenParenType
, 255, 183,0, FALSE
} // 48
181 , {doNOP
, 41 /* ) */, 255,0, TRUE
} // 49 paren-comment
182 , {doMismatchedParenErr
, 253, 183,0, FALSE
} // 50
183 , {doNOP
, 255, 49,0, TRUE
} // 51
184 , {doMatchMode
, 105 /* i */, 52,0, TRUE
} // 52 paren-flag
185 , {doMatchMode
, 100 /* d */, 52,0, TRUE
} // 53
186 , {doMatchMode
, 109 /* m */, 52,0, TRUE
} // 54
187 , {doMatchMode
, 115 /* s */, 52,0, TRUE
} // 55
188 , {doMatchMode
, 117 /* u */, 52,0, TRUE
} // 56
189 , {doMatchMode
, 119 /* w */, 52,0, TRUE
} // 57
190 , {doMatchMode
, 120 /* x */, 52,0, TRUE
} // 58
191 , {doMatchMode
, 45 /* - */, 52,0, TRUE
} // 59
192 , {doSetMatchMode
, 41 /* ) */, 2,0, TRUE
} // 60
193 , {doMatchModeParen
, 58 /* : */, 2, 14, TRUE
} // 61
194 , {doBadModeFlag
, 255, 183,0, FALSE
} // 62
195 , {doNGStar
, 63 /* ? */, 20,0, TRUE
} // 63 quant-star
196 , {doPossessiveStar
, 43 /* + */, 20,0, TRUE
} // 64
197 , {doStar
, 255, 20,0, FALSE
} // 65
198 , {doNGPlus
, 63 /* ? */, 20,0, TRUE
} // 66 quant-plus
199 , {doPossessivePlus
, 43 /* + */, 20,0, TRUE
} // 67
200 , {doPlus
, 255, 20,0, FALSE
} // 68
201 , {doNGOpt
, 63 /* ? */, 20,0, TRUE
} // 69 quant-opt
202 , {doPossessiveOpt
, 43 /* + */, 20,0, TRUE
} // 70
203 , {doOpt
, 255, 20,0, FALSE
} // 71
204 , {doNOP
, 128, 74,0, FALSE
} // 72 interval-open
205 , {doIntervalError
, 255, 183,0, FALSE
} // 73
206 , {doIntevalLowerDigit
, 128, 74,0, TRUE
} // 74 interval-lower
207 , {doNOP
, 44 /* , */, 78,0, TRUE
} // 75
208 , {doIntervalSame
, 125 /* } */, 81,0, TRUE
} // 76
209 , {doIntervalError
, 255, 183,0, FALSE
} // 77
210 , {doIntervalUpperDigit
, 128, 78,0, TRUE
} // 78 interval-upper
211 , {doNOP
, 125 /* } */, 81,0, TRUE
} // 79
212 , {doIntervalError
, 255, 183,0, FALSE
} // 80
213 , {doNGInterval
, 63 /* ? */, 20,0, TRUE
} // 81 interval-type
214 , {doPossessiveInterval
, 43 /* + */, 20,0, TRUE
} // 82
215 , {doInterval
, 255, 20,0, FALSE
} // 83
216 , {doBackslashA
, 65 /* A */, 2,0, TRUE
} // 84 backslash
217 , {doBackslashB
, 66 /* B */, 2,0, TRUE
} // 85
218 , {doBackslashb
, 98 /* b */, 2,0, TRUE
} // 86
219 , {doBackslashd
, 100 /* d */, 14,0, TRUE
} // 87
220 , {doBackslashD
, 68 /* D */, 14,0, TRUE
} // 88
221 , {doBackslashG
, 71 /* G */, 2,0, TRUE
} // 89
222 , {doNamedChar
, 78 /* N */, 14,0, FALSE
} // 90
223 , {doProperty
, 112 /* p */, 14,0, FALSE
} // 91
224 , {doProperty
, 80 /* P */, 14,0, FALSE
} // 92
225 , {doEnterQuoteMode
, 81 /* Q */, 2,0, TRUE
} // 93
226 , {doBackslashS
, 83 /* S */, 14,0, TRUE
} // 94
227 , {doBackslashs
, 115 /* s */, 14,0, TRUE
} // 95
228 , {doBackslashW
, 87 /* W */, 14,0, TRUE
} // 96
229 , {doBackslashw
, 119 /* w */, 14,0, TRUE
} // 97
230 , {doBackslashX
, 88 /* X */, 14,0, TRUE
} // 98
231 , {doBackslashZ
, 90 /* Z */, 2,0, TRUE
} // 99
232 , {doBackslashz
, 122 /* z */, 2,0, TRUE
} // 100
233 , {doBackRef
, 128, 14,0, TRUE
} // 101
234 , {doEscapeError
, 253, 183,0, FALSE
} // 102
235 , {doEscapedLiteralChar
, 255, 14,0, TRUE
} // 103
236 , {doSetNegate
, 94 /* ^ */, 107,0, TRUE
} // 104 set-open
237 , {doSetPosixProp
, 58 /* : */, 109,0, FALSE
} // 105
238 , {doNOP
, 255, 107,0, FALSE
} // 106
239 , {doSetLiteral
, 93 /* ] */, 122,0, TRUE
} // 107 set-open2
240 , {doNOP
, 255, 112,0, FALSE
} // 108
241 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 109 set-posix
242 , {doNOP
, 58 /* : */, 112,0, FALSE
} // 110
243 , {doRuleError
, 255, 183,0, FALSE
} // 111
244 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 112 set-start
245 , {doSetBeginUnion
, 91 /* [ */, 104, 129, TRUE
} // 113
246 , {doNOP
, 92 /* \ */, 172,0, TRUE
} // 114
247 , {doNOP
, 45 /* - */, 118,0, TRUE
} // 115
248 , {doNOP
, 38 /* & */, 120,0, TRUE
} // 116
249 , {doSetLiteral
, 255, 122,0, TRUE
} // 117
250 , {doRuleError
, 45 /* - */, 183,0, FALSE
} // 118 set-start-dash
251 , {doSetAddDash
, 255, 122,0, FALSE
} // 119
252 , {doRuleError
, 38 /* & */, 183,0, FALSE
} // 120 set-start-amp
253 , {doSetAddAmp
, 255, 122,0, FALSE
} // 121
254 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 122 set-after-lit
255 , {doSetBeginUnion
, 91 /* [ */, 104, 129, TRUE
} // 123
256 , {doNOP
, 45 /* - */, 159,0, TRUE
} // 124
257 , {doNOP
, 38 /* & */, 150,0, TRUE
} // 125
258 , {doNOP
, 92 /* \ */, 172,0, TRUE
} // 126
259 , {doSetNoCloseError
, 253, 183,0, FALSE
} // 127
260 , {doSetLiteral
, 255, 122,0, TRUE
} // 128
261 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 129 set-after-set
262 , {doSetBeginUnion
, 91 /* [ */, 104, 129, TRUE
} // 130
263 , {doNOP
, 45 /* - */, 152,0, TRUE
} // 131
264 , {doNOP
, 38 /* & */, 147,0, TRUE
} // 132
265 , {doNOP
, 92 /* \ */, 172,0, TRUE
} // 133
266 , {doSetNoCloseError
, 253, 183,0, FALSE
} // 134
267 , {doSetLiteral
, 255, 122,0, TRUE
} // 135
268 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 136 set-after-range
269 , {doSetBeginUnion
, 91 /* [ */, 104, 129, TRUE
} // 137
270 , {doNOP
, 45 /* - */, 155,0, TRUE
} // 138
271 , {doNOP
, 38 /* & */, 157,0, TRUE
} // 139
272 , {doNOP
, 92 /* \ */, 172,0, TRUE
} // 140
273 , {doSetNoCloseError
, 253, 183,0, FALSE
} // 141
274 , {doSetLiteral
, 255, 122,0, TRUE
} // 142
275 , {doSetBeginUnion
, 91 /* [ */, 104, 129, TRUE
} // 143 set-after-op
276 , {doSetOpError
, 93 /* ] */, 183,0, FALSE
} // 144
277 , {doNOP
, 92 /* \ */, 172,0, TRUE
} // 145
278 , {doSetLiteral
, 255, 122,0, TRUE
} // 146
279 , {doSetBeginIntersection1
, 91 /* [ */, 104, 129, TRUE
} // 147 set-set-amp
280 , {doSetIntersection2
, 38 /* & */, 143,0, TRUE
} // 148
281 , {doSetAddAmp
, 255, 122,0, FALSE
} // 149
282 , {doSetIntersection2
, 38 /* & */, 143,0, TRUE
} // 150 set-lit-amp
283 , {doSetAddAmp
, 255, 122,0, FALSE
} // 151
284 , {doSetBeginDifference1
, 91 /* [ */, 104, 129, TRUE
} // 152 set-set-dash
285 , {doSetDifference2
, 45 /* - */, 143,0, TRUE
} // 153
286 , {doSetAddDash
, 255, 122,0, FALSE
} // 154
287 , {doSetDifference2
, 45 /* - */, 143,0, TRUE
} // 155 set-range-dash
288 , {doSetAddDash
, 255, 122,0, FALSE
} // 156
289 , {doSetIntersection2
, 38 /* & */, 143,0, TRUE
} // 157 set-range-amp
290 , {doSetAddAmp
, 255, 122,0, FALSE
} // 158
291 , {doSetDifference2
, 45 /* - */, 143,0, TRUE
} // 159 set-lit-dash
292 , {doSetAddDash
, 91 /* [ */, 122,0, FALSE
} // 160
293 , {doSetAddDash
, 93 /* ] */, 122,0, FALSE
} // 161
294 , {doNOP
, 92 /* \ */, 164,0, TRUE
} // 162
295 , {doSetRange
, 255, 136,0, TRUE
} // 163
296 , {doSetOpError
, 115 /* s */, 183,0, FALSE
} // 164 set-lit-dash-escape
297 , {doSetOpError
, 83 /* S */, 183,0, FALSE
} // 165
298 , {doSetOpError
, 119 /* w */, 183,0, FALSE
} // 166
299 , {doSetOpError
, 87 /* W */, 183,0, FALSE
} // 167
300 , {doSetOpError
, 100 /* d */, 183,0, FALSE
} // 168
301 , {doSetOpError
, 68 /* D */, 183,0, FALSE
} // 169
302 , {doSetNamedRange
, 78 /* N */, 136,0, FALSE
} // 170
303 , {doSetRange
, 255, 136,0, TRUE
} // 171
304 , {doSetProp
, 112 /* p */, 129,0, FALSE
} // 172 set-escape
305 , {doSetProp
, 80 /* P */, 129,0, FALSE
} // 173
306 , {doSetNamedChar
, 78 /* N */, 122,0, FALSE
} // 174
307 , {doSetBackslash_s
, 115 /* s */, 136,0, TRUE
} // 175
308 , {doSetBackslash_S
, 83 /* S */, 136,0, TRUE
} // 176
309 , {doSetBackslash_w
, 119 /* w */, 136,0, TRUE
} // 177
310 , {doSetBackslash_W
, 87 /* W */, 136,0, TRUE
} // 178
311 , {doSetBackslash_d
, 100 /* d */, 136,0, TRUE
} // 179
312 , {doSetBackslash_D
, 68 /* D */, 136,0, TRUE
} // 180
313 , {doSetLiteralEscaped
, 255, 122,0, TRUE
} // 181
314 , {doSetFinish
, 255, 14,0, FALSE
} // 182 set-finish
315 , {doExit
, 255, 183,0, TRUE
} // 183 errorDeath
317 static const char * const RegexStateNames
[] = { 0,
346 "open-paren-extended",
363 "open-paren-lookbehind",
481 "set-lit-dash-escape",