1 //---------------------------------------------------------------------------------
3 // Generated Header File. Do not edit by hand.
4 // This file contains the state table for the ICU Regular Expression Pattern Parser
5 // It is generated by the Perl script "regexcst.pl" from
6 // the rule parser state definitions file "regexcst.txt".
8 // Copyright (C) 2002-2016 International Business Machines Corporation
9 // and others. All rights reserved.
11 //---------------------------------------------------------------------------------
15 #include "unicode/utypes.h"
19 // Character classes for regex pattern scanning.
21 static const uint8_t kRuleSet_ascii_letter
= 128;
22 static const uint8_t kRuleSet_digit_char
= 129;
23 static const uint8_t kRuleSet_rule_char
= 130;
26 enum Regex_PatternParseAction
{
33 doContinueNamedCapture
,
59 doSetBeginDifference1
,
92 doSetBeginIntersection1
,
98 doCompleteNamedBackRef
,
104 doOpenNonCaptureParen
,
118 doEscapedLiteralChar
,
121 doMismatchedParenErr
,
130 doContinueNamedBackRef
,
134 //-------------------------------------------------------------------------------
136 // RegexTableEl represents the structure of a row in the transition table
137 // for the pattern parser state machine.
138 //-------------------------------------------------------------------------------
139 struct RegexTableEl
{
140 Regex_PatternParseAction fAction
;
141 uint8_t fCharClass
; // 0-127: an individual ASCII character
142 // 128-255: character class index
143 uint8_t fNextState
; // 0-250: normal next-state numbers
144 // 255: pop next-state from stack.
149 static const struct RegexTableEl gRuleParseStateTable
[] = {
150 {doNOP
, 0, 0, 0, TRUE
}
151 , {doPatStart
, 255, 2,0, FALSE
} // 1 start
152 , {doLiteralChar
, 254, 14,0, TRUE
} // 2 term
153 , {doLiteralChar
, 130, 14,0, TRUE
} // 3
154 , {doSetBegin
, 91 /* [ */, 123, 205, TRUE
} // 4
155 , {doNOP
, 40 /* ( */, 27,0, TRUE
} // 5
156 , {doDotAny
, 46 /* . */, 14,0, TRUE
} // 6
157 , {doCaret
, 94 /* ^ */, 14,0, TRUE
} // 7
158 , {doDollar
, 36 /* $ */, 14,0, TRUE
} // 8
159 , {doNOP
, 92 /* \ */, 89,0, TRUE
} // 9
160 , {doOrOperator
, 124 /* | */, 2,0, TRUE
} // 10
161 , {doCloseParen
, 41 /* ) */, 255,0, TRUE
} // 11
162 , {doPatFinish
, 253, 2,0, FALSE
} // 12
163 , {doRuleError
, 255, 206,0, FALSE
} // 13
164 , {doNOP
, 42 /* * */, 68,0, TRUE
} // 14 expr-quant
165 , {doNOP
, 43 /* + */, 71,0, TRUE
} // 15
166 , {doNOP
, 63 /* ? */, 74,0, TRUE
} // 16
167 , {doIntervalInit
, 123 /* { */, 77,0, TRUE
} // 17
168 , {doNOP
, 40 /* ( */, 23,0, TRUE
} // 18
169 , {doNOP
, 255, 20,0, FALSE
} // 19
170 , {doOrOperator
, 124 /* | */, 2,0, TRUE
} // 20 expr-cont
171 , {doCloseParen
, 41 /* ) */, 255,0, TRUE
} // 21
172 , {doNOP
, 255, 2,0, FALSE
} // 22
173 , {doSuppressComments
, 63 /* ? */, 25,0, TRUE
} // 23 open-paren-quant
174 , {doNOP
, 255, 27,0, FALSE
} // 24
175 , {doNOP
, 35 /* # */, 50, 14, TRUE
} // 25 open-paren-quant2
176 , {doNOP
, 255, 29,0, FALSE
} // 26
177 , {doSuppressComments
, 63 /* ? */, 29,0, TRUE
} // 27 open-paren
178 , {doOpenCaptureParen
, 255, 2, 14, FALSE
} // 28
179 , {doOpenNonCaptureParen
, 58 /* : */, 2, 14, TRUE
} // 29 open-paren-extended
180 , {doOpenAtomicParen
, 62 /* > */, 2, 14, TRUE
} // 30
181 , {doOpenLookAhead
, 61 /* = */, 2, 20, TRUE
} // 31
182 , {doOpenLookAheadNeg
, 33 /* ! */, 2, 20, TRUE
} // 32
183 , {doNOP
, 60 /* < */, 46,0, TRUE
} // 33
184 , {doNOP
, 35 /* # */, 50, 2, TRUE
} // 34
185 , {doBeginMatchMode
, 105 /* i */, 53,0, FALSE
} // 35
186 , {doBeginMatchMode
, 100 /* d */, 53,0, FALSE
} // 36
187 , {doBeginMatchMode
, 109 /* m */, 53,0, FALSE
} // 37
188 , {doBeginMatchMode
, 115 /* s */, 53,0, FALSE
} // 38
189 , {doBeginMatchMode
, 117 /* u */, 53,0, FALSE
} // 39
190 , {doBeginMatchMode
, 119 /* w */, 53,0, FALSE
} // 40
191 , {doBeginMatchMode
, 120 /* x */, 53,0, FALSE
} // 41
192 , {doBeginMatchMode
, 45 /* - */, 53,0, FALSE
} // 42
193 , {doConditionalExpr
, 40 /* ( */, 206,0, TRUE
} // 43
194 , {doPerlInline
, 123 /* { */, 206,0, TRUE
} // 44
195 , {doBadOpenParenType
, 255, 206,0, FALSE
} // 45
196 , {doOpenLookBehind
, 61 /* = */, 2, 20, TRUE
} // 46 open-paren-lookbehind
197 , {doOpenLookBehindNeg
, 33 /* ! */, 2, 20, TRUE
} // 47
198 , {doBeginNamedCapture
, 128, 64,0, FALSE
} // 48
199 , {doBadOpenParenType
, 255, 206,0, FALSE
} // 49
200 , {doNOP
, 41 /* ) */, 255,0, TRUE
} // 50 paren-comment
201 , {doMismatchedParenErr
, 253, 206,0, FALSE
} // 51
202 , {doNOP
, 255, 50,0, TRUE
} // 52
203 , {doMatchMode
, 105 /* i */, 53,0, TRUE
} // 53 paren-flag
204 , {doMatchMode
, 100 /* d */, 53,0, TRUE
} // 54
205 , {doMatchMode
, 109 /* m */, 53,0, TRUE
} // 55
206 , {doMatchMode
, 115 /* s */, 53,0, TRUE
} // 56
207 , {doMatchMode
, 117 /* u */, 53,0, TRUE
} // 57
208 , {doMatchMode
, 119 /* w */, 53,0, TRUE
} // 58
209 , {doMatchMode
, 120 /* x */, 53,0, TRUE
} // 59
210 , {doMatchMode
, 45 /* - */, 53,0, TRUE
} // 60
211 , {doSetMatchMode
, 41 /* ) */, 2,0, TRUE
} // 61
212 , {doMatchModeParen
, 58 /* : */, 2, 14, TRUE
} // 62
213 , {doBadModeFlag
, 255, 206,0, FALSE
} // 63
214 , {doContinueNamedCapture
, 128, 64,0, TRUE
} // 64 named-capture
215 , {doContinueNamedCapture
, 129, 64,0, TRUE
} // 65
216 , {doOpenCaptureParen
, 62 /* > */, 2, 14, TRUE
} // 66
217 , {doBadNamedCapture
, 255, 206,0, FALSE
} // 67
218 , {doNGStar
, 63 /* ? */, 20,0, TRUE
} // 68 quant-star
219 , {doPossessiveStar
, 43 /* + */, 20,0, TRUE
} // 69
220 , {doStar
, 255, 20,0, FALSE
} // 70
221 , {doNGPlus
, 63 /* ? */, 20,0, TRUE
} // 71 quant-plus
222 , {doPossessivePlus
, 43 /* + */, 20,0, TRUE
} // 72
223 , {doPlus
, 255, 20,0, FALSE
} // 73
224 , {doNGOpt
, 63 /* ? */, 20,0, TRUE
} // 74 quant-opt
225 , {doPossessiveOpt
, 43 /* + */, 20,0, TRUE
} // 75
226 , {doOpt
, 255, 20,0, FALSE
} // 76
227 , {doNOP
, 129, 79,0, FALSE
} // 77 interval-open
228 , {doIntervalError
, 255, 206,0, FALSE
} // 78
229 , {doIntevalLowerDigit
, 129, 79,0, TRUE
} // 79 interval-lower
230 , {doNOP
, 44 /* , */, 83,0, TRUE
} // 80
231 , {doIntervalSame
, 125 /* } */, 86,0, TRUE
} // 81
232 , {doIntervalError
, 255, 206,0, FALSE
} // 82
233 , {doIntervalUpperDigit
, 129, 83,0, TRUE
} // 83 interval-upper
234 , {doNOP
, 125 /* } */, 86,0, TRUE
} // 84
235 , {doIntervalError
, 255, 206,0, FALSE
} // 85
236 , {doNGInterval
, 63 /* ? */, 20,0, TRUE
} // 86 interval-type
237 , {doPossessiveInterval
, 43 /* + */, 20,0, TRUE
} // 87
238 , {doInterval
, 255, 20,0, FALSE
} // 88
239 , {doBackslashA
, 65 /* A */, 2,0, TRUE
} // 89 backslash
240 , {doBackslashB
, 66 /* B */, 2,0, TRUE
} // 90
241 , {doBackslashb
, 98 /* b */, 2,0, TRUE
} // 91
242 , {doBackslashd
, 100 /* d */, 14,0, TRUE
} // 92
243 , {doBackslashD
, 68 /* D */, 14,0, TRUE
} // 93
244 , {doBackslashG
, 71 /* G */, 2,0, TRUE
} // 94
245 , {doBackslashh
, 104 /* h */, 14,0, TRUE
} // 95
246 , {doBackslashH
, 72 /* H */, 14,0, TRUE
} // 96
247 , {doNOP
, 107 /* k */, 115,0, TRUE
} // 97
248 , {doNamedChar
, 78 /* N */, 14,0, FALSE
} // 98
249 , {doProperty
, 112 /* p */, 14,0, FALSE
} // 99
250 , {doProperty
, 80 /* P */, 14,0, FALSE
} // 100
251 , {doBackslashR
, 82 /* R */, 14,0, TRUE
} // 101
252 , {doEnterQuoteMode
, 81 /* Q */, 2,0, TRUE
} // 102
253 , {doBackslashS
, 83 /* S */, 14,0, TRUE
} // 103
254 , {doBackslashs
, 115 /* s */, 14,0, TRUE
} // 104
255 , {doBackslashv
, 118 /* v */, 14,0, TRUE
} // 105
256 , {doBackslashV
, 86 /* V */, 14,0, TRUE
} // 106
257 , {doBackslashW
, 87 /* W */, 14,0, TRUE
} // 107
258 , {doBackslashw
, 119 /* w */, 14,0, TRUE
} // 108
259 , {doBackslashX
, 88 /* X */, 14,0, TRUE
} // 109
260 , {doBackslashZ
, 90 /* Z */, 2,0, TRUE
} // 110
261 , {doBackslashz
, 122 /* z */, 2,0, TRUE
} // 111
262 , {doBackRef
, 129, 14,0, TRUE
} // 112
263 , {doEscapeError
, 253, 206,0, FALSE
} // 113
264 , {doEscapedLiteralChar
, 255, 14,0, TRUE
} // 114
265 , {doBeginNamedBackRef
, 60 /* < */, 117,0, TRUE
} // 115 named-backref
266 , {doBadNamedCapture
, 255, 206,0, FALSE
} // 116
267 , {doContinueNamedBackRef
, 128, 119,0, TRUE
} // 117 named-backref-2
268 , {doBadNamedCapture
, 255, 206,0, FALSE
} // 118
269 , {doContinueNamedBackRef
, 128, 119,0, TRUE
} // 119 named-backref-3
270 , {doContinueNamedBackRef
, 129, 119,0, TRUE
} // 120
271 , {doCompleteNamedBackRef
, 62 /* > */, 14,0, TRUE
} // 121
272 , {doBadNamedCapture
, 255, 206,0, FALSE
} // 122
273 , {doSetNegate
, 94 /* ^ */, 126,0, TRUE
} // 123 set-open
274 , {doSetPosixProp
, 58 /* : */, 128,0, FALSE
} // 124
275 , {doNOP
, 255, 126,0, FALSE
} // 125
276 , {doSetLiteral
, 93 /* ] */, 141,0, TRUE
} // 126 set-open2
277 , {doNOP
, 255, 131,0, FALSE
} // 127
278 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 128 set-posix
279 , {doNOP
, 58 /* : */, 131,0, FALSE
} // 129
280 , {doRuleError
, 255, 206,0, FALSE
} // 130
281 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 131 set-start
282 , {doSetBeginUnion
, 91 /* [ */, 123, 148, TRUE
} // 132
283 , {doNOP
, 92 /* \ */, 191,0, TRUE
} // 133
284 , {doNOP
, 45 /* - */, 137,0, TRUE
} // 134
285 , {doNOP
, 38 /* & */, 139,0, TRUE
} // 135
286 , {doSetLiteral
, 255, 141,0, TRUE
} // 136
287 , {doRuleError
, 45 /* - */, 206,0, FALSE
} // 137 set-start-dash
288 , {doSetAddDash
, 255, 141,0, FALSE
} // 138
289 , {doRuleError
, 38 /* & */, 206,0, FALSE
} // 139 set-start-amp
290 , {doSetAddAmp
, 255, 141,0, FALSE
} // 140
291 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 141 set-after-lit
292 , {doSetBeginUnion
, 91 /* [ */, 123, 148, TRUE
} // 142
293 , {doNOP
, 45 /* - */, 178,0, TRUE
} // 143
294 , {doNOP
, 38 /* & */, 169,0, TRUE
} // 144
295 , {doNOP
, 92 /* \ */, 191,0, TRUE
} // 145
296 , {doSetNoCloseError
, 253, 206,0, FALSE
} // 146
297 , {doSetLiteral
, 255, 141,0, TRUE
} // 147
298 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 148 set-after-set
299 , {doSetBeginUnion
, 91 /* [ */, 123, 148, TRUE
} // 149
300 , {doNOP
, 45 /* - */, 171,0, TRUE
} // 150
301 , {doNOP
, 38 /* & */, 166,0, TRUE
} // 151
302 , {doNOP
, 92 /* \ */, 191,0, TRUE
} // 152
303 , {doSetNoCloseError
, 253, 206,0, FALSE
} // 153
304 , {doSetLiteral
, 255, 141,0, TRUE
} // 154
305 , {doSetEnd
, 93 /* ] */, 255,0, TRUE
} // 155 set-after-range
306 , {doSetBeginUnion
, 91 /* [ */, 123, 148, TRUE
} // 156
307 , {doNOP
, 45 /* - */, 174,0, TRUE
} // 157
308 , {doNOP
, 38 /* & */, 176,0, TRUE
} // 158
309 , {doNOP
, 92 /* \ */, 191,0, TRUE
} // 159
310 , {doSetNoCloseError
, 253, 206,0, FALSE
} // 160
311 , {doSetLiteral
, 255, 141,0, TRUE
} // 161
312 , {doSetBeginUnion
, 91 /* [ */, 123, 148, TRUE
} // 162 set-after-op
313 , {doSetOpError
, 93 /* ] */, 206,0, FALSE
} // 163
314 , {doNOP
, 92 /* \ */, 191,0, TRUE
} // 164
315 , {doSetLiteral
, 255, 141,0, TRUE
} // 165
316 , {doSetBeginIntersection1
, 91 /* [ */, 123, 148, TRUE
} // 166 set-set-amp
317 , {doSetIntersection2
, 38 /* & */, 162,0, TRUE
} // 167
318 , {doSetAddAmp
, 255, 141,0, FALSE
} // 168
319 , {doSetIntersection2
, 38 /* & */, 162,0, TRUE
} // 169 set-lit-amp
320 , {doSetAddAmp
, 255, 141,0, FALSE
} // 170
321 , {doSetBeginDifference1
, 91 /* [ */, 123, 148, TRUE
} // 171 set-set-dash
322 , {doSetDifference2
, 45 /* - */, 162,0, TRUE
} // 172
323 , {doSetAddDash
, 255, 141,0, FALSE
} // 173
324 , {doSetDifference2
, 45 /* - */, 162,0, TRUE
} // 174 set-range-dash
325 , {doSetAddDash
, 255, 141,0, FALSE
} // 175
326 , {doSetIntersection2
, 38 /* & */, 162,0, TRUE
} // 176 set-range-amp
327 , {doSetAddAmp
, 255, 141,0, FALSE
} // 177
328 , {doSetDifference2
, 45 /* - */, 162,0, TRUE
} // 178 set-lit-dash
329 , {doSetAddDash
, 91 /* [ */, 141,0, FALSE
} // 179
330 , {doSetAddDash
, 93 /* ] */, 141,0, FALSE
} // 180
331 , {doNOP
, 92 /* \ */, 183,0, TRUE
} // 181
332 , {doSetRange
, 255, 155,0, TRUE
} // 182
333 , {doSetOpError
, 115 /* s */, 206,0, FALSE
} // 183 set-lit-dash-escape
334 , {doSetOpError
, 83 /* S */, 206,0, FALSE
} // 184
335 , {doSetOpError
, 119 /* w */, 206,0, FALSE
} // 185
336 , {doSetOpError
, 87 /* W */, 206,0, FALSE
} // 186
337 , {doSetOpError
, 100 /* d */, 206,0, FALSE
} // 187
338 , {doSetOpError
, 68 /* D */, 206,0, FALSE
} // 188
339 , {doSetNamedRange
, 78 /* N */, 155,0, FALSE
} // 189
340 , {doSetRange
, 255, 155,0, TRUE
} // 190
341 , {doSetProp
, 112 /* p */, 148,0, FALSE
} // 191 set-escape
342 , {doSetProp
, 80 /* P */, 148,0, FALSE
} // 192
343 , {doSetNamedChar
, 78 /* N */, 141,0, FALSE
} // 193
344 , {doSetBackslash_s
, 115 /* s */, 155,0, TRUE
} // 194
345 , {doSetBackslash_S
, 83 /* S */, 155,0, TRUE
} // 195
346 , {doSetBackslash_w
, 119 /* w */, 155,0, TRUE
} // 196
347 , {doSetBackslash_W
, 87 /* W */, 155,0, TRUE
} // 197
348 , {doSetBackslash_d
, 100 /* d */, 155,0, TRUE
} // 198
349 , {doSetBackslash_D
, 68 /* D */, 155,0, TRUE
} // 199
350 , {doSetBackslash_h
, 104 /* h */, 155,0, TRUE
} // 200
351 , {doSetBackslash_H
, 72 /* H */, 155,0, TRUE
} // 201
352 , {doSetBackslash_v
, 118 /* v */, 155,0, TRUE
} // 202
353 , {doSetBackslash_V
, 86 /* V */, 155,0, TRUE
} // 203
354 , {doSetLiteralEscaped
, 255, 141,0, TRUE
} // 204
355 , {doSetFinish
, 255, 14,0, FALSE
} // 205 set-finish
356 , {doExit
, 255, 206,0, TRUE
} // 206 errorDeath
358 static const char * const RegexStateNames
[] = { 0,
387 "open-paren-extended",
404 "open-paren-lookbehind",
541 "set-lit-dash-escape",