]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/regexcst.h
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / regexcst.h
1 //---------------------------------------------------------------------------------
2 //
3 // Generated Header File. Do not edit by hand.
4 // This file contains the state table for the ICU Regular Expression Pattern Parser
5 // It is generated by the Perl script "regexcst.pl" from
6 // the rule parser state definitions file "regexcst.txt".
7 //
8 // Copyright (C) 2002-2016 International Business Machines Corporation
9 // and others. All rights reserved.
10 //
11 //---------------------------------------------------------------------------------
12 #ifndef RBBIRPT_H
13 #define RBBIRPT_H
14
15 #include "unicode/utypes.h"
16
17 U_NAMESPACE_BEGIN
18 //
19 // Character classes for regex pattern scanning.
20 //
21 static const uint8_t kRuleSet_ascii_letter = 128;
22 static const uint8_t kRuleSet_digit_char = 129;
23 static const uint8_t kRuleSet_rule_char = 130;
24
25
26 enum Regex_PatternParseAction {
27 doSetBackslash_V,
28 doSetBackslash_h,
29 doBeginNamedBackRef,
30 doSetMatchMode,
31 doEnterQuoteMode,
32 doOpenCaptureParen,
33 doContinueNamedCapture,
34 doSetBackslash_d,
35 doBeginMatchMode,
36 doBackslashX,
37 doSetPosixProp,
38 doIntervalError,
39 doSetLiteralEscaped,
40 doSetBackslash_s,
41 doNOP,
42 doBackslashv,
43 doOpenLookBehind,
44 doPatStart,
45 doPossessiveInterval,
46 doOpenAtomicParen,
47 doOpenLookAheadNeg,
48 doBackslashd,
49 doBackslashZ,
50 doIntervalUpperDigit,
51 doBadNamedCapture,
52 doSetDifference2,
53 doSetAddAmp,
54 doSetNamedChar,
55 doNamedChar,
56 doSetBackslash_H,
57 doBackslashb,
58 doBackslashz,
59 doSetBeginDifference1,
60 doOpenLookAhead,
61 doMatchModeParen,
62 doBackslashV,
63 doIntevalLowerDigit,
64 doCaret,
65 doSetEnd,
66 doSetNegate,
67 doBackslashS,
68 doOrOperator,
69 doBackslashB,
70 doBackslashw,
71 doBackslashR,
72 doRuleError,
73 doDotAny,
74 doMatchMode,
75 doSetBackslash_W,
76 doNGPlus,
77 doSetBackslash_D,
78 doPossessiveOpt,
79 doSetNamedRange,
80 doConditionalExpr,
81 doBackslashs,
82 doPossessiveStar,
83 doPlus,
84 doBadOpenParenType,
85 doCloseParen,
86 doNGInterval,
87 doSetProp,
88 doBackRef,
89 doSetBeginUnion,
90 doEscapeError,
91 doOpt,
92 doSetBeginIntersection1,
93 doPossessivePlus,
94 doBackslashD,
95 doOpenLookBehindNeg,
96 doSetBegin,
97 doSetIntersection2,
98 doCompleteNamedBackRef,
99 doSetRange,
100 doDollar,
101 doBackslashH,
102 doExit,
103 doNGOpt,
104 doOpenNonCaptureParen,
105 doBackslashA,
106 doSetBackslash_v,
107 doBackslashh,
108 doBadModeFlag,
109 doSetNoCloseError,
110 doIntervalSame,
111 doSetAddDash,
112 doBackslashW,
113 doPerlInline,
114 doSetOpError,
115 doSetLiteral,
116 doPatFinish,
117 doBeginNamedCapture,
118 doEscapedLiteralChar,
119 doLiteralChar,
120 doSuppressComments,
121 doMismatchedParenErr,
122 doNGStar,
123 doSetFinish,
124 doInterval,
125 doBackslashG,
126 doStar,
127 doSetBackslash_w,
128 doSetBackslash_S,
129 doProperty,
130 doContinueNamedBackRef,
131 doIntervalInit,
132 rbbiLastAction};
133
134 //-------------------------------------------------------------------------------
135 //
136 // RegexTableEl represents the structure of a row in the transition table
137 // for the pattern parser state machine.
138 //-------------------------------------------------------------------------------
139 struct RegexTableEl {
140 Regex_PatternParseAction fAction;
141 uint8_t fCharClass; // 0-127: an individual ASCII character
142 // 128-255: character class index
143 uint8_t fNextState; // 0-250: normal next-state numbers
144 // 255: pop next-state from stack.
145 uint8_t fPushState;
146 UBool fNextChar;
147 };
148
149 static const struct RegexTableEl gRuleParseStateTable[] = {
150 {doNOP, 0, 0, 0, TRUE}
151 , {doPatStart, 255, 2,0, FALSE} // 1 start
152 , {doLiteralChar, 254, 14,0, TRUE} // 2 term
153 , {doLiteralChar, 130, 14,0, TRUE} // 3
154 , {doSetBegin, 91 /* [ */, 123, 205, TRUE} // 4
155 , {doNOP, 40 /* ( */, 27,0, TRUE} // 5
156 , {doDotAny, 46 /* . */, 14,0, TRUE} // 6
157 , {doCaret, 94 /* ^ */, 14,0, TRUE} // 7
158 , {doDollar, 36 /* $ */, 14,0, TRUE} // 8
159 , {doNOP, 92 /* \ */, 89,0, TRUE} // 9
160 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 10
161 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11
162 , {doPatFinish, 253, 2,0, FALSE} // 12
163 , {doRuleError, 255, 206,0, FALSE} // 13
164 , {doNOP, 42 /* * */, 68,0, TRUE} // 14 expr-quant
165 , {doNOP, 43 /* + */, 71,0, TRUE} // 15
166 , {doNOP, 63 /* ? */, 74,0, TRUE} // 16
167 , {doIntervalInit, 123 /* { */, 77,0, TRUE} // 17
168 , {doNOP, 40 /* ( */, 23,0, TRUE} // 18
169 , {doNOP, 255, 20,0, FALSE} // 19
170 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont
171 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21
172 , {doNOP, 255, 2,0, FALSE} // 22
173 , {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant
174 , {doNOP, 255, 27,0, FALSE} // 24
175 , {doNOP, 35 /* # */, 50, 14, TRUE} // 25 open-paren-quant2
176 , {doNOP, 255, 29,0, FALSE} // 26
177 , {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren
178 , {doOpenCaptureParen, 255, 2, 14, FALSE} // 28
179 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended
180 , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30
181 , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31
182 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32
183 , {doNOP, 60 /* < */, 46,0, TRUE} // 33
184 , {doNOP, 35 /* # */, 50, 2, TRUE} // 34
185 , {doBeginMatchMode, 105 /* i */, 53,0, FALSE} // 35
186 , {doBeginMatchMode, 100 /* d */, 53,0, FALSE} // 36
187 , {doBeginMatchMode, 109 /* m */, 53,0, FALSE} // 37
188 , {doBeginMatchMode, 115 /* s */, 53,0, FALSE} // 38
189 , {doBeginMatchMode, 117 /* u */, 53,0, FALSE} // 39
190 , {doBeginMatchMode, 119 /* w */, 53,0, FALSE} // 40
191 , {doBeginMatchMode, 120 /* x */, 53,0, FALSE} // 41
192 , {doBeginMatchMode, 45 /* - */, 53,0, FALSE} // 42
193 , {doConditionalExpr, 40 /* ( */, 206,0, TRUE} // 43
194 , {doPerlInline, 123 /* { */, 206,0, TRUE} // 44
195 , {doBadOpenParenType, 255, 206,0, FALSE} // 45
196 , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind
197 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47
198 , {doBeginNamedCapture, 128, 64,0, FALSE} // 48
199 , {doBadOpenParenType, 255, 206,0, FALSE} // 49
200 , {doNOP, 41 /* ) */, 255,0, TRUE} // 50 paren-comment
201 , {doMismatchedParenErr, 253, 206,0, FALSE} // 51
202 , {doNOP, 255, 50,0, TRUE} // 52
203 , {doMatchMode, 105 /* i */, 53,0, TRUE} // 53 paren-flag
204 , {doMatchMode, 100 /* d */, 53,0, TRUE} // 54
205 , {doMatchMode, 109 /* m */, 53,0, TRUE} // 55
206 , {doMatchMode, 115 /* s */, 53,0, TRUE} // 56
207 , {doMatchMode, 117 /* u */, 53,0, TRUE} // 57
208 , {doMatchMode, 119 /* w */, 53,0, TRUE} // 58
209 , {doMatchMode, 120 /* x */, 53,0, TRUE} // 59
210 , {doMatchMode, 45 /* - */, 53,0, TRUE} // 60
211 , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 61
212 , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 62
213 , {doBadModeFlag, 255, 206,0, FALSE} // 63
214 , {doContinueNamedCapture, 128, 64,0, TRUE} // 64 named-capture
215 , {doContinueNamedCapture, 129, 64,0, TRUE} // 65
216 , {doOpenCaptureParen, 62 /* > */, 2, 14, TRUE} // 66
217 , {doBadNamedCapture, 255, 206,0, FALSE} // 67
218 , {doNGStar, 63 /* ? */, 20,0, TRUE} // 68 quant-star
219 , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 69
220 , {doStar, 255, 20,0, FALSE} // 70
221 , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 71 quant-plus
222 , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 72
223 , {doPlus, 255, 20,0, FALSE} // 73
224 , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 74 quant-opt
225 , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 75
226 , {doOpt, 255, 20,0, FALSE} // 76
227 , {doNOP, 129, 79,0, FALSE} // 77 interval-open
228 , {doIntervalError, 255, 206,0, FALSE} // 78
229 , {doIntevalLowerDigit, 129, 79,0, TRUE} // 79 interval-lower
230 , {doNOP, 44 /* , */, 83,0, TRUE} // 80
231 , {doIntervalSame, 125 /* } */, 86,0, TRUE} // 81
232 , {doIntervalError, 255, 206,0, FALSE} // 82
233 , {doIntervalUpperDigit, 129, 83,0, TRUE} // 83 interval-upper
234 , {doNOP, 125 /* } */, 86,0, TRUE} // 84
235 , {doIntervalError, 255, 206,0, FALSE} // 85
236 , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 86 interval-type
237 , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 87
238 , {doInterval, 255, 20,0, FALSE} // 88
239 , {doBackslashA, 65 /* A */, 2,0, TRUE} // 89 backslash
240 , {doBackslashB, 66 /* B */, 2,0, TRUE} // 90
241 , {doBackslashb, 98 /* b */, 2,0, TRUE} // 91
242 , {doBackslashd, 100 /* d */, 14,0, TRUE} // 92
243 , {doBackslashD, 68 /* D */, 14,0, TRUE} // 93
244 , {doBackslashG, 71 /* G */, 2,0, TRUE} // 94
245 , {doBackslashh, 104 /* h */, 14,0, TRUE} // 95
246 , {doBackslashH, 72 /* H */, 14,0, TRUE} // 96
247 , {doNOP, 107 /* k */, 115,0, TRUE} // 97
248 , {doNamedChar, 78 /* N */, 14,0, FALSE} // 98
249 , {doProperty, 112 /* p */, 14,0, FALSE} // 99
250 , {doProperty, 80 /* P */, 14,0, FALSE} // 100
251 , {doBackslashR, 82 /* R */, 14,0, TRUE} // 101
252 , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 102
253 , {doBackslashS, 83 /* S */, 14,0, TRUE} // 103
254 , {doBackslashs, 115 /* s */, 14,0, TRUE} // 104
255 , {doBackslashv, 118 /* v */, 14,0, TRUE} // 105
256 , {doBackslashV, 86 /* V */, 14,0, TRUE} // 106
257 , {doBackslashW, 87 /* W */, 14,0, TRUE} // 107
258 , {doBackslashw, 119 /* w */, 14,0, TRUE} // 108
259 , {doBackslashX, 88 /* X */, 14,0, TRUE} // 109
260 , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 110
261 , {doBackslashz, 122 /* z */, 2,0, TRUE} // 111
262 , {doBackRef, 129, 14,0, TRUE} // 112
263 , {doEscapeError, 253, 206,0, FALSE} // 113
264 , {doEscapedLiteralChar, 255, 14,0, TRUE} // 114
265 , {doBeginNamedBackRef, 60 /* < */, 117,0, TRUE} // 115 named-backref
266 , {doBadNamedCapture, 255, 206,0, FALSE} // 116
267 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 117 named-backref-2
268 , {doBadNamedCapture, 255, 206,0, FALSE} // 118
269 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 119 named-backref-3
270 , {doContinueNamedBackRef, 129, 119,0, TRUE} // 120
271 , {doCompleteNamedBackRef, 62 /* > */, 14,0, TRUE} // 121
272 , {doBadNamedCapture, 255, 206,0, FALSE} // 122
273 , {doSetNegate, 94 /* ^ */, 126,0, TRUE} // 123 set-open
274 , {doSetPosixProp, 58 /* : */, 128,0, FALSE} // 124
275 , {doNOP, 255, 126,0, FALSE} // 125
276 , {doSetLiteral, 93 /* ] */, 141,0, TRUE} // 126 set-open2
277 , {doNOP, 255, 131,0, FALSE} // 127
278 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 128 set-posix
279 , {doNOP, 58 /* : */, 131,0, FALSE} // 129
280 , {doRuleError, 255, 206,0, FALSE} // 130
281 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 131 set-start
282 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 132
283 , {doNOP, 92 /* \ */, 191,0, TRUE} // 133
284 , {doNOP, 45 /* - */, 137,0, TRUE} // 134
285 , {doNOP, 38 /* & */, 139,0, TRUE} // 135
286 , {doSetLiteral, 255, 141,0, TRUE} // 136
287 , {doRuleError, 45 /* - */, 206,0, FALSE} // 137 set-start-dash
288 , {doSetAddDash, 255, 141,0, FALSE} // 138
289 , {doRuleError, 38 /* & */, 206,0, FALSE} // 139 set-start-amp
290 , {doSetAddAmp, 255, 141,0, FALSE} // 140
291 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 141 set-after-lit
292 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 142
293 , {doNOP, 45 /* - */, 178,0, TRUE} // 143
294 , {doNOP, 38 /* & */, 169,0, TRUE} // 144
295 , {doNOP, 92 /* \ */, 191,0, TRUE} // 145
296 , {doSetNoCloseError, 253, 206,0, FALSE} // 146
297 , {doSetLiteral, 255, 141,0, TRUE} // 147
298 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 148 set-after-set
299 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 149
300 , {doNOP, 45 /* - */, 171,0, TRUE} // 150
301 , {doNOP, 38 /* & */, 166,0, TRUE} // 151
302 , {doNOP, 92 /* \ */, 191,0, TRUE} // 152
303 , {doSetNoCloseError, 253, 206,0, FALSE} // 153
304 , {doSetLiteral, 255, 141,0, TRUE} // 154
305 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 155 set-after-range
306 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 156
307 , {doNOP, 45 /* - */, 174,0, TRUE} // 157
308 , {doNOP, 38 /* & */, 176,0, TRUE} // 158
309 , {doNOP, 92 /* \ */, 191,0, TRUE} // 159
310 , {doSetNoCloseError, 253, 206,0, FALSE} // 160
311 , {doSetLiteral, 255, 141,0, TRUE} // 161
312 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 162 set-after-op
313 , {doSetOpError, 93 /* ] */, 206,0, FALSE} // 163
314 , {doNOP, 92 /* \ */, 191,0, TRUE} // 164
315 , {doSetLiteral, 255, 141,0, TRUE} // 165
316 , {doSetBeginIntersection1, 91 /* [ */, 123, 148, TRUE} // 166 set-set-amp
317 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 167
318 , {doSetAddAmp, 255, 141,0, FALSE} // 168
319 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 169 set-lit-amp
320 , {doSetAddAmp, 255, 141,0, FALSE} // 170
321 , {doSetBeginDifference1, 91 /* [ */, 123, 148, TRUE} // 171 set-set-dash
322 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 172
323 , {doSetAddDash, 255, 141,0, FALSE} // 173
324 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 174 set-range-dash
325 , {doSetAddDash, 255, 141,0, FALSE} // 175
326 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 176 set-range-amp
327 , {doSetAddAmp, 255, 141,0, FALSE} // 177
328 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 178 set-lit-dash
329 , {doSetAddDash, 91 /* [ */, 141,0, FALSE} // 179
330 , {doSetAddDash, 93 /* ] */, 141,0, FALSE} // 180
331 , {doNOP, 92 /* \ */, 183,0, TRUE} // 181
332 , {doSetRange, 255, 155,0, TRUE} // 182
333 , {doSetOpError, 115 /* s */, 206,0, FALSE} // 183 set-lit-dash-escape
334 , {doSetOpError, 83 /* S */, 206,0, FALSE} // 184
335 , {doSetOpError, 119 /* w */, 206,0, FALSE} // 185
336 , {doSetOpError, 87 /* W */, 206,0, FALSE} // 186
337 , {doSetOpError, 100 /* d */, 206,0, FALSE} // 187
338 , {doSetOpError, 68 /* D */, 206,0, FALSE} // 188
339 , {doSetNamedRange, 78 /* N */, 155,0, FALSE} // 189
340 , {doSetRange, 255, 155,0, TRUE} // 190
341 , {doSetProp, 112 /* p */, 148,0, FALSE} // 191 set-escape
342 , {doSetProp, 80 /* P */, 148,0, FALSE} // 192
343 , {doSetNamedChar, 78 /* N */, 141,0, FALSE} // 193
344 , {doSetBackslash_s, 115 /* s */, 155,0, TRUE} // 194
345 , {doSetBackslash_S, 83 /* S */, 155,0, TRUE} // 195
346 , {doSetBackslash_w, 119 /* w */, 155,0, TRUE} // 196
347 , {doSetBackslash_W, 87 /* W */, 155,0, TRUE} // 197
348 , {doSetBackslash_d, 100 /* d */, 155,0, TRUE} // 198
349 , {doSetBackslash_D, 68 /* D */, 155,0, TRUE} // 199
350 , {doSetBackslash_h, 104 /* h */, 155,0, TRUE} // 200
351 , {doSetBackslash_H, 72 /* H */, 155,0, TRUE} // 201
352 , {doSetBackslash_v, 118 /* v */, 155,0, TRUE} // 202
353 , {doSetBackslash_V, 86 /* V */, 155,0, TRUE} // 203
354 , {doSetLiteralEscaped, 255, 141,0, TRUE} // 204
355 , {doSetFinish, 255, 14,0, FALSE} // 205 set-finish
356 , {doExit, 255, 206,0, TRUE} // 206 errorDeath
357 };
358 static const char * const RegexStateNames[] = { 0,
359 "start",
360 "term",
361 0,
362 0,
363 0,
364 0,
365 0,
366 0,
367 0,
368 0,
369 0,
370 0,
371 0,
372 "expr-quant",
373 0,
374 0,
375 0,
376 0,
377 0,
378 "expr-cont",
379 0,
380 0,
381 "open-paren-quant",
382 0,
383 "open-paren-quant2",
384 0,
385 "open-paren",
386 0,
387 "open-paren-extended",
388 0,
389 0,
390 0,
391 0,
392 0,
393 0,
394 0,
395 0,
396 0,
397 0,
398 0,
399 0,
400 0,
401 0,
402 0,
403 0,
404 "open-paren-lookbehind",
405 0,
406 0,
407 0,
408 "paren-comment",
409 0,
410 0,
411 "paren-flag",
412 0,
413 0,
414 0,
415 0,
416 0,
417 0,
418 0,
419 0,
420 0,
421 0,
422 "named-capture",
423 0,
424 0,
425 0,
426 "quant-star",
427 0,
428 0,
429 "quant-plus",
430 0,
431 0,
432 "quant-opt",
433 0,
434 0,
435 "interval-open",
436 0,
437 "interval-lower",
438 0,
439 0,
440 0,
441 "interval-upper",
442 0,
443 0,
444 "interval-type",
445 0,
446 0,
447 "backslash",
448 0,
449 0,
450 0,
451 0,
452 0,
453 0,
454 0,
455 0,
456 0,
457 0,
458 0,
459 0,
460 0,
461 0,
462 0,
463 0,
464 0,
465 0,
466 0,
467 0,
468 0,
469 0,
470 0,
471 0,
472 0,
473 "named-backref",
474 0,
475 "named-backref-2",
476 0,
477 "named-backref-3",
478 0,
479 0,
480 0,
481 "set-open",
482 0,
483 0,
484 "set-open2",
485 0,
486 "set-posix",
487 0,
488 0,
489 "set-start",
490 0,
491 0,
492 0,
493 0,
494 0,
495 "set-start-dash",
496 0,
497 "set-start-amp",
498 0,
499 "set-after-lit",
500 0,
501 0,
502 0,
503 0,
504 0,
505 0,
506 "set-after-set",
507 0,
508 0,
509 0,
510 0,
511 0,
512 0,
513 "set-after-range",
514 0,
515 0,
516 0,
517 0,
518 0,
519 0,
520 "set-after-op",
521 0,
522 0,
523 0,
524 "set-set-amp",
525 0,
526 0,
527 "set-lit-amp",
528 0,
529 "set-set-dash",
530 0,
531 0,
532 "set-range-dash",
533 0,
534 "set-range-amp",
535 0,
536 "set-lit-dash",
537 0,
538 0,
539 0,
540 0,
541 "set-lit-dash-escape",
542 0,
543 0,
544 0,
545 0,
546 0,
547 0,
548 0,
549 "set-escape",
550 0,
551 0,
552 0,
553 0,
554 0,
555 0,
556 0,
557 0,
558 0,
559 0,
560 0,
561 0,
562 0,
563 "set-finish",
564 "errorDeath",
565 0};
566
567 U_NAMESPACE_END
568 #endif