]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/regexcst.h
ICU-551.51.4.tar.gz
[apple/icu.git] / icuSources / i18n / regexcst.h
1 //---------------------------------------------------------------------------------
2 //
3 // Generated Header File. Do not edit by hand.
4 // This file contains the state table for the ICU Regular Expression Pattern Parser
5 // It is generated by the Perl script "regexcst.pl" from
6 // the rule parser state definitions file "regexcst.txt".
7 //
8 // Copyright (C) 2002-2015 International Business Machines Corporation
9 // and others. All rights reserved.
10 //
11 //---------------------------------------------------------------------------------
12 #ifndef RBBIRPT_H
13 #define RBBIRPT_H
14
15 U_NAMESPACE_BEGIN
16 //
17 // Character classes for regex pattern scanning.
18 //
19 static const uint8_t kRuleSet_ascii_letter = 128;
20 static const uint8_t kRuleSet_digit_char = 129;
21 static const uint8_t kRuleSet_rule_char = 130;
22
23
24 enum Regex_PatternParseAction {
25 doSetBackslash_V,
26 doSetBackslash_h,
27 doBeginNamedBackRef,
28 doSetMatchMode,
29 doEnterQuoteMode,
30 doOpenCaptureParen,
31 doContinueNamedCapture,
32 doSetBackslash_d,
33 doBeginMatchMode,
34 doBackslashX,
35 doSetPosixProp,
36 doIntervalError,
37 doSetLiteralEscaped,
38 doSetBackslash_s,
39 doNOP,
40 doBackslashv,
41 doOpenLookBehind,
42 doPatStart,
43 doPossessiveInterval,
44 doOpenAtomicParen,
45 doOpenLookAheadNeg,
46 doBackslashd,
47 doBackslashZ,
48 doIntervalUpperDigit,
49 doBadNamedCapture,
50 doSetDifference2,
51 doSetAddAmp,
52 doSetNamedChar,
53 doNamedChar,
54 doSetBackslash_H,
55 doBackslashb,
56 doBackslashz,
57 doSetBeginDifference1,
58 doOpenLookAhead,
59 doMatchModeParen,
60 doBackslashV,
61 doIntevalLowerDigit,
62 doCaret,
63 doSetEnd,
64 doSetNegate,
65 doBackslashS,
66 doOrOperator,
67 doBackslashB,
68 doBackslashw,
69 doBackslashR,
70 doRuleError,
71 doDotAny,
72 doMatchMode,
73 doSetBackslash_W,
74 doNGPlus,
75 doSetBackslash_D,
76 doPossessiveOpt,
77 doSetNamedRange,
78 doConditionalExpr,
79 doBackslashs,
80 doPossessiveStar,
81 doPlus,
82 doBadOpenParenType,
83 doCloseParen,
84 doNGInterval,
85 doSetProp,
86 doBackRef,
87 doSetBeginUnion,
88 doEscapeError,
89 doOpt,
90 doSetBeginIntersection1,
91 doPossessivePlus,
92 doBackslashD,
93 doOpenLookBehindNeg,
94 doSetBegin,
95 doSetIntersection2,
96 doCompleteNamedBackRef,
97 doSetRange,
98 doDollar,
99 doBackslashH,
100 doExit,
101 doNGOpt,
102 doOpenNonCaptureParen,
103 doBackslashA,
104 doSetBackslash_v,
105 doBackslashh,
106 doBadModeFlag,
107 doSetNoCloseError,
108 doIntervalSame,
109 doSetAddDash,
110 doBackslashW,
111 doPerlInline,
112 doSetOpError,
113 doSetLiteral,
114 doPatFinish,
115 doBeginNamedCapture,
116 doEscapedLiteralChar,
117 doLiteralChar,
118 doSuppressComments,
119 doMismatchedParenErr,
120 doNGStar,
121 doSetFinish,
122 doInterval,
123 doBackslashG,
124 doStar,
125 doSetBackslash_w,
126 doSetBackslash_S,
127 doProperty,
128 doContinueNamedBackRef,
129 doIntervalInit,
130 rbbiLastAction};
131
132 //-------------------------------------------------------------------------------
133 //
134 // RegexTableEl represents the structure of a row in the transition table
135 // for the pattern parser state machine.
136 //-------------------------------------------------------------------------------
137 struct RegexTableEl {
138 Regex_PatternParseAction fAction;
139 uint8_t fCharClass; // 0-127: an individual ASCII character
140 // 128-255: character class index
141 uint8_t fNextState; // 0-250: normal next-state numbers
142 // 255: pop next-state from stack.
143 uint8_t fPushState;
144 UBool fNextChar;
145 };
146
147 static const struct RegexTableEl gRuleParseStateTable[] = {
148 {doNOP, 0, 0, 0, TRUE}
149 , {doPatStart, 255, 2,0, FALSE} // 1 start
150 , {doLiteralChar, 254, 14,0, TRUE} // 2 term
151 , {doLiteralChar, 130, 14,0, TRUE} // 3
152 , {doSetBegin, 91 /* [ */, 123, 205, TRUE} // 4
153 , {doNOP, 40 /* ( */, 27,0, TRUE} // 5
154 , {doDotAny, 46 /* . */, 14,0, TRUE} // 6
155 , {doCaret, 94 /* ^ */, 14,0, TRUE} // 7
156 , {doDollar, 36 /* $ */, 14,0, TRUE} // 8
157 , {doNOP, 92 /* \ */, 89,0, TRUE} // 9
158 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 10
159 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11
160 , {doPatFinish, 253, 2,0, FALSE} // 12
161 , {doRuleError, 255, 206,0, FALSE} // 13
162 , {doNOP, 42 /* * */, 68,0, TRUE} // 14 expr-quant
163 , {doNOP, 43 /* + */, 71,0, TRUE} // 15
164 , {doNOP, 63 /* ? */, 74,0, TRUE} // 16
165 , {doIntervalInit, 123 /* { */, 77,0, TRUE} // 17
166 , {doNOP, 40 /* ( */, 23,0, TRUE} // 18
167 , {doNOP, 255, 20,0, FALSE} // 19
168 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont
169 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21
170 , {doNOP, 255, 2,0, FALSE} // 22
171 , {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant
172 , {doNOP, 255, 27,0, FALSE} // 24
173 , {doNOP, 35 /* # */, 50, 14, TRUE} // 25 open-paren-quant2
174 , {doNOP, 255, 29,0, FALSE} // 26
175 , {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren
176 , {doOpenCaptureParen, 255, 2, 14, FALSE} // 28
177 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended
178 , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30
179 , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31
180 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32
181 , {doNOP, 60 /* < */, 46,0, TRUE} // 33
182 , {doNOP, 35 /* # */, 50, 2, TRUE} // 34
183 , {doBeginMatchMode, 105 /* i */, 53,0, FALSE} // 35
184 , {doBeginMatchMode, 100 /* d */, 53,0, FALSE} // 36
185 , {doBeginMatchMode, 109 /* m */, 53,0, FALSE} // 37
186 , {doBeginMatchMode, 115 /* s */, 53,0, FALSE} // 38
187 , {doBeginMatchMode, 117 /* u */, 53,0, FALSE} // 39
188 , {doBeginMatchMode, 119 /* w */, 53,0, FALSE} // 40
189 , {doBeginMatchMode, 120 /* x */, 53,0, FALSE} // 41
190 , {doBeginMatchMode, 45 /* - */, 53,0, FALSE} // 42
191 , {doConditionalExpr, 40 /* ( */, 206,0, TRUE} // 43
192 , {doPerlInline, 123 /* { */, 206,0, TRUE} // 44
193 , {doBadOpenParenType, 255, 206,0, FALSE} // 45
194 , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind
195 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47
196 , {doBeginNamedCapture, 128, 64,0, FALSE} // 48
197 , {doBadOpenParenType, 255, 206,0, FALSE} // 49
198 , {doNOP, 41 /* ) */, 255,0, TRUE} // 50 paren-comment
199 , {doMismatchedParenErr, 253, 206,0, FALSE} // 51
200 , {doNOP, 255, 50,0, TRUE} // 52
201 , {doMatchMode, 105 /* i */, 53,0, TRUE} // 53 paren-flag
202 , {doMatchMode, 100 /* d */, 53,0, TRUE} // 54
203 , {doMatchMode, 109 /* m */, 53,0, TRUE} // 55
204 , {doMatchMode, 115 /* s */, 53,0, TRUE} // 56
205 , {doMatchMode, 117 /* u */, 53,0, TRUE} // 57
206 , {doMatchMode, 119 /* w */, 53,0, TRUE} // 58
207 , {doMatchMode, 120 /* x */, 53,0, TRUE} // 59
208 , {doMatchMode, 45 /* - */, 53,0, TRUE} // 60
209 , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 61
210 , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 62
211 , {doBadModeFlag, 255, 206,0, FALSE} // 63
212 , {doContinueNamedCapture, 128, 64,0, TRUE} // 64 named-capture
213 , {doContinueNamedCapture, 129, 64,0, TRUE} // 65
214 , {doOpenCaptureParen, 62 /* > */, 2, 14, TRUE} // 66
215 , {doBadNamedCapture, 255, 206,0, FALSE} // 67
216 , {doNGStar, 63 /* ? */, 20,0, TRUE} // 68 quant-star
217 , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 69
218 , {doStar, 255, 20,0, FALSE} // 70
219 , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 71 quant-plus
220 , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 72
221 , {doPlus, 255, 20,0, FALSE} // 73
222 , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 74 quant-opt
223 , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 75
224 , {doOpt, 255, 20,0, FALSE} // 76
225 , {doNOP, 129, 79,0, FALSE} // 77 interval-open
226 , {doIntervalError, 255, 206,0, FALSE} // 78
227 , {doIntevalLowerDigit, 129, 79,0, TRUE} // 79 interval-lower
228 , {doNOP, 44 /* , */, 83,0, TRUE} // 80
229 , {doIntervalSame, 125 /* } */, 86,0, TRUE} // 81
230 , {doIntervalError, 255, 206,0, FALSE} // 82
231 , {doIntervalUpperDigit, 129, 83,0, TRUE} // 83 interval-upper
232 , {doNOP, 125 /* } */, 86,0, TRUE} // 84
233 , {doIntervalError, 255, 206,0, FALSE} // 85
234 , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 86 interval-type
235 , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 87
236 , {doInterval, 255, 20,0, FALSE} // 88
237 , {doBackslashA, 65 /* A */, 2,0, TRUE} // 89 backslash
238 , {doBackslashB, 66 /* B */, 2,0, TRUE} // 90
239 , {doBackslashb, 98 /* b */, 2,0, TRUE} // 91
240 , {doBackslashd, 100 /* d */, 14,0, TRUE} // 92
241 , {doBackslashD, 68 /* D */, 14,0, TRUE} // 93
242 , {doBackslashG, 71 /* G */, 2,0, TRUE} // 94
243 , {doBackslashh, 104 /* h */, 14,0, TRUE} // 95
244 , {doBackslashH, 72 /* H */, 14,0, TRUE} // 96
245 , {doNOP, 107 /* k */, 115,0, TRUE} // 97
246 , {doNamedChar, 78 /* N */, 14,0, FALSE} // 98
247 , {doProperty, 112 /* p */, 14,0, FALSE} // 99
248 , {doProperty, 80 /* P */, 14,0, FALSE} // 100
249 , {doBackslashR, 82 /* R */, 14,0, TRUE} // 101
250 , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 102
251 , {doBackslashS, 83 /* S */, 14,0, TRUE} // 103
252 , {doBackslashs, 115 /* s */, 14,0, TRUE} // 104
253 , {doBackslashv, 118 /* v */, 14,0, TRUE} // 105
254 , {doBackslashV, 86 /* V */, 14,0, TRUE} // 106
255 , {doBackslashW, 87 /* W */, 14,0, TRUE} // 107
256 , {doBackslashw, 119 /* w */, 14,0, TRUE} // 108
257 , {doBackslashX, 88 /* X */, 14,0, TRUE} // 109
258 , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 110
259 , {doBackslashz, 122 /* z */, 2,0, TRUE} // 111
260 , {doBackRef, 129, 14,0, TRUE} // 112
261 , {doEscapeError, 253, 206,0, FALSE} // 113
262 , {doEscapedLiteralChar, 255, 14,0, TRUE} // 114
263 , {doBeginNamedBackRef, 60 /* < */, 117,0, TRUE} // 115 named-backref
264 , {doBadNamedCapture, 255, 206,0, FALSE} // 116
265 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 117 named-backref-2
266 , {doBadNamedCapture, 255, 206,0, FALSE} // 118
267 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 119 named-backref-3
268 , {doContinueNamedBackRef, 129, 119,0, TRUE} // 120
269 , {doCompleteNamedBackRef, 62 /* > */, 14,0, TRUE} // 121
270 , {doBadNamedCapture, 255, 206,0, FALSE} // 122
271 , {doSetNegate, 94 /* ^ */, 126,0, TRUE} // 123 set-open
272 , {doSetPosixProp, 58 /* : */, 128,0, FALSE} // 124
273 , {doNOP, 255, 126,0, FALSE} // 125
274 , {doSetLiteral, 93 /* ] */, 141,0, TRUE} // 126 set-open2
275 , {doNOP, 255, 131,0, FALSE} // 127
276 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 128 set-posix
277 , {doNOP, 58 /* : */, 131,0, FALSE} // 129
278 , {doRuleError, 255, 206,0, FALSE} // 130
279 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 131 set-start
280 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 132
281 , {doNOP, 92 /* \ */, 191,0, TRUE} // 133
282 , {doNOP, 45 /* - */, 137,0, TRUE} // 134
283 , {doNOP, 38 /* & */, 139,0, TRUE} // 135
284 , {doSetLiteral, 255, 141,0, TRUE} // 136
285 , {doRuleError, 45 /* - */, 206,0, FALSE} // 137 set-start-dash
286 , {doSetAddDash, 255, 141,0, FALSE} // 138
287 , {doRuleError, 38 /* & */, 206,0, FALSE} // 139 set-start-amp
288 , {doSetAddAmp, 255, 141,0, FALSE} // 140
289 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 141 set-after-lit
290 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 142
291 , {doNOP, 45 /* - */, 178,0, TRUE} // 143
292 , {doNOP, 38 /* & */, 169,0, TRUE} // 144
293 , {doNOP, 92 /* \ */, 191,0, TRUE} // 145
294 , {doSetNoCloseError, 253, 206,0, FALSE} // 146
295 , {doSetLiteral, 255, 141,0, TRUE} // 147
296 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 148 set-after-set
297 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 149
298 , {doNOP, 45 /* - */, 171,0, TRUE} // 150
299 , {doNOP, 38 /* & */, 166,0, TRUE} // 151
300 , {doNOP, 92 /* \ */, 191,0, TRUE} // 152
301 , {doSetNoCloseError, 253, 206,0, FALSE} // 153
302 , {doSetLiteral, 255, 141,0, TRUE} // 154
303 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 155 set-after-range
304 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 156
305 , {doNOP, 45 /* - */, 174,0, TRUE} // 157
306 , {doNOP, 38 /* & */, 176,0, TRUE} // 158
307 , {doNOP, 92 /* \ */, 191,0, TRUE} // 159
308 , {doSetNoCloseError, 253, 206,0, FALSE} // 160
309 , {doSetLiteral, 255, 141,0, TRUE} // 161
310 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 162 set-after-op
311 , {doSetOpError, 93 /* ] */, 206,0, FALSE} // 163
312 , {doNOP, 92 /* \ */, 191,0, TRUE} // 164
313 , {doSetLiteral, 255, 141,0, TRUE} // 165
314 , {doSetBeginIntersection1, 91 /* [ */, 123, 148, TRUE} // 166 set-set-amp
315 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 167
316 , {doSetAddAmp, 255, 141,0, FALSE} // 168
317 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 169 set-lit-amp
318 , {doSetAddAmp, 255, 141,0, FALSE} // 170
319 , {doSetBeginDifference1, 91 /* [ */, 123, 148, TRUE} // 171 set-set-dash
320 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 172
321 , {doSetAddDash, 255, 141,0, FALSE} // 173
322 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 174 set-range-dash
323 , {doSetAddDash, 255, 141,0, FALSE} // 175
324 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 176 set-range-amp
325 , {doSetAddAmp, 255, 141,0, FALSE} // 177
326 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 178 set-lit-dash
327 , {doSetAddDash, 91 /* [ */, 141,0, FALSE} // 179
328 , {doSetAddDash, 93 /* ] */, 141,0, FALSE} // 180
329 , {doNOP, 92 /* \ */, 183,0, TRUE} // 181
330 , {doSetRange, 255, 155,0, TRUE} // 182
331 , {doSetOpError, 115 /* s */, 206,0, FALSE} // 183 set-lit-dash-escape
332 , {doSetOpError, 83 /* S */, 206,0, FALSE} // 184
333 , {doSetOpError, 119 /* w */, 206,0, FALSE} // 185
334 , {doSetOpError, 87 /* W */, 206,0, FALSE} // 186
335 , {doSetOpError, 100 /* d */, 206,0, FALSE} // 187
336 , {doSetOpError, 68 /* D */, 206,0, FALSE} // 188
337 , {doSetNamedRange, 78 /* N */, 155,0, FALSE} // 189
338 , {doSetRange, 255, 155,0, TRUE} // 190
339 , {doSetProp, 112 /* p */, 148,0, FALSE} // 191 set-escape
340 , {doSetProp, 80 /* P */, 148,0, FALSE} // 192
341 , {doSetNamedChar, 78 /* N */, 141,0, FALSE} // 193
342 , {doSetBackslash_s, 115 /* s */, 155,0, TRUE} // 194
343 , {doSetBackslash_S, 83 /* S */, 155,0, TRUE} // 195
344 , {doSetBackslash_w, 119 /* w */, 155,0, TRUE} // 196
345 , {doSetBackslash_W, 87 /* W */, 155,0, TRUE} // 197
346 , {doSetBackslash_d, 100 /* d */, 155,0, TRUE} // 198
347 , {doSetBackslash_D, 68 /* D */, 155,0, TRUE} // 199
348 , {doSetBackslash_h, 104 /* h */, 155,0, TRUE} // 200
349 , {doSetBackslash_H, 72 /* H */, 155,0, TRUE} // 201
350 , {doSetBackslash_v, 118 /* v */, 155,0, TRUE} // 202
351 , {doSetBackslash_V, 86 /* V */, 155,0, TRUE} // 203
352 , {doSetLiteralEscaped, 255, 141,0, TRUE} // 204
353 , {doSetFinish, 255, 14,0, FALSE} // 205 set-finish
354 , {doExit, 255, 206,0, TRUE} // 206 errorDeath
355 };
356 static const char * const RegexStateNames[] = { 0,
357 "start",
358 "term",
359 0,
360 0,
361 0,
362 0,
363 0,
364 0,
365 0,
366 0,
367 0,
368 0,
369 0,
370 "expr-quant",
371 0,
372 0,
373 0,
374 0,
375 0,
376 "expr-cont",
377 0,
378 0,
379 "open-paren-quant",
380 0,
381 "open-paren-quant2",
382 0,
383 "open-paren",
384 0,
385 "open-paren-extended",
386 0,
387 0,
388 0,
389 0,
390 0,
391 0,
392 0,
393 0,
394 0,
395 0,
396 0,
397 0,
398 0,
399 0,
400 0,
401 0,
402 "open-paren-lookbehind",
403 0,
404 0,
405 0,
406 "paren-comment",
407 0,
408 0,
409 "paren-flag",
410 0,
411 0,
412 0,
413 0,
414 0,
415 0,
416 0,
417 0,
418 0,
419 0,
420 "named-capture",
421 0,
422 0,
423 0,
424 "quant-star",
425 0,
426 0,
427 "quant-plus",
428 0,
429 0,
430 "quant-opt",
431 0,
432 0,
433 "interval-open",
434 0,
435 "interval-lower",
436 0,
437 0,
438 0,
439 "interval-upper",
440 0,
441 0,
442 "interval-type",
443 0,
444 0,
445 "backslash",
446 0,
447 0,
448 0,
449 0,
450 0,
451 0,
452 0,
453 0,
454 0,
455 0,
456 0,
457 0,
458 0,
459 0,
460 0,
461 0,
462 0,
463 0,
464 0,
465 0,
466 0,
467 0,
468 0,
469 0,
470 0,
471 "named-backref",
472 0,
473 "named-backref-2",
474 0,
475 "named-backref-3",
476 0,
477 0,
478 0,
479 "set-open",
480 0,
481 0,
482 "set-open2",
483 0,
484 "set-posix",
485 0,
486 0,
487 "set-start",
488 0,
489 0,
490 0,
491 0,
492 0,
493 "set-start-dash",
494 0,
495 "set-start-amp",
496 0,
497 "set-after-lit",
498 0,
499 0,
500 0,
501 0,
502 0,
503 0,
504 "set-after-set",
505 0,
506 0,
507 0,
508 0,
509 0,
510 0,
511 "set-after-range",
512 0,
513 0,
514 0,
515 0,
516 0,
517 0,
518 "set-after-op",
519 0,
520 0,
521 0,
522 "set-set-amp",
523 0,
524 0,
525 "set-lit-amp",
526 0,
527 "set-set-dash",
528 0,
529 0,
530 "set-range-dash",
531 0,
532 "set-range-amp",
533 0,
534 "set-lit-dash",
535 0,
536 0,
537 0,
538 0,
539 "set-lit-dash-escape",
540 0,
541 0,
542 0,
543 0,
544 0,
545 0,
546 0,
547 "set-escape",
548 0,
549 0,
550 0,
551 0,
552 0,
553 0,
554 0,
555 0,
556 0,
557 0,
558 0,
559 0,
560 0,
561 "set-finish",
562 "errorDeath",
563 0};
564
565 U_NAMESPACE_END
566 #endif