]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/regexcst.h
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / i18n / regexcst.h
1 //---------------------------------------------------------------------------------
2 //
3 // Generated Header File. Do not edit by hand.
4 // This file contains the state table for the ICU Regular Expression Pattern Parser
5 // It is generated by the Perl script "regexcst.pl" from
6 // the rule parser state definitions file "regexcst.txt".
7 //
8 // Copyright (C) 2002-2007 International Business Machines Corporation
9 // and others. All rights reserved.
10 //
11 //---------------------------------------------------------------------------------
12 #ifndef RBBIRPT_H
13 #define RBBIRPT_H
14
15 U_NAMESPACE_BEGIN
16 //
17 // Character classes for regex pattern scanning.
18 //
19 static const uint8_t kRuleSet_digit_char = 128;
20 static const uint8_t kRuleSet_rule_char = 129;
21
22
23 enum Regex_PatternParseAction {
24 doLiteralChar,
25 doSetEnd,
26 doBackslashA,
27 doSetBeginUnion,
28 doNOP,
29 doSetBackslash_w,
30 doSetRange,
31 doBackslashG,
32 doPerlInline,
33 doSetAddDash,
34 doIntevalLowerDigit,
35 doProperty,
36 doBackslashX,
37 doOpenAtomicParen,
38 doSetLiteralEscaped,
39 doPatFinish,
40 doSetBackslash_D,
41 doSetDifference2,
42 doNamedChar,
43 doNGPlus,
44 doOpenLookBehindNeg,
45 doIntervalError,
46 doIntervalSame,
47 doBackRef,
48 doPlus,
49 doOpenCaptureParen,
50 doMismatchedParenErr,
51 doBeginMatchMode,
52 doEscapeError,
53 doOpenNonCaptureParen,
54 doDollar,
55 doSetProp,
56 doIntervalUpperDigit,
57 doSetBegin,
58 doBackslashs,
59 doOpenLookBehind,
60 doSetMatchMode,
61 doOrOperator,
62 doCaret,
63 doMatchModeParen,
64 doStar,
65 doOpt,
66 doMatchMode,
67 doSuppressComments,
68 doPossessiveInterval,
69 doOpenLookAheadNeg,
70 doBackslashW,
71 doCloseParen,
72 doSetOpError,
73 doIntervalInit,
74 doSetFinish,
75 doSetIntersection2,
76 doNGStar,
77 doEnterQuoteMode,
78 doSetAddAmp,
79 doBackslashB,
80 doBackslashw,
81 doPossessiveOpt,
82 doSetNegate,
83 doRuleError,
84 doBackslashb,
85 doConditionalExpr,
86 doPossessivePlus,
87 doBadOpenParenType,
88 doNGInterval,
89 doSetLiteral,
90 doSetNamedChar,
91 doBackslashd,
92 doSetBeginDifference1,
93 doBackslashD,
94 doExit,
95 doSetBackslash_S,
96 doInterval,
97 doSetNoCloseError,
98 doNGOpt,
99 doSetPosixProp,
100 doBackslashS,
101 doBackslashZ,
102 doSetBeginIntersection1,
103 doSetBackslash_W,
104 doSetBackslash_d,
105 doOpenLookAhead,
106 doBadModeFlag,
107 doPatStart,
108 doSetNamedRange,
109 doPossessiveStar,
110 doEscapedLiteralChar,
111 doSetBackslash_s,
112 doBackslashz,
113 doDotAny,
114 rbbiLastAction};
115
116 //-------------------------------------------------------------------------------
117 //
118 // RegexTableEl represents the structure of a row in the transition table
119 // for the pattern parser state machine.
120 //-------------------------------------------------------------------------------
121 struct RegexTableEl {
122 Regex_PatternParseAction fAction;
123 uint8_t fCharClass; // 0-127: an individual ASCII character
124 // 128-255: character class index
125 uint8_t fNextState; // 0-250: normal next-state numbers
126 // 255: pop next-state from stack.
127 uint8_t fPushState;
128 UBool fNextChar;
129 };
130
131 static const struct RegexTableEl gRuleParseStateTable[] = {
132 {doNOP, 0, 0, 0, TRUE}
133 , {doPatStart, 255, 2,0, FALSE} // 1 start
134 , {doLiteralChar, 254, 14,0, TRUE} // 2 term
135 , {doLiteralChar, 129, 14,0, TRUE} // 3
136 , {doSetBegin, 91 /* [ */, 104, 182, TRUE} // 4
137 , {doNOP, 40 /* ( */, 27,0, TRUE} // 5
138 , {doDotAny, 46 /* . */, 14,0, TRUE} // 6
139 , {doCaret, 94 /* ^ */, 14,0, TRUE} // 7
140 , {doDollar, 36 /* $ */, 14,0, TRUE} // 8
141 , {doNOP, 92 /* \ */, 84,0, TRUE} // 9
142 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 10
143 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11
144 , {doPatFinish, 253, 2,0, FALSE} // 12
145 , {doRuleError, 255, 183,0, FALSE} // 13
146 , {doNOP, 42 /* * */, 63,0, TRUE} // 14 expr-quant
147 , {doNOP, 43 /* + */, 66,0, TRUE} // 15
148 , {doNOP, 63 /* ? */, 69,0, TRUE} // 16
149 , {doIntervalInit, 123 /* { */, 72,0, TRUE} // 17
150 , {doNOP, 40 /* ( */, 23,0, TRUE} // 18
151 , {doNOP, 255, 20,0, FALSE} // 19
152 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont
153 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21
154 , {doNOP, 255, 2,0, FALSE} // 22
155 , {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant
156 , {doNOP, 255, 27,0, FALSE} // 24
157 , {doNOP, 35 /* # */, 49, 14, TRUE} // 25 open-paren-quant2
158 , {doNOP, 255, 29,0, FALSE} // 26
159 , {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren
160 , {doOpenCaptureParen, 255, 2, 14, FALSE} // 28
161 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended
162 , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30
163 , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31
164 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32
165 , {doNOP, 60 /* < */, 46,0, TRUE} // 33
166 , {doNOP, 35 /* # */, 49, 2, TRUE} // 34
167 , {doBeginMatchMode, 105 /* i */, 52,0, FALSE} // 35
168 , {doBeginMatchMode, 100 /* d */, 52,0, FALSE} // 36
169 , {doBeginMatchMode, 109 /* m */, 52,0, FALSE} // 37
170 , {doBeginMatchMode, 115 /* s */, 52,0, FALSE} // 38
171 , {doBeginMatchMode, 117 /* u */, 52,0, FALSE} // 39
172 , {doBeginMatchMode, 119 /* w */, 52,0, FALSE} // 40
173 , {doBeginMatchMode, 120 /* x */, 52,0, FALSE} // 41
174 , {doBeginMatchMode, 45 /* - */, 52,0, FALSE} // 42
175 , {doConditionalExpr, 40 /* ( */, 183,0, TRUE} // 43
176 , {doPerlInline, 123 /* { */, 183,0, TRUE} // 44
177 , {doBadOpenParenType, 255, 183,0, FALSE} // 45
178 , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind
179 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47
180 , {doBadOpenParenType, 255, 183,0, FALSE} // 48
181 , {doNOP, 41 /* ) */, 255,0, TRUE} // 49 paren-comment
182 , {doMismatchedParenErr, 253, 183,0, FALSE} // 50
183 , {doNOP, 255, 49,0, TRUE} // 51
184 , {doMatchMode, 105 /* i */, 52,0, TRUE} // 52 paren-flag
185 , {doMatchMode, 100 /* d */, 52,0, TRUE} // 53
186 , {doMatchMode, 109 /* m */, 52,0, TRUE} // 54
187 , {doMatchMode, 115 /* s */, 52,0, TRUE} // 55
188 , {doMatchMode, 117 /* u */, 52,0, TRUE} // 56
189 , {doMatchMode, 119 /* w */, 52,0, TRUE} // 57
190 , {doMatchMode, 120 /* x */, 52,0, TRUE} // 58
191 , {doMatchMode, 45 /* - */, 52,0, TRUE} // 59
192 , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 60
193 , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 61
194 , {doBadModeFlag, 255, 183,0, FALSE} // 62
195 , {doNGStar, 63 /* ? */, 20,0, TRUE} // 63 quant-star
196 , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 64
197 , {doStar, 255, 20,0, FALSE} // 65
198 , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 66 quant-plus
199 , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 67
200 , {doPlus, 255, 20,0, FALSE} // 68
201 , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 69 quant-opt
202 , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 70
203 , {doOpt, 255, 20,0, FALSE} // 71
204 , {doNOP, 128, 74,0, FALSE} // 72 interval-open
205 , {doIntervalError, 255, 183,0, FALSE} // 73
206 , {doIntevalLowerDigit, 128, 74,0, TRUE} // 74 interval-lower
207 , {doNOP, 44 /* , */, 78,0, TRUE} // 75
208 , {doIntervalSame, 125 /* } */, 81,0, TRUE} // 76
209 , {doIntervalError, 255, 183,0, FALSE} // 77
210 , {doIntervalUpperDigit, 128, 78,0, TRUE} // 78 interval-upper
211 , {doNOP, 125 /* } */, 81,0, TRUE} // 79
212 , {doIntervalError, 255, 183,0, FALSE} // 80
213 , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 81 interval-type
214 , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 82
215 , {doInterval, 255, 20,0, FALSE} // 83
216 , {doBackslashA, 65 /* A */, 2,0, TRUE} // 84 backslash
217 , {doBackslashB, 66 /* B */, 2,0, TRUE} // 85
218 , {doBackslashb, 98 /* b */, 2,0, TRUE} // 86
219 , {doBackslashd, 100 /* d */, 14,0, TRUE} // 87
220 , {doBackslashD, 68 /* D */, 14,0, TRUE} // 88
221 , {doBackslashG, 71 /* G */, 2,0, TRUE} // 89
222 , {doNamedChar, 78 /* N */, 14,0, FALSE} // 90
223 , {doProperty, 112 /* p */, 14,0, FALSE} // 91
224 , {doProperty, 80 /* P */, 14,0, FALSE} // 92
225 , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 93
226 , {doBackslashS, 83 /* S */, 14,0, TRUE} // 94
227 , {doBackslashs, 115 /* s */, 14,0, TRUE} // 95
228 , {doBackslashW, 87 /* W */, 14,0, TRUE} // 96
229 , {doBackslashw, 119 /* w */, 14,0, TRUE} // 97
230 , {doBackslashX, 88 /* X */, 14,0, TRUE} // 98
231 , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 99
232 , {doBackslashz, 122 /* z */, 2,0, TRUE} // 100
233 , {doBackRef, 128, 14,0, TRUE} // 101
234 , {doEscapeError, 253, 183,0, FALSE} // 102
235 , {doEscapedLiteralChar, 255, 14,0, TRUE} // 103
236 , {doSetNegate, 94 /* ^ */, 107,0, TRUE} // 104 set-open
237 , {doSetPosixProp, 58 /* : */, 109,0, FALSE} // 105
238 , {doNOP, 255, 107,0, FALSE} // 106
239 , {doSetLiteral, 93 /* ] */, 122,0, TRUE} // 107 set-open2
240 , {doNOP, 255, 112,0, FALSE} // 108
241 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 109 set-posix
242 , {doNOP, 58 /* : */, 112,0, FALSE} // 110
243 , {doRuleError, 255, 183,0, FALSE} // 111
244 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 112 set-start
245 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 113
246 , {doNOP, 92 /* \ */, 172,0, TRUE} // 114
247 , {doNOP, 45 /* - */, 118,0, TRUE} // 115
248 , {doNOP, 38 /* & */, 120,0, TRUE} // 116
249 , {doSetLiteral, 255, 122,0, TRUE} // 117
250 , {doRuleError, 45 /* - */, 183,0, FALSE} // 118 set-start-dash
251 , {doSetAddDash, 255, 122,0, FALSE} // 119
252 , {doRuleError, 38 /* & */, 183,0, FALSE} // 120 set-start-amp
253 , {doSetAddAmp, 255, 122,0, FALSE} // 121
254 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 122 set-after-lit
255 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 123
256 , {doNOP, 45 /* - */, 159,0, TRUE} // 124
257 , {doNOP, 38 /* & */, 150,0, TRUE} // 125
258 , {doNOP, 92 /* \ */, 172,0, TRUE} // 126
259 , {doSetNoCloseError, 253, 183,0, FALSE} // 127
260 , {doSetLiteral, 255, 122,0, TRUE} // 128
261 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 129 set-after-set
262 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 130
263 , {doNOP, 45 /* - */, 152,0, TRUE} // 131
264 , {doNOP, 38 /* & */, 147,0, TRUE} // 132
265 , {doNOP, 92 /* \ */, 172,0, TRUE} // 133
266 , {doSetNoCloseError, 253, 183,0, FALSE} // 134
267 , {doSetLiteral, 255, 122,0, TRUE} // 135
268 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 136 set-after-range
269 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 137
270 , {doNOP, 45 /* - */, 155,0, TRUE} // 138
271 , {doNOP, 38 /* & */, 157,0, TRUE} // 139
272 , {doNOP, 92 /* \ */, 172,0, TRUE} // 140
273 , {doSetNoCloseError, 253, 183,0, FALSE} // 141
274 , {doSetLiteral, 255, 122,0, TRUE} // 142
275 , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 143 set-after-op
276 , {doSetOpError, 93 /* ] */, 183,0, FALSE} // 144
277 , {doNOP, 92 /* \ */, 172,0, TRUE} // 145
278 , {doSetLiteral, 255, 122,0, TRUE} // 146
279 , {doSetBeginIntersection1, 91 /* [ */, 104, 129, TRUE} // 147 set-set-amp
280 , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 148
281 , {doSetAddAmp, 255, 122,0, FALSE} // 149
282 , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 150 set-lit-amp
283 , {doSetAddAmp, 255, 122,0, FALSE} // 151
284 , {doSetBeginDifference1, 91 /* [ */, 104, 129, TRUE} // 152 set-set-dash
285 , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 153
286 , {doSetAddDash, 255, 122,0, FALSE} // 154
287 , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 155 set-range-dash
288 , {doSetAddDash, 255, 122,0, FALSE} // 156
289 , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 157 set-range-amp
290 , {doSetAddAmp, 255, 122,0, FALSE} // 158
291 , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 159 set-lit-dash
292 , {doSetAddDash, 91 /* [ */, 122,0, FALSE} // 160
293 , {doSetAddDash, 93 /* ] */, 122,0, FALSE} // 161
294 , {doNOP, 92 /* \ */, 164,0, TRUE} // 162
295 , {doSetRange, 255, 136,0, TRUE} // 163
296 , {doSetOpError, 115 /* s */, 183,0, FALSE} // 164 set-lit-dash-escape
297 , {doSetOpError, 83 /* S */, 183,0, FALSE} // 165
298 , {doSetOpError, 119 /* w */, 183,0, FALSE} // 166
299 , {doSetOpError, 87 /* W */, 183,0, FALSE} // 167
300 , {doSetOpError, 100 /* d */, 183,0, FALSE} // 168
301 , {doSetOpError, 68 /* D */, 183,0, FALSE} // 169
302 , {doSetNamedRange, 78 /* N */, 136,0, FALSE} // 170
303 , {doSetRange, 255, 136,0, TRUE} // 171
304 , {doSetProp, 112 /* p */, 129,0, FALSE} // 172 set-escape
305 , {doSetProp, 80 /* P */, 129,0, FALSE} // 173
306 , {doSetNamedChar, 78 /* N */, 122,0, FALSE} // 174
307 , {doSetBackslash_s, 115 /* s */, 136,0, TRUE} // 175
308 , {doSetBackslash_S, 83 /* S */, 136,0, TRUE} // 176
309 , {doSetBackslash_w, 119 /* w */, 136,0, TRUE} // 177
310 , {doSetBackslash_W, 87 /* W */, 136,0, TRUE} // 178
311 , {doSetBackslash_d, 100 /* d */, 136,0, TRUE} // 179
312 , {doSetBackslash_D, 68 /* D */, 136,0, TRUE} // 180
313 , {doSetLiteralEscaped, 255, 122,0, TRUE} // 181
314 , {doSetFinish, 255, 14,0, FALSE} // 182 set-finish
315 , {doExit, 255, 183,0, TRUE} // 183 errorDeath
316 };
317 static const char * const RegexStateNames[] = { 0,
318 "start",
319 "term",
320 0,
321 0,
322 0,
323 0,
324 0,
325 0,
326 0,
327 0,
328 0,
329 0,
330 0,
331 "expr-quant",
332 0,
333 0,
334 0,
335 0,
336 0,
337 "expr-cont",
338 0,
339 0,
340 "open-paren-quant",
341 0,
342 "open-paren-quant2",
343 0,
344 "open-paren",
345 0,
346 "open-paren-extended",
347 0,
348 0,
349 0,
350 0,
351 0,
352 0,
353 0,
354 0,
355 0,
356 0,
357 0,
358 0,
359 0,
360 0,
361 0,
362 0,
363 "open-paren-lookbehind",
364 0,
365 0,
366 "paren-comment",
367 0,
368 0,
369 "paren-flag",
370 0,
371 0,
372 0,
373 0,
374 0,
375 0,
376 0,
377 0,
378 0,
379 0,
380 "quant-star",
381 0,
382 0,
383 "quant-plus",
384 0,
385 0,
386 "quant-opt",
387 0,
388 0,
389 "interval-open",
390 0,
391 "interval-lower",
392 0,
393 0,
394 0,
395 "interval-upper",
396 0,
397 0,
398 "interval-type",
399 0,
400 0,
401 "backslash",
402 0,
403 0,
404 0,
405 0,
406 0,
407 0,
408 0,
409 0,
410 0,
411 0,
412 0,
413 0,
414 0,
415 0,
416 0,
417 0,
418 0,
419 0,
420 0,
421 "set-open",
422 0,
423 0,
424 "set-open2",
425 0,
426 "set-posix",
427 0,
428 0,
429 "set-start",
430 0,
431 0,
432 0,
433 0,
434 0,
435 "set-start-dash",
436 0,
437 "set-start-amp",
438 0,
439 "set-after-lit",
440 0,
441 0,
442 0,
443 0,
444 0,
445 0,
446 "set-after-set",
447 0,
448 0,
449 0,
450 0,
451 0,
452 0,
453 "set-after-range",
454 0,
455 0,
456 0,
457 0,
458 0,
459 0,
460 "set-after-op",
461 0,
462 0,
463 0,
464 "set-set-amp",
465 0,
466 0,
467 "set-lit-amp",
468 0,
469 "set-set-dash",
470 0,
471 0,
472 "set-range-dash",
473 0,
474 "set-range-amp",
475 0,
476 "set-lit-dash",
477 0,
478 0,
479 0,
480 0,
481 "set-lit-dash-escape",
482 0,
483 0,
484 0,
485 0,
486 0,
487 0,
488 0,
489 "set-escape",
490 0,
491 0,
492 0,
493 0,
494 0,
495 0,
496 0,
497 0,
498 0,
499 "set-finish",
500 "errorDeath",
501 0};
502
503 U_NAMESPACE_END
504 #endif