[apple/icu.git] / icuSources / i18n / regexcst.txt


#*****************************************************************************
#
#   Copyright (C) 2002-2007, International Business Machines Corporation and others.
#   All Rights Reserved.
#
#*****************************************************************************
#
#  file:  regexcst.txt
#  ICU Regular Expression Parser State Table
#
#     This state table is used when reading and parsing a regular expression pattern
#     The pattern parser uses a state machine; the data in this file define the
#     state transitions that occur for each input character.
#
#     *** This file defines the regex pattern grammar.   This is it.
#     *** The determination of what is accepted is here.
#
#     This file is processed by a perl script "regexcst.pl" to produce initialized C arrays
#     that are then built with the rule parser.
#

#
# Here is the syntax of the state definitions in this file:
#
#
#StateName:
#   input-char           n next-state           ^push-state     action
#   input-char           n next-state           ^push-state     action
#       |                |   |                      |             |
#       |                |   |                      |             |--- action to be performed by state machine
#       |                |   |                      |                  See function RBBIRuleScanner::doParseActions()
#       |                |   |                      |
#       |                |   |                      |--- Push this named state onto the state stack.
#       |                |   |                           Later, when next state is specified as "pop",
#       |                |   |                           the pushed state will become the current state.
#       |                |   |
#       |                |   |--- Transition to this state if the current input character matches the input
#       |                |        character or char class in the left hand column.  "pop" causes the next
#       |                |        state to be popped from the state stack.
#       |                |
#       |                |--- When making the state transition specified on this line, advance to the next
#       |                     character from the input only if 'n' appears here.
#       |
#       |--- Character or named character classes to test for.  If the current character being scanned
#            matches, peform the actions and go to the state specified on this line.
#            The input character is tested sequentally, in the order written.  The characters and
#            character classes tested for do not need to be mutually exclusive.  The first match wins.
#


#
#  start state, scan position is at the beginning of the pattern.
#
start:
   default                 term                                     doPatStart


#
#  term.  At a position where we can accept the start most items in a pattern.
#
term:
    quoted               n expr-quant                               doLiteralChar
    rule_char            n expr-quant                               doLiteralChar
    '['                  n set-open       ^set-finish               doSetBegin
    '('                  n open-paren
    '.'                  n expr-quant                               doDotAny
    '^'                  n expr-quant                               doCaret
    '$'                  n expr-quant                               doDollar
    '\'                  n backslash
    '|'                  n  term                                    doOrOperator
    ')'                  n  pop                                     doCloseParen
    eof	                   term                                     doPatFinish
    default                errorDeath                               doRuleError


#
#   expr-quant    We've just finished scanning a term, now look for the optional
#                 trailing quantifier - *, +, ?, *?,  etc.
#
expr-quant:
    '*'                  n  quant-star
    '+'                  n  quant-plus
    '?'                  n  quant-opt
    '{'                  n  interval-open                          doIntervalInit
    '('                  n  open-paren-quant
    default                 expr-cont


#
#  expr-cont      Expression, continuation.  At a point where additional terms are
#                                            allowed, but not required.  No Quantifiers
#
expr-cont:
    '|'                  n  term                                    doOrOperator
    ')'                  n  pop                                     doCloseParen
    default                 term


#
#   open-paren-quant   Special case handling for comments appearing before a quantifier,
#                        e.g.   x(?#comment )*
#                      Open parens from expr-quant come here; anything but a (?# comment
#                      branches into the normal parenthesis sequence as quickly as possible.
#
open-paren-quant:
    '?'                  n  open-paren-quant2                      doSuppressComments
    default                 open-paren

open-paren-quant2:
    '#'                  n  paren-comment   ^expr-quant
    default                 open-paren-extended


#
#   open-paren    We've got an open paren.  We need to scan further to
#                 determine what kind of quantifier it is - plain (, (?:, (?>, or whatever.
#
open-paren:
    '?'                  n  open-paren-extended                     doSuppressComments
    default                 term            ^expr-quant             doOpenCaptureParen

open-paren-extended:
    ':'                  n  term            ^expr-quant             doOpenNonCaptureParen  #  (?:
    '>'                  n  term            ^expr-quant             doOpenAtomicParen      #  (?>
    '='                  n  term            ^expr-cont              doOpenLookAhead        #  (?=
    '!'                  n  term            ^expr-cont              doOpenLookAheadNeg     #  (?!
    '<'                  n  open-paren-lookbehind
    '#'                  n  paren-comment   ^term
    'i'                     paren-flag                              doBeginMatchMode
    'd'                     paren-flag                              doBeginMatchMode
    'm'                     paren-flag                              doBeginMatchMode
    's'                     paren-flag                              doBeginMatchMode
    'u'                     paren-flag                              doBeginMatchMode
    'w'                     paren-flag                              doBeginMatchMode
    'x'                     paren-flag                              doBeginMatchMode
    '-'                     paren-flag                              doBeginMatchMode
    '('                  n  errorDeath                              doConditionalExpr
    '{'                  n  errorDeath                              doPerlInline
    default                 errorDeath                              doBadOpenParenType

open-paren-lookbehind:
    '='                  n  term            ^expr-cont              doOpenLookBehind       #  (?<=
    '!'                  n  term            ^expr-cont              doOpenLookBehindNeg    #  (?<!
    default                 errorDeath                              doBadOpenParenType


#
#   paren-comment    We've got a (?# ... )  style comment.  Eat pattern text till we get to the ')'
#
paren-comment:
    ')'                  n  pop
    eof		                errorDeath                              doMismatchedParenErr
    default              n  paren-comment

#
#  paren-flag    Scanned a (?ismx-ismx  flag setting
#
paren-flag:
    'i'                  n  paren-flag                              doMatchMode
    'd'                  n  paren-flag                              doMatchMode
    'm'                  n  paren-flag                              doMatchMode
    's'                  n  paren-flag                              doMatchMode
    'u'                  n  paren-flag                              doMatchMode
    'w'                  n  paren-flag                              doMatchMode
    'x'                  n  paren-flag                              doMatchMode
    '-'                  n  paren-flag                              doMatchMode
    ')'                  n  term                                    doSetMatchMode
    ':'                  n  term              ^expr-quant           doMatchModeParen
    default                 errorDeath                              doBadModeFlag


#
#  quant-star     Scanning a '*' quantifier.  Need to look ahead to decide
#                 between plain '*', '*?', '*+'
#
quant-star:
     '?'                 n  expr-cont                               doNGStar               #  *?
     '+'                 n  expr-cont                               doPossessiveStar       #  *+
     default                expr-cont                               doStar


#
#  quant-plus     Scanning a '+' quantifier.  Need to look ahead to decide
#                 between plain '+', '+?', '++'
#
quant-plus:
     '?'                 n  expr-cont                               doNGPlus               #  *?
     '+'                 n  expr-cont                               doPossessivePlus       #  *+
     default                expr-cont                               doPlus


#
#  quant-opt  Scanning a '?' quantifier.  Need to look ahead to decide
#                  between plain '?', '??', '?+'
#
quant-opt:
     '?'                 n  expr-cont                               doNGOpt                 #  ??
     '+'                 n  expr-cont                               doPossessiveOpt         #  ?+
     default                expr-cont                               doOpt                   #  ?


#
#   Interval         scanning a '{', the opening delimiter for an interval specification
#                                   {number} or {min, max} or {min,}
#
interval-open:
    digit_char              interval-lower
    default                 errorDeath                              doIntervalError

interval-lower:
    digit_char           n  interval-lower                          doIntevalLowerDigit
    ','			         n  interval-upper
    '}'                  n  interval-type                           doIntervalSame             # {n}
    default                 errorDeath                              doIntervalError

interval-upper:
    digit_char           n  interval-upper                          doIntervalUpperDigit
    '}'                  n  interval-type
    default                 errorDeath                              doIntervalError

interval-type:
    '?'                  n  expr-cont                               doNGInterval                # {n,m}?
    '+'                  n  expr-cont                               doPossessiveInterval        # {n,m}+
    default                 expr-cont                               doInterval                  # {m,n}


#
#  backslash        #  Backslash.  Figure out which of the \thingies we have encountered.
#                                  The low level next-char function will have preprocessed
#                                  some of them already; those won't come here.
backslash:
   'A'                   n  term                                    doBackslashA
   'B'                   n  term                                    doBackslashB
   'b'                   n  term                                    doBackslashb
   'd'                   n  expr-quant                              doBackslashd
   'D'                   n  expr-quant                              doBackslashD
   'G'                   n  term                                    doBackslashG
   'N'                      expr-quant                              doNamedChar      #   \N{NAME}  named char
   'p'                      expr-quant                              doProperty       #   \p{Lu}  style property
   'P'                      expr-quant                              doProperty
   'Q'                   n  term                                    doEnterQuoteMode
   'S'                   n  expr-quant                              doBackslashS
   's'                   n  expr-quant                              doBackslashs
   'W'                   n  expr-quant                              doBackslashW
   'w'                   n  expr-quant                              doBackslashw
   'X'                   n  expr-quant                              doBackslashX
   'Z'                   n  term                                    doBackslashZ
   'z'                   n  term                                    doBackslashz
   digit_char            n  expr-quant                              doBackRef         #  Will scan multiple digits
   eof                      errorDeath                              doEscapeError
   default               n  expr-quant                              doEscapedLiteralChar


#
# [set expression] parsing,
#    All states involved in parsing set expressions have names beginning with "set-"
#

set-open:
   '^'                   n  set-open2                               doSetNegate
   ':'                      set-posix                               doSetPosixProp
   default                  set-open2

set-open2:
   ']'                   n  set-after-lit                           doSetLiteral
   default                  set-start

#  set-posix:
#                  scanned a '[:'  If it really is a [:property:], doSetPosixProp will have
#                  moved the scan to the closing ']'.  If it wasn't a property
#                  expression, the scan will still be at the opening ':', which should
#                  be interpreted as a normal set expression.
set-posix:
    ']'                  n   pop                                    doSetEnd
    ':'                      set-start
    default                  errorDeath                             doRuleError  # should not be possible.

#
#   set-start   after the [ and special case leading characters (^ and/or ]) but before
#               everything else.   A '-' is literal at this point.
#
set-start:
    ']'                  n  pop                                     doSetEnd
    '['                  n  set-open      ^set-after-set            doSetBeginUnion
    '\'                  n  set-escape
    '-'                  n  set-start-dash
    '&'                  n  set-start-amp
    default              n  set-after-lit                           doSetLiteral

#    set-start-dash    Turn "[--" into a syntax error.
#                           "[-x" is good, - and x are literals.
#
set-start-dash:
    '-'                     errorDeath                              doRuleError
    default                 set-after-lit                           doSetAddDash

#    set-start-amp     Turn "[&&" into a syntax error.
#                           "[&x" is good, & and x are literals.
#
set-start-amp:
    '&'                     errorDeath                              doRuleError
    default                 set-after-lit                           doSetAddAmp

#
#   set-after-lit    The last thing scanned was a literal character within a set.
#                    Can be followed by anything.  Single '-' or '&' are
#                    literals in this context, not operators.
set-after-lit:
    ']'                  n  pop                                     doSetEnd
    '['                  n  set-open      ^set-after-set            doSetBeginUnion
    '-'                  n  set-lit-dash
    '&'                  n  set-lit-amp
    '\'                  n  set-escape
    eof                     errorDeath                              doSetNoCloseError
    default              n  set-after-lit                           doSetLiteral

set-after-set:
    ']'                  n  pop                                     doSetEnd
    '['                  n  set-open      ^set-after-set            doSetBeginUnion
    '-'                  n  set-set-dash
    '&'                  n  set-set-amp
    '\'                  n  set-escape
    eof                     errorDeath                              doSetNoCloseError
    default              n  set-after-lit                           doSetLiteral

set-after-range:
    ']'                  n  pop                                     doSetEnd
    '['                  n  set-open      ^set-after-set            doSetBeginUnion
    '-'                  n  set-range-dash
    '&'                  n  set-range-amp
    '\'                  n  set-escape
    eof                     errorDeath                              doSetNoCloseError
    default              n  set-after-lit                           doSetLiteral
    

# set-after-op
#     After a --  or &&
#     It is an error to close a set at this point.
#
set-after-op:
    '['                  n  set-open         ^set-after-set         doSetBeginUnion
    ']'                     errorDeath                              doSetOpError
    '\'                  n  set-escape
    default              n  set-after-lit                           doSetLiteral

#
#   set-set-amp
#      Have scanned [[set]&
#      Could be a '&' intersection operator, if a set follows.
#      Could be the start of a '&&' operator.
#      Otherewise is a literal.
set-set-amp:
    '['                  n  set-open      ^set-after-set           doSetBeginIntersection1
    '&'                  n  set-after-op                           doSetIntersection2
    default                 set-after-lit                          doSetAddAmp


# set-lit-amp   Have scanned "[literals&"
#               Could be a start of "&&" operator or a literal
#               In [abc&[def]],   the '&' is a literal
#
set-lit-amp:
    '&'                  n  set-after-op                            doSetIntersection2
    default                 set-after-lit                           doSetAddAmp


#
#  set-set-dash
#      Have scanned [set]-
#      Could be a '-' difference operator, if a [set] follows.
#      Could be the start of a '--' operator.
#      Otherewise is a literal.
set-set-dash:
    '['                  n  set-open      ^set-after-set           doSetBeginDifference1
    '-'                  n  set-after-op                           doSetDifference2
    default                 set-after-lit                          doSetAddDash


#
#  set-range-dash
#      scanned  a-b-  or \w-
#         any set or range like item where the trailing single '-' should
#         be literal, not a set difference operation.
#         A trailing "--" is still a difference operator.
set-range-dash:
    '-'                  n  set-after-op                           doSetDifference2
    default                 set-after-lit                          doSetAddDash


set-range-amp:
    '&'                  n  set-after-op                           doSetIntersection2
    default                 set-after-lit                          doSetAddAmp


#  set-lit-dash
#     Have scanned "[literals-" Could be a range or a -- operator or a literal
#     In [abc-[def]], the '-' is a literal (confirmed with a Java test)
#        [abc-\p{xx}  the '-' is an error
#        [abc-]       the '-' is a literal
#        [ab-xy]      the '-' is a range
#
set-lit-dash:
    '-'                  n  set-after-op                            doSetDifference2
    '['                     set-after-lit                           doSetAddDash
    ']'                     set-after-lit                           doSetAddDash
    '\'                  n  set-lit-dash-escape
    default              n  set-after-range                         doSetRange

# set-lit-dash-escape
#
#    scanned "[literal-\"
#    Could be a range, if the \ introduces an escaped literal char or a named char.
#    Otherwise it is an error.
#
set-lit-dash-escape:
   's'                      errorDeath                             doSetOpError
   'S'                      errorDeath                             doSetOpError
   'w'                      errorDeath                             doSetOpError
   'W'                      errorDeath                             doSetOpError
   'd'                      errorDeath                             doSetOpError
   'D'                      errorDeath                             doSetOpError
   'N'                      set-after-range                        doSetNamedRange
   default               n  set-after-range                        doSetRange

   
#
#  set-escape
#       Common back-slash escape processing within set expressions
#
set-escape:
   'p'                      set-after-set                           doSetProp
   'P'                      set-after-set                           doSetProp
   'N'                      set-after-lit                           doSetNamedChar
   's'                   n  set-after-range                         doSetBackslash_s
   'S'                   n  set-after-range                         doSetBackslash_S
   'w'                   n  set-after-range                         doSetBackslash_w
   'W'                   n  set-after-range                         doSetBackslash_W
   'd'                   n  set-after-range                         doSetBackslash_d
   'D'                   n  set-after-range                         doSetBackslash_D
   default               n  set-after-lit                           doSetLiteralEscaped 

#
# set-finish
#     Have just encountered the final ']' that completes a [set], and
#     arrived here via a pop.  From here, we exit the set parsing world, and go
#     back to generic regular expression parsing.
#
set-finish:
    default                 expr-quant                              doSetFinish


#
# errorDeath.   This state is specified as the next state whenever a syntax error
#               in the source rules is detected.  Barring bugs, the state machine will never
#               actually get here, but will stop because of the action associated with the error.
#               But, just in case, this state asks the state machine to exit.
errorDeath:
    default              n errorDeath                               doExit
Commit	Line	Data
b75a7d8f A	1
	2	#*****************************************************************************
	3	#
46f4442e	4	# Copyright (C) 2002-2007, International Business Machines Corporation and others.
b75a7d8f A	5	# All Rights Reserved.
	6	#
	7	#*****************************************************************************
	8	#
	9	# file: regexcst.txt
	10	# ICU Regular Expression Parser State Table
	11	#
	12	# This state table is used when reading and parsing a regular expression pattern
	13	# The pattern parser uses a state machine; the data in this file define the
	14	# state transitions that occur for each input character.
	15	#
	16	# *** This file defines the regex pattern grammar. This is it.
	17	# *** The determination of what is accepted is here.
	18	#
	19	# This file is processed by a perl script "regexcst.pl" to produce initialized C arrays
	20	# that are then built with the rule parser.
	21	#
	22
	23	#
	24	# Here is the syntax of the state definitions in this file:
	25	#
	26	#
	27	#StateName:
46f4442e A	28	# input-char n next-state ^push-state action
46f4442e A	29	# input-char n next-state ^push-state action
b75a7d8f A	30	# \| \| \| \| \|
	31	# \| \| \| \| \|--- action to be performed by state machine
	32	# \| \| \| \| See function RBBIRuleScanner::doParseActions()
	33	# \| \| \| \|
	34	# \| \| \| \|--- Push this named state onto the state stack.
	35	# \| \| \| Later, when next state is specified as "pop",
	36	# \| \| \| the pushed state will become the current state.
	37	# \| \| \|
	38	# \| \| \|--- Transition to this state if the current input character matches the input
	39	# \| \| character or char class in the left hand column. "pop" causes the next
	40	# \| \| state to be popped from the state stack.
	41	# \| \|
	42	# \| \|--- When making the state transition specified on this line, advance to the next
	43	# \| character from the input only if 'n' appears here.
	44	# \|
	45	# \|--- Character or named character classes to test for. If the current character being scanned
	46	# matches, peform the actions and go to the state specified on this line.
	47	# The input character is tested sequentally, in the order written. The characters and
	48	# character classes tested for do not need to be mutually exclusive. The first match wins.
46f4442e	49	#
b75a7d8f A	50
	51
	52
	53
	54	#
	55	# start state, scan position is at the beginning of the pattern.
	56	#
	57	start:
	58	default term doPatStart
b75a7d8f	59
46f4442e A	60
	61
	62
b75a7d8f A	63	#
	64	# term. At a position where we can accept the start most items in a pattern.
	65	#
	66	term:
	67	quoted n expr-quant doLiteralChar
	68	rule_char n expr-quant doLiteralChar
46f4442e A	69	'[' n set-open ^set-finish doSetBegin
46f4442e A	70	'(' n open-paren
b75a7d8f	71	'.' n expr-quant doDotAny
46f4442e A	72	'^' n expr-quant doCaret
46f4442e A	73	'$' n expr-quant doDollar
b75a7d8f A	74	'\' n backslash
	75	'\|' n term doOrOperator
	76	')' n pop doCloseParen
	77	eof term doPatFinish
	78	default errorDeath doRuleError
46f4442e	79
b75a7d8f A	80
	81
	82	#
	83	# expr-quant We've just finished scanning a term, now look for the optional
	84	# trailing quantifier - , +, ?, ?, etc.
	85	#
	86	expr-quant:
46f4442e A	87	'*' n quant-star
	88	'+' n quant-plus
	89	'?' n quant-opt
b75a7d8f A	90	'{' n interval-open doIntervalInit
b75a7d8f A	91	'(' n open-paren-quant
46f4442e A	92	default expr-cont
	93
	94
b75a7d8f A	95	#
	96	# expr-cont Expression, continuation. At a point where additional terms are
	97	# allowed, but not required. No Quantifiers
	98	#
	99	expr-cont:
	100	'\|' n term doOrOperator
	101	')' n pop doCloseParen
46f4442e A	102	default term
46f4442e A	103
b75a7d8f A	104
	105	#
	106	# open-paren-quant Special case handling for comments appearing before a quantifier,
	107	# e.g. x(?#comment )*
	108	# Open parens from expr-quant come here; anything but a (?# comment
	109	# branches into the normal parenthesis sequence as quickly as possible.
	110	#
	111	open-paren-quant:
	112	'?' n open-paren-quant2 doSuppressComments
	113	default open-paren
46f4442e	114
b75a7d8f A	115	open-paren-quant2:
	116	'#' n paren-comment ^expr-quant
	117	default open-paren-extended
46f4442e A	118
46f4442e A	119
b75a7d8f A	120	#
	121	# open-paren We've got an open paren. We need to scan further to
	122	# determine what kind of quantifier it is - plain (, (?:, (?>, or whatever.
	123	#
	124	open-paren:
	125	'?' n open-paren-extended doSuppressComments
	126	default term ^expr-quant doOpenCaptureParen
46f4442e	127
b75a7d8f A	128	open-paren-extended:
	129	':' n term ^expr-quant doOpenNonCaptureParen # (?:
	130	'>' n term ^expr-quant doOpenAtomicParen # (?>
	131	'=' n term ^expr-cont doOpenLookAhead # (?=
	132	'!' n term ^expr-cont doOpenLookAheadNeg # (?!
	133	'<' n open-paren-lookbehind
	134	'#' n paren-comment ^term
	135	'i' paren-flag doBeginMatchMode
46f4442e	136	'd' paren-flag doBeginMatchMode
b75a7d8f A	137	'm' paren-flag doBeginMatchMode
b75a7d8f A	138	's' paren-flag doBeginMatchMode
46f4442e	139	'u' paren-flag doBeginMatchMode
374ca955	140	'w' paren-flag doBeginMatchMode
b75a7d8f A	141	'x' paren-flag doBeginMatchMode
	142	'-' paren-flag doBeginMatchMode
	143	'(' n errorDeath doConditionalExpr
	144	'{' n errorDeath doPerlInline
	145	default errorDeath doBadOpenParenType
46f4442e	146
b75a7d8f A	147	open-paren-lookbehind:
	148	'=' n term ^expr-cont doOpenLookBehind # (?<=
	149	'!' n term ^expr-cont doOpenLookBehindNeg # (?<!
	150	default errorDeath doBadOpenParenType
46f4442e	151
b75a7d8f A	152
	153	#
	154	# paren-comment We've got a (?# ... ) style comment. Eat pattern text till we get to the ')'
b75a7d8f A	155	#
	156	paren-comment:
	157	')' n pop
	158	eof errorDeath doMismatchedParenErr
	159	default n paren-comment
	160
	161	#
46f4442e A	162	# paren-flag Scanned a (?ismx-ismx flag setting
46f4442e A	163	#
b75a7d8f A	164	paren-flag:
b75a7d8f A	165	'i' n paren-flag doMatchMode
46f4442e	166	'd' n paren-flag doMatchMode
b75a7d8f A	167	'm' n paren-flag doMatchMode
b75a7d8f A	168	's' n paren-flag doMatchMode
46f4442e	169	'u' n paren-flag doMatchMode
374ca955	170	'w' n paren-flag doMatchMode
b75a7d8f A	171	'x' n paren-flag doMatchMode
	172	'-' n paren-flag doMatchMode
	173	')' n term doSetMatchMode
	174	':' n term ^expr-quant doMatchModeParen
374ca955	175	default errorDeath doBadModeFlag
46f4442e A	176
46f4442e A	177
b75a7d8f A	178	#
	179	# quant-star Scanning a '*' quantifier. Need to look ahead to decide
	180	# between plain '', '?', '*+'
	181	#
	182	quant-star:
	183	'?' n expr-cont doNGStar # *?
	184	'+' n expr-cont doPossessiveStar # *+
	185	default expr-cont doStar
	186
	187
	188	#
	189	# quant-plus Scanning a '+' quantifier. Need to look ahead to decide
	190	# between plain '+', '+?', '++'
	191	#
	192	quant-plus:
	193	'?' n expr-cont doNGPlus # *?
	194	'+' n expr-cont doPossessivePlus # *+
	195	default expr-cont doPlus
	196
	197
	198	#
	199	# quant-opt Scanning a '?' quantifier. Need to look ahead to decide
	200	# between plain '?', '??', '?+'
	201	#
	202	quant-opt:
	203	'?' n expr-cont doNGOpt # ??
	204	'+' n expr-cont doPossessiveOpt # ?+
	205	default expr-cont doOpt # ?
	206
	207
	208	#
	209	# Interval scanning a '{', the opening delimiter for an interval specification
46f4442e	210	# {number} or {min, max} or {min,}
b75a7d8f A	211	#
b75a7d8f A	212	interval-open:
46f4442e	213	digit_char interval-lower
b75a7d8f	214	default errorDeath doIntervalError
46f4442e	215
b75a7d8f A	216	interval-lower:
	217	digit_char n interval-lower doIntevalLowerDigit
	218	',' n interval-upper
	219	'}' n interval-type doIntervalSame # {n}
	220	default errorDeath doIntervalError
	221
	222	interval-upper:
	223	digit_char n interval-upper doIntervalUpperDigit
	224	'}' n interval-type
	225	default errorDeath doIntervalError
46f4442e	226
b75a7d8f A	227	interval-type:
	228	'?' n expr-cont doNGInterval # {n,m}?
	229	'+' n expr-cont doPossessiveInterval # {n,m}+
	230	default expr-cont doInterval # {m,n}
46f4442e A	231
46f4442e A	232
b75a7d8f A	233	#
	234	# backslash # Backslash. Figure out which of the \thingies we have encountered.
	235	# The low level next-char function will have preprocessed
	236	# some of them already; those won't come here.
	237	backslash:
	238	'A' n term doBackslashA
	239	'B' n term doBackslashB
	240	'b' n term doBackslashb
	241	'd' n expr-quant doBackslashd
	242	'D' n expr-quant doBackslashD
	243	'G' n term doBackslashG
46f4442e	244	'N' expr-quant doNamedChar # \N{NAME} named char
b75a7d8f A	245	'p' expr-quant doProperty # \p{Lu} style property
	246	'P' expr-quant doProperty
	247	'Q' n term doEnterQuoteMode
	248	'S' n expr-quant doBackslashS
	249	's' n expr-quant doBackslashs
	250	'W' n expr-quant doBackslashW
	251	'w' n expr-quant doBackslashw
	252	'X' n expr-quant doBackslashX
	253	'Z' n term doBackslashZ
	254	'z' n term doBackslashz
46f4442e	255	digit_char n expr-quant doBackRef # Will scan multiple digits
b75a7d8f	256	eof errorDeath doEscapeError
46f4442e A	257	default n expr-quant doEscapedLiteralChar
46f4442e A	258
b75a7d8f	259
46f4442e A	260
	261	#
	262	# [set expression] parsing,
	263	# All states involved in parsing set expressions have names beginning with "set-"
	264	#
	265
	266	set-open:
	267	'^' n set-open2 doSetNegate
	268	':' set-posix doSetPosixProp
	269	default set-open2
	270
	271	set-open2:
	272	']' n set-after-lit doSetLiteral
	273	default set-start
	274
	275	# set-posix:
	276	# scanned a '[:' If it really is a [:property:], doSetPosixProp will have
	277	# moved the scan to the closing ']'. If it wasn't a property
	278	# expression, the scan will still be at the opening ':', which should
	279	# be interpreted as a normal set expression.
	280	set-posix:
	281	']' n pop doSetEnd
	282	':' set-start
	283	default errorDeath doRuleError # should not be possible.
	284
	285	#
	286	# set-start after the [ and special case leading characters (^ and/or ]) but before
	287	# everything else. A '-' is literal at this point.
	288	#
	289	set-start:
	290	']' n pop doSetEnd
	291	'[' n set-open ^set-after-set doSetBeginUnion
	292	'\' n set-escape
	293	'-' n set-start-dash
	294	'&' n set-start-amp
	295	default n set-after-lit doSetLiteral
	296
	297	# set-start-dash Turn "[--" into a syntax error.
	298	# "[-x" is good, - and x are literals.
	299	#
	300	set-start-dash:
	301	'-' errorDeath doRuleError
	302	default set-after-lit doSetAddDash
	303
	304	# set-start-amp Turn "[&&" into a syntax error.
	305	# "[&x" is good, & and x are literals.
	306	#
	307	set-start-amp:
	308	'&' errorDeath doRuleError
	309	default set-after-lit doSetAddAmp
	310
	311	#
	312	# set-after-lit The last thing scanned was a literal character within a set.
	313	# Can be followed by anything. Single '-' or '&' are
	314	# literals in this context, not operators.
	315	set-after-lit:
	316	']' n pop doSetEnd
	317	'[' n set-open ^set-after-set doSetBeginUnion
	318	'-' n set-lit-dash
	319	'&' n set-lit-amp
	320	'\' n set-escape
	321	eof errorDeath doSetNoCloseError
	322	default n set-after-lit doSetLiteral
	323
324	set-after-set:
325	']' n pop doSetEnd
326	'[' n set-open ^set-after-set doSetBeginUnion
327	'-' n set-set-dash
328	'&' n set-set-amp
329	'\' n set-escape
330	eof errorDeath doSetNoCloseError
331	default n set-after-lit doSetLiteral
332
333	set-after-range:
334	']' n pop doSetEnd
335	'[' n set-open ^set-after-set doSetBeginUnion
336	'-' n set-range-dash
337	'&' n set-range-amp
338	'\' n set-escape
339	eof errorDeath doSetNoCloseError
340	default n set-after-lit doSetLiteral
b75a7d8f	341
46f4442e A	342
	343	# set-after-op
	344	# After a -- or &&
	345	# It is an error to close a set at this point.
	346	#
	347	set-after-op:
	348	'[' n set-open ^set-after-set doSetBeginUnion
	349	']' errorDeath doSetOpError
	350	'\' n set-escape
	351	default n set-after-lit doSetLiteral
	352
	353	#
	354	# set-set-amp
	355	# Have scanned [[set]&
	356	# Could be a '&' intersection operator, if a set follows.
	357	# Could be the start of a '&&' operator.
	358	# Otherewise is a literal.
	359	set-set-amp:
	360	'[' n set-open ^set-after-set doSetBeginIntersection1
	361	'&' n set-after-op doSetIntersection2
	362	default set-after-lit doSetAddAmp
	363
	364
	365	# set-lit-amp Have scanned "[literals&"
	366	# Could be a start of "&&" operator or a literal
	367	# In [abc&[def]], the '&' is a literal
	368	#
	369	set-lit-amp:
	370	'&' n set-after-op doSetIntersection2
	371	default set-after-lit doSetAddAmp
	372
	373
	374	#
	375	# set-set-dash
	376	# Have scanned [set]-
	377	# Could be a '-' difference operator, if a [set] follows.
	378	# Could be the start of a '--' operator.
	379	# Otherewise is a literal.
	380	set-set-dash:
	381	'[' n set-open ^set-after-set doSetBeginDifference1
	382	'-' n set-after-op doSetDifference2
	383	default set-after-lit doSetAddDash
	384
	385
	386	#
	387	# set-range-dash
	388	# scanned a-b- or \w-
	389	# any set or range like item where the trailing single '-' should
	390	# be literal, not a set difference operation.
	391	# A trailing "--" is still a difference operator.
	392	set-range-dash:
	393	'-' n set-after-op doSetDifference2
	394	default set-after-lit doSetAddDash
	395
	396
	397	set-range-amp:
	398	'&' n set-after-op doSetIntersection2
	399	default set-after-lit doSetAddAmp
	400
	401
	402	# set-lit-dash
	403	# Have scanned "[literals-" Could be a range or a -- operator or a literal
	404	# In [abc-[def]], the '-' is a literal (confirmed with a Java test)
	405	# [abc-\p{xx} the '-' is an error
406	# [abc-] the '-' is a literal
407	# [ab-xy] the '-' is a range
408	#
409	set-lit-dash:
410	'-' n set-after-op doSetDifference2
411	'[' set-after-lit doSetAddDash
412	']' set-after-lit doSetAddDash
413	'\' n set-lit-dash-escape
414	default n set-after-range doSetRange
415
416	# set-lit-dash-escape
417	#
418	# scanned "[literal-\"
419	# Could be a range, if the \ introduces an escaped literal char or a named char.
420	# Otherwise it is an error.
421	#
422	set-lit-dash-escape:
423	's' errorDeath doSetOpError
424	'S' errorDeath doSetOpError
425	'w' errorDeath doSetOpError
426	'W' errorDeath doSetOpError
427	'd' errorDeath doSetOpError
428	'D' errorDeath doSetOpError
429	'N' set-after-range doSetNamedRange
430	default n set-after-range doSetRange
431
432
433	#
434	# set-escape
435	# Common back-slash escape processing within set expressions
436	#
437	set-escape:
438	'p' set-after-set doSetProp
439	'P' set-after-set doSetProp
440	'N' set-after-lit doSetNamedChar
441	's' n set-after-range doSetBackslash_s
442	'S' n set-after-range doSetBackslash_S
443	'w' n set-after-range doSetBackslash_w
444	'W' n set-after-range doSetBackslash_W
445	'd' n set-after-range doSetBackslash_d
446	'D' n set-after-range doSetBackslash_D
447	default n set-after-lit doSetLiteralEscaped
448
449	#
450	# set-finish
451	# Have just encountered the final ']' that completes a [set], and
452	# arrived here via a pop. From here, we exit the set parsing world, and go
453	# back to generic regular expression parsing.
454	#
455	set-finish:
456	default expr-quant doSetFinish
457
458
b75a7d8f A	459	#
	460	# errorDeath. This state is specified as the next state whenever a syntax error
	461	# in the source rules is detected. Barring bugs, the state machine will never
	462	# actually get here, but will stop because of the action associated with the error.
	463	# But, just in case, this state asks the state machine to exit.
	464	errorDeath:
	465	default n errorDeath doExit
	466
	467