From 48bf5def83846c527b4720fcf09db5d8c42eafef Mon Sep 17 00:00:00 2001
From: Ryan Norton <wxprojects@comcast.net>
Date: Thu, 5 Aug 1999 01:16:55 +0000
Subject: [PATCH] Tcl regex lib

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@3274 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
---
 src/regex/re_syntax.n |  970 +++++++++++++++++++++++++
 src/regex/regc_nfa.c  | 1559 +++++++++++++++++++++++++++++++++++++++++
 src/regex/rege_dfa.c  |  699 ++++++++++++++++++
 3 files changed, 3228 insertions(+)
 create mode 100644 src/regex/re_syntax.n
 create mode 100644 src/regex/regc_nfa.c
 create mode 100644 src/regex/rege_dfa.c

diff --git a/src/regex/re_syntax.n b/src/regex/re_syntax.n
new file mode 100644
index 0000000000..f37bb85abd
--- /dev/null
+++ b/src/regex/re_syntax.n
@@ -0,0 +1,970 @@
+'\"
+'\" Copyright (c) 1998 Sun Microsystems, Inc.
+'\" Copyright (c) 1999 Scriptics Corporation
+'\"
+'\" This software is copyrighted by the Regents of the University of
+'\" California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
+'\" Corporation and other parties.  The following terms apply to all files
+'\" associated with the software unless explicitly disclaimed in
+'\" individual files.
+'\" 
+'\" The authors hereby grant permission to use, copy, modify, distribute,
+'\" and license this software and its documentation for any purpose, provided
+'\" that existing copyright notices are retained in all copies and that this
+'\" notice is included verbatim in any distributions. No written agreement,
+'\" license, or royalty fee is required for any of the authorized uses.
+'\" Modifications to this software may be copyrighted by their authors
+'\" and need not follow the licensing terms described here, provided that
+'\" the new terms are clearly indicated on the first page of each file where
+'\" they apply.
+'\" 
+'\" IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
+'\" FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+'\" ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
+'\" DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
+'\" POSSIBILITY OF SUCH DAMAGE.
+'\" 
+'\" THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
+'\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+'\" FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
+'\" IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
+'\" NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
+'\" MODIFICATIONS.
+'\" 
+'\" GOVERNMENT USE: If you are acquiring this software on behalf of the
+'\" U.S. government, the Government shall have only "Restricted Rights"
+'\" in the software and related documentation as defined in the Federal 
+'\" Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
+'\" are acquiring the software on behalf of the Department of Defense, the
+'\" software shall be classified as "Commercial Computer Software" and the
+'\" Government shall have only "Restricted Rights" as defined in Clause
+'\" 252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
+'\" authors grant the U.S. Government and others acting in its behalf
+'\" permission to use and distribute the software in accordance with the
+'\" terms specified in this license. 
+'\" 
+'\" RCS: @(#) Id: re_syntax.n,v 1.3 1999/07/14 19:09:36 jpeek Exp 
+'\"
+.so man.macros
+.TH re_syntax n "8.1" Tcl "Tcl Built-In Commands"
+.BS
+.SH NAME
+re_syntax \- Syntax of Tcl regular expressions.
+.BE
+
+.SH DESCRIPTION
+.PP
+A \fIregular expression\fR describes strings of characters.
+It's a pattern that matches certain strings and doesn't match others.
+
+.SH "DIFFERENT FLAVORS OF REs"
+Regular expressions (``RE''s), as defined by POSIX, come in two
+flavors: \fIextended\fR REs (``EREs'') and \fIbasic\fR REs (``BREs'').
+EREs are roughly those of the traditional \fIegrep\fR, while BREs are
+roughly those of the traditional \fIed\fR.  This implementation adds
+a third flavor, \fIadvanced\fR REs (``AREs''), basically EREs with
+some significant extensions.
+.PP
+This manual page primarily describes AREs.  BREs mostly exist for
+backward compatibility in some old programs; they will be discussed at
+the end.  POSIX EREs are almost an exact subset of AREs.  Features of
+AREs that are not present in EREs will be indicated.
+
+.SH "REGULAR EXPRESSION SYNTAX"
+.PP
+Tcl regular expressions are implemented using the package written by
+Henry Spencer, based on the 1003.2 spec and some (not quite all) of
+the Perl5 extensions (thanks, Henry!).  Much of the description of
+regular expressions below is copied verbatim from his manual entry.
+.PP
+An ARE is one or more \fIbranches\fR,
+separated by `\fB|\fR',
+matching anything that matches any of the branches.
+.PP
+A branch is zero or more \fIconstraints\fR or \fIquantified atoms\fR,
+concatenated.
+It matches a match for the first, followed by a match for the second, etc;
+an empty branch matches the empty string.
+.PP
+A quantified atom is an \fIatom\fR possibly followed
+by a single \fIquantifier\fR.
+Without a quantifier, it matches a match for the atom.
+The quantifiers,
+and what a so-quantified atom matches, are:
+.RS 2
+.TP 6
+\fB*\fR
+a sequence of 0 or more matches of the atom
+.TP
+\fB+\fR
+a sequence of 1 or more matches of the atom
+.TP
+\fB?\fR
+a sequence of 0 or 1 matches of the atom
+.TP
+\fB{\fIm\fB}\fR
+a sequence of exactly \fIm\fR matches of the atom
+.TP
+\fB{\fIm\fB,}\fR
+a sequence of \fIm\fR or more matches of the atom
+.TP
+\fB{\fIm\fB,\fIn\fB}\fR
+a sequence of \fIm\fR through \fIn\fR (inclusive) matches of the atom;
+\fIm\fR may not exceed \fIn\fR
+.TP
+\fB*?  +?  ??  {\fIm\fB}?  {\fIm\fB,}?  {\fIm\fB,\fIn\fB}?\fR
+\fInon-greedy\fR quantifiers,
+which match the same possibilities,
+but prefer the smallest number rather than the largest number
+of matches (see MATCHING)
+.RE
+.PP
+The forms using
+\fB{\fR and \fB}\fR
+are known as \fIbound\fRs.
+The numbers
+\fIm\fR and \fIn\fR are unsigned decimal integers
+with permissible values from 0 to 255 inclusive.
+.PP
+An atom is one of:
+.RS 2
+.TP 6
+\fB(\fIre\fB)\fR
+(where \fIre\fR is any regular expression)
+matches a match for
+\fIre\fR, with the match noted for possible reporting
+.TP
+\fB(?:\fIre\fB)\fR
+as previous,
+but does no reporting
+(a ``non-capturing'' set of parentheses)
+.TP
+\fB()\fR
+matches an empty string,
+noted for possible reporting
+.TP
+\fB(?:)\fR
+matches an empty string,
+without reporting
+.TP
+\fB[\fIchars\fB]\fR
+a \fIbracket expression\fR,
+matching any one of the \fIchars\fR (see BRACKET EXPRESSIONS for more detail)
+.TP
+ \fB.\fR
+matches any single character
+.TP
+\fB\e\fIk\fR
+(where \fIk\fR is a non-alphanumeric character)
+matches that character taken as an ordinary character,
+e.g. \e\e matches a backslash character
+.TP
+\fB\e\fIc\fR
+where \fIc\fR is alphanumeric
+(possibly followed by other characters),
+an \fIescape\fR (AREs only),
+see ESCAPES below
+.TP
+\fB{\fR
+when followed by a character other than a digit,
+matches the left-brace character `\fB{\fR';
+when followed by a digit, it is the beginning of a
+\fIbound\fR (see above)
+.TP
+\fIx\fR
+where \fIx\fR is
+a single character with no other significance, matches that character.
+.RE
+.PP
+A \fIconstraint\fR matches an empty string when specific conditions
+are met.
+A constraint may not be followed by a quantifier.
+The simple constraints are as follows; some more constraints are
+described later, under ESCAPES.
+.RS 2
+.TP 8
+\fB^\fR
+matches at the beginning of a line
+.TP
+\fB$\fR
+matches at the end of a line
+.TP
+\fB(?=\fIre\fB)\fR
+\fIpositive lookahead\fR (AREs only), matches at any point
+where a substring matching \fIre\fR begins
+.TP
+\fB(?!\fIre\fB)\fR
+\fInegative lookahead\fR (AREs only), matches at any point
+where no substring matching \fIre\fR begins
+.RE
+.PP
+The lookahead constraints may not contain back references (see later),
+and all parentheses within them are considered non-capturing.
+.PP
+An RE may not end with `\fB\e\fR'.
+
+.SH "BRACKET EXPRESSIONS"
+A \fIbracket expression\fR is a list of characters enclosed in `\fB[\|]\fR'.
+It normally matches any single character from the list (but see below).
+If the list begins with `\fB^\fR',
+it matches any single character
+(but see below) \fInot\fR from the rest of the list.
+.PP
+If two characters in the list are separated by `\fB\-\fR',
+this is shorthand
+for the full \fIrange\fR of characters between those two (inclusive) in the
+collating sequence,
+e.g.
+\fB[0\-9]\fR
+in ASCII matches any decimal digit.
+Two ranges may not share an
+endpoint, so e.g.
+\fBa\-c\-e\fR
+is illegal.
+Ranges are very collating-sequence-dependent,
+and portable programs should avoid relying on them.
+.PP
+To include a literal
+\fB]\fR
+or
+\fB\-\fR
+in the list,
+the simplest method is to
+enclose it in
+\fB[.\fR and \fB.]\fR
+to make it a collating element (see below).
+Alternatively,
+make it the first character
+(following a possible `\fB^\fR'),
+or (AREs only) precede it with `\fB\e\fR'.
+Alternatively, for `\fB\-\fR',
+make it the last character,
+or the second endpoint of a range.
+To use a literal
+\fB\-\fR
+as the first endpoint of a range,
+make it a collating element
+or (AREs only) precede it with `\fB\e\fR'.
+With the exception of these, some combinations using
+\fB[\fR
+(see next
+paragraphs), and escapes,
+all other special characters lose their
+special significance within a bracket expression.
+.PP
+Within a bracket expression, a collating element (a character,
+a multi-character sequence that collates as if it were a single character,
+or a collating-sequence name for either)
+enclosed in
+\fB[.\fR and \fB.]\fR
+stands for the
+sequence of characters of that collating element.
+The sequence is a single element of the bracket expression's list.
+A bracket expression in a locale that has
+multi-character collating elements
+can thus match more than one character.
+.VS 8.2
+So (insidiously), a bracket expression that starts with \fB^\fR
+can match multi-character collating elements even if none of them
+appear in the bracket expression!
+(\fINote:\fR Tcl currently has no multi-character collating elements.
+This information is only for illustration.)
+.PP
+For example, assume the collating sequence includes a \fBch\fR
+multi-character collating element.
+Then the RE \fB[[.ch.]]*c\fR (zero or more \fBch\fP's followed by \fBc\fP)
+matches the first five characters of `\fBchchcc\fR'.
+Also, the RE \fB[^c]b\fR matches all of `\fBchb\fR'
+(because \fB[^c]\fR matches the multi-character \fBch\fR).
+.VE 8.2
+.PP
+Within a bracket expression, a collating element enclosed in
+\fB[=\fR
+and
+\fB=]\fR
+is an equivalence class, standing for the sequences of characters
+of all collating elements equivalent to that one, including itself.
+(If there are no other equivalent collating elements,
+the treatment is as if the enclosing delimiters were `\fB[.\fR'\&
+and `\fB.]\fR'.)
+For example, if
+\fBo\fR
+and
+\fB\o'o^'\fR
+are the members of an equivalence class,
+then `\fB[[=o=]]\fR', `\fB[[=\o'o^'=]]\fR',
+and `\fB[o\o'o^']\fR'\&
+are all synonymous.
+An equivalence class may not be an endpoint
+of a range.
+.VS 8.2
+(\fINote:\fR 
+Tcl currently implements only the Unicode locale.
+It doesn't define any equivalence classes.
+The examples above are just illustrations.)
+.VE 8.2
+.PP
+Within a bracket expression, the name of a \fIcharacter class\fR enclosed
+in
+\fB[:\fR
+and
+\fB:]\fR
+stands for the list of all characters
+(not all collating elements!)
+belonging to that
+class.
+Standard character classes are:
+.PP
+.RS
+.ne 5
+.nf
+.ta 3c
+\fBalpha\fR	A letter. 
+\fBupper\fR	An upper-case letter. 
+\fBlower\fR	A lower-case letter. 
+\fBdigit\fR	A decimal digit. 
+\fBxdigit\fR	A hexadecimal digit. 
+\fBalnum\fR	An alphanumeric (letter or digit). 
+\fBprint\fR	An alphanumeric (same as alnum).
+\fBblank\fR	A space or tab character.
+\fBspace\fR	A character producing white space in displayed text. 
+\fBpunct\fR	A punctuation character. 
+\fBgraph\fR	A character with a visible representation. 
+\fBcntrl\fR	A control character. 
+.fi
+.RE
+.PP
+A locale may provide others.
+.VS 8.2
+(Note that the current Tcl implementation has only one locale:
+the Unicode locale.)
+.VE 8.2
+A character class may not be used as an endpoint of a range.
+.PP
+There are two special cases of bracket expressions:
+the bracket expressions
+\fB[[:<:]]\fR
+and
+\fB[[:>:]]\fR
+are constraints, matching empty strings at
+the beginning and end of a word respectively.
+'\" note, discussion of escapes below references this definition of word
+A word is defined as a sequence of
+word characters
+that is neither preceded nor followed by
+word characters.
+A word character is an
+\fIalnum\fR
+character
+or an underscore
+(\fB_\fR).
+These special bracket expressions are deprecated;
+users of AREs should use constraint escapes instead (see below).
+.SH ESCAPES
+Escapes (AREs only), which begin with a
+\fB\e\fR
+followed by an alphanumeric character,
+come in several varieties:
+character entry, class shorthands, constraint escapes, and back references.
+A
+\fB\e\fR
+followed by an alphanumeric character but not constituting
+a valid escape is illegal in AREs.
+In EREs, there are no escapes:
+outside a bracket expression,
+a
+\fB\e\fR
+followed by an alphanumeric character merely stands for that
+character as an ordinary character,
+and inside a bracket expression,
+\fB\e\fR
+is an ordinary character.
+(The latter is the one actual incompatibility between EREs and AREs.)
+.PP
+Character-entry escapes (AREs only) exist to make it easier to specify
+non-printing and otherwise inconvenient characters in REs:
+.RS 2
+.TP 5
+\fB\ea\fR
+alert (bell) character, as in C
+.TP
+\fB\eb\fR
+backspace, as in C
+.TP
+\fB\eB\fR
+synonym for
+\fB\e\fR
+to help reduce backslash doubling in some
+applications where there are multiple levels of backslash processing
+.TP
+\fB\ec\fIX\fR
+(where X is any character) the character whose
+low-order 5 bits are the same as those of
+\fIX\fR,
+and whose other bits are all zero
+.TP
+\fB\ee\fR
+the character whose collating-sequence name
+is `\fBESC\fR',
+or failing that, the character with octal value 033
+.TP
+\fB\ef\fR
+formfeed, as in C
+.TP
+\fB\en\fR
+newline, as in C
+.TP
+\fB\er\fR
+carriage return, as in C
+.TP
+\fB\et\fR
+horizontal tab, as in C
+.TP
+\fB\eu\fIwxyz\fR
+(where
+\fIwxyz\fR
+is exactly four hexadecimal digits)
+the Unicode character
+\fBU+\fIwxyz\fR
+in the local byte ordering
+.TP
+\fB\eU\fIstuvwxyz\fR
+(where
+\fIstuvwxyz\fR
+is exactly eight hexadecimal digits)
+reserved for a somewhat-hypothetical Unicode extension to 32 bits
+.TP
+\fB\ev\fR
+vertical tab, as in C
+are all available.
+.TP
+\fB\ex\fIhhh\fR
+(where
+\fIhhh\fR
+is any sequence of hexadecimal digits)
+the character whose hexadecimal value is
+\fB0x\fIhhh\fR
+(a single character no matter how many hexadecimal digits are used).
+.TP
+\fB\e0\fR
+the character whose value is
+\fB0\fR
+.TP
+\fB\e\fIxy\fR
+(where
+\fIxy\fR
+is exactly two octal digits,
+and is not a
+\fIback reference\fR (see below))
+the character whose octal value is
+\fB0\fIxy\fR
+.TP
+\fB\e\fIxyz\fR
+(where
+\fIxyz\fR
+is exactly three octal digits,
+and is not a
+back reference (see below))
+the character whose octal value is
+\fB0\fIxyz\fR
+.RE
+.PP
+Hexadecimal digits are `\fB0\fR'-`\fB9\fR', `\fBa\fR'-`\fBf\fR',
+and `\fBA\fR'-`\fBF\fR'.
+Octal digits are `\fB0\fR'-`\fB7\fR'.
+.PP
+The character-entry escapes are always taken as ordinary characters.
+For example,
+\fB\e135\fR
+is
+\fB]\fR
+in ASCII,
+but
+\fB\e135\fR
+does not terminate a bracket expression.
+Beware, however, that some applications (e.g., C compilers) interpret 
+such sequences themselves before the regular-expression package
+gets to see them, which may require doubling (quadrupling, etc.) the `\fB\e\fR'.
+.PP
+Class-shorthand escapes (AREs only) provide shorthands for certain commonly-used
+character classes:
+.RS 2
+.TP 10
+\fB\ed\fR
+\fB[[:digit:]]\fR
+.TP
+\fB\es\fR
+\fB[[:space:]]\fR
+.TP
+\fB\ew\fR
+\fB[[:alnum:]_]\fR
+(note underscore)
+.TP
+\fB\eD\fR
+\fB[^[:digit:]]\fR
+.TP
+\fB\eS\fR
+\fB[^[:space:]]\fR
+.TP
+\fB\eW\fR
+\fB[^[:alnum:]_]\fR
+(note underscore)
+.RE
+.PP
+Within bracket expressions, `\fB\ed\fR', `\fB\es\fR',
+and `\fB\ew\fR'\&
+lose their outer brackets,
+and `\fB\eD\fR', `\fB\eS\fR',
+and `\fB\eW\fR'\&
+are illegal.
+.VS 8.2
+(So, for example, \fB[a-c\ed]\fR is equivalent to \fB[a-c[:digit:]]\fR.
+Also, \fB[a-c\eD]\fR, which is equivalent to \fB[a-c^[:digit:]]\fR, is illegal.)
+.VE 8.2
+.PP
+A constraint escape (AREs only) is a constraint,
+matching the empty string if specific conditions are met,
+written as an escape:
+.RS 2
+.TP 6
+\fB\eA\fR
+matches only at the beginning of the string
+(see MATCHING, below, for how this differs from `\fB^\fR')
+.TP
+\fB\em\fR
+matches only at the beginning of a word
+.TP
+\fB\eM\fR
+matches only at the end of a word
+.TP
+\fB\ey\fR
+matches only at the beginning or end of a word
+.TP
+\fB\eY\fR
+matches only at a point that is not the beginning or end of a word
+.TP
+\fB\eZ\fR
+matches only at the end of the string
+(see MATCHING, below, for how this differs from `\fB$\fR')
+.TP
+\fB\e\fIm\fR
+(where
+\fIm\fR
+is a nonzero digit) a \fIback reference\fR, see below
+.TP
+\fB\e\fImnn\fR
+(where
+\fIm\fR
+is a nonzero digit, and
+\fInn\fR
+is some more digits,
+and the decimal value
+\fImnn\fR
+is not greater than the number of closing capturing parentheses seen so far)
+a \fIback reference\fR, see below
+.RE
+.PP
+A word is defined as in the specification of
+\fB[[:<:]]\fR
+and
+\fB[[:>:]]\fR
+above.
+Constraint escapes are illegal within bracket expressions.
+.PP
+A back reference (AREs only) matches the same string matched by the parenthesized
+subexpression specified by the number,
+so that (e.g.)
+\fB([bc])\e1\fR
+matches
+\fBbb\fR
+or
+\fBcc\fR
+but not `\fBbc\fR'.
+The subexpression must entirely precede the back reference in the RE.
+Subexpressions are numbered in the order of their leading parentheses.
+Non-capturing parentheses do not define subexpressions.
+.PP
+There is an inherent historical ambiguity between octal character-entry 
+escapes and back references, which is resolved by heuristics,
+as hinted at above.
+A leading zero always indicates an octal escape.
+A single non-zero digit, not followed by another digit,
+is always taken as a back reference.
+A multi-digit sequence not starting with a zero is taken as a back 
+reference if it comes after a suitable subexpression
+(i.e. the number is in the legal range for a back reference),
+and otherwise is taken as octal.
+.SH "METASYNTAX"
+In addition to the main syntax described above, there are some special
+forms and miscellaneous syntactic facilities available.
+.PP
+Normally the flavor of RE being used is specified by
+application-dependent means.
+However, this can be overridden by a \fIdirector\fR.
+If an RE of any flavor begins with `\fB***:\fR',
+the rest of the RE is an ARE.
+If an RE of any flavor begins with `\fB***=\fR',
+the rest of the RE is taken to be a literal string,
+with all characters considered ordinary characters.
+.PP
+An ARE may begin with \fIembedded options\fR:
+a sequence
+\fB(?\fIxyz\fB)\fR
+(where
+\fIxyz\fR
+is one or more alphabetic characters)
+specifies options affecting the rest of the RE.
+These supplement, and can override,
+any options specified by the application.
+The available option letters are:
+.RS 2
+.TP 3
+\fBb\fR
+rest of RE is a BRE
+.TP 3
+\fBc\fR
+case-sensitive matching (usual default)
+.TP 3
+\fBe\fR
+rest of RE is an ERE
+.TP 3
+\fBi\fR
+case-insensitive matching (see MATCHING, below)
+.TP 3
+\fBm\fR
+historical synonym for
+\fBn\fR
+.TP 3
+\fBn\fR
+newline-sensitive matching (see MATCHING, below)
+.TP 3
+\fBp\fR
+partial newline-sensitive matching (see MATCHING, below)
+.TP 3
+\fBq\fR
+rest of RE is a literal (``quoted'') string, all ordinary characters
+.TP 3
+\fBs\fR
+non-newline-sensitive matching (usual default)
+.TP 3
+\fBt\fR
+tight syntax (usual default; see below)
+.TP 3
+\fBw\fR
+inverse partial newline-sensitive (``weird'') matching (see MATCHING, below)
+.TP 3
+\fBx\fR
+expanded syntax (see below)
+.RE
+.PP
+Embedded options take effect at the
+\fB)\fR
+terminating the sequence.
+They are available only at the start of an ARE,
+and may not be used later within it.
+.PP
+In addition to the usual (\fItight\fR) RE syntax, in which all characters are
+significant, there is an \fIexpanded\fR syntax,
+available in all flavors of RE
+with the \fB-expanded\fR switch, or in AREs with the embedded x option.
+In the expanded syntax,
+white-space characters are ignored
+and all characters between a
+\fB#\fR
+and the following newline (or the end of the RE) are ignored,
+permitting paragraphing and commenting a complex RE.
+There are three exceptions to that basic rule:
+.RS 2
+.PP
+a white-space character or `\fB#\fR' preceded by `\fB\e\fR' is retained
+.PP
+white space or `\fB#\fR' within a bracket expression is retained
+.PP
+white space and comments are illegal within multi-character symbols
+like the ARE `\fB(?:\fR' or the BRE `\fB\e(\fR'
+.RE
+.PP
+Expanded-syntax white-space characters are blank, tab, newline, and
+.VS 8.2
+any character that belongs to the \fIspace\fR character class.
+.VE 8.2
+.PP
+Finally, in an ARE,
+outside bracket expressions, the sequence `\fB(?#\fIttt\fB)\fR'
+(where
+\fIttt\fR
+is any text not containing a `\fB)\fR')
+is a comment,
+completely ignored.
+Again, this is not allowed between the characters of
+multi-character symbols like `\fB(?:\fR'.
+Such comments are more a historical artifact than a useful facility,
+and their use is deprecated;
+use the expanded syntax instead.
+.PP
+\fINone\fR of these metasyntax extensions is available if the application
+(or an initial
+\fB***=\fR
+director)
+has specified that the user's input be treated as a literal string
+rather than as an RE.
+.SH MATCHING
+In the event that an RE could match more than one substring of a given
+string,
+the RE matches the one starting earliest in the string.
+If the RE could match more than one substring starting at that point,
+its choice is determined by its \fIpreference\fR:
+either the longest substring, or the shortest.
+.PP
+Most atoms, and all constraints, have no preference.
+A parenthesized RE has the same preference (possibly none) as the RE.
+A quantified atom with quantifier
+\fB{\fIm\fB}\fR
+or
+\fB{\fIm\fB}?\fR
+has the same preference (possibly none) as the atom itself.
+A quantified atom with other normal quantifiers (including
+\fB{\fIm\fB,\fIn\fB}\fR
+with
+\fIm\fR
+equal to
+\fIn\fR)
+prefers longest match.
+A quantified atom with other non-greedy quantifiers (including
+\fB{\fIm\fB,\fIn\fB}?\fR
+with
+\fIm\fR
+equal to
+\fIn\fR)
+prefers shortest match.
+A branch has the same preference as the first quantified atom in it
+which has a preference.
+An RE consisting of two or more branches connected by the
+\fB|\fR
+operator prefers longest match.
+.PP
+Subject to the constraints imposed by the rules for matching the whole RE,
+subexpressions also match the longest or shortest possible substrings,
+based on their preferences,
+with subexpressions starting earlier in the RE taking priority over
+ones starting later.
+Note that outer subexpressions thus take priority over
+their component subexpressions.
+.PP
+Note that the quantifiers
+\fB{1,1}\fR
+and
+\fB{1,1}?\fR
+can be used to force longest and shortest preference, respectively,
+on a subexpression or a whole RE.
+.PP
+Match lengths are measured in characters, not collating elements.
+An empty string is considered longer than no match at all.
+For example,
+\fBbb*\fR
+matches the three middle characters of `\fBabbbc\fR',
+\fB(week|wee)(night|knights)\fR
+matches all ten characters of `\fBweeknights\fR',
+when
+\fB(.*).*\fR
+is matched against
+\fBabc\fR
+the parenthesized subexpression
+matches all three characters, and
+when
+\fB(a*)*\fR
+is matched against
+\fBbc\fR
+both the whole RE and the parenthesized
+subexpression match an empty string.
+.PP
+If case-independent matching is specified,
+the effect is much as if all case distinctions had vanished from the
+alphabet.
+When an alphabetic that exists in multiple cases appears as an
+ordinary character outside a bracket expression, it is effectively
+transformed into a bracket expression containing both cases,
+so that
+\fBx\fR
+becomes `\fB[xX]\fR'.
+When it appears inside a bracket expression, all case counterparts
+of it are added to the bracket expression, so that
+\fB[x]\fR
+becomes
+\fB[xX]\fR
+and
+\fB[^x]\fR
+becomes `\fB[^xX]\fR'.
+.PP
+If newline-sensitive matching is specified, \fB.\fR
+and bracket expressions using
+\fB^\fR
+will never match the newline character
+(so that matches will never cross newlines unless the RE
+explicitly arranges it)
+and
+\fB^\fR
+and
+\fB$\fR
+will match the empty string after and before a newline
+respectively, in addition to matching at beginning and end of string
+respectively.
+ARE
+\fB\eA\fR
+and
+\fB\eZ\fR
+continue to match beginning or end of string \fIonly\fR.
+.PP
+If partial newline-sensitive matching is specified,
+this affects \fB.\fR
+and bracket expressions
+as with newline-sensitive matching, but not
+\fB^\fR
+and `\fB$\fR'.
+.PP
+If inverse partial newline-sensitive matching is specified,
+this affects
+\fB^\fR
+and
+\fB$\fR
+as with
+newline-sensitive matching,
+but not \fB.\fR
+and bracket expressions.
+This isn't very useful but is provided for symmetry.
+.SH "LIMITS AND COMPATIBILITY"
+No particular limit is imposed on the length of REs.
+Programs intended to be highly portable should not employ REs longer
+than 256 bytes,
+as a POSIX-compliant implementation can refuse to accept such REs.
+.PP
+The only feature of AREs that is actually incompatible with
+POSIX EREs is that
+\fB\e\fR
+does not lose its special
+significance inside bracket expressions.
+All other ARE features use syntax which is illegal or has
+undefined or unspecified effects in POSIX EREs;
+the
+\fB***\fR
+syntax of directors likewise is outside the POSIX
+syntax for both BREs and EREs.
+.PP
+Many of the ARE extensions are borrowed from Perl, but some have
+been changed to clean them up, and a few Perl extensions are not present.
+Incompatibilities of note include `\fB\eb\fR', `\fB\eB\fR',
+the lack of special treatment for a trailing newline,
+the addition of complemented bracket expressions to the things
+affected by newline-sensitive matching,
+the restrictions on parentheses and back references in lookahead constraints,
+and the longest/shortest-match (rather than first-match) matching semantics.
+.PP
+The matching rules for REs containing both normal and non-greedy quantifiers
+have changed since early beta-test versions of this package.
+(The new rules are much simpler and cleaner,
+but don't work as hard at guessing the user's real intentions.)
+.PP
+Henry Spencer's original 1986 \fIregexp\fR package,
+still in widespread use (e.g., in pre-8.1 releases of Tcl),
+implemented an early version of today's EREs.
+There are four incompatibilities between \fIregexp\fR's near-EREs
+(`RREs' for short) and AREs.
+In roughly increasing order of significance:
+.PP
+.RS
+In AREs,
+\fB\e\fR
+followed by an alphanumeric character is either an
+escape or an error,
+while in RREs, it was just another way of writing the 
+alphanumeric.
+This should not be a problem because there was no reason to write
+such a sequence in RREs.
+.PP
+\fB{\fR
+followed by a digit in an ARE is the beginning of a bound,
+while in RREs,
+\fB{\fR
+was always an ordinary character.
+Such sequences should be rare,
+and will often result in an error because following characters
+will not look like a valid bound.
+.PP
+In AREs,
+\fB\e\fR
+remains a special character within `\fB[\|]\fR',
+so a literal
+\fB\e\fR
+within
+\fB[\|]\fR
+must be written `\fB\e\e\fR'.
+\fB\e\e\fR
+also gives a literal
+\fB\e\fR
+within
+\fB[\|]\fR
+in RREs,
+but only truly paranoid programmers routinely doubled the backslash.
+.PP
+AREs report the longest/shortest match for the RE,
+rather than the first found in a specified search order.
+This may affect some RREs which were written in the expectation that
+the first match would be reported.
+(The careful crafting of RREs to optimize the search order for fast
+matching is obsolete (AREs examine all possible matches
+in parallel, and their performance is largely insensitive to their
+complexity) but cases where the search order was exploited to deliberately 
+find a match which was \fInot\fR the longest/shortest will need rewriting.)
+.RE
+
+.SH "BASIC REGULAR EXPRESSIONS"
+BREs differ from EREs in several respects.  `\fB|\fR', `\fB+\fR',
+and
+\fB?\fR
+are ordinary characters and there is no equivalent
+for their functionality.
+The delimiters for bounds are
+\fB\e{\fR
+and `\fB\e}\fR',
+with
+\fB{\fR
+and
+\fB}\fR
+by themselves ordinary characters.
+The parentheses for nested subexpressions are
+\fB\e(\fR
+and `\fB\e)\fR',
+with
+\fB(\fR
+and
+\fB)\fR
+by themselves ordinary characters.
+\fB^\fR
+is an ordinary character except at the beginning of the
+RE or the beginning of a parenthesized subexpression,
+\fB$\fR
+is an ordinary character except at the end of the
+RE or the end of a parenthesized subexpression,
+and
+\fB*\fR
+is an ordinary character if it appears at the beginning of the
+RE or the beginning of a parenthesized subexpression
+(after a possible leading `\fB^\fR').
+Finally,
+single-digit back references are available,
+and
+\fB\e<\fR
+and
+\fB\e>\fR
+are synonyms for
+\fB[[:<:]]\fR
+and
+\fB[[:>:]]\fR
+respectively;
+no other escapes are available.
+
+.SH "SEE ALSO"
+RegExp(3), regexp(n), regsub(n), lsearch(n), switch(n), text(n)
+
+.SH KEYWORDS
+match, regular expression, string
diff --git a/src/regex/regc_nfa.c b/src/regex/regc_nfa.c
new file mode 100644
index 0000000000..cc9f6ea2f9
--- /dev/null
+++ b/src/regex/regc_nfa.c
@@ -0,0 +1,1559 @@
+/*
+ * NFA utilities.
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.	All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Header$
+ *
+ *
+ * One or two things that technically ought to be in here
+ * are actually in color.c, thanks to some incestuous relationships in
+ * the color chains.
+ */
+
+#define NISERR()	VISERR(nfa->v)
+#define NERR(e)		VERR(nfa->v, (e))
+
+
+/*
+ * newnfa - set up an NFA
+ */
+static struct nfa *				/* the NFA, or NULL */
+newnfa(struct vars * v,
+	   struct colormap * cm,
+	   struct nfa * parent)		/* NULL if primary NFA */
+{
+	struct nfa *nfa;
+
+	nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
+	if (nfa == NULL)
+		return NULL;
+
+	nfa->states = NULL;
+	nfa->slast = NULL;
+	nfa->free = NULL;
+	nfa->nstates = 0;
+	nfa->cm = cm;
+	nfa->v = v;
+	nfa->bos[0] = nfa->bos[1] = COLORLESS;
+	nfa->eos[0] = nfa->eos[1] = COLORLESS;
+	nfa->post = newfstate(nfa, '@');	/* number 0 */
+	nfa->pre = newfstate(nfa, '>');		/* number 1 */
+	nfa->parent = parent;
+
+	nfa->init = newstate(nfa);	/* may become invalid later */
+	nfa->final = newstate(nfa);
+	if (ISERR())
+	{
+		freenfa(nfa);
+		return NULL;
+	}
+	rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
+	newarc(nfa, '^', 1, nfa->pre, nfa->init);
+	newarc(nfa, '^', 0, nfa->pre, nfa->init);
+	rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
+	newarc(nfa, '$', 1, nfa->final, nfa->post);
+	newarc(nfa, '$', 0, nfa->final, nfa->post);
+
+	if (ISERR())
+	{
+		freenfa(nfa);
+		return NULL;
+	}
+	return nfa;
+}
+
+/*
+ * freenfa - free an entire NFA
+ */
+static void
+freenfa(struct nfa * nfa)
+{
+	struct state *s;
+
+	while ((s = nfa->states) != NULL)
+	{
+		s->nins = s->nouts = 0; /* don't worry about arcs */
+		freestate(nfa, s);
+	}
+	while ((s = nfa->free) != NULL)
+	{
+		nfa->free = s->next;
+		destroystate(nfa, s);
+	}
+
+	nfa->slast = NULL;
+	nfa->nstates = -1;
+	nfa->pre = NULL;
+	nfa->post = NULL;
+	FREE(nfa);
+}
+
+/*
+ * newstate - allocate an NFA state, with zero flag value
+ */
+static struct state *			/* NULL on error */
+newstate(struct nfa * nfa)
+{
+	struct state *s;
+
+	if (nfa->free != NULL)
+	{
+		s = nfa->free;
+		nfa->free = s->next;
+	}
+	else
+	{
+		s = (struct state *) MALLOC(sizeof(struct state));
+		if (s == NULL)
+		{
+			NERR(REG_ESPACE);
+			return NULL;
+		}
+		s->oas.next = NULL;
+		s->free = NULL;
+		s->noas = 0;
+	}
+
+	assert(nfa->nstates >= 0);
+	s->no = nfa->nstates++;
+	s->flag = 0;
+	if (nfa->states == NULL)
+		nfa->states = s;
+	s->nins = 0;
+	s->ins = NULL;
+	s->nouts = 0;
+	s->outs = NULL;
+	s->tmp = NULL;
+	s->next = NULL;
+	if (nfa->slast != NULL)
+	{
+		assert(nfa->slast->next == NULL);
+		nfa->slast->next = s;
+	}
+	s->prev = nfa->slast;
+	nfa->slast = s;
+	return s;
+}
+
+/*
+ * newfstate - allocate an NFA state with a specified flag value
+ */
+static struct state *			/* NULL on error */
+newfstate(struct nfa * nfa, int flag)
+{
+	struct state *s;
+
+	s = newstate(nfa);
+	if (s != NULL)
+		s->flag = (char) flag;
+	return s;
+}
+
+/*
+ * dropstate - delete a state's inarcs and outarcs and free it
+ */
+static void
+dropstate(struct nfa * nfa,
+		  struct state * s)
+{
+	struct arc *a;
+
+	while ((a = s->ins) != NULL)
+		freearc(nfa, a);
+	while ((a = s->outs) != NULL)
+		freearc(nfa, a);
+	freestate(nfa, s);
+}
+
+/*
+ * freestate - free a state, which has no in-arcs or out-arcs
+ */
+static void
+freestate(struct nfa * nfa,
+		  struct state * s)
+{
+	assert(s != NULL);
+	assert(s->nins == 0 && s->nouts == 0);
+
+	s->no = FREESTATE;
+	s->flag = 0;
+	if (s->next != NULL)
+		s->next->prev = s->prev;
+	else
+	{
+		assert(s == nfa->slast);
+		nfa->slast = s->prev;
+	}
+	if (s->prev != NULL)
+		s->prev->next = s->next;
+	else
+	{
+		assert(s == nfa->states);
+		nfa->states = s->next;
+	}
+	s->prev = NULL;
+	s->next = nfa->free;		/* don't delete it, put it on the free
+								 * list */
+	nfa->free = s;
+}
+
+/*
+ * destroystate - really get rid of an already-freed state
+ */
+static void
+destroystate(struct nfa * nfa,
+			 struct state * s)
+{
+	struct arcbatch *ab;
+	struct arcbatch *abnext;
+
+	assert(s->no == FREESTATE);
+	for (ab = s->oas.next; ab != NULL; ab = abnext)
+	{
+		abnext = ab->next;
+		FREE(ab);
+	}
+	s->ins = NULL;
+	s->outs = NULL;
+	s->next = NULL;
+	FREE(s);
+}
+
+/*
+ * newarc - set up a new arc within an NFA
+ */
+static void
+newarc(struct nfa * nfa,
+	   int t,
+	   pcolor co,
+	   struct state * from,
+	   struct state * to)
+{
+	struct arc *a;
+
+	assert(from != NULL && to != NULL);
+
+	/* check for duplicates */
+	for (a = from->outs; a != NULL; a = a->outchain)
+		if (a->to == to && a->co == co && a->type == t)
+			return;
+
+	a = allocarc(nfa, from);
+	if (NISERR())
+		return;
+	assert(a != NULL);
+
+	a->type = t;
+	a->co = (color) co;
+	a->to = to;
+	a->from = from;
+
+	/*
+	 * Put the new arc on the beginning, not the end, of the chains. Not
+	 * only is this easier, it has the very useful side effect that
+	 * deleting the most-recently-added arc is the cheapest case rather
+	 * than the most expensive one.
+	 */
+	a->inchain = to->ins;
+	to->ins = a;
+	a->outchain = from->outs;
+	from->outs = a;
+
+	from->nouts++;
+	to->nins++;
+
+	if (COLORED(a) && nfa->parent == NULL)
+		colorchain(nfa->cm, a);
+
+	return;
+}
+
+/*
+ * allocarc - allocate a new out-arc within a state
+ */
+static struct arc *				/* NULL for failure */
+allocarc(struct nfa * nfa,
+		 struct state * s)
+{
+	struct arc *a;
+	struct arcbatch *new;
+	int			i;
+
+	/* shortcut */
+	if (s->free == NULL && s->noas < ABSIZE)
+	{
+		a = &s->oas.a[s->noas];
+		s->noas++;
+		return a;
+	}
+
+	/* if none at hand, get more */
+	if (s->free == NULL)
+	{
+		new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
+		if (new == NULL)
+		{
+			NERR(REG_ESPACE);
+			return NULL;
+		}
+		new->next = s->oas.next;
+		s->oas.next = new;
+
+		for (i = 0; i < ABSIZE; i++)
+		{
+			new->a[i].type = 0;
+			new->a[i].freechain = &new->a[i + 1];
+		}
+		new->a[ABSIZE - 1].freechain = NULL;
+		s->free = &new->a[0];
+	}
+	assert(s->free != NULL);
+
+	a = s->free;
+	s->free = a->freechain;
+	return a;
+}
+
+/*
+ * freearc - free an arc
+ */
+static void
+freearc(struct nfa * nfa,
+		struct arc * victim)
+{
+	struct state *from = victim->from;
+	struct state *to = victim->to;
+	struct arc *a;
+
+	assert(victim->type != 0);
+
+	/* take it off color chain if necessary */
+	if (COLORED(victim) && nfa->parent == NULL)
+		uncolorchain(nfa->cm, victim);
+
+	/* take it off source's out-chain */
+	assert(from != NULL);
+	assert(from->outs != NULL);
+	a = from->outs;
+	if (a == victim)			/* simple case:  first in chain */
+		from->outs = victim->outchain;
+	else
+	{
+		for (; a != NULL && a->outchain != victim; a = a->outchain)
+			continue;
+		assert(a != NULL);
+		a->outchain = victim->outchain;
+	}
+	from->nouts--;
+
+	/* take it off target's in-chain */
+	assert(to != NULL);
+	assert(to->ins != NULL);
+	a = to->ins;
+	if (a == victim)			/* simple case:  first in chain */
+		to->ins = victim->inchain;
+	else
+	{
+		for (; a != NULL && a->inchain != victim; a = a->inchain)
+			continue;
+		assert(a != NULL);
+		a->inchain = victim->inchain;
+	}
+	to->nins--;
+
+	/* clean up and place on free list */
+	victim->type = 0;
+	victim->from = NULL;		/* precautions... */
+	victim->to = NULL;
+	victim->inchain = NULL;
+	victim->outchain = NULL;
+	victim->freechain = from->free;
+	from->free = victim;
+}
+
+/*
+ * findarc - find arc, if any, from given source with given type and color
+ * If there is more than one such arc, the result is random.
+ */
+static struct arc *
+findarc(struct state * s,
+		int type,
+		pcolor co)
+{
+	struct arc *a;
+
+	for (a = s->outs; a != NULL; a = a->outchain)
+		if (a->type == type && a->co == co)
+			return a;
+	return NULL;
+}
+
+/*
+ * cparc - allocate a new arc within an NFA, copying details from old one
+ */
+static void
+cparc(struct nfa * nfa,
+	  struct arc * oa,
+	  struct state * from,
+	  struct state * to)
+{
+	newarc(nfa, oa->type, oa->co, from, to);
+}
+
+/*
+ * moveins - move all in arcs of a state to another state
+ *
+ * You might think this could be done better by just updating the
+ * existing arcs, and you would be right if it weren't for the desire
+ * for duplicate suppression, which makes it easier to just make new
+ * ones to exploit the suppression built into newarc.
+ */
+static void
+moveins(struct nfa * nfa,
+		struct state * old,
+		struct state * new)
+{
+	struct arc *a;
+
+	assert(old != new);
+
+	while ((a = old->ins) != NULL)
+	{
+		cparc(nfa, a, a->from, new);
+		freearc(nfa, a);
+	}
+	assert(old->nins == 0);
+	assert(old->ins == NULL);
+}
+
+/*
+ * copyins - copy all in arcs of a state to another state
+ */
+static void
+copyins(struct nfa * nfa,
+		struct state * old,
+		struct state * new)
+{
+	struct arc *a;
+
+	assert(old != new);
+
+	for (a = old->ins; a != NULL; a = a->inchain)
+		cparc(nfa, a, a->from, new);
+}
+
+/*
+ * moveouts - move all out arcs of a state to another state
+ */
+static void
+moveouts(struct nfa * nfa,
+		 struct state * old,
+		 struct state * new)
+{
+	struct arc *a;
+
+	assert(old != new);
+
+	while ((a = old->outs) != NULL)
+	{
+		cparc(nfa, a, new, a->to);
+		freearc(nfa, a);
+	}
+}
+
+/*
+ * copyouts - copy all out arcs of a state to another state
+ */
+static void
+copyouts(struct nfa * nfa,
+		 struct state * old,
+		 struct state * new)
+{
+	struct arc *a;
+
+	assert(old != new);
+
+	for (a = old->outs; a != NULL; a = a->outchain)
+		cparc(nfa, a, new, a->to);
+}
+
+/*
+ * cloneouts - copy out arcs of a state to another state pair, modifying type
+ */
+static void
+cloneouts(struct nfa * nfa,
+		  struct state * old,
+		  struct state * from,
+		  struct state * to,
+		  int type)
+{
+	struct arc *a;
+
+	assert(old != from);
+
+	for (a = old->outs; a != NULL; a = a->outchain)
+		newarc(nfa, type, a->co, from, to);
+}
+
+/*
+ * delsub - delete a sub-NFA, updating subre pointers if necessary
+ *
+ * This uses a recursive traversal of the sub-NFA, marking already-seen
+ * states using their tmp pointer.
+ */
+static void
+delsub(struct nfa * nfa,
+	   struct state * lp,		/* the sub-NFA goes from here... */
+	   struct state * rp)		/* ...to here, *not* inclusive */
+{
+	assert(lp != rp);
+
+	rp->tmp = rp;				/* mark end */
+
+	deltraverse(nfa, lp, lp);
+	assert(lp->nouts == 0 && rp->nins == 0);	/* did the job */
+	assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
+
+	rp->tmp = NULL;				/* unmark end */
+	lp->tmp = NULL;				/* and begin, marked by deltraverse */
+}
+
+/*
+ * deltraverse - the recursive heart of delsub
+ * This routine's basic job is to destroy all out-arcs of the state.
+ */
+static void
+deltraverse(struct nfa * nfa,
+			struct state * leftend,
+			struct state * s)
+{
+	struct arc *a;
+	struct state *to;
+
+	if (s->nouts == 0)
+		return;					/* nothing to do */
+	if (s->tmp != NULL)
+		return;					/* already in progress */
+
+	s->tmp = s;					/* mark as in progress */
+
+	while ((a = s->outs) != NULL)
+	{
+		to = a->to;
+		deltraverse(nfa, leftend, to);
+		assert(to->nouts == 0 || to->tmp != NULL);
+		freearc(nfa, a);
+		if (to->nins == 0 && to->tmp == NULL)
+		{
+			assert(to->nouts == 0);
+			freestate(nfa, to);
+		}
+	}
+
+	assert(s->no != FREESTATE); /* we're still here */
+	assert(s == leftend || s->nins != 0);		/* and still reachable */
+	assert(s->nouts == 0);		/* but have no outarcs */
+
+	s->tmp = NULL;				/* we're done here */
+}
+
+/*
+ * dupnfa - duplicate sub-NFA
+ *
+ * Another recursive traversal, this time using tmp to point to duplicates
+ * as well as mark already-seen states.  (You knew there was a reason why
+ * it's a state pointer, didn't you? :-))
+ */
+static void
+dupnfa(struct nfa * nfa,
+	   struct state * start,	/* duplicate of subNFA starting here */
+	   struct state * stop,		/* and stopping here */
+	   struct state * from,		/* stringing duplicate from here */
+	   struct state * to)		/* to here */
+{
+	if (start == stop)
+	{
+		newarc(nfa, EMPTY, 0, from, to);
+		return;
+	}
+
+	stop->tmp = to;
+	duptraverse(nfa, start, from);
+	/* done, except for clearing out the tmp pointers */
+
+	stop->tmp = NULL;
+	cleartraverse(nfa, start);
+}
+
+/*
+ * duptraverse - recursive heart of dupnfa
+ */
+static void
+duptraverse(struct nfa * nfa,
+			struct state * s,
+			struct state * stmp)	/* s's duplicate, or NULL */
+{
+	struct arc *a;
+
+	if (s->tmp != NULL)
+		return;					/* already done */
+
+	s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
+	if (s->tmp == NULL)
+	{
+		assert(NISERR());
+		return;
+	}
+
+	for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
+	{
+		duptraverse(nfa, a->to, (struct state *) NULL);
+		assert(a->to->tmp != NULL);
+		cparc(nfa, a, s->tmp, a->to->tmp);
+	}
+}
+
+/*
+ * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
+ */
+static void
+cleartraverse(struct nfa * nfa,
+			  struct state * s)
+{
+	struct arc *a;
+
+	if (s->tmp == NULL)
+		return;
+	s->tmp = NULL;
+
+	for (a = s->outs; a != NULL; a = a->outchain)
+		cleartraverse(nfa, a->to);
+}
+
+/*
+ * specialcolors - fill in special colors for an NFA
+ */
+static void
+specialcolors(struct nfa * nfa)
+{
+	/* false colors for BOS, BOL, EOS, EOL */
+	if (nfa->parent == NULL)
+	{
+		nfa->bos[0] = pseudocolor(nfa->cm);
+		nfa->bos[1] = pseudocolor(nfa->cm);
+		nfa->eos[0] = pseudocolor(nfa->cm);
+		nfa->eos[1] = pseudocolor(nfa->cm);
+	}
+	else
+	{
+		assert(nfa->parent->bos[0] != COLORLESS);
+		nfa->bos[0] = nfa->parent->bos[0];
+		assert(nfa->parent->bos[1] != COLORLESS);
+		nfa->bos[1] = nfa->parent->bos[1];
+		assert(nfa->parent->eos[0] != COLORLESS);
+		nfa->eos[0] = nfa->parent->eos[0];
+		assert(nfa->parent->eos[1] != COLORLESS);
+		nfa->eos[1] = nfa->parent->eos[1];
+	}
+}
+
+/*
+ * optimize - optimize an NFA
+ */
+static long						/* re_info bits */
+optimize(struct nfa * nfa,
+		 FILE *f)				/* for debug output; NULL none */
+{
+#ifdef REG_DEBUG
+	int			verbose = (f != NULL) ? 1 : 0;
+
+	if (verbose)
+		fprintf(f, "\ninitial cleanup:\n");
+#endif
+	cleanup(nfa);				/* may simplify situation */
+#ifdef REG_DEBUG
+	if (verbose)
+		dumpnfa(nfa, f);
+	if (verbose)
+		fprintf(f, "\nempties:\n");
+#endif
+	fixempties(nfa, f);			/* get rid of EMPTY arcs */
+#ifdef REG_DEBUG
+	if (verbose)
+		fprintf(f, "\nconstraints:\n");
+#endif
+	pullback(nfa, f);			/* pull back constraints backward */
+	pushfwd(nfa, f);			/* push fwd constraints forward */
+#ifdef REG_DEBUG
+	if (verbose)
+		fprintf(f, "\nfinal cleanup:\n");
+#endif
+	cleanup(nfa);				/* final tidying */
+	return analyze(nfa);		/* and analysis */
+}
+
+/*
+ * pullback - pull back constraints backward to (with luck) eliminate them
+ */
+static void
+pullback(struct nfa * nfa,
+		 FILE *f)				/* for debug output; NULL none */
+{
+	struct state *s;
+	struct state *nexts;
+	struct arc *a;
+	struct arc *nexta;
+	int			progress;
+
+	/* find and pull until there are no more */
+	do
+	{
+		progress = 0;
+		for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
+		{
+			nexts = s->next;
+			for (a = s->outs; a != NULL && !NISERR(); a = nexta)
+			{
+				nexta = a->outchain;
+				if (a->type == '^' || a->type == BEHIND)
+					if (pull(nfa, a))
+						progress = 1;
+				assert(nexta == NULL || s->no != FREESTATE);
+			}
+		}
+		if (progress && f != NULL)
+			dumpnfa(nfa, f);
+	} while (progress && !NISERR());
+	if (NISERR())
+		return;
+
+	for (a = nfa->pre->outs; a != NULL; a = nexta)
+	{
+		nexta = a->outchain;
+		if (a->type == '^')
+		{
+			assert(a->co == 0 || a->co == 1);
+			newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
+			freearc(nfa, a);
+		}
+	}
+}
+
+/*
+ * pull - pull a back constraint backward past its source state
+ * A significant property of this function is that it deletes at most
+ * one state -- the constraint's from state -- and only if the constraint
+ * was that state's last outarc.
+ */
+static int						/* 0 couldn't, 1 could */
+pull(struct nfa * nfa,
+	 struct arc * con)
+{
+	struct state *from = con->from;
+	struct state *to = con->to;
+	struct arc *a;
+	struct arc *nexta;
+	struct state *s;
+
+	if (from == to)
+	{							/* circular constraint is pointless */
+		freearc(nfa, con);
+		return 1;
+	}
+	if (from->flag)				/* can't pull back beyond start */
+		return 0;
+	if (from->nins == 0)
+	{							/* unreachable */
+		freearc(nfa, con);
+		return 1;
+	}
+
+	/* first, clone from state if necessary to avoid other outarcs */
+	if (from->nouts > 1)
+	{
+		s = newstate(nfa);
+		if (NISERR())
+			return 0;
+		assert(to != from);		/* con is not an inarc */
+		copyins(nfa, from, s);	/* duplicate inarcs */
+		cparc(nfa, con, s, to); /* move constraint arc */
+		freearc(nfa, con);
+		from = s;
+		con = from->outs;
+	}
+	assert(from->nouts == 1);
+
+	/* propagate the constraint into the from state's inarcs */
+	for (a = from->ins; a != NULL; a = nexta)
+	{
+		nexta = a->inchain;
+		switch (combine(con, a))
+		{
+			case INCOMPATIBLE:	/* destroy the arc */
+				freearc(nfa, a);
+				break;
+			case SATISFIED:		/* no action needed */
+				break;
+			case COMPATIBLE:	/* swap the two arcs, more or less */
+				s = newstate(nfa);
+				if (NISERR())
+					return 0;
+				cparc(nfa, a, s, to);	/* anticipate move */
+				cparc(nfa, con, a->from, s);
+				if (NISERR())
+					return 0;
+				freearc(nfa, a);
+				break;
+			default:
+				assert(NOTREACHED);
+				break;
+		}
+	}
+
+	/* remaining inarcs, if any, incorporate the constraint */
+	moveins(nfa, from, to);
+	dropstate(nfa, from);		/* will free the constraint */
+	return 1;
+}
+
+/*
+ * pushfwd - push forward constraints forward to (with luck) eliminate them
+ */
+static void
+pushfwd(struct nfa * nfa,
+		FILE *f)				/* for debug output; NULL none */
+{
+	struct state *s;
+	struct state *nexts;
+	struct arc *a;
+	struct arc *nexta;
+	int			progress;
+
+	/* find and push until there are no more */
+	do
+	{
+		progress = 0;
+		for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
+		{
+			nexts = s->next;
+			for (a = s->ins; a != NULL && !NISERR(); a = nexta)
+			{
+				nexta = a->inchain;
+				if (a->type == '$' || a->type == AHEAD)
+					if (push(nfa, a))
+						progress = 1;
+				assert(nexta == NULL || s->no != FREESTATE);
+			}
+		}
+		if (progress && f != NULL)
+			dumpnfa(nfa, f);
+	} while (progress && !NISERR());
+	if (NISERR())
+		return;
+
+	for (a = nfa->post->ins; a != NULL; a = nexta)
+	{
+		nexta = a->inchain;
+		if (a->type == '$')
+		{
+			assert(a->co == 0 || a->co == 1);
+			newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
+			freearc(nfa, a);
+		}
+	}
+}
+
+/*
+ * push - push a forward constraint forward past its destination state
+ * A significant property of this function is that it deletes at most
+ * one state -- the constraint's to state -- and only if the constraint
+ * was that state's last inarc.
+ */
+static int						/* 0 couldn't, 1 could */
+push(struct nfa * nfa,
+	 struct arc * con)
+{
+	struct state *from = con->from;
+	struct state *to = con->to;
+	struct arc *a;
+	struct arc *nexta;
+	struct state *s;
+
+	if (to == from)
+	{							/* circular constraint is pointless */
+		freearc(nfa, con);
+		return 1;
+	}
+	if (to->flag)				/* can't push forward beyond end */
+		return 0;
+	if (to->nouts == 0)
+	{							/* dead end */
+		freearc(nfa, con);
+		return 1;
+	}
+
+	/* first, clone to state if necessary to avoid other inarcs */
+	if (to->nins > 1)
+	{
+		s = newstate(nfa);
+		if (NISERR())
+			return 0;
+		copyouts(nfa, to, s);	/* duplicate outarcs */
+		cparc(nfa, con, from, s);		/* move constraint */
+		freearc(nfa, con);
+		to = s;
+		con = to->ins;
+	}
+	assert(to->nins == 1);
+
+	/* propagate the constraint into the to state's outarcs */
+	for (a = to->outs; a != NULL; a = nexta)
+	{
+		nexta = a->outchain;
+		switch (combine(con, a))
+		{
+			case INCOMPATIBLE:	/* destroy the arc */
+				freearc(nfa, a);
+				break;
+			case SATISFIED:		/* no action needed */
+				break;
+			case COMPATIBLE:	/* swap the two arcs, more or less */
+				s = newstate(nfa);
+				if (NISERR())
+					return 0;
+				cparc(nfa, con, s, a->to);		/* anticipate move */
+				cparc(nfa, a, from, s);
+				if (NISERR())
+					return 0;
+				freearc(nfa, a);
+				break;
+			default:
+				assert(NOTREACHED);
+				break;
+		}
+	}
+
+	/* remaining outarcs, if any, incorporate the constraint */
+	moveouts(nfa, to, from);
+	dropstate(nfa, to);			/* will free the constraint */
+	return 1;
+}
+
+/*
+ * combine - constraint lands on an arc, what happens?
+ *
+ * #def INCOMPATIBLE	1	// destroys arc
+ * #def SATISFIED		2	// constraint satisfied
+ * #def COMPATIBLE		3	// compatible but not satisfied yet
+ */
+static int
+combine(struct arc * con,
+		struct arc * a)
+{
+#define  CA(ct,at)	 (((ct)<<CHAR_BIT) | (at))
+
+	switch (CA(con->type, a->type))
+	{
+		case CA('^', PLAIN):	/* newlines are handled separately */
+		case CA('$', PLAIN):
+			return INCOMPATIBLE;
+			break;
+		case CA(AHEAD, PLAIN):	/* color constraints meet colors */
+		case CA(BEHIND, PLAIN):
+			if (con->co == a->co)
+				return SATISFIED;
+			return INCOMPATIBLE;
+			break;
+		case CA('^', '^'):		/* collision, similar constraints */
+		case CA('$', '$'):
+		case CA(AHEAD, AHEAD):
+		case CA(BEHIND, BEHIND):
+			if (con->co == a->co)		/* true duplication */
+				return SATISFIED;
+			return INCOMPATIBLE;
+			break;
+		case CA('^', BEHIND):	/* collision, dissimilar constraints */
+		case CA(BEHIND, '^'):
+		case CA('$', AHEAD):
+		case CA(AHEAD, '$'):
+			return INCOMPATIBLE;
+			break;
+		case CA('^', '$'):		/* constraints passing each other */
+		case CA('^', AHEAD):
+		case CA(BEHIND, '$'):
+		case CA(BEHIND, AHEAD):
+		case CA('$', '^'):
+		case CA('$', BEHIND):
+		case CA(AHEAD, '^'):
+		case CA(AHEAD, BEHIND):
+		case CA('^', LACON):
+		case CA(BEHIND, LACON):
+		case CA('$', LACON):
+		case CA(AHEAD, LACON):
+			return COMPATIBLE;
+			break;
+	}
+	assert(NOTREACHED);
+	return INCOMPATIBLE;		/* for benefit of blind compilers */
+}
+
+/*
+ * fixempties - get rid of EMPTY arcs
+ */
+static void
+fixempties(struct nfa * nfa,
+		   FILE *f)				/* for debug output; NULL none */
+{
+	struct state *s;
+	struct state *nexts;
+	struct arc *a;
+	struct arc *nexta;
+	int			progress;
+
+	/* find and eliminate empties until there are no more */
+	do
+	{
+		progress = 0;
+		for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
+		{
+			nexts = s->next;
+			for (a = s->outs; a != NULL && !NISERR(); a = nexta)
+			{
+				nexta = a->outchain;
+				if (a->type == EMPTY && unempty(nfa, a))
+					progress = 1;
+				assert(nexta == NULL || s->no != FREESTATE);
+			}
+		}
+		if (progress && f != NULL)
+			dumpnfa(nfa, f);
+	} while (progress && !NISERR());
+}
+
+/*
+ * unempty - optimize out an EMPTY arc, if possible
+ *
+ * Actually, as it stands this function always succeeds, but the return
+ * value is kept with an eye on possible future changes.
+ */
+static int						/* 0 couldn't, 1 could */
+unempty(struct nfa * nfa,
+		struct arc * a)
+{
+	struct state *from = a->from;
+	struct state *to = a->to;
+	int			usefrom;		/* work on from, as opposed to to? */
+
+	assert(a->type == EMPTY);
+	assert(from != nfa->pre && to != nfa->post);
+
+	if (from == to)
+	{							/* vacuous loop */
+		freearc(nfa, a);
+		return 1;
+	}
+
+	/* decide which end to work on */
+	usefrom = 1;				/* default:  attack from */
+	if (from->nouts > to->nins)
+		usefrom = 0;
+	else if (from->nouts == to->nins)
+	{
+		/* decide on secondary issue:  move/copy fewest arcs */
+		if (from->nins > to->nouts)
+			usefrom = 0;
+	}
+
+	freearc(nfa, a);
+	if (usefrom)
+	{
+		if (from->nouts == 0)
+		{
+			/* was the state's only outarc */
+			moveins(nfa, from, to);
+			freestate(nfa, from);
+		}
+		else
+			copyins(nfa, from, to);
+	}
+	else
+	{
+		if (to->nins == 0)
+		{
+			/* was the state's only inarc */
+			moveouts(nfa, to, from);
+			freestate(nfa, to);
+		}
+		else
+			copyouts(nfa, to, from);
+	}
+
+	return 1;
+}
+
+/*
+ * cleanup - clean up NFA after optimizations
+ */
+static void
+cleanup(struct nfa * nfa)
+{
+	struct state *s;
+	struct state *nexts;
+	int			n;
+
+	/* clear out unreachable or dead-end states */
+	/* use pre to mark reachable, then post to mark can-reach-post */
+	markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
+	markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
+	for (s = nfa->states; s != NULL; s = nexts)
+	{
+		nexts = s->next;
+		if (s->tmp != nfa->post && !s->flag)
+			dropstate(nfa, s);
+	}
+	assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
+	cleartraverse(nfa, nfa->pre);
+	assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
+	/* the nins==0 (final unreachable) case will be caught later */
+
+	/* renumber surviving states */
+	n = 0;
+	for (s = nfa->states; s != NULL; s = s->next)
+		s->no = n++;
+	nfa->nstates = n;
+}
+
+/*
+ * markreachable - recursive marking of reachable states
+ */
+static void
+markreachable(struct nfa * nfa,
+			  struct state * s,
+			  struct state * okay,		/* consider only states with this
+										 * mark */
+			  struct state * mark)		/* the value to mark with */
+{
+	struct arc *a;
+
+	if (s->tmp != okay)
+		return;
+	s->tmp = mark;
+
+	for (a = s->outs; a != NULL; a = a->outchain)
+		markreachable(nfa, a->to, okay, mark);
+}
+
+/*
+ * markcanreach - recursive marking of states which can reach here
+ */
+static void
+markcanreach(struct nfa * nfa,
+			 struct state * s,
+			 struct state * okay,		/* consider only states with this
+										 * mark */
+			 struct state * mark)		/* the value to mark with */
+{
+	struct arc *a;
+
+	if (s->tmp != okay)
+		return;
+	s->tmp = mark;
+
+	for (a = s->ins; a != NULL; a = a->inchain)
+		markcanreach(nfa, a->from, okay, mark);
+}
+
+/*
+ * analyze - ascertain potentially-useful facts about an optimized NFA
+ */
+static long						/* re_info bits to be ORed in */
+analyze(struct nfa * nfa)
+{
+	struct arc *a;
+	struct arc *aa;
+
+	if (nfa->pre->outs == NULL)
+		return REG_UIMPOSSIBLE;
+	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
+		for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
+			if (aa->to == nfa->post)
+				return REG_UEMPTYMATCH;
+	return 0;
+}
+
+/*
+ * compact - compact an NFA
+ */
+static void
+compact(struct nfa * nfa,
+		struct cnfa * cnfa)
+{
+	struct state *s;
+	struct arc *a;
+	size_t		nstates;
+	size_t		narcs;
+	struct carc *ca;
+	struct carc *first;
+
+	assert(!NISERR());
+
+	nstates = 0;
+	narcs = 0;
+	for (s = nfa->states; s != NULL; s = s->next)
+	{
+		nstates++;
+		narcs += 1 + s->nouts + 1;
+		/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
+	}
+
+	cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
+	cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
+	if (cnfa->states == NULL || cnfa->arcs == NULL)
+	{
+		if (cnfa->states != NULL)
+			FREE(cnfa->states);
+		if (cnfa->arcs != NULL)
+			FREE(cnfa->arcs);
+		NERR(REG_ESPACE);
+		return;
+	}
+	cnfa->nstates = nstates;
+	cnfa->pre = nfa->pre->no;
+	cnfa->post = nfa->post->no;
+	cnfa->bos[0] = nfa->bos[0];
+	cnfa->bos[1] = nfa->bos[1];
+	cnfa->eos[0] = nfa->eos[0];
+	cnfa->eos[1] = nfa->eos[1];
+	cnfa->ncolors = maxcolor(nfa->cm) + 1;
+	cnfa->flags = 0;
+
+	ca = cnfa->arcs;
+	for (s = nfa->states; s != NULL; s = s->next)
+	{
+		assert((size_t) s->no < nstates);
+		cnfa->states[s->no] = ca;
+		ca->co = 0;				/* clear and skip flags "arc" */
+		ca++;
+		first = ca;
+		for (a = s->outs; a != NULL; a = a->outchain)
+			switch (a->type)
+			{
+				case PLAIN:
+					ca->co = a->co;
+					ca->to = a->to->no;
+					ca++;
+					break;
+				case LACON:
+					assert(s->no != cnfa->pre);
+					ca->co = (color) (cnfa->ncolors + a->co);
+					ca->to = a->to->no;
+					ca++;
+					cnfa->flags |= HASLACONS;
+					break;
+				default:
+					assert(NOTREACHED);
+					break;
+			}
+		carcsort(first, ca - 1);
+		ca->co = COLORLESS;
+		ca->to = 0;
+		ca++;
+	}
+	assert(ca == &cnfa->arcs[narcs]);
+	assert(cnfa->nstates != 0);
+
+	/* mark no-progress states */
+	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
+		cnfa->states[a->to->no]->co = 1;
+	cnfa->states[nfa->pre->no]->co = 1;
+}
+
+/*
+ * carcsort - sort compacted-NFA arcs by color
+ *
+ * Really dumb algorithm, but if the list is long enough for that to matter,
+ * you're in real trouble anyway.
+ */
+static void
+carcsort(struct carc * first,
+		 struct carc * last)
+{
+	struct carc *p;
+	struct carc *q;
+	struct carc tmp;
+
+	if (last - first <= 1)
+		return;
+
+	for (p = first; p <= last; p++)
+		for (q = p; q <= last; q++)
+			if (p->co > q->co ||
+				(p->co == q->co && p->to > q->to))
+			{
+				assert(p != q);
+				tmp = *p;
+				*p = *q;
+				*q = tmp;
+			}
+}
+
+/*
+ * freecnfa - free a compacted NFA
+ */
+static void
+freecnfa(struct cnfa * cnfa)
+{
+	assert(cnfa->nstates != 0); /* not empty already */
+	cnfa->nstates = 0;
+	FREE(cnfa->states);
+	FREE(cnfa->arcs);
+}
+
+/*
+ * dumpnfa - dump an NFA in human-readable form
+ */
+static void
+dumpnfa(struct nfa * nfa,
+		FILE *f)
+{
+#ifdef REG_DEBUG
+	struct state *s;
+
+	fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
+	if (nfa->bos[0] != COLORLESS)
+		fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
+	if (nfa->bos[1] != COLORLESS)
+		fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
+	if (nfa->eos[0] != COLORLESS)
+		fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
+	if (nfa->eos[1] != COLORLESS)
+		fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
+	fprintf(f, "\n");
+	for (s = nfa->states; s != NULL; s = s->next)
+		dumpstate(s, f);
+	if (nfa->parent == NULL)
+		dumpcolors(nfa->cm, f);
+	fflush(f);
+#endif
+}
+
+#ifdef REG_DEBUG				/* subordinates of dumpnfa */
+
+/*
+ * dumpstate - dump an NFA state in human-readable form
+ */
+static void
+dumpstate(struct state * s,
+		  FILE *f)
+{
+	struct arc *a;
+
+	fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
+			(s->flag) ? s->flag : '.');
+	if (s->prev != NULL && s->prev->next != s)
+		fprintf(f, "\tstate chain bad\n");
+	if (s->nouts == 0)
+		fprintf(f, "\tno out arcs\n");
+	else
+		dumparcs(s, f);
+	fflush(f);
+	for (a = s->ins; a != NULL; a = a->inchain)
+	{
+		if (a->to != s)
+			fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
+					a->from->no, a->to->no, s->no);
+	}
+}
+
+/*
+ * dumparcs - dump out-arcs in human-readable form
+ */
+static void
+dumparcs(struct state * s,
+		 FILE *f)
+{
+	int			pos;
+
+	assert(s->nouts > 0);
+	/* printing arcs in reverse order is usually clearer */
+	pos = dumprarcs(s->outs, s, f, 1);
+	if (pos != 1)
+		fprintf(f, "\n");
+}
+
+/*
+ * dumprarcs - dump remaining outarcs, recursively, in reverse order
+ */
+static int						/* resulting print position */
+dumprarcs(struct arc * a,
+		  struct state * s,
+		  FILE *f,
+		  int pos)				/* initial print position */
+{
+	if (a->outchain != NULL)
+		pos = dumprarcs(a->outchain, s, f, pos);
+	dumparc(a, s, f);
+	if (pos == 5)
+	{
+		fprintf(f, "\n");
+		pos = 1;
+	}
+	else
+		pos++;
+	return pos;
+}
+
+/*
+ * dumparc - dump one outarc in readable form, including prefixing tab
+ */
+static void
+dumparc(struct arc * a,
+		struct state * s,
+		FILE *f)
+{
+	struct arc *aa;
+	struct arcbatch *ab;
+
+	fprintf(f, "\t");
+	switch (a->type)
+	{
+		case PLAIN:
+			fprintf(f, "[%ld]", (long) a->co);
+			break;
+		case AHEAD:
+			fprintf(f, ">%ld>", (long) a->co);
+			break;
+		case BEHIND:
+			fprintf(f, "<%ld<", (long) a->co);
+			break;
+		case LACON:
+			fprintf(f, ":%ld:", (long) a->co);
+			break;
+		case '^':
+		case '$':
+			fprintf(f, "%c%d", a->type, (int) a->co);
+			break;
+		case EMPTY:
+			break;
+		default:
+			fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
+			break;
+	}
+	if (a->from != s)
+		fprintf(f, "?%d?", a->from->no);
+	for (ab = &a->from->oas; ab != NULL; ab = ab->next)
+	{
+		for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
+			if (aa == a)
+				break;			/* NOTE BREAK OUT */
+		if (aa < &ab->a[ABSIZE])	/* propagate break */
+			break;				/* NOTE BREAK OUT */
+	}
+	if (ab == NULL)
+		fprintf(f, "?!?");		/* not in allocated space */
+	fprintf(f, "->");
+	if (a->to == NULL)
+	{
+		fprintf(f, "NULL");
+		return;
+	}
+	fprintf(f, "%d", a->to->no);
+	for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
+		if (aa == a)
+			break;				/* NOTE BREAK OUT */
+	if (aa == NULL)
+		fprintf(f, "?!?");		/* missing from in-chain */
+}
+#endif   /* REG_DEBUG */
+
+/*
+ * dumpcnfa - dump a compacted NFA in human-readable form
+ */
+#ifdef REG_DEBUG
+static void
+dumpcnfa(struct cnfa * cnfa,
+		 FILE *f)
+{
+	int			st;
+
+	fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
+	if (cnfa->bos[0] != COLORLESS)
+		fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
+	if (cnfa->bos[1] != COLORLESS)
+		fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
+	if (cnfa->eos[0] != COLORLESS)
+		fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
+	if (cnfa->eos[1] != COLORLESS)
+		fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
+	if (cnfa->flags & HASLACONS)
+		fprintf(f, ", haslacons");
+	fprintf(f, "\n");
+	for (st = 0; st < cnfa->nstates; st++)
+		dumpcstate(st, cnfa->states[st], cnfa, f);
+	fflush(f);
+}
+#endif
+
+#ifdef REG_DEBUG				/* subordinates of dumpcnfa */
+
+/*
+ * dumpcstate - dump a compacted-NFA state in human-readable form
+ */
+static void
+dumpcstate(int st,
+		   struct carc * ca,
+		   struct cnfa * cnfa,
+		   FILE *f)
+{
+	int			i;
+	int			pos;
+
+	fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
+	pos = 1;
+	for (i = 1; ca[i].co != COLORLESS; i++)
+	{
+		if (ca[i].co < cnfa->ncolors)
+			fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
+		else
+			fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
+					ca[i].to);
+		if (pos == 5)
+		{
+			fprintf(f, "\n");
+			pos = 1;
+		}
+		else
+			pos++;
+	}
+	if (i == 1 || pos != 1)
+		fprintf(f, "\n");
+	fflush(f);
+}
+
+#endif   /* REG_DEBUG */
diff --git a/src/regex/rege_dfa.c b/src/regex/rege_dfa.c
new file mode 100644
index 0000000000..5347b90d73
--- /dev/null
+++ b/src/regex/rege_dfa.c
@@ -0,0 +1,699 @@
+/*
+ * DFA routines
+ * This file is #included by regexec.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.	All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Header$
+ *
+ */
+
+/*
+ * longest - longest-preferred matching engine
+ */
+static chr *					/* endpoint, or NULL */
+longest(struct vars * v,		/* used only for debug and exec flags */
+		struct dfa * d,
+		chr *start,				/* where the match should start */
+		chr *stop,				/* match must end at or before here */
+		int *hitstopp)			/* record whether hit v->stop, if non-NULL */
+{
+	chr		   *cp;
+	chr		   *realstop = (stop == v->stop) ? stop : stop + 1;
+	color		co;
+	struct sset *css;
+	struct sset *ss;
+	chr		   *post;
+	int			i;
+	struct colormap *cm = d->cm;
+
+	/* initialize */
+	css = initialize(v, d, start);
+	cp = start;
+	if (hitstopp != NULL)
+		*hitstopp = 0;
+
+	/* startup */
+	FDEBUG(("+++ startup +++\n"));
+	if (cp == v->start)
+	{
+		co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long) co));
+	}
+	else
+	{
+		co = GETCOLOR(cm, *(cp - 1));
+		FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
+	}
+	css = miss(v, d, css, co, cp, start);
+	if (css == NULL)
+		return NULL;
+	css->lastseen = cp;
+
+	/* main loop */
+	if (v->eflags & REG_FTRACE)
+		while (cp < realstop)
+		{
+			FDEBUG(("+++ at c%d +++\n", css - d->ssets));
+			co = GETCOLOR(cm, *cp);
+			FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
+			ss = css->outs[co];
+			if (ss == NULL)
+			{
+				ss = miss(v, d, css, co, cp + 1, start);
+				if (ss == NULL)
+					break;		/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+		}
+	else
+		while (cp < realstop)
+		{
+			co = GETCOLOR(cm, *cp);
+			ss = css->outs[co];
+			if (ss == NULL)
+			{
+				ss = miss(v, d, css, co, cp + 1, start);
+				if (ss == NULL)
+					break;		/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+		}
+
+	/* shutdown */
+	FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets));
+	if (cp == v->stop && stop == v->stop)
+	{
+		if (hitstopp != NULL)
+			*hitstopp = 1;
+		co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long) co));
+		ss = miss(v, d, css, co, cp, start);
+		/* special case:  match ended at eol? */
+		if (ss != NULL && (ss->flags & POSTSTATE))
+			return cp;
+		else if (ss != NULL)
+			ss->lastseen = cp;	/* to be tidy */
+	}
+
+	/* find last match, if any */
+	post = d->lastpost;
+	for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
+		if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
+			(post == NULL || post < ss->lastseen))
+			post = ss->lastseen;
+	if (post != NULL)			/* found one */
+		return post - 1;
+
+	return NULL;
+}
+
+/*
+ * shortest - shortest-preferred matching engine
+ */
+static chr *					/* endpoint, or NULL */
+shortest(struct vars * v,
+		 struct dfa * d,
+		 chr *start,			/* where the match should start */
+		 chr *min,				/* match must end at or after here */
+		 chr *max,				/* match must end at or before here */
+		 chr **coldp,			/* store coldstart pointer here, if
+								 * nonNULL */
+		 int *hitstopp)			/* record whether hit v->stop, if non-NULL */
+{
+	chr		   *cp;
+	chr		   *realmin = (min == v->stop) ? min : min + 1;
+	chr		   *realmax = (max == v->stop) ? max : max + 1;
+	color		co;
+	struct sset *css;
+	struct sset *ss;
+	struct colormap *cm = d->cm;
+
+	/* initialize */
+	css = initialize(v, d, start);
+	cp = start;
+	if (hitstopp != NULL)
+		*hitstopp = 0;
+
+	/* startup */
+	FDEBUG(("--- startup ---\n"));
+	if (cp == v->start)
+	{
+		co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long) co));
+	}
+	else
+	{
+		co = GETCOLOR(cm, *(cp - 1));
+		FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
+	}
+	css = miss(v, d, css, co, cp, start);
+	if (css == NULL)
+		return NULL;
+	css->lastseen = cp;
+	ss = css;
+
+	/* main loop */
+	if (v->eflags & REG_FTRACE)
+		while (cp < realmax)
+		{
+			FDEBUG(("--- at c%d ---\n", css - d->ssets));
+			co = GETCOLOR(cm, *cp);
+			FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
+			ss = css->outs[co];
+			if (ss == NULL)
+			{
+				ss = miss(v, d, css, co, cp + 1, start);
+				if (ss == NULL)
+					break;		/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+			if ((ss->flags & POSTSTATE) && cp >= realmin)
+				break;			/* NOTE BREAK OUT */
+		}
+	else
+		while (cp < realmax)
+		{
+			co = GETCOLOR(cm, *cp);
+			ss = css->outs[co];
+			if (ss == NULL)
+			{
+				ss = miss(v, d, css, co, cp + 1, start);
+				if (ss == NULL)
+					break;		/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+			if ((ss->flags & POSTSTATE) && cp >= realmin)
+				break;			/* NOTE BREAK OUT */
+		}
+
+	if (ss == NULL)
+		return NULL;
+
+	if (coldp != NULL)			/* report last no-progress state set, if
+								 * any */
+		*coldp = lastcold(v, d);
+
+	if ((ss->flags & POSTSTATE) && cp > min)
+	{
+		assert(cp >= realmin);
+		cp--;
+	}
+	else if (cp == v->stop && max == v->stop)
+	{
+		co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long) co));
+		ss = miss(v, d, css, co, cp, start);
+		/* match might have ended at eol */
+		if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
+			*hitstopp = 1;
+	}
+
+	if (ss == NULL || !(ss->flags & POSTSTATE))
+		return NULL;
+
+	return cp;
+}
+
+/*
+ * lastcold - determine last point at which no progress had been made
+ */
+static chr *					/* endpoint, or NULL */
+lastcold(struct vars * v,
+		 struct dfa * d)
+{
+	struct sset *ss;
+	chr		   *nopr;
+	int			i;
+
+	nopr = d->lastnopr;
+	if (nopr == NULL)
+		nopr = v->start;
+	for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
+		if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
+			nopr = ss->lastseen;
+	return nopr;
+}
+
+/*
+ * newdfa - set up a fresh DFA
+ */
+static struct dfa *
+newdfa(struct vars * v,
+	   struct cnfa * cnfa,
+	   struct colormap * cm,
+	   struct smalldfa * small) /* preallocated space, may be NULL */
+{
+	struct dfa *d;
+	size_t		nss = cnfa->nstates * 2;
+	int			wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
+	struct smalldfa *smallwas = small;
+
+	assert(cnfa != NULL && cnfa->nstates != 0);
+
+	if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
+	{
+		assert(wordsper == 1);
+		if (small == NULL)
+		{
+			small = (struct smalldfa *) MALLOC(
+											   sizeof(struct smalldfa));
+			if (small == NULL)
+			{
+				ERR(REG_ESPACE);
+				return NULL;
+			}
+		}
+		d = &small->dfa;
+		d->ssets = small->ssets;
+		d->statesarea = small->statesarea;
+		d->work = &d->statesarea[nss];
+		d->outsarea = small->outsarea;
+		d->incarea = small->incarea;
+		d->cptsmalloced = 0;
+		d->mallocarea = (smallwas == NULL) ? (char *) small : NULL;
+	}
+	else
+	{
+		d = (struct dfa *) MALLOC(sizeof(struct dfa));
+		if (d == NULL)
+		{
+			ERR(REG_ESPACE);
+			return NULL;
+		}
+		d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
+		d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
+											sizeof(unsigned));
+		d->work = &d->statesarea[nss * wordsper];
+		d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
+											  sizeof(struct sset *));
+		d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
+											sizeof(struct arcp));
+		d->cptsmalloced = 1;
+		d->mallocarea = (char *) d;
+		if (d->ssets == NULL || d->statesarea == NULL ||
+			d->outsarea == NULL || d->incarea == NULL)
+		{
+			freedfa(d);
+			ERR(REG_ESPACE);
+			return NULL;
+		}
+	}
+
+	d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
+	d->nssused = 0;
+	d->nstates = cnfa->nstates;
+	d->ncolors = cnfa->ncolors;
+	d->wordsper = wordsper;
+	d->cnfa = cnfa;
+	d->cm = cm;
+	d->lastpost = NULL;
+	d->lastnopr = NULL;
+	d->search = d->ssets;
+
+	/* initialization of sset fields is done as needed */
+
+	return d;
+}
+
+/*
+ * freedfa - free a DFA
+ */
+static void
+freedfa(struct dfa * d)
+{
+	if (d->cptsmalloced)
+	{
+		if (d->ssets != NULL)
+			FREE(d->ssets);
+		if (d->statesarea != NULL)
+			FREE(d->statesarea);
+		if (d->outsarea != NULL)
+			FREE(d->outsarea);
+		if (d->incarea != NULL)
+			FREE(d->incarea);
+	}
+
+	if (d->mallocarea != NULL)
+		FREE(d->mallocarea);
+}
+
+/*
+ * hash - construct a hash code for a bitvector
+ *
+ * There are probably better ways, but they're more expensive.
+ */
+static unsigned
+hash(unsigned *uv,
+	 int n)
+{
+	int			i;
+	unsigned	h;
+
+	h = 0;
+	for (i = 0; i < n; i++)
+		h ^= uv[i];
+	return h;
+}
+
+/*
+ * initialize - hand-craft a cache entry for startup, otherwise get ready
+ */
+static struct sset *
+initialize(struct vars * v,		/* used only for debug flags */
+		   struct dfa * d,
+		   chr *start)
+{
+	struct sset *ss;
+	int			i;
+
+	/* is previous one still there? */
+	if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
+		ss = &d->ssets[0];
+	else
+	{							/* no, must (re)build it */
+		ss = getvacant(v, d, start, start);
+		for (i = 0; i < d->wordsper; i++)
+			ss->states[i] = 0;
+		BSET(ss->states, d->cnfa->pre);
+		ss->hash = HASH(ss->states, d->wordsper);
+		assert(d->cnfa->pre != d->cnfa->post);
+		ss->flags = STARTER | LOCKED | NOPROGRESS;
+		/* lastseen dealt with below */
+	}
+
+	for (i = 0; i < d->nssused; i++)
+		d->ssets[i].lastseen = NULL;
+	ss->lastseen = start;		/* maybe untrue, but harmless */
+	d->lastpost = NULL;
+	d->lastnopr = NULL;
+	return ss;
+}
+
+/*
+ * miss - handle a cache miss
+ */
+static struct sset *			/* NULL if goes to empty set */
+miss(struct vars * v,			/* used only for debug flags */
+	 struct dfa * d,
+	 struct sset * css,
+	 pcolor co,
+	 chr *cp,					/* next chr */
+	 chr *start)				/* where the attempt got started */
+{
+	struct cnfa *cnfa = d->cnfa;
+	int			i;
+	unsigned	h;
+	struct carc *ca;
+	struct sset *p;
+	int			ispost;
+	int			noprogress;
+	int			gotstate;
+	int			dolacons;
+	int			sawlacons;
+
+	/* for convenience, we can be called even if it might not be a miss */
+	if (css->outs[co] != NULL)
+	{
+		FDEBUG(("hit\n"));
+		return css->outs[co];
+	}
+	FDEBUG(("miss\n"));
+
+	/* first, what set of states would we end up in? */
+	for (i = 0; i < d->wordsper; i++)
+		d->work[i] = 0;
+	ispost = 0;
+	noprogress = 1;
+	gotstate = 0;
+	for (i = 0; i < d->nstates; i++)
+		if (ISBSET(css->states, i))
+			for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++)
+				if (ca->co == co)
+				{
+					BSET(d->work, ca->to);
+					gotstate = 1;
+					if (ca->to == cnfa->post)
+						ispost = 1;
+					if (!cnfa->states[ca->to]->co)
+						noprogress = 0;
+					FDEBUG(("%d -> %d\n", i, ca->to));
+				}
+	dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0;
+	sawlacons = 0;
+	while (dolacons)
+	{							/* transitive closure */
+		dolacons = 0;
+		for (i = 0; i < d->nstates; i++)
+			if (ISBSET(d->work, i))
+				for (ca = cnfa->states[i] + 1; ca->co != COLORLESS;
+					 ca++)
+				{
+					if (ca->co <= cnfa->ncolors)
+						continue;		/* NOTE CONTINUE */
+					sawlacons = 1;
+					if (ISBSET(d->work, ca->to))
+						continue;		/* NOTE CONTINUE */
+					if (!lacon(v, cnfa, cp, ca->co))
+						continue;		/* NOTE CONTINUE */
+					BSET(d->work, ca->to);
+					dolacons = 1;
+					if (ca->to == cnfa->post)
+						ispost = 1;
+					if (!cnfa->states[ca->to]->co)
+						noprogress = 0;
+					FDEBUG(("%d :> %d\n", i, ca->to));
+				}
+	}
+	if (!gotstate)
+		return NULL;
+	h = HASH(d->work, d->wordsper);
+
+	/* next, is that in the cache? */
+	for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
+		if (HIT(h, d->work, p, d->wordsper))
+		{
+			FDEBUG(("cached c%d\n", p - d->ssets));
+			break;				/* NOTE BREAK OUT */
+		}
+	if (i == 0)
+	{							/* nope, need a new cache entry */
+		p = getvacant(v, d, cp, start);
+		assert(p != css);
+		for (i = 0; i < d->wordsper; i++)
+			p->states[i] = d->work[i];
+		p->hash = h;
+		p->flags = (ispost) ? POSTSTATE : 0;
+		if (noprogress)
+			p->flags |= NOPROGRESS;
+		/* lastseen to be dealt with by caller */
+	}
+
+	if (!sawlacons)
+	{							/* lookahead conds. always cache miss */
+		FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets));
+		css->outs[co] = p;
+		css->inchain[co] = p->ins;
+		p->ins.ss = css;
+		p->ins.co = (color) co;
+	}
+	return p;
+}
+
+/*
+ * lacon - lookahead-constraint checker for miss()
+ */
+static int						/* predicate:  constraint satisfied? */
+lacon(struct vars * v,
+	  struct cnfa * pcnfa,		/* parent cnfa */
+	  chr *cp,
+	  pcolor co)				/* "color" of the lookahead constraint */
+{
+	int			n;
+	struct subre *sub;
+	struct dfa *d;
+	struct smalldfa sd;
+	chr		   *end;
+
+	n = co - pcnfa->ncolors;
+	assert(n < v->g->nlacons && v->g->lacons != NULL);
+	FDEBUG(("=== testing lacon %d\n", n));
+	sub = &v->g->lacons[n];
+	d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd);
+	if (d == NULL)
+	{
+		ERR(REG_ESPACE);
+		return 0;
+	}
+	end = longest(v, d, cp, v->stop, (int *) NULL);
+	freedfa(d);
+	FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
+	return (sub->subno) ? (end != NULL) : (end == NULL);
+}
+
+/*
+ * getvacant - get a vacant state set
+ * This routine clears out the inarcs and outarcs, but does not otherwise
+ * clear the innards of the state set -- that's up to the caller.
+ */
+static struct sset *
+getvacant(struct vars * v,		/* used only for debug flags */
+		  struct dfa * d,
+		  chr *cp,
+		  chr *start)
+{
+	int			i;
+	struct sset *ss;
+	struct sset *p;
+	struct arcp ap;
+	struct arcp lastap;
+	color		co;
+
+	ss = pickss(v, d, cp, start);
+	assert(!(ss->flags & LOCKED));
+
+	/* clear out its inarcs, including self-referential ones */
+	ap = ss->ins;
+	while ((p = ap.ss) != NULL)
+	{
+		co = ap.co;
+		FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long) co));
+		p->outs[co] = NULL;
+		ap = p->inchain[co];
+		p->inchain[co].ss = NULL;		/* paranoia */
+	}
+	ss->ins.ss = NULL;
+
+	/* take it off the inarc chains of the ssets reached by its outarcs */
+	for (i = 0; i < d->ncolors; i++)
+	{
+		p = ss->outs[i];
+		assert(p != ss);		/* not self-referential */
+		if (p == NULL)
+			continue;			/* NOTE CONTINUE */
+		FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets));
+		if (p->ins.ss == ss && p->ins.co == i)
+			p->ins = ss->inchain[i];
+		else
+		{
+			assert(p->ins.ss != NULL);
+			for (ap = p->ins; ap.ss != NULL &&
+				 !(ap.ss == ss && ap.co == i);
+				 ap = ap.ss->inchain[ap.co])
+				lastap = ap;
+			assert(ap.ss != NULL);
+			lastap.ss->inchain[lastap.co] = ss->inchain[i];
+		}
+		ss->outs[i] = NULL;
+		ss->inchain[i].ss = NULL;
+	}
+
+	/* if ss was a success state, may need to remember location */
+	if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
+		(d->lastpost == NULL || d->lastpost < ss->lastseen))
+		d->lastpost = ss->lastseen;
+
+	/* likewise for a no-progress state */
+	if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
+		(d->lastnopr == NULL || d->lastnopr < ss->lastseen))
+		d->lastnopr = ss->lastseen;
+
+	return ss;
+}
+
+/*
+ * pickss - pick the next stateset to be used
+ */
+static struct sset *
+pickss(struct vars * v,			/* used only for debug flags */
+	   struct dfa * d,
+	   chr *cp,
+	   chr *start)
+{
+	int			i;
+	struct sset *ss;
+	struct sset *end;
+	chr		   *ancient;
+
+	/* shortcut for cases where cache isn't full */
+	if (d->nssused < d->nssets)
+	{
+		i = d->nssused;
+		d->nssused++;
+		ss = &d->ssets[i];
+		FDEBUG(("new c%d\n", i));
+		/* set up innards */
+		ss->states = &d->statesarea[i * d->wordsper];
+		ss->flags = 0;
+		ss->ins.ss = NULL;
+		ss->ins.co = WHITE;		/* give it some value */
+		ss->outs = &d->outsarea[i * d->ncolors];
+		ss->inchain = &d->incarea[i * d->ncolors];
+		for (i = 0; i < d->ncolors; i++)
+		{
+			ss->outs[i] = NULL;
+			ss->inchain[i].ss = NULL;
+		}
+		return ss;
+	}
+
+	/* look for oldest, or old enough anyway */
+	if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
+		ancient = cp - d->nssets * 2 / 3;
+	else
+		ancient = start;
+	for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
+		if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
+			!(ss->flags & LOCKED))
+		{
+			d->search = ss + 1;
+			FDEBUG(("replacing c%d\n", ss - d->ssets));
+			return ss;
+		}
+	for (ss = d->ssets, end = d->search; ss < end; ss++)
+		if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
+			!(ss->flags & LOCKED))
+		{
+			d->search = ss + 1;
+			FDEBUG(("replacing c%d\n", ss - d->ssets));
+			return ss;
+		}
+
+	/* nobody's old enough?!? -- something's really wrong */
+	FDEBUG(("can't find victim to replace!\n"));
+	assert(NOTREACHED);
+	ERR(REG_ASSERT);
+	return d->ssets;
+}
-- 
2.50.0