From e70833fb1dfa271e7b0c9dec11ad644880e03c6f Mon Sep 17 00:00:00 2001
From: =?utf8?q?V=C3=A1clav=20Slav=C3=ADk?= <vslavik@fastmail.fm>
Date: Fri, 5 Mar 2004 23:14:23 +0000
Subject: [PATCH] added regex test suite

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@26104 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
---
 tests/Makefile.in      |    6 +-
 tests/makefile.bcc     |    6 +-
 tests/makefile.gcc     |    6 +-
 tests/makefile.vc      |    6 +-
 tests/makefile.wat     |    6 +-
 tests/regex/reg.test   | 1135 +++++++++++++++++++++++++++++++++
 tests/regex/regex.cpp  |  421 +++++++++++++
 tests/regex/regex.inc  | 1361 ++++++++++++++++++++++++++++++++++++++++
 tests/regex/regex.pl   |  437 +++++++++++++
 tests/regex/wxreg.test |   71 +++
 tests/test.bkl         |    1 +
 tests/test.dsp         |    4 +
 12 files changed, 3455 insertions(+), 5 deletions(-)
 create mode 100644 tests/regex/reg.test
 create mode 100644 tests/regex/regex.cpp
 create mode 100644 tests/regex/regex.inc
 create mode 100755 tests/regex/regex.pl
 create mode 100644 tests/regex/wxreg.test

diff --git a/tests/Makefile.in b/tests/Makefile.in
index 6f272dd47a..ac19e587e7 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -37,7 +37,8 @@ TEST_CXXFLAGS = -D__WX$(TOOLKIT)__ $(__WXUNIV_DEFINE_p) -I$(srcdir) \
 	$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  \
 	test_test.o \
-	test_main.o
+	test_main.o \
+	test_regex.o
 
 ### Conditionally set variables: ###
 
@@ -106,6 +107,9 @@ test_test.o: $(srcdir)/test.cpp
 test_main.o: $(srcdir)/mbconv/main.cpp
 	$(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
 
+test_regex.o: $(srcdir)/regex/regex.cpp
+	$(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
+
 
 # Include dependency info, if present:
 @IF_GNU_MAKE@-include .deps/*.d
diff --git a/tests/makefile.bcc b/tests/makefile.bcc
index a968cacdf8..749a041311 100644
--- a/tests/makefile.bcc
+++ b/tests/makefile.bcc
@@ -31,7 +31,8 @@ TEST_CXXFLAGS = $(__RUNTIME_LIBS_6) -I$(BCCDIR)\include $(__DEBUGINFO) \
 	$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  \
 	$(OBJS)\test_test.obj \
-	$(OBJS)\test_main.obj
+	$(OBJS)\test_main.obj \
+	$(OBJS)\test_regex.obj
 
 ### Conditionally set variables: ###
 
@@ -156,3 +157,6 @@ $(OBJS)\test_test.obj: .\test.cpp
 
 $(OBJS)\test_main.obj: .\mbconv\main.cpp
 	$(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
+
+$(OBJS)\test_regex.obj: .\regex\regex.cpp
+	$(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
diff --git a/tests/makefile.gcc b/tests/makefile.gcc
index 5aeeb39f94..2fe6eb76fe 100644
--- a/tests/makefile.gcc
+++ b/tests/makefile.gcc
@@ -22,7 +22,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO) $(__OPTIMIZEFLAG_2) $(GCCFLAGS) -DHAVE_W32API_H \
 	$(__EXCEPTIONSFLAG_4) $(__EXCEPTIONSFLAG_5) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  \
 	$(OBJS)\test_test.o \
-	$(OBJS)\test_main.o
+	$(OBJS)\test_main.o \
+	$(OBJS)\test_regex.o
 
 ### Conditionally set variables: ###
 
@@ -151,4 +152,7 @@ $(OBJS)\test_test.o: ./test.cpp
 $(OBJS)\test_main.o: ./mbconv/main.cpp
 	$(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
 
+$(OBJS)\test_regex.o: ./regex/regex.cpp
+	$(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
+
 .PHONY: all clean
diff --git a/tests/makefile.vc b/tests/makefile.vc
index 59f35e494a..ee92075fb5 100644
--- a/tests/makefile.vc
+++ b/tests/makefile.vc
@@ -24,7 +24,8 @@ TEST_CXXFLAGS = /M$(__RUNTIME_LIBS_7)$(__DEBUGRUNTIME_3) /DWIN32 \
 	$(__EXCEPTIONSFLAG_9) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  \
 	$(OBJS)\test_test.obj \
-	$(OBJS)\test_main.obj
+	$(OBJS)\test_main.obj \
+	$(OBJS)\test_regex.obj
 
 ### Conditionally set variables: ###
 
@@ -212,3 +213,6 @@ $(OBJS)\test_test.obj: .\test.cpp
 
 $(OBJS)\test_main.obj: .\mbconv\main.cpp
 	$(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
+
+$(OBJS)\test_regex.obj: .\regex\regex.cpp
+	$(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
diff --git a/tests/makefile.wat b/tests/makefile.wat
index 9df4df112a..53e2b0c516 100644
--- a/tests/makefile.wat
+++ b/tests/makefile.wat
@@ -172,7 +172,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO_0) $(__OPTIMIZEFLAG_2) -bm $(__RUNTIME_LIBS_5) &
 	$(__EXCEPTIONSFLAG_7) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  &
 	$(OBJS)\test_test.obj &
-	$(OBJS)\test_main.obj
+	$(OBJS)\test_main.obj &
+	$(OBJS)\test_regex.obj
 
 
 all : $(OBJS)
@@ -206,3 +207,6 @@ $(OBJS)\test_test.obj :  .AUTODEPEND .\test.cpp
 
 $(OBJS)\test_main.obj :  .AUTODEPEND .\mbconv\main.cpp
 	$(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
+
+$(OBJS)\test_regex.obj :  .AUTODEPEND .\regex\regex.cpp
+	$(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
diff --git a/tests/regex/reg.test b/tests/regex/reg.test
new file mode 100644
index 0000000000..8bfffad107
--- /dev/null
+++ b/tests/regex/reg.test
@@ -0,0 +1,1135 @@
+# reg.test --
+#
+# This file contains a collection of tests for one or more of the Tcl
+# built-in commands.  Sourcing this file into Tcl runs the tests and
+# generates output for errors.  No output means no errors were found.
+# (Don't panic if you are seeing this as part of the reg distribution
+# and aren't using Tcl -- reg's own regression tester also knows how
+# to read this file, ignoring the Tcl-isms.)
+#
+# Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+#
+# RCS: @(#) $Id$
+
+if {[lsearch [namespace children] ::tcltest] == -1} {
+    package require tcltest 2
+    namespace import -force ::tcltest::*
+}
+
+# All tests require the testregexp command, return if this
+# command doesn't exist
+
+::tcltest::testConstraint testregexp \
+	[expr {[info commands testregexp] != {}}]
+::tcltest::testConstraint localeRegexp 0
+
+# This file uses some custom procedures, defined below, for regexp regression
+# testing.  The name of the procedure indicates the general nature of the
+# test:
+#	e	compile error expected
+#	f	match failure expected
+#	m	successful match
+#	i	successful match with -indices (used in checking things like
+#		nonparticipating subexpressions)
+#	p	unsuccessful match with -indices (!!) (used in checking
+#		partial-match reporting)
+# There is also "doing" which sets up title and major test number for each
+# block of tests.
+
+# The first 3 arguments are constant:  a minor number (which often gets
+# a letter or two suffixed to it internally), some flags, and the RE itself.
+# For e, the remaining argument is the name of the compile error expected,
+# less the leading "REG_".  For the rest, the next argument is the string
+# to try the match against.  Remaining arguments are the substring expected
+# to be matched, and any substrings expected to be matched by subexpressions.
+# (For f, these arguments are optional, and if present are ignored except
+# that they indicate how many subexpressions should be present in the RE.)
+# It is an error for the number of subexpression arguments to be wrong.
+# Cases involving nonparticipating subexpressions, checking where empty
+# substrings are located, etc. should be done using i and p.
+
+# The flag characters are complex and a bit eclectic.  Generally speaking, 
+# lowercase letters are compile options, uppercase are expected re_info
+# bits, and nonalphabetics are match options, controls for how the test is 
+# run, or testing options.  The one small surprise is that AREs are the
+# default, and you must explicitly request lesser flavors of RE.  The flags
+# are as follows.  It is admitted that some are not very mnemonic.
+# There are some others which are purely debugging tools and are not
+# useful in this file.
+#
+#	-	no-op (placeholder)
+#	+	provide fake xy equivalence class and ch collating element
+#	%	force small state-set cache in matcher (to test cache replace)
+#	^	beginning of string is not beginning of line
+#	$	end of string is not end of line
+#	*	test is Unicode-specific, needs big character set
+#
+#	&	test as both ARE and BRE
+#	b	BRE
+#	e	ERE
+#	a	turn advanced-features bit on (error unless ERE already)
+#	q	literal string, no metacharacters at all
+#
+#	i	case-independent matching
+#	o	("opaque") no subexpression capture
+#	p	newlines are half-magic, excluded from . and [^ only
+#	w	newlines are half-magic, significant to ^ and $ only
+#	n	newlines are fully magic, both effects
+#	x	expanded RE syntax
+#	t	incomplete-match reporting
+#
+#	A	backslash-_a_lphanumeric seen
+#	B	ERE/ARE literal-_b_race heuristic used
+#	E	backslash (_e_scape) seen within []
+#	H	looka_h_ead constraint seen
+#	I	_i_mpossible to match
+#	L	_l_ocale-specific construct seen
+#	M	unportable (_m_achine-specific) construct seen
+#	N	RE can match empty (_n_ull) string
+#	P	non-_P_OSIX construct seen
+#	Q	{} _q_uantifier seen
+#	R	back _r_eference seen
+#	S	POSIX-un_s_pecified syntax seen
+#	T	prefers shortest (_t_iny)
+#	U	saw original-POSIX botch:  unmatched right paren in ERE (_u_gh)
+
+# The one area we can't easily test is memory-allocation failures (which
+# are hard to provoke on command).  Embedded NULs also are not tested at
+# the moment, but this is a historical accident which should be fixed.
+
+
+
+# test procedures and related
+
+set ask "about"
+set xflags "xflags"
+set testbypassed 0
+
+# re_info abbreviation mapping table
+set infonames(A) "REG_UBSALNUM"
+set infonames(B) "REG_UBRACES"
+set infonames(E) "REG_UBBS"
+set infonames(H) "REG_ULOOKAHEAD"
+set infonames(I) "REG_UIMPOSSIBLE"
+set infonames(L) "REG_ULOCALE"
+set infonames(M) "REG_UUNPORT"
+set infonames(N) "REG_UEMPTYMATCH"
+set infonames(P) "REG_UNONPOSIX"
+set infonames(Q) "REG_UBOUNDS"
+set infonames(R) "REG_UBACKREF"
+set infonames(S) "REG_UUNSPEC"
+set infonames(T) "REG_USHORTEST"
+set infonames(U) "REG_UPBOTCH"
+set infonameorder "RHQBAUEPSMLNIT"	;# must match bit order, lsb first
+
+# set major test number and description
+proc doing {major desc} {
+	global prefix description testbypassed
+
+	if {$testbypassed != 0} {
+		puts stdout "!!! bypassed $testbypassed tests in\
+					 $prefix, `$description'"
+	}
+
+	set prefix reg-$major
+	set description "reg $desc"
+	set testbypassed 0
+}
+
+# build test number (internal)
+proc tno {testid} {
+	return [join $testid .]
+}
+
+# build description, with possible modifiers (internal)
+proc desc {testid} {
+	global description
+
+	set d $description
+	if {[llength $testid] > 1} {
+		set d "([lreplace $testid 0 0]) $d"
+	}
+	return $d
+}
+
+# build trailing options and flags argument from a flags string (internal)
+proc flags {fl} {
+	global xflags
+
+	set args [list]
+	set flags ""
+	foreach f [split $fl ""] {
+		switch -exact -- $f {
+		"i" { lappend args "-nocase" }
+		"x" { lappend args "-expanded" }
+		"n" { lappend args "-line" }
+		"p" { lappend args "-linestop" }
+		"w" { lappend args "-lineanchor" }
+		"-" { }
+		default { append flags $f }
+		}
+	}
+	if {[string compare $flags ""] != 0} {
+		lappend args -$xflags $flags
+	}
+	return $args
+}
+
+# build info-flags list from a flags string (internal)
+proc infoflags {fl} {
+	global infonames infonameorder
+
+	set ret [list]
+	foreach f [split $infonameorder ""] {
+		if {[string first $f $fl] >= 0} {
+			lappend ret $infonames($f)
+		}
+	}
+	return $ret
+}
+
+# compilation error expected
+proc e {testid flags re err} {
+	global prefix ask errorCode
+
+	# Tcl locale stuff doesn't do the ch/xy test fakery yet
+	if {[string first "+" $flags] >= 0} {
+	    # This will register as a skipped test
+	    test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
+	    return
+	}
+
+	# if &, test as both ARE and BRE
+	set amp [string first "&" $flags]
+	if {$amp >= 0} {
+		set f [string range $flags 0 [expr $amp - 1]]
+		append f [string range $flags [expr $amp + 1] end]
+		e [linsert $testid end ARE] ${f} $re $err
+		e [linsert $testid end BRE] ${f}b $re $err
+		return
+	}
+
+	set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
+	set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
+	test $prefix.[tno $testid] [desc $testid] \
+		{testregexp} $run [list 1 REG_$err]
+}
+
+# match failure expected
+proc f {testid flags re target args} {
+	global prefix description ask
+
+	# Tcl locale stuff doesn't do the ch/xy test fakery yet
+	if {[string first "+" $flags] >= 0} {
+	    # This will register as a skipped test
+	    test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
+	    return
+	}
+
+	# if &, test as both ARE and BRE
+	set amp [string first "&" $flags]
+	if {$amp >= 0} {
+		set f [string range $flags 0 [expr $amp - 1]]
+		append f [string range $flags [expr $amp + 1] end]
+		eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \
+								$target]
+		eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \
+								$target]
+		return
+	}
+
+	set f [flags $flags]
+	set infoflags [infoflags $flags]
+	set ccmd [concat [list testregexp -$ask] $f [list $re]]
+	set nsub [expr [llength $args] - 1]
+	if {$nsub == -1} {
+		# didn't tell us number of subexps
+		set ccmd "lreplace \[$ccmd\] 0 0"
+		set info [list $infoflags]
+	} else {
+		set info [list $nsub $infoflags]
+	}
+	lappend testid "compile"
+	test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
+
+	set testid [lreplace $testid end end "execute"]
+	set ecmd [concat [list testregexp] $f [list $re $target]]
+	test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0
+}
+
+# match expected, internal routine that does the work
+# parameters like the "real" routines except they don't have "opts",
+#  which is a possibly-empty list of switches for the regexp match attempt
+# The ! flag is used to indicate expected match failure (for REG_EXPECT,
+#  which wants argument testing even in the event of failure).
+proc matchexpected {opts testid flags re target args} {
+	global prefix description ask regBug
+
+    if {[info exists regBug] && $regBug} {
+	# This will register as a skipped test
+	test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1}
+	return
+    }
+
+	# Tcl locale stuff doesn't do the ch/xy test fakery yet
+	if {[string first "+" $flags] >= 0} {
+	    # This will register as a skipped test
+	    test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
+	    return
+	}
+
+	# if &, test as both BRE and ARE
+	set amp [string first "&" $flags]
+	if {$amp >= 0} {
+		set f [string range $flags 0 [expr $amp - 1]]
+		append f [string range $flags [expr $amp + 1] end]
+		eval [concat [list matchexpected $opts \
+			[linsert $testid end ARE] ${f} $re $target] $args]
+		eval [concat [list matchexpected $opts \
+			[linsert $testid end BRE] ${f}b $re $target] $args]
+		return
+	}
+
+	set f [flags $flags]
+	set infoflags [infoflags $flags]
+	set ccmd [concat [list testregexp -$ask] $f [list $re]]
+	set ecmd [concat [list testregexp] $opts $f [list $re $target]]
+
+	set nsub [expr [llength $args] - 1]
+	set names [list]
+	set refs ""
+	for {set i 0} {$i <= $nsub} {incr i} {
+		if {$i == 0} {
+			set name match
+		} else {
+			set name sub$i
+		}
+		lappend names $name
+		append refs " \$$name"
+		set $name ""
+	}
+	if {[string first "o" $flags] >= 0} {	;# REG_NOSUB kludge
+		set nsub 0		;# unsigned value cannot be -1
+	}
+	if {[string first "t" $flags] >= 0} {	;# REG_EXPECT
+		incr nsub -1		;# the extra does not count
+	}
+	set ecmd [concat $ecmd $names]
+	set erun "list \[$ecmd\] $refs"
+	set retcode [list 1]
+	if {[string first "!" $flags] >= 0} {
+		set retcode [list 0]
+	}
+	set result [concat $retcode $args]
+
+	set info [list $nsub $infoflags]
+	lappend testid "compile"
+	test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
+	set testid [lreplace $testid end end "execute"]
+	test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result
+}
+
+# match expected (no missing, empty, or ambiguous submatches)
+# m testno flags re target mat submat ...
+proc m {args} {
+	eval matchexpected [linsert $args 0 [list]]
+}
+
+# match expected (full fanciness)
+# i testno flags re target mat submat ...
+proc i {args} {
+	eval matchexpected [linsert $args 0 [list "-indices"]]
+}
+
+# partial match expected
+# p testno flags re target mat "" ...
+# Quirk:  number of ""s must be one more than number of subREs.
+proc p {args} {
+	set f [lindex $args 1]			;# add ! flag
+	set args [lreplace $args 1 1 "!$f"]
+	eval matchexpected [linsert $args 0 [list "-indices"]]
+}
+
+# test is a knownBug
+proc knownBug {args} {
+    set ::regBug 1
+    uplevel #0 $args
+    set ::regBug 0
+}
+
+
+
+# the tests themselves
+
+
+
+# support functions and preliminary misc.
+# This is sensitive to changes in message wording, but we really have to
+# test the code->message expansion at least once.
+test reg-0.1 "regexp error reporting" {
+	list [catch {regexp (*) ign} msg] $msg
+} {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
+
+
+
+doing 1 "basic sanity checks"
+m  1	&	abc		abc	abc
+f  2	&	abc		def
+m  3	&	abc		xyabxabce	abc
+
+
+
+doing 2 "invalid option combinations"
+e  1	qe	a		INVARG
+e  2	qa	a		INVARG
+e  3	qx	a		INVARG
+e  4	qn	a		INVARG
+e  5	ba	a		INVARG
+
+
+
+doing 3 "basic syntax"
+i  1	&NS	""		a	{0 -1}
+m  2	NS	a|		a	a
+m  3	-	a|b		a	a
+m  4	-	a|b		b	b
+m  5	NS	a||b		b	b
+m  6	&	ab		ab	ab
+
+
+
+doing 4 "parentheses"
+m  1	-	(a)e		ae	ae	a
+m  2	o	(a)e		ae
+m  3	b	{\(a\)b}	ab	ab	a
+m  4	-	a((b)c)		abc	abc	bc	b
+m  5	-	a(b)(c)		abc	abc	b	c
+e  6	-	a(b		EPAREN
+e  7	b	{a\(b}		EPAREN
+# sigh, we blew it on the specs here... someday this will be fixed in POSIX,
+#  but meanwhile, it's fixed in AREs
+m  8	eU	a)b		a)b	a)b
+e  9	-	a)b		EPAREN
+e 10	b	{a\)b}		EPAREN
+m 11	P	a(?:b)c		abc	abc
+e 12	e	a(?:b)c		BADRPT
+i 13	S	a()b		ab	{0 1}	{1 0}
+m 14	SP	a(?:)b		ab	ab
+i 15	S	a(|b)c		ac	{0 1}	{1 0}
+m 16	S	a(b|)c		abc	abc	b
+
+
+
+doing 5 "simple one-char matching"
+# general case of brackets done later
+m  1	&	a.b		axb	axb
+f  2	&n	"a.b"		"a\nb"
+m  3	&	{a[bc]d}	abd	abd
+m  4	&	{a[bc]d}	acd	acd
+f  5	&	{a[bc]d}	aed
+f  6	&	{a[^bc]d}	abd
+m  7	&	{a[^bc]d}	aed	aed
+f  8	&p	"a\[^bc]d"	"a\nd"
+
+
+
+doing 6 "context-dependent syntax"
+# plus odds and ends
+e  1	-	*		BADRPT
+m  2	b	*		*	*
+m  3	b	{\(*\)}		*	*	*
+e  4	-	(*)		BADRPT
+m  5	b	^*		*	*
+e  6	-	^*		BADRPT
+f  7	&	^b		^b
+m  8	b	x^		x^	x^
+f  9	I	x^		x
+m 10	n	"\n^"		"x\nb"	"\n"
+f 11	bS	{\(^b\)}	^b
+m 12	-	(^b)		b	b	b
+m 13	&	{x$}		x	x
+m 14	bS	{\(x$\)}	x	x	x
+m 15	-	{(x$)}		x	x	x
+m 16	b	{x$y}		"x\$y"	"x\$y"
+f 17	I	{x$y}		xy
+m 18	n	"x\$\n"		"x\n"	"x\n"
+e 19	-	+		BADRPT
+e 20	-	?		BADRPT
+
+
+
+doing 7 "simple quantifiers"
+m  1	&N	a*		aa	aa
+i  2	&N	a*		b	{0 -1}
+m  3	-	a+		aa	aa
+m  4	-	a?b		ab	ab
+m  5	-	a?b		b	b
+e  6	-	**		BADRPT
+m  7	bN	**		***	***
+e  8	&	a**		BADRPT
+e  9	&	a**b		BADRPT
+e 10	&	***		BADRPT
+e 11	-	a++		BADRPT
+e 12	-	a?+		BADRPT
+e 13	-	a?*		BADRPT
+e 14	-	a+*		BADRPT
+e 15	-	a*+		BADRPT
+
+
+
+doing 8 "braces"
+m  1	NQ	"a{0,1}"	""	""
+m  2	NQ	"a{0,1}"	ac	a
+e  3	-	"a{1,0}"	BADBR
+e  4	-	"a{1,2,3}"	BADBR
+e  5	-	"a{257}"	BADBR
+e  6	-	"a{1000}"	BADBR
+e  7	-	"a{1"		EBRACE
+e  8	-	"a{1n}"		BADBR
+m  9	BS	"a{b"		"a\{b"	"a\{b"
+m 10	BS	"a{"		"a\{"	"a\{"
+m 11	bQ	"a\\{0,1\\}b"	cb	b
+e 12	b	"a\\{0,1"	EBRACE
+e 13	-	"a{0,1\\"	BADBR
+m 14	Q	"a{0}b"		ab	b
+m 15	Q	"a{0,0}b"	ab	b
+m 16	Q	"a{0,1}b"	ab	ab
+m 17	Q	"a{0,2}b"	b	b
+m 18	Q	"a{0,2}b"	aab	aab
+m 19	Q	"a{0,}b"	aab	aab
+m 20	Q	"a{1,1}b"	aab	ab
+m 21	Q	"a{1,3}b"	aaaab	aaab
+f 22	Q	"a{1,3}b"	b
+m 23	Q	"a{1,}b"	aab	aab
+f 24	Q	"a{2,3}b"	ab
+m 25	Q	"a{2,3}b"	aaaab	aaab
+f 26	Q	"a{2,}b"	ab
+m 27	Q	"a{2,}b"	aaaab	aaaab
+
+
+
+doing 9 "brackets"
+m  1	&	{a[bc]}		ac	ac
+m  2	&	{a[-]}		a-	a-
+m  3	&	{a[[.-.]]}	a-	a-
+m  4	&L	{a[[.zero.]]}	a0	a0
+m  5	&LM	{a[[.zero.]-9]}	a2	a2
+m  6	&M	{a[0-[.9.]]}	a2	a2
+m  7	&+L	{a[[=x=]]}	ax	ax
+m  8	&+L	{a[[=x=]]}	ay	ay
+f  9	&+L	{a[[=x=]]}	az
+e 10	&	{a[0-[=x=]]}	ERANGE
+m 11	&L	{a[[:digit:]]}	a0	a0
+e 12	&	{a[[:woopsie:]]}	ECTYPE
+f 13	&L	{a[[:digit:]]}	ab
+e 14	&	{a[0-[:digit:]]}	ERANGE
+m 15	&LP	{[[:<:]]a}	a	a
+m 16	&LP	{a[[:>:]]}	a	a
+e 17	&	{a[[..]]b}	ECOLLATE
+e 18	&	{a[[==]]b}	ECOLLATE
+e 19	&	{a[[::]]b}	ECTYPE
+e 20	&	{a[[.a}		EBRACK
+e 21	&	{a[[=a}		EBRACK
+e 22	&	{a[[:a}		EBRACK
+e 23	&	{a[}		EBRACK
+e 24	&	{a[b}		EBRACK
+e 25	&	{a[b-}		EBRACK
+e 26	&	{a[b-c}		EBRACK
+m 27	&M	{a[b-c]}	ab	ab
+m 28	&	{a[b-b]}	ab	ab
+m 29	&M	{a[1-2]}	a2	a2
+e 30	&	{a[c-b]}	ERANGE
+e 31	&	{a[a-b-c]}	ERANGE
+m 32	&M	{a[--?]b}	a?b	a?b
+m 33	&	{a[---]b}	a-b	a-b
+m 34	&	{a[]b]c}	a]c	a]c
+m 35	EP	{a[\]]b}	a]b	a]b
+f 36	bE	{a[\]]b}	a]b
+m 37	bE	{a[\]]b}	"a\\]b"	"a\\]b"
+m 38	eE	{a[\]]b}	"a\\]b"	"a\\]b"
+m 39	EP	{a[\\]b}	"a\\b"	"a\\b"
+m 40	eE	{a[\\]b}	"a\\b"	"a\\b"
+m 41	bE	{a[\\]b}	"a\\b"	"a\\b"
+e 42	-	{a[\Z]b}	EESCAPE
+m 43	&	{a[[b]c}	"a\[c"	"a\[c"
+m 44	EMP*	{a[\u00fe-\u0507][\u00ff-\u0300]b} \
+			"a\u0102\u02ffb"	"a\u0102\u02ffb"
+
+
+
+doing 10 "anchors and newlines"
+m  1	&	^a		a	a
+f  2	&^	^a		a
+i  3	&N	^		a	{0 -1}
+i  4	&	{a$}		aba	{2 2}
+f  5	{&$}	{a$}		a
+i  6	&N	{$}		ab	{2 1}
+m  7	&n	^a		a	a
+m  8	&n	"^a"		"b\na"	"a"
+i  9	&w	"^a"		"a\na"	{0 0}
+i 10	&n^	"^a"		"a\na"	{2 2}
+m 11	&n	{a$}		a	a
+m 12	&n	"a\$"		"a\nb"	"a"
+i 13	&n	"a\$"		"a\na"	{0 0}
+i 14	N	^^		a	{0 -1}
+m 15	b	^^		^	^
+i 16	N	{$$}		a	{1 0}
+m 17	b	{$$}		"\$"	"\$"
+m 18	&N	{^$}		""	""
+f 19	&N	{^$}		a
+i 20	&nN	"^\$"		"a\n\nb"	{2 1}
+m 21	N	{$^}		""	""
+m 22	b	{$^}		"\$^"	"\$^"
+m 23	P	{\Aa}		a	a
+m 24	^P	{\Aa}		a	a
+f 25	^nP	{\Aa}		"b\na"
+m 26	P	{a\Z}		a	a
+m 27	{$P}	{a\Z}		a	a
+f 28	{$nP}	{a\Z}		"a\nb"
+e 29	-	^*		BADRPT
+e 30	-	{$*}		BADRPT
+e 31	-	{\A*}		BADRPT
+e 32	-	{\Z*}		BADRPT
+
+
+
+doing 11 "boundary constraints"
+m  1	&LP	{[[:<:]]a}	a	a
+m  2	&LP	{[[:<:]]a}	-a	a
+f  3	&LP	{[[:<:]]a}	ba
+m  4	&LP	{a[[:>:]]}	a	a
+m  5	&LP	{a[[:>:]]}	a-	a
+f  6	&LP	{a[[:>:]]}	ab
+m  7	bLP	{\<a}		a	a
+f  8	bLP	{\<a}		ba
+m  9	bLP	{a\>}		a	a
+f 10	bLP	{a\>}		ab
+m 11	LP	{\ya}		a	a
+f 12	LP	{\ya}		ba
+m 13	LP	{a\y}		a	a
+f 14	LP	{a\y}		ab
+m 15	LP	{a\Y}		ab	a
+f 16	LP	{a\Y}		a-
+f 17	LP	{a\Y}		a
+f 18	LP	{-\Y}		-a
+m 19	LP	{-\Y}		-%	-
+f 20	LP	{\Y-}		a-
+e 21	-	{[[:<:]]*}	BADRPT
+e 22	-	{[[:>:]]*}	BADRPT
+e 23	b	{\<*}		BADRPT
+e 24	b	{\>*}		BADRPT
+e 25	-	{\y*}		BADRPT
+e 26	-	{\Y*}		BADRPT
+m 27	LP	{\ma}		a	a
+f 28	LP	{\ma}		ba
+m 29	LP	{a\M}		a	a
+f 30	LP	{a\M}		ab
+f 31	ILP	{\Ma}		a
+f 32	ILP	{a\m}		a
+
+
+
+doing 12 "character classes"
+m  1	LP	{a\db}		a0b	a0b
+f  2	LP	{a\db}		axb
+f  3	LP	{a\Db}		a0b
+m  4	LP	{a\Db}		axb	axb
+m  5	LP	"a\\sb"		"a b"	"a b"
+m  6	LP	"a\\sb"		"a\tb"	"a\tb"
+m  7	LP	"a\\sb"		"a\nb"	"a\nb"
+f  8	LP	{a\sb}		axb
+m  9	LP	{a\Sb}		axb	axb
+f 10	LP	"a\\Sb"		"a b"
+m 11	LP	{a\wb}		axb	axb
+f 12	LP	{a\wb}		a-b
+f 13	LP	{a\Wb}		axb
+m 14	LP	{a\Wb}		a-b	a-b
+m 15	LP	{\y\w+z\y}	adze-guz	guz
+m 16	LPE	{a[\d]b}	a1b	a1b
+m 17	LPE	"a\[\\s]b"	"a b"	"a b"
+m 18	LPE	{a[\w]b}	axb	axb
+
+
+
+doing 13 "escapes"
+e  1	&	"a\\"		EESCAPE
+m  2	-	{a\<b}		a<b	a<b
+m  3	e	{a\<b}		a<b	a<b
+m  4	bAS	{a\wb}		awb	awb
+m  5	eAS	{a\wb}		awb	awb
+m  6	PL	"a\\ab"		"a\007b"	"a\007b"
+m  7	P	"a\\bb"		"a\bb"	"a\bb"
+m  8	P	{a\Bb}		"a\\b"	"a\\b"
+m  9	MP	"a\\chb"	"a\bb"	"a\bb"
+m 10	MP	"a\\cHb"	"a\bb"	"a\bb"
+m 11	LMP	"a\\e"		"a\033"	"a\033"
+m 12	P	"a\\fb"		"a\fb"	"a\fb"
+m 13	P	"a\\nb"		"a\nb"	"a\nb"
+m 14	P	"a\\rb"		"a\rb"	"a\rb"
+m 15	P	"a\\tb"		"a\tb"	"a\tb"
+m 16	P	"a\\u0008x"	"a\bx"	"a\bx"
+e 17	-	{a\u008x}	EESCAPE
+m 18	P	"a\\u00088x"	"a\b8x"	"a\b8x"
+m 19	P	"a\\U00000008x"	"a\bx"	"a\bx"
+e 20	-	{a\U0000008x}	EESCAPE
+m 21	P	"a\\vb"		"a\vb"	"a\vb"
+m 22	MP	"a\\x08x"	"a\bx"	"a\bx"
+e 23	-	{a\xq}		EESCAPE
+m 24	MP	"a\\x0008x"	"a\bx"	"a\bx"
+e 25	-	{a\z}		EESCAPE
+m 26	MP	"a\\010b"	"a\bb"	"a\bb"
+
+
+
+doing 14 "back references"
+# ugh
+m  1	RP	{a(b*)c\1}	abbcbb	abbcbb	bb
+m  2	RP	{a(b*)c\1}	ac	ac	""
+f  3	RP	{a(b*)c\1}	abbcb
+m  4	RP	{a(b*)\1}	abbcbb	abb	b
+m  5	RP	{a(b|bb)\1}	abbcbb	abb	b
+m  6	RP	{a([bc])\1}	abb	abb	b
+f  7	RP	{a([bc])\1}	abc
+m  8	RP	{a([bc])\1}	abcabb	abb	b
+f  9	RP	{a([bc])*\1}	abc
+f 10	RP	{a([bc])\1}	abB
+m 11	iRP	{a([bc])\1}	abB	abB	b
+m 12	RP	{a([bc])\1+}	abbb	abbb	b
+m 13	QRP	"a(\[bc])\\1{3,4}"	abbbb	abbbb	b
+f 14	QRP	"a(\[bc])\\1{3,4}"	abbb
+m 15	RP	{a([bc])\1*}	abbb	abbb	b
+m 16	RP	{a([bc])\1*}	ab	ab	b
+m 17	RP	{a([bc])(\1*)}	ab	ab	b	""
+e 18	-	{a((b)\1)}	ESUBREG
+e 19	-	{a(b)c\2}	ESUBREG
+m 20	bR	{a\(b*\)c\1}	abbcbb	abbcbb	bb
+
+
+
+doing 15 "octal escapes vs back references"
+# initial zero is always octal
+m  1	MP	"a\\010b"	"a\bb"	"a\bb"
+m  2	MP	"a\\0070b"	"a\0070b"	"a\0070b"
+m  3	MP	"a\\07b"	"a\007b"	"a\007b"
+m  4	MP	"a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c"	"abbbbbbbbbb\007c" \
+	"abbbbbbbbbb\007c"	"b"	"b"	"b"	"b"	"b"	"b" \
+	"b"	"b"	"b"	"b"
+# a single digit is always a backref
+e  5	-	{a\7b}		ESUBREG
+# otherwise it's a backref only if within range (barf!)
+m  6	MP	"a\\10b"	"a\bb"	"a\bb"
+m  7	MP	{a\101b}	aAb	aAb
+m  8	RP	{a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c}	abbbbbbbbbbbc \
+	abbbbbbbbbbbc	b	b	b	b	b	b	b \
+	b	b	b
+# but we're fussy about border cases -- guys who want octal should use the zero
+e  9	-	{a((((((((((b\10))))))))))c}	ESUBREG
+# BREs don't have octal, EREs don't have backrefs
+m 10	MP	"a\\12b"	"a\nb"	"a\nb"
+e 11	b	{a\12b}		ESUBREG
+m 12	eAS	{a\12b}		a12b	a12b
+
+
+
+doing 16 "expanded syntax"
+m  1	xP	"a b c"		"abc"	"abc"
+m  2	xP	"a b #oops\nc\td"	"abcd"	"abcd"
+m  3	x	"a\\ b\\\tc"	"a b\tc"	"a b\tc"
+m  4	xP	"a b\\#c"	"ab#c"	"ab#c"
+m  5	xP	"a b\[c d]e"	"ab e"	"ab e"
+m  6	xP	"a b\[c#d]e"	"ab#e"	"ab#e"
+m  7	xP	"a b\[c#d]e"	"abde"	"abde"
+m  8	xSPB	"ab{ d"		"ab\{d"	"ab\{d"
+m  9	xPQ	"ab{ 1 , 2 }c"	"abc"	"abc"
+
+
+
+doing 17 "misc syntax"
+m  1	P	a(?#comment)b	ab	ab
+
+
+
+doing 18 "unmatchable REs"
+f  1	I	a^b		ab
+
+
+
+doing 19 "case independence"
+m  1	&i	ab		Ab	Ab
+m  2	&i	{a[bc]}		aC	aC
+f  3	&i	{a[^bc]}	aB
+m  4	&iM	{a[b-d]}	aC	aC
+f  5	&iM	{a[^b-d]}	aC
+
+
+
+doing 20 "directors and embedded options"
+e  1	&	***?		BADPAT
+m  2	q	***?		***?	***?
+m  3	&P	***=a*b		a*b	a*b
+m  4	q	***=a*b		***=a*b	***=a*b
+m  5	bLP	{***:\w+}	ab	ab
+m  6	eLP	{***:\w+}	ab	ab
+e  7	&	***:***=a*b	BADRPT
+m  8	&P	***:(?b)a+b	a+b	a+b
+m  9	P	(?b)a+b		a+b	a+b
+e 10	e	{(?b)\w+}	BADRPT
+m 11	bAS	{(?b)\w+}	(?b)w+	(?b)w+
+m 12	iP	(?c)a		a	a
+f 13	iP	(?c)a		A
+m 14	APS	{(?e)\W+}	WW	WW
+m 15	P	(?i)a+		Aa	Aa
+f 16	P	"(?m)a.b"	"a\nb"
+m 17	P	"(?m)^b"	"a\nb"	"b"
+f 18	P	"(?n)a.b"	"a\nb"
+m 19	P	"(?n)^b"	"a\nb"	"b"
+f 20	P	"(?p)a.b"	"a\nb"
+f 21	P	"(?p)^b"	"a\nb"
+m 22	P	(?q)a+b		a+b	a+b
+m 23	nP	"(?s)a.b"	"a\nb"	"a\nb"
+m 24	xP	"(?t)a b"	"a b"	"a b"
+m 25	P	"(?w)a.b"	"a\nb"	"a\nb"
+m 26	P	"(?w)^b"	"a\nb"	"b"
+m 27	P	"(?x)a b"	"ab"	"ab"
+e 28	-	(?z)ab		BADOPT
+m 29	P	(?ici)a+	Aa	Aa
+e 30	P	(?i)(?q)a+	BADRPT
+m 31	P	(?q)(?i)a+	(?i)a+	(?i)a+
+m 32	P	(?qe)a+		a	a
+m 33	xP	"(?q)a b"	"a b"	"a b"
+m 34	P	"(?qx)a b"	"a b"	"a b"
+m 35	P	(?qi)ab		Ab	Ab
+
+
+
+doing 21 "capturing"
+m  1	-	a(b)c		abc	abc	b
+m  2	P	a(?:b)c		xabc	abc
+m  3	-	a((b))c		xabcy	abc	b	b
+m  4	P	a(?:(b))c	abcy	abc	b
+m  5	P	a((?:b))c	abc	abc	b
+m  6	P	a(?:(?:b))c	abc	abc
+i  7	Q	"a(b){0}c"	ac	{0 1}	{-1 -1}
+m  8	-	a(b)c(d)e	abcde	abcde	b	d
+m  9	-	(b)c(d)e	bcde	bcde	b	d
+m 10	-	a(b)(d)e	abde	abde	b	d
+m 11	-	a(b)c(d)	abcd	abcd	b	d
+m 12	-	(ab)(cd)	xabcdy	abcd	ab	cd
+m 13	-	a(b)?c		xabcy	abc	b
+i 14	-	a(b)?c		xacy	{1 2}	{-1 -1}
+m 15	-	a(b)?c(d)?e	xabcdey	abcde	b	d
+i 16	-	a(b)?c(d)?e	xacdey	{1 4}	{-1 -1}	{3 3}
+i 17	-	a(b)?c(d)?e	xabcey	{1 4}	{2 2}	{-1 -1}
+i 18	-	a(b)?c(d)?e	xacey	{1 3}	{-1 -1}	{-1 -1}
+m 19	-	a(b)*c		xabcy	abc	b
+i 20	-	a(b)*c		xabbbcy	{1 5}	{4 4}
+i 21	-	a(b)*c		xacy	{1 2}	{-1 -1}
+m 22	-	a(b*)c		xabbbcy	abbbc	bbb
+m 23	-	a(b*)c		xacy	ac	""
+f 24	-	a(b)+c		xacy
+m 25	-	a(b)+c		xabcy	abc	b
+i 26	-	a(b)+c		xabbbcy	{1 5}	{4 4}
+m 27	-	a(b+)c		xabbbcy	abbbc	bbb
+i 28	Q	"a(b){2,3}c"	xabbbcy	{1 5}	{4 4}
+i 29	Q	"a(b){2,3}c"	xabbcy	{1 4}	{3 3}
+f 30	Q	"a(b){2,3}c"	xabcy
+m 31	LP	"\\y(\\w+)\\y"	"-- abc-"	"abc"	"abc"
+m 32	-	a((b|c)d+)+	abacdbd	acdbd	bd	b
+m 33	N	(.*).*		abc	abc	abc
+m 34	N	(a*)*		bc	""	""
+
+
+
+doing 22 "multicharacter collating elements"
+# again ugh
+m  1	&+L	{a[c]e}		ace	ace
+f  2	&+IL	{a[c]h}		ach
+m  3	&+L	{a[[.ch.]]}	ach	ach
+f  4	&+L	{a[[.ch.]]}	ace
+m  5	&+L	{a[c[.ch.]]}	ac	ac
+m  6	&+L	{a[c[.ch.]]}	ace	ac
+m  7	&+L	{a[c[.ch.]]}	ache	ach
+f  8	&+L	{a[^c]e}	ace
+m  9	&+L	{a[^c]e}	abe	abe
+m 10	&+L	{a[^c]e}	ache	ache
+f 11	&+L	{a[^[.ch.]]}	ach
+m 12	&+L	{a[^[.ch.]]}	ace	ac
+m 13	&+L	{a[^[.ch.]]}	ac	ac
+m 14	&+L	{a[^[.ch.]]}	abe	ab
+f 15	&+L	{a[^c[.ch.]]}	ach
+f 16	&+L	{a[^c[.ch.]]}	ace
+f 17	&+L	{a[^c[.ch.]]}	ac
+m 18	&+L	{a[^c[.ch.]]}	abe	ab
+m 19	&+L	{a[^b]}		ac	ac
+m 20	&+L	{a[^b]}		ace	ac
+m 21	&+L	{a[^b]}		ach	ach
+f 22	&+L	{a[^b]}		abe
+
+
+
+doing 23 "lookahead constraints"
+m  1	HP	a(?=b)b*	ab	ab
+f  2	HP	a(?=b)b*	a
+m  3	HP	a(?=b)b*(?=c)c*	abc	abc
+f  4	HP	a(?=b)b*(?=c)c*	ab
+f  5	HP	a(?!b)b*	ab
+m  6	HP	a(?!b)b*	a	a
+m  7	HP	(?=b)b		b	b
+f  8	HP	(?=b)b		a
+
+
+
+doing 24 "non-greedy quantifiers"
+m  1	PT	ab+?		abb	ab
+m  2	PT	ab+?c		abbc	abbc
+m  3	PT	ab*?		abb	a
+m  4	PT	ab*?c		abbc	abbc
+m  5	PT	ab??		ab	a
+m  6	PT	ab??c		abc	abc
+m  7	PQT	"ab{2,4}?"	abbbb	abb
+m  8	PQT	"ab{2,4}?c"	abbbbc	abbbbc
+m  9	-	3z*		123zzzz456	3zzzz
+m 10	PT	3z*?		123zzzz456	3
+m 11	-	z*4		123zzzz456	zzzz4
+m 12	PT	z*?4		123zzzz456	zzzz4
+
+
+
+doing 25 "mixed quantifiers"
+# this is very incomplete as yet
+# should include |
+m  1	PNT	{^(.*?)(a*)$}	xyza	xyza	xyz	a
+m  2	PNT	{^(.*?)(a*)$}	xyzaa	xyzaa	xyz	aa
+m  3	PNT	{^(.*?)(a*)$}	xyz	xyz	xyz	""
+
+
+
+doing 26 "tricky cases"
+# attempts to trick the matcher into accepting a short match
+m  1	-	(week|wee)(night|knights)	weeknights	weeknights \
+	wee	knights
+m  2	RP	{a(bc*).*\1}	abccbccb	abccbccb	b
+m  3	-	{a(b.[bc]*)+}	abcbd	abcbd	bd
+
+
+
+doing 27 "implementation misc."
+# duplicate arcs are suppressed
+m  1	P	a(?:b|b)c	abc	abc
+# make color/subcolor relationship go back and forth
+m  2	&	{[ab][ab][ab]}	aba	aba
+m  3	&	{[ab][ab][ab][ab][ab][ab][ab]}	abababa	abababa
+
+
+
+doing 28 "boundary busters etc."
+# color-descriptor allocation changes at 10
+m  1	&	abcdefghijkl	abcdefghijkl	abcdefghijkl
+# so does arc allocation
+m  2	P	a(?:b|c|d|e|f|g|h|i|j|k|l|m)n	agn	agn
+# subexpression tracking also at 10
+m  3	-	a(((((((((((((b)))))))))))))c	abc	abc	b	b	b	b	b	b	b	b	b	b	b	b	b
+# state-set handling changes slightly at unsigned size (might be 64...)
+# (also stresses arc allocation)
+m  4	Q	"ab{1,100}c"	abbc	abbc
+m  5	Q	"ab{1,100}c"	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
+	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
+m  6	Q	"ab{1,100}c" \
+	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
+	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
+# force small cache and bust it, several ways
+m  7	LP	{\w+abcdefgh}	xyzabcdefgh	xyzabcdefgh
+m  8	%LP	{\w+abcdefgh}	xyzabcdefgh	xyzabcdefgh
+m  9	%LP	{\w+abcdefghijklmnopqrst}	xyzabcdefghijklmnopqrst \
+	xyzabcdefghijklmnopqrst
+i 10	%LP	{\w+(abcdefgh)?}	xyz	{0 2}	{-1 -1}
+i 11	%LP	{\w+(abcdefgh)?}	xyzabcdefg	{0 9}	{-1 -1}
+i 12	%LP	{\w+(abcdefghijklmnopqrst)?}	xyzabcdefghijklmnopqrs \
+	{0 21}	{-1 -1}
+
+
+
+doing 29 "incomplete matches"
+p  1	t	def		abc	{3 2}	""
+p  2	t	bcd		abc	{1 2}	""
+p  3	t	abc		abab	{0 3}	""
+p  4	t	abc		abdab	{3 4}	""
+i  5	t	abc		abc	{0 2}	{0 2}
+i  6	t	abc		xyabc	{2 4}	{2 4}
+p  7	t	abc+		xyab	{2 3}	""
+i  8	t	abc+		xyabc	{2 4}	{2 4}
+knownBug i  9	t	abc+		xyabcd	{2 4}	{6 5}
+i  10	t	abc+		xyabcdd	{2 4}	{7 6}
+p  11	tPT	abc+?		xyab	{2 3}	""
+# the retain numbers in these two may look wrong, but they aren't
+i  12	tPT	abc+?		xyabc	{2 4}	{5 4}
+i  13	tPT	abc+?		xyabcc	{2 4}	{6 5}
+i  14	tPT	abc+?		xyabcd	{2 4}	{6 5}
+i  15	tPT	abc+?		xyabcdd	{2 4}	{7 6}
+i  16	t	abcd|bc		xyabc	{3 4}	{2 4}
+p  17	tn	.*k		"xx\nyyy"	{3 5}	""
+
+
+doing 30 "misc. oddities and old bugs"
+e  1	&	***		BADRPT
+m  2	N	a?b*		abb	abb
+m  3	N	a?b*		bb	bb
+m  4	&	a*b		aab	aab
+m  5	&	^a*b		aaaab	aaaab
+m  6	&M	{[0-6][1-2][0-3][0-6][1-6][0-6]}	010010	010010
+# temporary REG_BOSONLY kludge
+m  7	s	abc		abcd	abc
+f  8	s	abc		xabcd
+# back to normal stuff
+m  9	HLP	{(?n)^(?![t#])\S+}	"tk\n\n#\n#\nit0"	it0
+
+
+# flush any leftover complaints
+doing 0 "flush"
+
+# Tests resulting from bugs reported by users
+test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
+    set str {2:::DebugWin32}
+    set re {([[:xdigit:]])([[:space:]]*)}
+    list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
+    # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
+} {1 2 2 {}}
+
+test reg-32.1 {canmatch functionality -- at end} {
+    set pat {blah}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 7}
+
+test reg-32.2 {canmatch functionality -- at end} {
+    set pat {s%$}
+    set line "asd asd"
+    # can only match after the end of the string
+    set res [testregexp -xflags -- c $pat $line resvar] 
+    lappend res $resvar
+} {0 7}
+
+test reg-32.3 {canmatch functionality -- not last char} {
+    set pat {[^d]%$}
+    set line "asd asd"
+    # can only match after the end of the string
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 7}
+
+test reg-32.3.1 {canmatch functionality -- no match} {
+    set pat {\Zx}
+    set line "asd asd"
+    # can match the last char, if followed by x
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 -1}
+
+test reg-32.4 {canmatch functionality -- last char} {knownBug} {
+    set pat {.x}
+    set line "asd asd"
+    # can match the last char, if followed by x
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.4.1 {canmatch functionality -- last char} {knownBug} {
+    set pat {.x$}
+    set line "asd asd"
+    # can match the last char, if followed by x
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.5 {canmatch functionality -- last char} {knownBug} {
+    set pat {.[^d]x$}
+    set line "asd asd"
+    # can match the last char, if followed by not-d and x.
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.6 {canmatch functionality -- last char} {knownBug} {
+    set pat {[^a]%[^\r\n]*$}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.7 {canmatch functionality -- last char} {knownBug} {
+    set pat {[^a]%$}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.8 {canmatch functionality -- last char} {knownBug} {
+    set pat {[^x]%$}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.9 {canmatch functionality -- more complex case} {knownBug} {
+    set pat {((\B\B|\Bh+line)[ \t]*|[^\B]%[^\r\n]*)$}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+# Tests reg-33.*: Checks for bug fixes
+
+test reg-33.1 {Bug 230589} {
+    regexp {[ ]*(^|[^%])%V} "*%V2" m s
+} 1
+
+test reg-33.2 {Bug 504785} {
+    regexp -inline {([^_.]*)([^.]*)\.(..)(.).*} bbcos_001_c01.q1la
+} {bbcos_001_c01.q1la bbcos _001_c01 q1 l}
+
+test reg-33.3 {Bug 505048} {
+    regexp {\A\s*[^<]*\s*<([^>]+)>} a<a>
+} 1
+
+test reg-33.4 {Bug 505048} {
+    regexp {\A\s*([^b]*)b} ab
+} 1
+
+test reg-33.5 {Bug 505048} {
+    regexp {\A\s*[^b]*(b)} ab
+} 1
+
+test reg-33.6 {Bug 505048} {
+    regexp {\A(\s*)[^b]*(b)} ab
+} 1
+
+test reg-33.7 {Bug 505048} {
+    regexp {\A\s*[^b]*b} ab
+} 1
+
+test reg-33.8 {Bug 505048} {
+    regexp -inline {\A\s*[^b]*b} ab
+} ab
+
+test reg-33.9 {Bug 505048} {
+    regexp -indices -inline {\A\s*[^b]*b} ab
+} {{0 1}}
+
+test reg-33.10 {Bug 840258} {
+    regsub {(^|\n)+\.*b} \n.b {} tmp
+} 1
+
+test reg-33.11 {Bug 840258} {
+    regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \
+            "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
+} 1
+
+# cleanup
+::tcltest::cleanupTests
+return
diff --git a/tests/regex/regex.cpp b/tests/regex/regex.cpp
new file mode 100644
index 0000000000..733e5aed69
--- /dev/null
+++ b/tests/regex/regex.cpp
@@ -0,0 +1,421 @@
+///////////////////////////////////////////////////////////////////////////////
+// Name:        tests/regex/regex.cpp
+// Purpose:     Test the built-in regex lib and wxRegEx
+// Author:      Mike Wetherell
+// RCS-ID:      $Id$
+// Copyright:   (c) 2004 Mike Wetherell
+// Licence:     wxWidgets licence
+///////////////////////////////////////////////////////////////////////////////
+
+//
+// Notes:
+//
+// To run just one section, say wx_1, do this:
+//  test regex.wx_1
+//
+// To run all the regex tests:
+//  test regex
+// 
+// Some tests must be skipped since they use features which we do not make
+// available through wxRegEx. To see the list of tests that have been skipped
+// turn on verbose logging, e.g.:
+//  test --verbose regex
+// 
+// The tests here are for the builtin library, tests for wxRegEx in general
+// should go in another module.
+//
+// The tests are generated from Henry Spencer's reg.test, additional test
+// can be added in wxreg.test. These test files are then turned into a C++
+// include file 'regex.inc' (included below) using a script 'regex.pl'.
+// 
+
+#if defined(__GNUG__) && !defined(__APPLE__)
+    #pragma implementation
+    #pragma interface
+#endif
+
+// For compilers that support precompilation, includes "wx/wx.h".
+#include "wx/wxprec.h"
+
+#ifdef __BORLANDC__
+    #pragma hdrstop
+#endif
+
+// for all others, include the necessary headers
+#ifndef WX_PRECOMP
+    #include "wx/wx.h"
+#endif
+
+#include "wx/regex.h"
+#include "wx/cppunit.h"
+#include <iomanip>
+#include <stdexcept>
+
+using namespace std;
+using namespace CppUnit;
+
+// many of the tests are specific to the builtin regex lib, so only attempts
+// to do them when using the builtin regex lib.
+//
+#ifdef wxHAS_REGEX_ADVANCED
+
+
+///////////////////////////////////////////////////////////////////////////////
+// The test case - an instance represents a single test
+
+class RegExTestCase : public TestCase
+{
+public:
+    // constructor - create a single testcase
+    RegExTestCase(
+        const string& name,
+        const char *mode,
+        const char *id,
+        const char *flags,
+        const char *pattern,
+        const char *data,
+        const vector<const char *>& expected);
+
+protected:
+    // run this testcase
+    void runTest();
+
+private:
+    // workers
+    wxString Conv(const char *str);
+    void parseFlags(const wxString& flags);
+    void doTest(int flavor);
+    static size_t matchCount(const wxString& expr, int flags);
+    static wxString quote(const wxString& arg);
+    const wxChar *convError() const { return _T("<cannot convert>"); }
+
+    // assertions - adds some information about the test that failed
+    void fail(const wxString& msg) const;
+    void failIf(bool condition, const wxString& msg) const
+        { if (condition) fail(msg); }
+
+    // mode, id, flags, pattern, test data, expected results...
+    int m_mode;
+    wxString m_id;
+    wxString m_flags;
+    wxString m_pattern;
+    wxString m_data;
+    wxArrayString m_expected;
+
+    // the flag decoded
+    int m_compileFlags;
+    int m_matchFlags;
+    bool m_basic;
+    bool m_extended;
+    bool m_advanced;
+};
+
+// constructor - throws Exception on failure
+//
+RegExTestCase::RegExTestCase(
+    const string& name,
+    const char *mode,
+    const char *id,
+    const char *flags,
+    const char *pattern,
+    const char *data,
+    const vector<const char *>& expected)
+  :
+    TestCase(name),
+    m_mode(mode[0]),
+    m_id(Conv(id)),
+    m_flags(Conv(flags)),
+    m_pattern(Conv(pattern)),
+    m_data(Conv(data)),
+    m_compileFlags(0),
+    m_matchFlags(0),
+    m_basic(false),
+    m_extended(false),
+    m_advanced(false)
+{
+    bool badconv = m_pattern == convError() || m_data == convError();
+    vector<const char *>::const_iterator it;
+
+    for (it = expected.begin(); it != expected.end(); ++it) {
+        m_expected.push_back(Conv(*it));
+        badconv = badconv || *m_expected.rbegin() == convError();
+    }
+
+    failIf(badconv, _T("cannot convert to default character encoding"));
+    
+    // the flags need further parsing...
+    parseFlags(m_flags);
+
+#ifndef wxHAS_REGEX_ADVANCED
+    failIf(!m_basic && !m_extended, _T("advanced regexs not available"));
+#endif
+}
+
+// convert a string from UTF8 to the internal encoding
+//
+wxString RegExTestCase::Conv(const char *str)
+{
+    const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str);
+    const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr);
+
+    if (!buf || wcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0)
+        return convError();
+    else
+        return buf;
+}
+
+// Parse flags
+//
+void RegExTestCase::parseFlags(const wxString& flags)
+{
+    for (const wxChar *p = flags; *p; p++) {
+        switch (*p) {
+            // noop
+            case '-': break;
+
+            // we don't fully support these flags, but they don't stop us
+            // checking for success of failure of the match, so treat as noop
+            case 'A': case 'B': case 'E': case 'H':
+            case 'I': case 'L': case 'M': case 'N':
+            case 'P': case 'Q': case 'R': case 'S':
+            case 'T': case 'U': case '%':
+                break;
+
+            // match options
+            case '^': m_matchFlags |= wxRE_NOTBOL; break;
+            case '$': m_matchFlags |= wxRE_NOTEOL; break;
+#if wxUSE_UNICODE
+            case '*': break;
+#endif
+            // compile options
+            case '&': m_advanced = m_basic = true; break;
+            case 'b': m_basic = true; break;
+            case 'e': m_extended = true; break;
+            case 'i': m_compileFlags |= wxRE_ICASE; break;
+            case 'o': m_compileFlags |= wxRE_NOSUB; break;
+            case 'n': m_compileFlags |= wxRE_NEWLINE; break;
+            case 't': if (strchr("ep", m_mode)) break; // else fall through...
+
+            // anything else we must skip the test
+            default:
+                fail(wxString::Format(
+                     _T("requires unsupported flag '%c'"), *p));
+        }
+    }
+}
+
+// Try test for all flavours of expression specified
+//
+void RegExTestCase::runTest()
+{
+    if (m_basic)
+        doTest(wxRE_BASIC);
+    if (m_extended)
+        doTest(wxRE_EXTENDED);
+#ifdef wxHAS_REGEX_ADVANCED
+    if (m_advanced || (!m_basic && !m_extended))
+        doTest(wxRE_ADVANCED);
+#endif
+}
+    
+// Try the test for a single flavour of expression
+//
+void RegExTestCase::doTest(int flavor)
+{
+    wxRegEx re(m_pattern, m_compileFlags | flavor);
+
+    // 'e' - test that the pattern fails to compile
+    if (m_mode == 'e')
+        return failIf(re.IsValid(), _T("compile suceeded (should fail)"));
+    failIf(!re.IsValid(), _T("compile failed"));
+
+    bool matches = re.Matches(m_data, m_matchFlags);
+
+    // 'f' or 'p' - test that the pattern does not match
+    if (m_mode == 'f' || m_mode == 'p')
+        return failIf(matches, _T("match suceeded (should fail)"));
+
+    // otherwise 'm' or 'i' - test the pattern does match
+    failIf(!matches, _T("match failed"));
+
+    // Check that wxRegEx is going to allocate a large enough array for the
+    // results we are supposed to get
+    failIf(m_expected.size() > matchCount(m_pattern, m_compileFlags | flavor),
+           _T("wxRegEx has not allocated a large enough array for the ")
+           _T("number of results expected"));
+
+    wxString result;
+    size_t start, len;
+
+    for (size_t i = 0; i < m_expected.size(); i++) {
+        failIf(!re.GetMatch(&start, &len, i), wxString::Format(
+                _T("wxRegEx::GetMatch failed for match %d"), i));
+
+        // m - check the match returns the strings given
+        if (m_mode == 'm')
+            if (start < INT_MAX)
+                result = m_data.substr(start, len);
+            else
+                result = _T("");
+
+        // i - check the match returns the offsets given
+        else if (m_mode == 'i')
+            if (start < INT_MAX)
+                result = wxString::Format(_T("%d %d"), start, start + len - 1);
+            else
+                result = _T("-1 -1");
+
+        failIf(result != m_expected[i], wxString::Format(
+                _T("match(%d) == %s, expected == %s"), i,
+                quote(result).c_str(), quote(m_expected[i]).c_str()));
+    }
+}
+
+// assertion - adds some information about the test that failed
+//
+void RegExTestCase::fail(const wxString& msg) const
+{
+    wxString str;
+    wxArrayString::const_iterator it;
+
+    str << (wxChar)m_mode << _T(" ") << m_id << _T(" ") << m_flags << _T(" ")
+        << quote(m_pattern) << _T(" ") << quote(m_data);
+
+    for (it = m_expected.begin(); it != m_expected.end(); ++it)
+        str << _T(" ") << quote(*it);
+    
+    if (str.length() > 77)
+        str = str.substr(0, 74) + _T("...");
+
+    str << _T("\n ") << msg;
+
+    // no lossy convs so using utf8
+    CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8)));
+}
+
+// quote a string so that it can be displayed (static)
+//
+wxString RegExTestCase::quote(const wxString& arg)
+{
+    const wxChar *needEscape = _T("\a\b\t\n\v\f\r\"\\");
+    const wxChar *escapes = _T("abtnvfr\"\\");
+    wxString str;
+
+    for (size_t i = 0; i < arg.length(); i++) {
+        wxUChar ch = arg[i];
+        const wxChar *p = wxStrchr(needEscape, ch);
+        
+        if (p)
+            str += wxString::Format(_T("\\%c"), escapes[p - needEscape]);
+        else if (wxIscntrl(ch))
+            str += wxString::Format(_T("\\%03o"), ch);
+        else
+            str += ch;
+    }
+
+    return str.length() == arg.length() && str.find(' ') == wxString::npos ?
+        str : _T("\"") + str + _T("\"");
+}
+
+// Count the number of subexpressions (taken from wxRegExImpl::Compile)
+//
+size_t RegExTestCase::matchCount(const wxString& expr, int flags)
+{
+    // there is always one for the whole expression
+    size_t nMatches = 1;
+
+    // and some more for bracketed subexperessions
+    for ( const wxChar *cptr = expr; *cptr; cptr++ )
+    {
+        if ( *cptr == _T('\\') )
+        {
+            // in basic RE syntax groups are inside \(...\)
+            if ( *++cptr == _T('(') && (flags & wxRE_BASIC) )
+            {
+                nMatches++;
+            }
+        }
+        else if ( *cptr == _T('(') && !(flags & wxRE_BASIC) )
+        {
+            // we know that the previous character is not an unquoted
+            // backslash because it would have been eaten above, so we
+            // have a bar '(' and this indicates a group start for the
+            // extended syntax
+            nMatches++;
+        }
+    }
+
+    return nMatches;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Test suite
+//
+// In a non-unicode build the regex is affected by the current locale, so
+// this derived TestSuite is used. It sets the locale in it's run() method
+// for the duration of the regex tests.
+
+class RegExTestSuite : public TestSuite
+{
+public:
+    RegExTestSuite(string name);
+    void run(TestResult *result);
+    void add(const char *mode, const char *id, const char *flags,
+             const char *pattern, const char *data, const char *expected, ...);
+};
+
+// constructor, sets the locale so that it is set when the tests are added
+//
+RegExTestSuite::RegExTestSuite(string name) : TestSuite(name)
+{
+    setlocale(LC_ALL, "");
+}
+
+// run the test suite, sets the locale again since it may have been changed
+// by another test since this suite was crated
+//
+void RegExTestSuite::run(TestResult *result)
+{
+    setlocale(LC_ALL, "");
+    TestSuite::run(result);
+}
+
+// Add a testcase to the suite
+//
+void RegExTestSuite::add(
+    const char *mode,
+    const char *id,
+    const char *flags,
+    const char *pattern,
+    const char *data,
+    const char *expected, ...)
+{
+    string name = getName() + "." + id;
+
+    vector<const char *> expected_results;
+    va_list ap;
+
+    for (va_start(ap, expected); expected; expected = va_arg(ap, const char *))
+        expected_results.push_back(expected);
+
+    va_end(ap);
+        
+    try {
+        addTest(new RegExTestCase(
+            name, mode, id, flags, pattern, data, expected_results));
+    }
+    catch (Exception& e) {
+        wxLogInfo(wxString::Format(_T("skipping: %s\n %s\n"),
+            wxString(name.c_str(), wxConvUTF8).c_str(),
+            wxString(e.what(), wxConvUTF8).c_str()));
+    }
+}
+
+
+// Include the generated tests
+//
+#include "regex.inc"
+
+
+#endif // wxHAS_REGEX_ADVANCED
diff --git a/tests/regex/regex.inc b/tests/regex/regex.inc
new file mode 100644
index 0000000000..e53d36451a
--- /dev/null
+++ b/tests/regex/regex.inc
@@ -0,0 +1,1361 @@
+/*
+ * Test data for wxRegEx (UTF-8 encoded)
+ * 
+ * Generated Fri Mar 5 21:35:22 2004 by regex.pl from the following files:
+ * 
+ *   reg.test: Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *   wxreg.test: Copyright (c) 2004 Mike Wetherell.
+ * 
+ * Test types:
+ *  	e	compile error expected
+ *  	f	match failure expected
+ *  	m	successful match
+ *  	i	successful match with -indices (used in checking things like
+ *  		nonparticipating subexpressions)
+ *  	p	unsuccessful match with -indices (!!) (used in checking
+ *  		partial-match reporting)
+ * 
+ * Flag characters:
+ *  	-	no-op (placeholder)
+ *  	+	provide fake xy equivalence class and ch collating element
+ *  	%	force small state-set cache in matcher (to test cache replace)
+ *  	^	beginning of string is not beginning of line
+ *  	$	end of string is not end of line
+ *  	*	test is Unicode-specific, needs big character set
+ * 
+ *  	&	test as both ARE and BRE
+ *  	b	BRE
+ *  	e	ERE
+ *  	a	turn advanced-features bit on (error unless ERE already)
+ *  	q	literal string, no metacharacters at all
+ * 
+ *  	i	case-independent matching
+ *  	o	("opaque") no subexpression capture
+ *  	p	newlines are half-magic, excluded from . and [^ only
+ *  	w	newlines are half-magic, significant to ^ and $ only
+ *  	n	newlines are fully magic, both effects
+ *  	x	expanded RE syntax
+ *  	t	incomplete-match reporting
+ * 
+ *  	A	backslash-_a_lphanumeric seen
+ *  	B	ERE/ARE literal-_b_race heuristic used
+ *  	E	backslash (_e_scape) seen within []
+ *  	H	looka_h_ead constraint seen
+ *  	I	_i_mpossible to match
+ *  	L	_l_ocale-specific construct seen
+ *  	M	unportable (_m_achine-specific) construct seen
+ *  	N	RE can match empty (_n_ull) string
+ *  	P	non-_P_OSIX construct seen
+ *  	Q	{} _q_uantifier seen
+ *  	R	back _r_eference seen
+ *  	S	POSIX-un_s_pecified syntax seen
+ *  	T	prefers shortest (_t_iny)
+ *  	U	saw original-POSIX botch:  unmatched right paren in ERE (_u_gh)
+ */
+
+
+/*
+ * 1 basic sanity checks
+ */
+
+class regextest_1 : public RegExTestSuite
+{
+public:
+    regextest_1() : RegExTestSuite("regex.1") { }
+    static Test *suite();
+};
+
+Test *regextest_1::suite()
+{
+    RegExTestSuite *suite = new regextest_1;
+
+    suite->add("m", "1", "&", "abc", "abc", "abc", NULL);
+    suite->add("f", "2", "&", "abc", "def", NULL);
+    suite->add("m", "3", "&", "abc", "xyabxabce", "abc", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_1, "regex.1");
+
+
+/*
+ * 2 invalid option combinations
+ */
+
+class regextest_2 : public RegExTestSuite
+{
+public:
+    regextest_2() : RegExTestSuite("regex.2") { }
+    static Test *suite();
+};
+
+Test *regextest_2::suite()
+{
+    RegExTestSuite *suite = new regextest_2;
+
+    suite->add("e", "1", "qe", "a", "INVARG", NULL);
+    suite->add("e", "2", "qa", "a", "INVARG", NULL);
+    suite->add("e", "3", "qx", "a", "INVARG", NULL);
+    suite->add("e", "4", "qn", "a", "INVARG", NULL);
+    suite->add("e", "5", "ba", "a", "INVARG", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_2, "regex.2");
+
+
+/*
+ * 3 basic syntax
+ */
+
+class regextest_3 : public RegExTestSuite
+{
+public:
+    regextest_3() : RegExTestSuite("regex.3") { }
+    static Test *suite();
+};
+
+Test *regextest_3::suite()
+{
+    RegExTestSuite *suite = new regextest_3;
+
+    suite->add("i", "1", "&NS", "", "a", "0 -1", NULL);
+    suite->add("m", "2", "NS", "a|", "a", "a", NULL);
+    suite->add("m", "3", "-", "a|b", "a", "a", NULL);
+    suite->add("m", "4", "-", "a|b", "b", "b", NULL);
+    suite->add("m", "5", "NS", "a||b", "b", "b", NULL);
+    suite->add("m", "6", "&", "ab", "ab", "ab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_3, "regex.3");
+
+
+/*
+ * 4 parentheses
+ */
+
+class regextest_4 : public RegExTestSuite
+{
+public:
+    regextest_4() : RegExTestSuite("regex.4") { }
+    static Test *suite();
+};
+
+Test *regextest_4::suite()
+{
+    RegExTestSuite *suite = new regextest_4;
+
+    suite->add("m", "1", "-", "(a)e", "ae", "ae", "a", NULL);
+    suite->add("m", "2", "o", "(a)e", "ae", NULL);
+    suite->add("m", "3", "b", "\\(a\\)b", "ab", "ab", "a", NULL);
+    suite->add("m", "4", "-", "a((b)c)", "abc", "abc", "bc", "b", NULL);
+    suite->add("m", "5", "-", "a(b)(c)", "abc", "abc", "b", "c", NULL);
+    suite->add("e", "6", "-", "a(b", "EPAREN", NULL);
+    suite->add("e", "7", "b", "a\\(b", "EPAREN", NULL);
+    suite->add("m", "8", "eU", "a)b", "a)b", "a)b", NULL);
+    suite->add("e", "9", "-", "a)b", "EPAREN", NULL);
+    suite->add("e", "10", "b", "a\\)b", "EPAREN", NULL);
+    suite->add("m", "11", "P", "a(?:b)c", "abc", "abc", NULL);
+    suite->add("e", "12", "e", "a(?:b)c", "BADRPT", NULL);
+    suite->add("i", "13", "S", "a()b", "ab", "0 1", "1 0", NULL);
+    suite->add("m", "14", "SP", "a(?:)b", "ab", "ab", NULL);
+    suite->add("i", "15", "S", "a(|b)c", "ac", "0 1", "1 0", NULL);
+    suite->add("m", "16", "S", "a(b|)c", "abc", "abc", "b", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_4, "regex.4");
+
+
+/*
+ * 5 simple one-char matching
+ */
+
+class regextest_5 : public RegExTestSuite
+{
+public:
+    regextest_5() : RegExTestSuite("regex.5") { }
+    static Test *suite();
+};
+
+Test *regextest_5::suite()
+{
+    RegExTestSuite *suite = new regextest_5;
+
+    suite->add("m", "1", "&", "a.b", "axb", "axb", NULL);
+    suite->add("f", "2", "&n", "a.b", "a\nb", NULL);
+    suite->add("m", "3", "&", "a[bc]d", "abd", "abd", NULL);
+    suite->add("m", "4", "&", "a[bc]d", "acd", "acd", NULL);
+    suite->add("f", "5", "&", "a[bc]d", "aed", NULL);
+    suite->add("f", "6", "&", "a[^bc]d", "abd", NULL);
+    suite->add("m", "7", "&", "a[^bc]d", "aed", "aed", NULL);
+    suite->add("f", "8", "&p", "a[^bc]d", "a\nd", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_5, "regex.5");
+
+
+/*
+ * 6 context-dependent syntax
+ */
+
+class regextest_6 : public RegExTestSuite
+{
+public:
+    regextest_6() : RegExTestSuite("regex.6") { }
+    static Test *suite();
+};
+
+Test *regextest_6::suite()
+{
+    RegExTestSuite *suite = new regextest_6;
+
+    suite->add("e", "1", "-", "*", "BADRPT", NULL);
+    suite->add("m", "2", "b", "*", "*", "*", NULL);
+    suite->add("m", "3", "b", "\\(*\\)", "*", "*", "*", NULL);
+    suite->add("e", "4", "-", "(*)", "BADRPT", NULL);
+    suite->add("m", "5", "b", "^*", "*", "*", NULL);
+    suite->add("e", "6", "-", "^*", "BADRPT", NULL);
+    suite->add("f", "7", "&", "^b", "^b", NULL);
+    suite->add("m", "8", "b", "x^", "x^", "x^", NULL);
+    suite->add("f", "9", "I", "x^", "x", NULL);
+    suite->add("m", "10", "n", "\n^", "x\nb", "\n", NULL);
+    suite->add("f", "11", "bS", "\\(^b\\)", "^b", NULL);
+    suite->add("m", "12", "-", "(^b)", "b", "b", "b", NULL);
+    suite->add("m", "13", "&", "x$", "x", "x", NULL);
+    suite->add("m", "14", "bS", "\\(x$\\)", "x", "x", "x", NULL);
+    suite->add("m", "15", "-", "(x$)", "x", "x", "x", NULL);
+    suite->add("m", "16", "b", "x$y", "x$y", "x$y", NULL);
+    suite->add("f", "17", "I", "x$y", "xy", NULL);
+    suite->add("m", "18", "n", "x$\n", "x\n", "x\n", NULL);
+    suite->add("e", "19", "-", "+", "BADRPT", NULL);
+    suite->add("e", "20", "-", "?", "BADRPT", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_6, "regex.6");
+
+
+/*
+ * 7 simple quantifiers
+ */
+
+class regextest_7 : public RegExTestSuite
+{
+public:
+    regextest_7() : RegExTestSuite("regex.7") { }
+    static Test *suite();
+};
+
+Test *regextest_7::suite()
+{
+    RegExTestSuite *suite = new regextest_7;
+
+    suite->add("m", "1", "&N", "a*", "aa", "aa", NULL);
+    suite->add("i", "2", "&N", "a*", "b", "0 -1", NULL);
+    suite->add("m", "3", "-", "a+", "aa", "aa", NULL);
+    suite->add("m", "4", "-", "a?b", "ab", "ab", NULL);
+    suite->add("m", "5", "-", "a?b", "b", "b", NULL);
+    suite->add("e", "6", "-", "**", "BADRPT", NULL);
+    suite->add("m", "7", "bN", "**", "***", "***", NULL);
+    suite->add("e", "8", "&", "a**", "BADRPT", NULL);
+    suite->add("e", "9", "&", "a**b", "BADRPT", NULL);
+    suite->add("e", "10", "&", "***", "BADRPT", NULL);
+    suite->add("e", "11", "-", "a++", "BADRPT", NULL);
+    suite->add("e", "12", "-", "a?+", "BADRPT", NULL);
+    suite->add("e", "13", "-", "a?*", "BADRPT", NULL);
+    suite->add("e", "14", "-", "a+*", "BADRPT", NULL);
+    suite->add("e", "15", "-", "a*+", "BADRPT", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_7, "regex.7");
+
+
+/*
+ * 8 braces
+ */
+
+class regextest_8 : public RegExTestSuite
+{
+public:
+    regextest_8() : RegExTestSuite("regex.8") { }
+    static Test *suite();
+};
+
+Test *regextest_8::suite()
+{
+    RegExTestSuite *suite = new regextest_8;
+
+    suite->add("m", "1", "NQ", "a{0,1}", "", "", NULL);
+    suite->add("m", "2", "NQ", "a{0,1}", "ac", "a", NULL);
+    suite->add("e", "3", "-", "a{1,0}", "BADBR", NULL);
+    suite->add("e", "4", "-", "a{1,2,3}", "BADBR", NULL);
+    suite->add("e", "5", "-", "a{257}", "BADBR", NULL);
+    suite->add("e", "6", "-", "a{1000}", "BADBR", NULL);
+    suite->add("e", "7", "-", "a{1", "EBRACE", NULL);
+    suite->add("e", "8", "-", "a{1n}", "BADBR", NULL);
+    suite->add("m", "9", "BS", "a{b", "a{b", "a{b", NULL);
+    suite->add("m", "10", "BS", "a{", "a{", "a{", NULL);
+    suite->add("m", "11", "bQ", "a\\{0,1\\}b", "cb", "b", NULL);
+    suite->add("e", "12", "b", "a\\{0,1", "EBRACE", NULL);
+    suite->add("e", "13", "-", "a{0,1\\", "BADBR", NULL);
+    suite->add("m", "14", "Q", "a{0}b", "ab", "b", NULL);
+    suite->add("m", "15", "Q", "a{0,0}b", "ab", "b", NULL);
+    suite->add("m", "16", "Q", "a{0,1}b", "ab", "ab", NULL);
+    suite->add("m", "17", "Q", "a{0,2}b", "b", "b", NULL);
+    suite->add("m", "18", "Q", "a{0,2}b", "aab", "aab", NULL);
+    suite->add("m", "19", "Q", "a{0,}b", "aab", "aab", NULL);
+    suite->add("m", "20", "Q", "a{1,1}b", "aab", "ab", NULL);
+    suite->add("m", "21", "Q", "a{1,3}b", "aaaab", "aaab", NULL);
+    suite->add("f", "22", "Q", "a{1,3}b", "b", NULL);
+    suite->add("m", "23", "Q", "a{1,}b", "aab", "aab", NULL);
+    suite->add("f", "24", "Q", "a{2,3}b", "ab", NULL);
+    suite->add("m", "25", "Q", "a{2,3}b", "aaaab", "aaab", NULL);
+    suite->add("f", "26", "Q", "a{2,}b", "ab", NULL);
+    suite->add("m", "27", "Q", "a{2,}b", "aaaab", "aaaab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_8, "regex.8");
+
+
+/*
+ * 9 brackets
+ */
+
+class regextest_9 : public RegExTestSuite
+{
+public:
+    regextest_9() : RegExTestSuite("regex.9") { }
+    static Test *suite();
+};
+
+Test *regextest_9::suite()
+{
+    RegExTestSuite *suite = new regextest_9;
+
+    suite->add("m", "1", "&", "a[bc]", "ac", "ac", NULL);
+    suite->add("m", "2", "&", "a[-]", "a-", "a-", NULL);
+    suite->add("m", "3", "&", "a[[.-.]]", "a-", "a-", NULL);
+    suite->add("m", "4", "&L", "a[[.zero.]]", "a0", "a0", NULL);
+    suite->add("m", "5", "&LM", "a[[.zero.]-9]", "a2", "a2", NULL);
+    suite->add("m", "6", "&M", "a[0-[.9.]]", "a2", "a2", NULL);
+    suite->add("m", "7", "&+L", "a[[=x=]]", "ax", "ax", NULL);
+    suite->add("m", "8", "&+L", "a[[=x=]]", "ay", "ay", NULL);
+    suite->add("f", "9", "&+L", "a[[=x=]]", "az", NULL);
+    suite->add("e", "10", "&", "a[0-[=x=]]", "ERANGE", NULL);
+    suite->add("m", "11", "&L", "a[[:digit:]]", "a0", "a0", NULL);
+    suite->add("e", "12", "&", "a[[:woopsie:]]", "ECTYPE", NULL);
+    suite->add("f", "13", "&L", "a[[:digit:]]", "ab", NULL);
+    suite->add("e", "14", "&", "a[0-[:digit:]]", "ERANGE", NULL);
+    suite->add("m", "15", "&LP", "[[:<:]]a", "a", "a", NULL);
+    suite->add("m", "16", "&LP", "a[[:>:]]", "a", "a", NULL);
+    suite->add("e", "17", "&", "a[[..]]b", "ECOLLATE", NULL);
+    suite->add("e", "18", "&", "a[[==]]b", "ECOLLATE", NULL);
+    suite->add("e", "19", "&", "a[[::]]b", "ECTYPE", NULL);
+    suite->add("e", "20", "&", "a[[.a", "EBRACK", NULL);
+    suite->add("e", "21", "&", "a[[=a", "EBRACK", NULL);
+    suite->add("e", "22", "&", "a[[:a", "EBRACK", NULL);
+    suite->add("e", "23", "&", "a[", "EBRACK", NULL);
+    suite->add("e", "24", "&", "a[b", "EBRACK", NULL);
+    suite->add("e", "25", "&", "a[b-", "EBRACK", NULL);
+    suite->add("e", "26", "&", "a[b-c", "EBRACK", NULL);
+    suite->add("m", "27", "&M", "a[b-c]", "ab", "ab", NULL);
+    suite->add("m", "28", "&", "a[b-b]", "ab", "ab", NULL);
+    suite->add("m", "29", "&M", "a[1-2]", "a2", "a2", NULL);
+    suite->add("e", "30", "&", "a[c-b]", "ERANGE", NULL);
+    suite->add("e", "31", "&", "a[a-b-c]", "ERANGE", NULL);
+    suite->add("m", "32", "&M", "a[--?]b", "a?b", "a?b", NULL);
+    suite->add("m", "33", "&", "a[---]b", "a-b", "a-b", NULL);
+    suite->add("m", "34", "&", "a[]b]c", "a]c", "a]c", NULL);
+    suite->add("m", "35", "EP", "a[\\]]b", "a]b", "a]b", NULL);
+    suite->add("f", "36", "bE", "a[\\]]b", "a]b", NULL);
+    suite->add("m", "37", "bE", "a[\\]]b", "a\\]b", "a\\]b", NULL);
+    suite->add("m", "38", "eE", "a[\\]]b", "a\\]b", "a\\]b", NULL);
+    suite->add("m", "39", "EP", "a[\\\\]b", "a\\b", "a\\b", NULL);
+    suite->add("m", "40", "eE", "a[\\\\]b", "a\\b", "a\\b", NULL);
+    suite->add("m", "41", "bE", "a[\\\\]b", "a\\b", "a\\b", NULL);
+    suite->add("e", "42", "-", "a[\\Z]b", "EESCAPE", NULL);
+    suite->add("m", "43", "&", "a[[b]c", "a[c", "a[c", NULL);
+    suite->add("m", "44", "EMP*", "a[\\u00fe-\\u0507][\\u00ff-\\u0300]b", "aĂ˿b", "aĂ˿b", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_9, "regex.9");
+
+
+/*
+ * 10 anchors and newlines
+ */
+
+class regextest_10 : public RegExTestSuite
+{
+public:
+    regextest_10() : RegExTestSuite("regex.10") { }
+    static Test *suite();
+};
+
+Test *regextest_10::suite()
+{
+    RegExTestSuite *suite = new regextest_10;
+
+    suite->add("m", "1", "&", "^a", "a", "a", NULL);
+    suite->add("f", "2", "&^", "^a", "a", NULL);
+    suite->add("i", "3", "&N", "^", "a", "0 -1", NULL);
+    suite->add("i", "4", "&", "a$", "aba", "2 2", NULL);
+    suite->add("f", "5", "&$", "a$", "a", NULL);
+    suite->add("i", "6", "&N", "$", "ab", "2 1", NULL);
+    suite->add("m", "7", "&n", "^a", "a", "a", NULL);
+    suite->add("m", "8", "&n", "^a", "b\na", "a", NULL);
+    suite->add("i", "9", "&w", "^a", "a\na", "0 0", NULL);
+    suite->add("i", "10", "&n^", "^a", "a\na", "2 2", NULL);
+    suite->add("m", "11", "&n", "a$", "a", "a", NULL);
+    suite->add("m", "12", "&n", "a$", "a\nb", "a", NULL);
+    suite->add("i", "13", "&n", "a$", "a\na", "0 0", NULL);
+    suite->add("i", "14", "N", "^^", "a", "0 -1", NULL);
+    suite->add("m", "15", "b", "^^", "^", "^", NULL);
+    suite->add("i", "16", "N", "$$", "a", "1 0", NULL);
+    suite->add("m", "17", "b", "$$", "$", "$", NULL);
+    suite->add("m", "18", "&N", "^$", "", "", NULL);
+    suite->add("f", "19", "&N", "^$", "a", NULL);
+    suite->add("i", "20", "&nN", "^$", "a\n\nb", "2 1", NULL);
+    suite->add("m", "21", "N", "$^", "", "", NULL);
+    suite->add("m", "22", "b", "$^", "$^", "$^", NULL);
+    suite->add("m", "23", "P", "\\Aa", "a", "a", NULL);
+    suite->add("m", "24", "^P", "\\Aa", "a", "a", NULL);
+    suite->add("f", "25", "^nP", "\\Aa", "b\na", NULL);
+    suite->add("m", "26", "P", "a\\Z", "a", "a", NULL);
+    suite->add("m", "27", "$P", "a\\Z", "a", "a", NULL);
+    suite->add("f", "28", "$nP", "a\\Z", "a\nb", NULL);
+    suite->add("e", "29", "-", "^*", "BADRPT", NULL);
+    suite->add("e", "30", "-", "$*", "BADRPT", NULL);
+    suite->add("e", "31", "-", "\\A*", "BADRPT", NULL);
+    suite->add("e", "32", "-", "\\Z*", "BADRPT", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_10, "regex.10");
+
+
+/*
+ * 11 boundary constraints
+ */
+
+class regextest_11 : public RegExTestSuite
+{
+public:
+    regextest_11() : RegExTestSuite("regex.11") { }
+    static Test *suite();
+};
+
+Test *regextest_11::suite()
+{
+    RegExTestSuite *suite = new regextest_11;
+
+    suite->add("m", "1", "&LP", "[[:<:]]a", "a", "a", NULL);
+    suite->add("m", "2", "&LP", "[[:<:]]a", "-a", "a", NULL);
+    suite->add("f", "3", "&LP", "[[:<:]]a", "ba", NULL);
+    suite->add("m", "4", "&LP", "a[[:>:]]", "a", "a", NULL);
+    suite->add("m", "5", "&LP", "a[[:>:]]", "a-", "a", NULL);
+    suite->add("f", "6", "&LP", "a[[:>:]]", "ab", NULL);
+    suite->add("m", "7", "bLP", "\\<a", "a", "a", NULL);
+    suite->add("f", "8", "bLP", "\\<a", "ba", NULL);
+    suite->add("m", "9", "bLP", "a\\>", "a", "a", NULL);
+    suite->add("f", "10", "bLP", "a\\>", "ab", NULL);
+    suite->add("m", "11", "LP", "\\ya", "a", "a", NULL);
+    suite->add("f", "12", "LP", "\\ya", "ba", NULL);
+    suite->add("m", "13", "LP", "a\\y", "a", "a", NULL);
+    suite->add("f", "14", "LP", "a\\y", "ab", NULL);
+    suite->add("m", "15", "LP", "a\\Y", "ab", "a", NULL);
+    suite->add("f", "16", "LP", "a\\Y", "a-", NULL);
+    suite->add("f", "17", "LP", "a\\Y", "a", NULL);
+    suite->add("f", "18", "LP", "-\\Y", "-a", NULL);
+    suite->add("m", "19", "LP", "-\\Y", "-%", "-", NULL);
+    suite->add("f", "20", "LP", "\\Y-", "a-", NULL);
+    suite->add("e", "21", "-", "[[:<:]]*", "BADRPT", NULL);
+    suite->add("e", "22", "-", "[[:>:]]*", "BADRPT", NULL);
+    suite->add("e", "23", "b", "\\<*", "BADRPT", NULL);
+    suite->add("e", "24", "b", "\\>*", "BADRPT", NULL);
+    suite->add("e", "25", "-", "\\y*", "BADRPT", NULL);
+    suite->add("e", "26", "-", "\\Y*", "BADRPT", NULL);
+    suite->add("m", "27", "LP", "\\ma", "a", "a", NULL);
+    suite->add("f", "28", "LP", "\\ma", "ba", NULL);
+    suite->add("m", "29", "LP", "a\\M", "a", "a", NULL);
+    suite->add("f", "30", "LP", "a\\M", "ab", NULL);
+    suite->add("f", "31", "ILP", "\\Ma", "a", NULL);
+    suite->add("f", "32", "ILP", "a\\m", "a", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_11, "regex.11");
+
+
+/*
+ * 12 character classes
+ */
+
+class regextest_12 : public RegExTestSuite
+{
+public:
+    regextest_12() : RegExTestSuite("regex.12") { }
+    static Test *suite();
+};
+
+Test *regextest_12::suite()
+{
+    RegExTestSuite *suite = new regextest_12;
+
+    suite->add("m", "1", "LP", "a\\db", "a0b", "a0b", NULL);
+    suite->add("f", "2", "LP", "a\\db", "axb", NULL);
+    suite->add("f", "3", "LP", "a\\Db", "a0b", NULL);
+    suite->add("m", "4", "LP", "a\\Db", "axb", "axb", NULL);
+    suite->add("m", "5", "LP", "a\\sb", "a b", "a b", NULL);
+    suite->add("m", "6", "LP", "a\\sb", "a\tb", "a\tb", NULL);
+    suite->add("m", "7", "LP", "a\\sb", "a\nb", "a\nb", NULL);
+    suite->add("f", "8", "LP", "a\\sb", "axb", NULL);
+    suite->add("m", "9", "LP", "a\\Sb", "axb", "axb", NULL);
+    suite->add("f", "10", "LP", "a\\Sb", "a b", NULL);
+    suite->add("m", "11", "LP", "a\\wb", "axb", "axb", NULL);
+    suite->add("f", "12", "LP", "a\\wb", "a-b", NULL);
+    suite->add("f", "13", "LP", "a\\Wb", "axb", NULL);
+    suite->add("m", "14", "LP", "a\\Wb", "a-b", "a-b", NULL);
+    suite->add("m", "15", "LP", "\\y\\w+z\\y", "adze-guz", "guz", NULL);
+    suite->add("m", "16", "LPE", "a[\\d]b", "a1b", "a1b", NULL);
+    suite->add("m", "17", "LPE", "a[\\s]b", "a b", "a b", NULL);
+    suite->add("m", "18", "LPE", "a[\\w]b", "axb", "axb", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_12, "regex.12");
+
+
+/*
+ * 13 escapes
+ */
+
+class regextest_13 : public RegExTestSuite
+{
+public:
+    regextest_13() : RegExTestSuite("regex.13") { }
+    static Test *suite();
+};
+
+Test *regextest_13::suite()
+{
+    RegExTestSuite *suite = new regextest_13;
+
+    suite->add("e", "1", "&", "a\\", "EESCAPE", NULL);
+    suite->add("m", "2", "-", "a\\<b", "a<b", "a<b", NULL);
+    suite->add("m", "3", "e", "a\\<b", "a<b", "a<b", NULL);
+    suite->add("m", "4", "bAS", "a\\wb", "awb", "awb", NULL);
+    suite->add("m", "5", "eAS", "a\\wb", "awb", "awb", NULL);
+    suite->add("m", "6", "PL", "a\\ab", "a\ab", "a\ab", NULL);
+    suite->add("m", "7", "P", "a\\bb", "a\bb", "a\bb", NULL);
+    suite->add("m", "8", "P", "a\\Bb", "a\\b", "a\\b", NULL);
+    suite->add("m", "9", "MP", "a\\chb", "a\bb", "a\bb", NULL);
+    suite->add("m", "10", "MP", "a\\cHb", "a\bb", "a\bb", NULL);
+    suite->add("m", "11", "LMP", "a\\e", "a\033", "a\033", NULL);
+    suite->add("m", "12", "P", "a\\fb", "a\fb", "a\fb", NULL);
+    suite->add("m", "13", "P", "a\\nb", "a\nb", "a\nb", NULL);
+    suite->add("m", "14", "P", "a\\rb", "a\rb", "a\rb", NULL);
+    suite->add("m", "15", "P", "a\\tb", "a\tb", "a\tb", NULL);
+    suite->add("m", "16", "P", "a\\u0008x", "a\bx", "a\bx", NULL);
+    suite->add("e", "17", "-", "a\\u008x", "EESCAPE", NULL);
+    suite->add("m", "18", "P", "a\\u00088x", "a\b8x", "a\b8x", NULL);
+    suite->add("m", "19", "P", "a\\U00000008x", "a\bx", "a\bx", NULL);
+    suite->add("e", "20", "-", "a\\U0000008x", "EESCAPE", NULL);
+    suite->add("m", "21", "P", "a\\vb", "a\vb", "a\vb", NULL);
+    suite->add("m", "22", "MP", "a\\x08x", "a\bx", "a\bx", NULL);
+    suite->add("e", "23", "-", "a\\xq", "EESCAPE", NULL);
+    suite->add("m", "24", "MP", "a\\x0008x", "a\bx", "a\bx", NULL);
+    suite->add("e", "25", "-", "a\\z", "EESCAPE", NULL);
+    suite->add("m", "26", "MP", "a\\010b", "a\bb", "a\bb", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_13, "regex.13");
+
+
+/*
+ * 14 back references
+ */
+
+class regextest_14 : public RegExTestSuite
+{
+public:
+    regextest_14() : RegExTestSuite("regex.14") { }
+    static Test *suite();
+};
+
+Test *regextest_14::suite()
+{
+    RegExTestSuite *suite = new regextest_14;
+
+    suite->add("m", "1", "RP", "a(b*)c\\1", "abbcbb", "abbcbb", "bb", NULL);
+    suite->add("m", "2", "RP", "a(b*)c\\1", "ac", "ac", "", NULL);
+    suite->add("f", "3", "RP", "a(b*)c\\1", "abbcb", NULL);
+    suite->add("m", "4", "RP", "a(b*)\\1", "abbcbb", "abb", "b", NULL);
+    suite->add("m", "5", "RP", "a(b|bb)\\1", "abbcbb", "abb", "b", NULL);
+    suite->add("m", "6", "RP", "a([bc])\\1", "abb", "abb", "b", NULL);
+    suite->add("f", "7", "RP", "a([bc])\\1", "abc", NULL);
+    suite->add("m", "8", "RP", "a([bc])\\1", "abcabb", "abb", "b", NULL);
+    suite->add("f", "9", "RP", "a([bc])*\\1", "abc", NULL);
+    suite->add("f", "10", "RP", "a([bc])\\1", "abB", NULL);
+    suite->add("m", "11", "iRP", "a([bc])\\1", "abB", "abB", "b", NULL);
+    suite->add("m", "12", "RP", "a([bc])\\1+", "abbb", "abbb", "b", NULL);
+    suite->add("m", "13", "QRP", "a([bc])\\1{3,4}", "abbbb", "abbbb", "b", NULL);
+    suite->add("f", "14", "QRP", "a([bc])\\1{3,4}", "abbb", NULL);
+    suite->add("m", "15", "RP", "a([bc])\\1*", "abbb", "abbb", "b", NULL);
+    suite->add("m", "16", "RP", "a([bc])\\1*", "ab", "ab", "b", NULL);
+    suite->add("m", "17", "RP", "a([bc])(\\1*)", "ab", "ab", "b", "", NULL);
+    suite->add("e", "18", "-", "a((b)\\1)", "ESUBREG", NULL);
+    suite->add("e", "19", "-", "a(b)c\\2", "ESUBREG", NULL);
+    suite->add("m", "20", "bR", "a\\(b*\\)c\\1", "abbcbb", "abbcbb", "bb", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_14, "regex.14");
+
+
+/*
+ * 15 octal escapes vs back references
+ */
+
+class regextest_15 : public RegExTestSuite
+{
+public:
+    regextest_15() : RegExTestSuite("regex.15") { }
+    static Test *suite();
+};
+
+Test *regextest_15::suite()
+{
+    RegExTestSuite *suite = new regextest_15;
+
+    suite->add("m", "1", "MP", "a\\010b", "a\bb", "a\bb", NULL);
+    suite->add("m", "2", "MP", "a\\0070b", "a\a0b", "a\a0b", NULL);
+    suite->add("m", "3", "MP", "a\\07b", "a\ab", "a\ab", NULL);
+    suite->add("m", "4", "MP", "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c", "abbbbbbbbbb\ac", "abbbbbbbbbb\ac", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", NULL);
+    suite->add("e", "5", "-", "a\\7b", "ESUBREG", NULL);
+    suite->add("m", "6", "MP", "a\\10b", "a\bb", "a\bb", NULL);
+    suite->add("m", "7", "MP", "a\\101b", "aAb", "aAb", NULL);
+    suite->add("m", "8", "RP", "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\10c", "abbbbbbbbbbbc", "abbbbbbbbbbbc", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", NULL);
+    suite->add("e", "9", "-", "a((((((((((b\\10))))))))))c", "ESUBREG", NULL);
+    suite->add("m", "10", "MP", "a\\12b", "a\nb", "a\nb", NULL);
+    suite->add("e", "11", "b", "a\\12b", "ESUBREG", NULL);
+    suite->add("m", "12", "eAS", "a\\12b", "a12b", "a12b", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_15, "regex.15");
+
+
+/*
+ * 16 expanded syntax
+ */
+
+class regextest_16 : public RegExTestSuite
+{
+public:
+    regextest_16() : RegExTestSuite("regex.16") { }
+    static Test *suite();
+};
+
+Test *regextest_16::suite()
+{
+    RegExTestSuite *suite = new regextest_16;
+
+    suite->add("m", "1", "xP", "a b c", "abc", "abc", NULL);
+    suite->add("m", "2", "xP", "a b #oops\nc\td", "abcd", "abcd", NULL);
+    suite->add("m", "3", "x", "a\\ b\\\tc", "a b\tc", "a b\tc", NULL);
+    suite->add("m", "4", "xP", "a b\\#c", "ab#c", "ab#c", NULL);
+    suite->add("m", "5", "xP", "a b[c d]e", "ab e", "ab e", NULL);
+    suite->add("m", "6", "xP", "a b[c#d]e", "ab#e", "ab#e", NULL);
+    suite->add("m", "7", "xP", "a b[c#d]e", "abde", "abde", NULL);
+    suite->add("m", "8", "xSPB", "ab{ d", "ab{d", "ab{d", NULL);
+    suite->add("m", "9", "xPQ", "ab{ 1 , 2 }c", "abc", "abc", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_16, "regex.16");
+
+
+/*
+ * 17 misc syntax
+ */
+
+class regextest_17 : public RegExTestSuite
+{
+public:
+    regextest_17() : RegExTestSuite("regex.17") { }
+    static Test *suite();
+};
+
+Test *regextest_17::suite()
+{
+    RegExTestSuite *suite = new regextest_17;
+
+    suite->add("m", "1", "P", "a(?#comment)b", "ab", "ab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_17, "regex.17");
+
+
+/*
+ * 18 unmatchable REs
+ */
+
+class regextest_18 : public RegExTestSuite
+{
+public:
+    regextest_18() : RegExTestSuite("regex.18") { }
+    static Test *suite();
+};
+
+Test *regextest_18::suite()
+{
+    RegExTestSuite *suite = new regextest_18;
+
+    suite->add("f", "1", "I", "a^b", "ab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_18, "regex.18");
+
+
+/*
+ * 19 case independence
+ */
+
+class regextest_19 : public RegExTestSuite
+{
+public:
+    regextest_19() : RegExTestSuite("regex.19") { }
+    static Test *suite();
+};
+
+Test *regextest_19::suite()
+{
+    RegExTestSuite *suite = new regextest_19;
+
+    suite->add("m", "1", "&i", "ab", "Ab", "Ab", NULL);
+    suite->add("m", "2", "&i", "a[bc]", "aC", "aC", NULL);
+    suite->add("f", "3", "&i", "a[^bc]", "aB", NULL);
+    suite->add("m", "4", "&iM", "a[b-d]", "aC", "aC", NULL);
+    suite->add("f", "5", "&iM", "a[^b-d]", "aC", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_19, "regex.19");
+
+
+/*
+ * 20 directors and embedded options
+ */
+
+class regextest_20 : public RegExTestSuite
+{
+public:
+    regextest_20() : RegExTestSuite("regex.20") { }
+    static Test *suite();
+};
+
+Test *regextest_20::suite()
+{
+    RegExTestSuite *suite = new regextest_20;
+
+    suite->add("e", "1", "&", "***?", "BADPAT", NULL);
+    suite->add("m", "2", "q", "***?", "***?", "***?", NULL);
+    suite->add("m", "3", "&P", "***=a*b", "a*b", "a*b", NULL);
+    suite->add("m", "4", "q", "***=a*b", "***=a*b", "***=a*b", NULL);
+    suite->add("m", "5", "bLP", "***:\\w+", "ab", "ab", NULL);
+    suite->add("m", "6", "eLP", "***:\\w+", "ab", "ab", NULL);
+    suite->add("e", "7", "&", "***:***=a*b", "BADRPT", NULL);
+    suite->add("m", "8", "&P", "***:(?b)a+b", "a+b", "a+b", NULL);
+    suite->add("m", "9", "P", "(?b)a+b", "a+b", "a+b", NULL);
+    suite->add("e", "10", "e", "(?b)\\w+", "BADRPT", NULL);
+    suite->add("m", "11", "bAS", "(?b)\\w+", "(?b)w+", "(?b)w+", NULL);
+    suite->add("m", "12", "iP", "(?c)a", "a", "a", NULL);
+    suite->add("f", "13", "iP", "(?c)a", "A", NULL);
+    suite->add("m", "14", "APS", "(?e)\\W+", "WW", "WW", NULL);
+    suite->add("m", "15", "P", "(?i)a+", "Aa", "Aa", NULL);
+    suite->add("f", "16", "P", "(?m)a.b", "a\nb", NULL);
+    suite->add("m", "17", "P", "(?m)^b", "a\nb", "b", NULL);
+    suite->add("f", "18", "P", "(?n)a.b", "a\nb", NULL);
+    suite->add("m", "19", "P", "(?n)^b", "a\nb", "b", NULL);
+    suite->add("f", "20", "P", "(?p)a.b", "a\nb", NULL);
+    suite->add("f", "21", "P", "(?p)^b", "a\nb", NULL);
+    suite->add("m", "22", "P", "(?q)a+b", "a+b", "a+b", NULL);
+    suite->add("m", "23", "nP", "(?s)a.b", "a\nb", "a\nb", NULL);
+    suite->add("m", "24", "xP", "(?t)a b", "a b", "a b", NULL);
+    suite->add("m", "25", "P", "(?w)a.b", "a\nb", "a\nb", NULL);
+    suite->add("m", "26", "P", "(?w)^b", "a\nb", "b", NULL);
+    suite->add("m", "27", "P", "(?x)a b", "ab", "ab", NULL);
+    suite->add("e", "28", "-", "(?z)ab", "BADOPT", NULL);
+    suite->add("m", "29", "P", "(?ici)a+", "Aa", "Aa", NULL);
+    suite->add("e", "30", "P", "(?i)(?q)a+", "BADRPT", NULL);
+    suite->add("m", "31", "P", "(?q)(?i)a+", "(?i)a+", "(?i)a+", NULL);
+    suite->add("m", "32", "P", "(?qe)a+", "a", "a", NULL);
+    suite->add("m", "33", "xP", "(?q)a b", "a b", "a b", NULL);
+    suite->add("m", "34", "P", "(?qx)a b", "a b", "a b", NULL);
+    suite->add("m", "35", "P", "(?qi)ab", "Ab", "Ab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_20, "regex.20");
+
+
+/*
+ * 21 capturing
+ */
+
+class regextest_21 : public RegExTestSuite
+{
+public:
+    regextest_21() : RegExTestSuite("regex.21") { }
+    static Test *suite();
+};
+
+Test *regextest_21::suite()
+{
+    RegExTestSuite *suite = new regextest_21;
+
+    suite->add("m", "1", "-", "a(b)c", "abc", "abc", "b", NULL);
+    suite->add("m", "2", "P", "a(?:b)c", "xabc", "abc", NULL);
+    suite->add("m", "3", "-", "a((b))c", "xabcy", "abc", "b", "b", NULL);
+    suite->add("m", "4", "P", "a(?:(b))c", "abcy", "abc", "b", NULL);
+    suite->add("m", "5", "P", "a((?:b))c", "abc", "abc", "b", NULL);
+    suite->add("m", "6", "P", "a(?:(?:b))c", "abc", "abc", NULL);
+    suite->add("i", "7", "Q", "a(b){0}c", "ac", "0 1", "-1 -1", NULL);
+    suite->add("m", "8", "-", "a(b)c(d)e", "abcde", "abcde", "b", "d", NULL);
+    suite->add("m", "9", "-", "(b)c(d)e", "bcde", "bcde", "b", "d", NULL);
+    suite->add("m", "10", "-", "a(b)(d)e", "abde", "abde", "b", "d", NULL);
+    suite->add("m", "11", "-", "a(b)c(d)", "abcd", "abcd", "b", "d", NULL);
+    suite->add("m", "12", "-", "(ab)(cd)", "xabcdy", "abcd", "ab", "cd", NULL);
+    suite->add("m", "13", "-", "a(b)?c", "xabcy", "abc", "b", NULL);
+    suite->add("i", "14", "-", "a(b)?c", "xacy", "1 2", "-1 -1", NULL);
+    suite->add("m", "15", "-", "a(b)?c(d)?e", "xabcdey", "abcde", "b", "d", NULL);
+    suite->add("i", "16", "-", "a(b)?c(d)?e", "xacdey", "1 4", "-1 -1", "3 3", NULL);
+    suite->add("i", "17", "-", "a(b)?c(d)?e", "xabcey", "1 4", "2 2", "-1 -1", NULL);
+    suite->add("i", "18", "-", "a(b)?c(d)?e", "xacey", "1 3", "-1 -1", "-1 -1", NULL);
+    suite->add("m", "19", "-", "a(b)*c", "xabcy", "abc", "b", NULL);
+    suite->add("i", "20", "-", "a(b)*c", "xabbbcy", "1 5", "4 4", NULL);
+    suite->add("i", "21", "-", "a(b)*c", "xacy", "1 2", "-1 -1", NULL);
+    suite->add("m", "22", "-", "a(b*)c", "xabbbcy", "abbbc", "bbb", NULL);
+    suite->add("m", "23", "-", "a(b*)c", "xacy", "ac", "", NULL);
+    suite->add("f", "24", "-", "a(b)+c", "xacy", NULL);
+    suite->add("m", "25", "-", "a(b)+c", "xabcy", "abc", "b", NULL);
+    suite->add("i", "26", "-", "a(b)+c", "xabbbcy", "1 5", "4 4", NULL);
+    suite->add("m", "27", "-", "a(b+)c", "xabbbcy", "abbbc", "bbb", NULL);
+    suite->add("i", "28", "Q", "a(b){2,3}c", "xabbbcy", "1 5", "4 4", NULL);
+    suite->add("i", "29", "Q", "a(b){2,3}c", "xabbcy", "1 4", "3 3", NULL);
+    suite->add("f", "30", "Q", "a(b){2,3}c", "xabcy", NULL);
+    suite->add("m", "31", "LP", "\\y(\\w+)\\y", "-- abc-", "abc", "abc", NULL);
+    suite->add("m", "32", "-", "a((b|c)d+)+", "abacdbd", "acdbd", "bd", "b", NULL);
+    suite->add("m", "33", "N", "(.*).*", "abc", "abc", "abc", NULL);
+    suite->add("m", "34", "N", "(a*)*", "bc", "", "", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_21, "regex.21");
+
+
+/*
+ * 22 multicharacter collating elements
+ */
+
+class regextest_22 : public RegExTestSuite
+{
+public:
+    regextest_22() : RegExTestSuite("regex.22") { }
+    static Test *suite();
+};
+
+Test *regextest_22::suite()
+{
+    RegExTestSuite *suite = new regextest_22;
+
+    suite->add("m", "1", "&+L", "a[c]e", "ace", "ace", NULL);
+    suite->add("f", "2", "&+IL", "a[c]h", "ach", NULL);
+    suite->add("m", "3", "&+L", "a[[.ch.]]", "ach", "ach", NULL);
+    suite->add("f", "4", "&+L", "a[[.ch.]]", "ace", NULL);
+    suite->add("m", "5", "&+L", "a[c[.ch.]]", "ac", "ac", NULL);
+    suite->add("m", "6", "&+L", "a[c[.ch.]]", "ace", "ac", NULL);
+    suite->add("m", "7", "&+L", "a[c[.ch.]]", "ache", "ach", NULL);
+    suite->add("f", "8", "&+L", "a[^c]e", "ace", NULL);
+    suite->add("m", "9", "&+L", "a[^c]e", "abe", "abe", NULL);
+    suite->add("m", "10", "&+L", "a[^c]e", "ache", "ache", NULL);
+    suite->add("f", "11", "&+L", "a[^[.ch.]]", "ach", NULL);
+    suite->add("m", "12", "&+L", "a[^[.ch.]]", "ace", "ac", NULL);
+    suite->add("m", "13", "&+L", "a[^[.ch.]]", "ac", "ac", NULL);
+    suite->add("m", "14", "&+L", "a[^[.ch.]]", "abe", "ab", NULL);
+    suite->add("f", "15", "&+L", "a[^c[.ch.]]", "ach", NULL);
+    suite->add("f", "16", "&+L", "a[^c[.ch.]]", "ace", NULL);
+    suite->add("f", "17", "&+L", "a[^c[.ch.]]", "ac", NULL);
+    suite->add("m", "18", "&+L", "a[^c[.ch.]]", "abe", "ab", NULL);
+    suite->add("m", "19", "&+L", "a[^b]", "ac", "ac", NULL);
+    suite->add("m", "20", "&+L", "a[^b]", "ace", "ac", NULL);
+    suite->add("m", "21", "&+L", "a[^b]", "ach", "ach", NULL);
+    suite->add("f", "22", "&+L", "a[^b]", "abe", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_22, "regex.22");
+
+
+/*
+ * 23 lookahead constraints
+ */
+
+class regextest_23 : public RegExTestSuite
+{
+public:
+    regextest_23() : RegExTestSuite("regex.23") { }
+    static Test *suite();
+};
+
+Test *regextest_23::suite()
+{
+    RegExTestSuite *suite = new regextest_23;
+
+    suite->add("m", "1", "HP", "a(?=b)b*", "ab", "ab", NULL);
+    suite->add("f", "2", "HP", "a(?=b)b*", "a", NULL);
+    suite->add("m", "3", "HP", "a(?=b)b*(?=c)c*", "abc", "abc", NULL);
+    suite->add("f", "4", "HP", "a(?=b)b*(?=c)c*", "ab", NULL);
+    suite->add("f", "5", "HP", "a(?!b)b*", "ab", NULL);
+    suite->add("m", "6", "HP", "a(?!b)b*", "a", "a", NULL);
+    suite->add("m", "7", "HP", "(?=b)b", "b", "b", NULL);
+    suite->add("f", "8", "HP", "(?=b)b", "a", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_23, "regex.23");
+
+
+/*
+ * 24 non-greedy quantifiers
+ */
+
+class regextest_24 : public RegExTestSuite
+{
+public:
+    regextest_24() : RegExTestSuite("regex.24") { }
+    static Test *suite();
+};
+
+Test *regextest_24::suite()
+{
+    RegExTestSuite *suite = new regextest_24;
+
+    suite->add("m", "1", "PT", "ab+?", "abb", "ab", NULL);
+    suite->add("m", "2", "PT", "ab+?c", "abbc", "abbc", NULL);
+    suite->add("m", "3", "PT", "ab*?", "abb", "a", NULL);
+    suite->add("m", "4", "PT", "ab*?c", "abbc", "abbc", NULL);
+    suite->add("m", "5", "PT", "ab??", "ab", "a", NULL);
+    suite->add("m", "6", "PT", "ab??c", "abc", "abc", NULL);
+    suite->add("m", "7", "PQT", "ab{2,4}?", "abbbb", "abb", NULL);
+    suite->add("m", "8", "PQT", "ab{2,4}?c", "abbbbc", "abbbbc", NULL);
+    suite->add("m", "9", "-", "3z*", "123zzzz456", "3zzzz", NULL);
+    suite->add("m", "10", "PT", "3z*?", "123zzzz456", "3", NULL);
+    suite->add("m", "11", "-", "z*4", "123zzzz456", "zzzz4", NULL);
+    suite->add("m", "12", "PT", "z*?4", "123zzzz456", "zzzz4", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_24, "regex.24");
+
+
+/*
+ * 25 mixed quantifiers
+ */
+
+class regextest_25 : public RegExTestSuite
+{
+public:
+    regextest_25() : RegExTestSuite("regex.25") { }
+    static Test *suite();
+};
+
+Test *regextest_25::suite()
+{
+    RegExTestSuite *suite = new regextest_25;
+
+    suite->add("m", "1", "PNT", "^(.*?)(a*)$", "xyza", "xyza", "xyz", "a", NULL);
+    suite->add("m", "2", "PNT", "^(.*?)(a*)$", "xyzaa", "xyzaa", "xyz", "aa", NULL);
+    suite->add("m", "3", "PNT", "^(.*?)(a*)$", "xyz", "xyz", "xyz", "", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_25, "regex.25");
+
+
+/*
+ * 26 tricky cases
+ */
+
+class regextest_26 : public RegExTestSuite
+{
+public:
+    regextest_26() : RegExTestSuite("regex.26") { }
+    static Test *suite();
+};
+
+Test *regextest_26::suite()
+{
+    RegExTestSuite *suite = new regextest_26;
+
+    suite->add("m", "1", "-", "(week|wee)(night|knights)", "weeknights", "weeknights", "wee", "knights", NULL);
+    suite->add("m", "2", "RP", "a(bc*).*\\1", "abccbccb", "abccbccb", "b", NULL);
+    suite->add("m", "3", "-", "a(b.[bc]*)+", "abcbd", "abcbd", "bd", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_26, "regex.26");
+
+
+/*
+ * 27 implementation misc.
+ */
+
+class regextest_27 : public RegExTestSuite
+{
+public:
+    regextest_27() : RegExTestSuite("regex.27") { }
+    static Test *suite();
+};
+
+Test *regextest_27::suite()
+{
+    RegExTestSuite *suite = new regextest_27;
+
+    suite->add("m", "1", "P", "a(?:b|b)c", "abc", "abc", NULL);
+    suite->add("m", "2", "&", "[ab][ab][ab]", "aba", "aba", NULL);
+    suite->add("m", "3", "&", "[ab][ab][ab][ab][ab][ab][ab]", "abababa", "abababa", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_27, "regex.27");
+
+
+/*
+ * 28 boundary busters etc.
+ */
+
+class regextest_28 : public RegExTestSuite
+{
+public:
+    regextest_28() : RegExTestSuite("regex.28") { }
+    static Test *suite();
+};
+
+Test *regextest_28::suite()
+{
+    RegExTestSuite *suite = new regextest_28;
+
+    suite->add("m", "1", "&", "abcdefghijkl", "abcdefghijkl", "abcdefghijkl", NULL);
+    suite->add("m", "2", "P", "a(?:b|c|d|e|f|g|h|i|j|k|l|m)n", "agn", "agn", NULL);
+    suite->add("m", "3", "-", "a(((((((((((((b)))))))))))))c", "abc", "abc", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", NULL);
+    suite->add("m", "4", "Q", "ab{1,100}c", "abbc", "abbc", NULL);
+    suite->add("m", "5", "Q", "ab{1,100}c", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", NULL);
+    suite->add("m", "6", "Q", "ab{1,100}c", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", NULL);
+    suite->add("m", "7", "LP", "\\w+abcdefgh", "xyzabcdefgh", "xyzabcdefgh", NULL);
+    suite->add("m", "8", "%LP", "\\w+abcdefgh", "xyzabcdefgh", "xyzabcdefgh", NULL);
+    suite->add("m", "9", "%LP", "\\w+abcdefghijklmnopqrst", "xyzabcdefghijklmnopqrst", "xyzabcdefghijklmnopqrst", NULL);
+    suite->add("i", "10", "%LP", "\\w+(abcdefgh)?", "xyz", "0 2", "-1 -1", NULL);
+    suite->add("i", "11", "%LP", "\\w+(abcdefgh)?", "xyzabcdefg", "0 9", "-1 -1", NULL);
+    suite->add("i", "12", "%LP", "\\w+(abcdefghijklmnopqrst)?", "xyzabcdefghijklmnopqrs", "0 21", "-1 -1", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_28, "regex.28");
+
+
+/*
+ * 29 incomplete matches
+ */
+
+class regextest_29 : public RegExTestSuite
+{
+public:
+    regextest_29() : RegExTestSuite("regex.29") { }
+    static Test *suite();
+};
+
+Test *regextest_29::suite()
+{
+    RegExTestSuite *suite = new regextest_29;
+
+    suite->add("p", "1", "t", "def", "abc", "3 2", "", NULL);
+    suite->add("p", "2", "t", "bcd", "abc", "1 2", "", NULL);
+    suite->add("p", "3", "t", "abc", "abab", "0 3", "", NULL);
+    suite->add("p", "4", "t", "abc", "abdab", "3 4", "", NULL);
+    suite->add("i", "5", "t", "abc", "abc", "0 2", "0 2", NULL);
+    suite->add("i", "6", "t", "abc", "xyabc", "2 4", "2 4", NULL);
+    suite->add("p", "7", "t", "abc+", "xyab", "2 3", "", NULL);
+    suite->add("i", "8", "t", "abc+", "xyabc", "2 4", "2 4", NULL);
+    suite->add("i", "10", "t", "abc+", "xyabcdd", "2 4", "7 6", NULL);
+    suite->add("p", "11", "tPT", "abc+?", "xyab", "2 3", "", NULL);
+    suite->add("i", "12", "tPT", "abc+?", "xyabc", "2 4", "5 4", NULL);
+    suite->add("i", "13", "tPT", "abc+?", "xyabcc", "2 4", "6 5", NULL);
+    suite->add("i", "14", "tPT", "abc+?", "xyabcd", "2 4", "6 5", NULL);
+    suite->add("i", "15", "tPT", "abc+?", "xyabcdd", "2 4", "7 6", NULL);
+    suite->add("i", "16", "t", "abcd|bc", "xyabc", "3 4", "2 4", NULL);
+    suite->add("p", "17", "tn", ".*k", "xx\nyyy", "3 5", "", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_29, "regex.29");
+
+
+/*
+ * 30 misc. oddities and old bugs
+ */
+
+class regextest_30 : public RegExTestSuite
+{
+public:
+    regextest_30() : RegExTestSuite("regex.30") { }
+    static Test *suite();
+};
+
+Test *regextest_30::suite()
+{
+    RegExTestSuite *suite = new regextest_30;
+
+    suite->add("e", "1", "&", "***", "BADRPT", NULL);
+    suite->add("m", "2", "N", "a?b*", "abb", "abb", NULL);
+    suite->add("m", "3", "N", "a?b*", "bb", "bb", NULL);
+    suite->add("m", "4", "&", "a*b", "aab", "aab", NULL);
+    suite->add("m", "5", "&", "^a*b", "aaaab", "aaaab", NULL);
+    suite->add("m", "6", "&M", "[0-6][1-2][0-3][0-6][1-6][0-6]", "010010", "010010", NULL);
+    suite->add("m", "7", "s", "abc", "abcd", "abc", NULL);
+    suite->add("f", "8", "s", "abc", "xabcd", NULL);
+    suite->add("m", "9", "HLP", "(?n)^(?![t#])\\S+", "tk\n\n#\n#\nit0", "it0", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_30, "regex.30");
+
+
+/*
+ * extra_1 checks for bug fixes
+ */
+
+class regextest_extra_1 : public RegExTestSuite
+{
+public:
+    regextest_extra_1() : RegExTestSuite("regex.extra_1") { }
+    static Test *suite();
+};
+
+Test *regextest_extra_1::suite()
+{
+    RegExTestSuite *suite = new regextest_extra_1;
+
+    suite->add("m", "Bug 230589", "-", "[ ]*(^|[^%])%V", "*%V2", NULL);
+    suite->add("m", "Bug 504785", "-", "([^_.]*)([^.]*)\\.(..)(.).*", "bbcos_001_c01.q1la", "bbcos_001_c01.q1la", "bbcos", "_001_c01", "q1", "l", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*[^<]*\\s*<([^>]+)>", "a<a>", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*([^b]*)b", "ab", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*[^b]*(b)", "ab", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A(\\s*)[^b]*(b)", "ab", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*[^b]*b", "ab", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*[^b]*b", "ab", "ab", NULL);
+    suite->add("i", "Bug 505048", "-", "\\A\\s*[^b]*b", "ab", "0 1", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_extra_1, "regex.extra_1");
+
+
+/*
+ * wx_1 character classification: ascii
+ */
+
+class regextest_wx_1 : public RegExTestSuite
+{
+public:
+    regextest_wx_1() : RegExTestSuite("regex.wx_1") { }
+    static Test *suite();
+};
+
+Test *regextest_wx_1::suite()
+{
+    RegExTestSuite *suite = new regextest_wx_1;
+
+    suite->add("m", "1", "&", "[^[:alnum:]]", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!", "!", NULL);
+    suite->add("m", "2", "&", "[[:alnum:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~X", "X", NULL);
+    suite->add("m", "3", "&", "[^[:alpha:]]", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!", "!", NULL);
+    suite->add("m", "4", "&", "[[:alpha:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~X", "X", NULL);
+    suite->add("m", "5", "&", "[^[:cntrl:]]", "\a\b\t\n\v\f\r!", "!", NULL);
+    suite->add("m", "6", "&", "[[:cntrl:]]", " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n", "\n", NULL);
+    suite->add("m", "7", "&", "[^[:digit:]]", "0123456789!", "!", NULL);
+    suite->add("m", "8", "&", "[[:digit:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 0", "0", NULL);
+    suite->add("m", "9", "&", "[^[:graph:]]", "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n", "\n", NULL);
+    suite->add("m", "10", "&", "[[:graph:]]", "\a\b\t\n\v\f\r !", "!", NULL);
+    suite->add("m", "11", "&", "[^[:lower:]]", "abcdefghijklmnopqrstuvwxyz!", "!", NULL);
+    suite->add("m", "12", "&", "[[:lower:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~x", "x", NULL);
+    suite->add("m", "13", "&", "[^[:print:]]", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n", "\n", NULL);
+    suite->add("m", "14", "&", "[[:print:]]", "\a\b\n\v\f\rX", "X", NULL);
+    suite->add("m", "15", "&", "[^[:punct:]]", "!\"#%&'()*,-./:;?@[\\]_{}X", "X", NULL);
+    suite->add("m", "16", "&", "[[:punct:]]", "\a\b\t\n\v\f\r 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!", "!", NULL);
+    suite->add("m", "17", "&", "[^[:space:]]", "\t\n\v\f\r X", "X", NULL);
+    suite->add("m", "18", "&", "[[:space:]]", "\a\b!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n", "\n", NULL);
+    suite->add("m", "19", "&", "[^[:upper:]]", "ABCDEFGHIJKLMNOPQRSTUVWXYZ!", "!", NULL);
+    suite->add("m", "20", "&", "[[:upper:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~X", "X", NULL);
+    suite->add("m", "21", "&", "[^[:xdigit:]]", "0123456789ABCDEFabcdef!", "!", NULL);
+    suite->add("m", "22", "&", "[[:xdigit:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@GHIJKLMNOPQRSTUVWXYZ[\\]^_`ghijklmnopqrstuvwxyz{|}~a", "a", NULL);
+    suite->add("i", "23", "&i", "AbCdEfGhIjKlMnOpQrStUvWxYz", "aBcDeFgHiJkLmNoPqRsTuVwXyZ", "0 25", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_wx_1, "regex.wx_1");
+
+
+/*
+ * wx_2 character classification: western european
+ */
+
+class regextest_wx_2 : public RegExTestSuite
+{
+public:
+    regextest_wx_2() : RegExTestSuite("regex.wx_2") { }
+    static Test *suite();
+};
+
+Test *regextest_wx_2::suite()
+{
+    RegExTestSuite *suite = new regextest_wx_2;
+
+    suite->add("m", "1", "&", "[^[:alpha:]]", "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!", "!", NULL);
+    suite->add("m", "2", "&", "[[:alpha:]]", " ¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷X", "X", NULL);
+    suite->add("m", "3", "&", "[^[:lower:]]", "ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!", "!", NULL);
+    suite->add("m", "4", "&", "[[:lower:]]", " ¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ÷x", "x", NULL);
+    suite->add("m", "5", "&", "[^[:upper:]]", "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ!", "!", NULL);
+    suite->add("m", "6", "&", "[[:upper:]]", " ¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×ßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿX", "X", NULL);
+    suite->add("i", "7", "&i*", "ÀáÂãÄåÆçÈéÊëÌíÎïÐñÒóÔõÖøÙúÛüÝþ", "àÁâÃäÅæÇèÉêËìÍîÏðÑòÓôÕöØùÚûÜýÞ", "0 29", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_wx_2, "regex.wx_2");
+
+
+/*
+ * wx_3 character classification: cyrillic
+ */
+
+class regextest_wx_3 : public RegExTestSuite
+{
+public:
+    regextest_wx_3() : RegExTestSuite("regex.wx_3") { }
+    static Test *suite();
+};
+
+Test *regextest_wx_3::suite()
+{
+    RegExTestSuite *suite = new regextest_wx_3;
+
+    suite->add("m", "1", "&", "[^[:alpha:]]", "ёЁюабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!", "!", NULL);
+    suite->add("m", "2", "&", "[^[:lower:]]", "ёюабцдефгхийклмнопярстужвьызшэщчъ!", "!", NULL);
+    suite->add("m", "3", "&", "[[:lower:]]", "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪx", "x", NULL);
+    suite->add("m", "4", "&", "[^[:upper:]]", "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!", "!", NULL);
+    suite->add("m", "5", "&", "[[:upper:]]", "ёюабцдефгхийклмнопярстужвьызшэщчъX", "X", NULL);
+    suite->add("i", "6", "&i*", "ЁюАбЦдЕфГхИйКлМнОпЯрСтУжВьЫзШэЩчЪ", "ёЮаБцДеФгХиЙкЛмНоПяРсТуЖвЬыЗшЭщЧъ", "0 32", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_wx_3, "regex.wx_3");
+
+
+/*
+ * A suite containing all the above suites
+ */
+
+class regextest : public TestSuite
+{
+public:
+    regextest() : TestSuite("regex") { }
+    static Test *suite();
+};
+
+Test *regextest::suite()
+{
+    TestSuite *suite = new regextest;
+
+    suite->addTest(regextest_1::suite());
+    suite->addTest(regextest_2::suite());
+    suite->addTest(regextest_3::suite());
+    suite->addTest(regextest_4::suite());
+    suite->addTest(regextest_5::suite());
+    suite->addTest(regextest_6::suite());
+    suite->addTest(regextest_7::suite());
+    suite->addTest(regextest_8::suite());
+    suite->addTest(regextest_9::suite());
+    suite->addTest(regextest_10::suite());
+    suite->addTest(regextest_11::suite());
+    suite->addTest(regextest_12::suite());
+    suite->addTest(regextest_13::suite());
+    suite->addTest(regextest_14::suite());
+    suite->addTest(regextest_15::suite());
+    suite->addTest(regextest_16::suite());
+    suite->addTest(regextest_17::suite());
+    suite->addTest(regextest_18::suite());
+    suite->addTest(regextest_19::suite());
+    suite->addTest(regextest_20::suite());
+    suite->addTest(regextest_21::suite());
+    suite->addTest(regextest_22::suite());
+    suite->addTest(regextest_23::suite());
+    suite->addTest(regextest_24::suite());
+    suite->addTest(regextest_25::suite());
+    suite->addTest(regextest_26::suite());
+    suite->addTest(regextest_27::suite());
+    suite->addTest(regextest_28::suite());
+    suite->addTest(regextest_29::suite());
+    suite->addTest(regextest_30::suite());
+    suite->addTest(regextest_extra_1::suite());
+    suite->addTest(regextest_wx_1::suite());
+    suite->addTest(regextest_wx_2::suite());
+    suite->addTest(regextest_wx_3::suite());
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
+CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
diff --git a/tests/regex/regex.pl b/tests/regex/regex.pl
new file mode 100755
index 0000000000..af0cfe8a78
--- /dev/null
+++ b/tests/regex/regex.pl
@@ -0,0 +1,437 @@
+#!/usr/bin/env perl -w
+#############################################################################
+# Name:        regex.pl
+# Purpose:     Generate test code for wxRegEx from 'reg.test'
+# Author:      Mike Wetherell
+# RCS-ID:      $Id$
+# Copyright:   (c) Mike Wetherell
+# Licence:     wxWidgets licence
+#############################################################################
+
+#
+# Notes:
+#   See './regex.pl -h' for usage
+#
+#   Output at the moment is C++ using the cppunit testing framework. The
+#   language/framework specifics are separated, with the following 5
+#   subs as an interface: 'begin_output', 'begin_section', 'write_test',
+#   'end_section' and 'end_output'. So for a different language/framework,
+#   implement 5 new similar subs.
+# 
+#   I've avoided using 'use encoding "UTF-8"', since this wasn't available
+#   in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
+#   earler than perl 5.6.0 aren't going to work.
+#
+
+use strict;
+use File::Basename;
+#use encoding "UTF-8";  # enable in the future when perl 5.6.x is just a memory
+
+# if 0 output is wide characters, if 1 output is utf8 encoded
+my $utf = 1;
+
+# quote a parameter (C++ helper)
+#
+sub quotecxx {
+    my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
+                "\n" => "n", "\r" => "r", "\t" => "t",
+                "\013" => "v", '"' => '"', "\\" => "\\" );
+
+    # working around lack of 'use encoding'
+    $_ = pack "U0C*", unpack "C*", $_;
+    use utf8;
+
+    s/[\000-\037"\\\177-\x{ffff}]/
+        if ($esc{$&}) {
+            "\\$esc{$&}";
+        } elsif (ord($&) > 0x9f) {
+            if ($utf) {
+                $&;
+            } else {
+                sprintf "\\u%04x", ord($&);
+            }
+        } else {
+            sprintf "\\%03o", ord($&);
+        }
+    /ge;
+
+    # working around lack of 'use encoding'
+    no utf8;
+    $_ = pack "C*", unpack "C*", $_;
+
+    return ($utf ? '"' : 'L"') . $_ . '"'
+}
+
+# start writing the output code (C++ interface)
+#
+sub begin_output {
+    my ($from, $instructions) = @_;
+
+    # embed it in the comment
+    $from = "\n$from";
+    $from =~ s/^(?:   )?/ * /mg;
+
+    # $instructions contains information about the flags etc.
+    if ($instructions) {
+        $instructions = "\n$instructions";
+        $instructions =~ s/^(?:   )?/ * /mg;
+    }
+
+    my $u = $utf ? " (UTF-8 encoded)" : "";
+
+    print <<EOT;
+/*
+ * Test data for wxRegEx$u
+$from$instructions */
+
+EOT
+}
+
+my @classes;
+
+# start a new section (C++ interface)
+#
+sub begin_section {
+    my ($id, $title) = @_;
+    my $class = "regextest_$id";
+    $class =~ s/\W/_/g;
+    push @classes, [$id, $class];
+
+    print <<EOT;
+
+/*
+ * $id $title
+ */
+
+class $class : public RegExTestSuite
+{
+public:
+    $class() : RegExTestSuite("regex.$id") { }
+    static Test *suite();
+};
+
+Test *$class\::suite()
+{
+    RegExTestSuite *suite = new $class;
+
+EOT
+}
+
+# output a test line (C++ interface)
+#
+sub write_test {
+    my @args = @_;
+    $_ = quotecxx for @args;
+    print "    suite->add(" . (join ', ', @args) . ", NULL);\n"; 
+}
+
+# end a section (C++ interface)
+#
+sub end_section {
+    my ($id, $class) = @{$classes[$#classes]};
+
+    print <<EOT;
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
+
+EOT
+}
+
+# finish off the output (C++ interface)
+#
+sub end_output {
+    print <<EOT;
+
+/*
+ * A suite containing all the above suites
+ */
+
+class regextest : public TestSuite
+{
+public:
+    regextest() : TestSuite("regex") { }
+    static Test *suite();
+};
+
+Test *regextest::suite()
+{
+    TestSuite *suite = new regextest;
+
+EOT
+    print "    suite->addTest(".$_->[1]."::suite());\n" for @classes;
+
+    print <<EOT;
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
+CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
+EOT
+}
+
+# Parse a tcl string. Handles curly quoting and double quoting.
+#
+sub parsetcl {
+    my ($curly, $quote);
+    # recursively defined expression that can parse balanced braces
+    # warning: uses experimental features of perl, see perlop(1)
+    $curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
+    $quote = qr/"(?:\\"|[^"])*"/;
+    my @tokens = shift =~ /($curly|$quote|\S+)/g;
+
+    # now remove braces/quotes and unescape any escapes
+    for (@tokens) {
+        if (s/^{(.*)}$/$1/) {
+            # for curly quoting, only unescape \{ and \}
+            s/\\([{}])/$1/g;
+        } else {
+            s/^"(.*)"$/$1/;
+
+            # unescape any escapes
+            my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
+                        "n" => "\n", "r" => "\r", "t" => "\t",
+                        "v" => "\013" );
+            my $x = qr/[[:xdigit:]]/;
+
+            s/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
+                if ($1 =~ m{^([0-7]+)}) {
+                    chr(oct($1));
+                } elsif ($1 =~ m{^x($x+)}) {
+                    pack("C0U", hex($1) & 0xff);
+                } elsif ($1 =~ m{^u($x+)}) {
+                    pack("C0U", hex($1));
+                } elsif ($esc{$1}) {
+                    $esc{$1};
+                } else {
+                    $1;
+                }
+            /ge;
+        }
+    }
+
+    return @tokens;
+}
+
+# helpers which keep track of whether begin_section has been called, so that
+# end_section can be called when appropriate
+#
+my @doing = ("0", "");
+my $in_section = 0;
+
+sub handle_doing {
+    end_section if $in_section;
+    $in_section = 0;
+    @doing = @_;
+}
+
+sub handle_test {
+    begin_section(@doing) if !$in_section;
+    $in_section = 1;
+    write_test @_;
+}
+
+sub handle_end {
+    end_section if $in_section;
+    $in_section = 0;
+    end_output;
+}
+
+# 'main' - start by parsing the command lines options.
+#
+my $badoption = !@ARGV;
+my $utfdefault = $utf;
+my $outputname;
+
+for (my $i = 0; $i < @ARGV; ) {
+    if ($ARGV[$i] !~ m{^-.}) {
+        $i++;
+        next;
+    }
+
+    if ($ARGV[$i] eq '--') {
+        splice @ARGV, $i, 1;
+        last;
+    }
+
+    if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) {       # -o : output file
+        $outputname = $2 || splice @ARGV, $i + 1, 1;
+    }
+
+    for (split //, substr($ARGV[$i], 1)) {
+        if (/u/i) {                                 # -u : utf-8 output
+            $utf = 1;
+        } elsif (/w/i) {                            # -w : wide char output
+            $utf = 0;
+        } else {
+            $badoption = 1;
+        }
+    }
+
+    splice @ARGV, $i, 1;
+}
+
+# Display help
+#
+if ($badoption) {
+    my $prog = basename $0;
+    my ($w, $u) = (" (default)", "          ");
+    ($w, $u) = ($u, $w) if $utfdefault;
+    
+    print <<EOT;
+Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
+Generate test code for wxRegEx from 'reg.test'
+Example: $prog -o regex.inc reg.test wxreg.test 
+
+ -w$w   Output will be wide characters.
+ -u$u   Output will be UTF-8 encoded.
+
+Input files should be in UTF-8. If no input files are specified input is
+read from stdin. If no output file is specified output is written to stdout.
+See the comments in reg.test (in src/regex) for details of the input file
+format.
+EOT
+    exit 0;
+}
+
+# Open the output file
+#
+open STDOUT, ">$outputname" if $outputname;
+
+# Read in the files and initially parse just the comments for copyright
+# information and instructions on the tests
+#
+my @input;                  # slurped input files stripped of comments
+my $files = "";             # copyright info from the input comments
+my $instructions = "";      # test instructions from the input comments
+
+do {
+    my $inputname = basename $ARGV[0] if @ARGV;
+
+    # slurp input
+    undef $/;
+    my $in = <>;
+
+    # remove escaped newlines
+    $in =~ s/(?<!\\)\\\n//g;
+
+    # record the copyrights of the input files
+    for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
+        s/[\s:]+/ /g;
+        $files .= "  ";
+        $files .= $inputname . ": " if $inputname && $inputname ne '-';
+        $files .= "$_\n";
+    }
+
+    # Parse the comments for instructions on the tests, which look like this:
+    #    i    successful match with -indices (used in checking things like
+    #         nonparticipating subexpressions)
+    if (!$instructions) {
+        my $sp = qr{\t|   +};                   # tab or three or more spaces
+        my @instructions = $in =~
+            /\n(
+                (?:
+                    \#$sp\S?$sp\S[^\n]+\n       # instruction line
+                    (?:\#$sp$sp\S[^\n]+\n)*     # continuation lines (if any)
+                )+
+            )/gx;
+
+        if (@instructions) {
+            $instructions[0] = "Test types:\n$instructions[0]";
+            if (@instructions > 1) {
+                $instructions[1] = "Flag characters:\n$instructions[1]";
+            }
+            $instructions = join "\n", @instructions;
+            $instructions =~ s/^#([^\t]?)/ $1/mg;
+        }
+    }
+
+    # @input is the input of all files (stipped of comments)
+    $in =~ s/^#.*$//mg;
+    push @input, $in;
+
+} while $ARGV[0];
+
+# Make a string naming the generator, the input files and copyright info
+#
+my $from = "Generated " . localtime() . " by " . basename $0;
+$from =~ s/[\s]+/ /g;
+if ($files) {
+    if ($files =~ /:/) {
+        $from .= " from the following files:";
+    } else {
+        $from .= " from work with the following copyright:";
+    }
+}
+$from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g);  # word-wrap
+$from .= "\n$files" if $files;
+
+# Now start to print the code
+#
+begin_output $from, $instructions;
+
+# numbers for 'extra' sections
+my $extra = 1;
+
+for (@input)
+{
+    # Print the main tests
+    #
+    # Test lines look like this:
+    # m  3  b       {\(a\)b}        ab      ab      a
+    # 
+    # Also looks for heading lines, e.g.:
+    # doing 4 "parentheses"
+    #
+    for (split "\n") {
+        if (/^doing\s+(\S+)\s+(\S.*)/) {
+            handle_doing parsetcl "$1 $2";
+        } elsif (/^[efimp]\s/) {
+            handle_test parsetcl $_;
+        }
+    }
+
+    # Extra tests
+    #
+    # The expression below matches something like this:
+    #   test reg-33.8 {Bug 505048} {
+    #       regexp -inline {\A\s*[^b]*b} ab
+    #   } ab
+    #   
+    # The three subexpressions then return these parts: 
+    #   $extras[$i]     = '{Bug 505048}',
+    #   $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
+    #   $extras[$i + 2] = 'ab'
+    #
+    my @extras = /\ntest\s+\S+\s*(\{.*?\})\s*\{\n       # line 1
+                  \s*regexp\s+([^\n]+)\n                # line 2
+                  \}\s*(\S[^\n]*)/gx;                   # line 3
+
+    handle_doing "extra_" . $extra++, "checks for bug fixes" if @extras;
+
+    for (my $i = 0; $i < @extras; $i += 3) {
+        my $id = $extras[$i];
+
+        # further parse the middle line into options and the rest (i.e. $args)
+        my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;
+
+        my @args = parsetcl $args;
+        $#args = 1;     # only want the first two
+
+        # now handle the options
+        my $test    = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
+        my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
+
+        # get them all in the right order and print
+        unshift @args, $test, parsetcl($id), '-';
+        push @args, parsetcl(parsetcl($results)) if $results;
+        handle_test @args;
+    }
+}
+
+# finish
+#
+handle_end;
diff --git a/tests/regex/wxreg.test b/tests/regex/wxreg.test
new file mode 100644
index 0000000000..3ae0f23551
--- /dev/null
+++ b/tests/regex/wxreg.test
@@ -0,0 +1,71 @@
+#############################################################################
+# Name:        wxreg.test
+# Purpose:     Additional tests for the regex lib and wxRegEx
+# Author:      Mike Wetherell
+# RCS-ID:      $Id$
+# Copyright:   (c) 2004 Mike Wetherell.
+# Licence:     wxWidgets licence
+#############################################################################
+
+#
+# The layout of this file is the same as src/regex/reg.test. See the comments
+# in that file for full details. The encoding used in here is UTF-8.
+#
+# These tests test the character classifications over the ascii range pretty
+# thoroughly, since hopefully these will be similar for all platforms and
+# locales where wxWidgets runs.
+#
+# Also does some tests involving western european and cyrillic characters.
+# In Unicode mode, all these tests should succeed, which verifies that the
+# classifications aren't limited to a single 8-bit character set.
+#
+# In non-unicode mode, if the test can't be translated into the character
+# encoding of the current locale, the test will be skipped. So either may
+# succeed or be skipped.
+#
+
+doing wx_1 "character classification: ascii"
+m   1   &   {[^[:alnum:]]}  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
+m   2   &   {[[:alnum:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~X" "X"
+m   3   &   {[^[:alpha:]]}  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
+m   4   &   {[[:alpha:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~X" "X"
+m   5   &   {[^[:cntrl:]]}  "\a\b\t\n\v\f\r!" "!"
+m   6   &   {[[:cntrl:]]}   " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
+m   7   &   {[^[:digit:]]}  "0123456789!" "!"
+m   8   &   {[[:digit:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 0" "0"
+m   9   &   {[^[:graph:]]}  "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
+m   10  &   {[[:graph:]]}   "\a\b\t\n\v\f\r !" "!"
+m   11  &   {[^[:lower:]]}  "abcdefghijklmnopqrstuvwxyz!" "!"
+m   12  &   {[[:lower:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~x" "x"
+m   13  &   {[^[:print:]]}  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n" "\n"
+m   14  &   {[[:print:]]}   "\a\b\n\v\f\rX" "X"
+m   15  &   {[^[:punct:]]}  "!\"#%&'()*,-./:;?@[\\]_{}X" "X"
+m   16  &   {[[:punct:]]}   "\a\b\t\n\v\f\r 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
+m   17  &   {[^[:space:]]}  "\t\n\v\f\r X" "X"
+m   18  &   {[[:space:]]}   "\a\b!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
+m   19  &   {[^[:upper:]]}  "ABCDEFGHIJKLMNOPQRSTUVWXYZ!" "!"
+m   20  &   {[[:upper:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~X" "X"
+m   21  &   {[^[:xdigit:]]} "0123456789ABCDEFabcdef!" "!"
+m   22  &   {[[:xdigit:]]}  "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@GHIJKLMNOPQRSTUVWXYZ[\\]^_`ghijklmnopqrstuvwxyz{|}~a" "a"
+i   23  &i  "AbCdEfGhIjKlMnOpQrStUvWxYz" "aBcDeFgHiJkLmNoPqRsTuVwXyZ" "0 25"
+
+doing wx_2 "character classification: western european"
+m   1   &   {[^[:alpha:]]}  "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
+m   2   &   {[[:alpha:]]}   " ¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷X" "X"
+m   3   &   {[^[:lower:]]}  "ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
+m   4   &   {[[:lower:]]}   " ¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ÷x" "x"
+m   5   &   {[^[:upper:]]}  "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ!" "!"
+m   6   &   {[[:upper:]]}   " ¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×ßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿX" "X"
+i   7   &i* "ÀáÂãÄåÆçÈéÊëÌíÎïÐñÒóÔõÖøÙúÛüÝþ" "àÁâÃäÅæÇèÉêËìÍîÏðÑòÓôÕöØùÚûÜýÞ" "0 29"
+
+doing wx_3 "character classification: cyrillic"
+m   1   &   {[^[:alpha:]]}  "ёЁюабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
+m   2   &   {[^[:lower:]]}  "ёюабцдефгхийклмнопярстужвьызшэщчъ!" "!"
+m   3   &   {[[:lower:]]}   "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪx" "x"
+m   4   &   {[^[:upper:]]}  "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
+m   5   &   {[[:upper:]]}   "ёюабцдефгхийклмнопярстужвьызшэщчъX" "X"
+i   6   &i* "ЁюАбЦдЕфГхИйКлМнОпЯрСтУжВьЫзШэЩчЪ" "ёЮаБцДеФгХиЙкЛмНоПяРсТуЖвЬыЗшЭщЧъ" "0 32"
+
+#doing bugs "known bugs"
+#m  1    -   {(\w+).*?(\d\d:\d\d)} "from 10:30 until 12:00" "from" "10:30"
+
diff --git a/tests/test.bkl b/tests/test.bkl
index 910296b8a7..2fd21e6d09 100644
--- a/tests/test.bkl
+++ b/tests/test.bkl
@@ -10,6 +10,7 @@
         <sources>
             test.cpp
             mbconv/main.cpp
+            regex/regex.cpp
         </sources>
         <wx-lib>base</wx-lib>
     </exe>
diff --git a/tests/test.dsp b/tests/test.dsp
index 4feb363e4e..54b7fb6087 100644
--- a/tests/test.dsp
+++ b/tests/test.dsp
@@ -439,6 +439,10 @@ SOURCE=.\mbconv\main.cpp
 # End Source File
 # Begin Source File
 
+SOURCE=.\regex\regex.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\test.cpp
 # End Source File
 # End Group
-- 
2.47.2