added regex test suite

author Václav Slavík <vslavik@fastmail.fm>

Fri, 5 Mar 2004 23:14:23 +0000 (23:14 +0000)

committer Václav Slavík <vslavik@fastmail.fm>

Fri, 5 Mar 2004 23:14:23 +0000 (23:14 +0000)
author Václav Slavík <vslavik@fastmail.fm>
Fri, 5 Mar 2004 23:14:23 +0000 (23:14 +0000)
committer Václav Slavík <vslavik@fastmail.fm>
Fri, 5 Mar 2004 23:14:23 +0000 (23:14 +0000)
diff --git a/tests/Makefile.in b/tests/Makefile.in

index 6f272dd47a010e5960f6993d23953dad92c816d8..ac19e587e7a664b231603fda0ebfa5e65412705d 100644 (file)
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -37,7 +37,8 @@ TEST_CXXFLAGS = -D__WX$(TOOLKIT)__ $(__WXUNIV_DEFINE_p) -I$(srcdir) \
         $(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
  TEST_OBJECTS =  \
         test_test.o \
-       test_main.o
+       test_main.o \
+       test_regex.o
  
  ### Conditionally set variables: ###
  
@@ -106,6 +107,9 @@ test_test.o: $(srcdir)/test.cpp
  test_main.o: $(srcdir)/mbconv/main.cpp
         $(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
  
+test_regex.o: $(srcdir)/regex/regex.cpp
+       $(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
+
  
  # Include dependency info, if present:
  @IF_GNU_MAKE@-include .deps/*.d
diff --git a/tests/makefile.bcc b/tests/makefile.bcc

index a968cacdf8bc13e3cc24e97918a79f0143c88eec..749a04131135e587653acade862641c29ac4748a 100644 (file)
--- a/tests/makefile.bcc
+++ b/tests/makefile.bcc
@@ -31,7 +31,8 @@ TEST_CXXFLAGS = $(__RUNTIME_LIBS_6) -I$(BCCDIR)\include $(__DEBUGINFO) \
         $(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
  TEST_OBJECTS =  \
         $(OBJS)\test_test.obj \
-       $(OBJS)\test_main.obj
+       $(OBJS)\test_main.obj \
+       $(OBJS)\test_regex.obj
  
  ### Conditionally set variables: ###
  
@@ -156,3 +157,6 @@ $(OBJS)\test_test.obj: .\test.cpp
  
  $(OBJS)\test_main.obj: .\mbconv\main.cpp
         $(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
+
+$(OBJS)\test_regex.obj: .\regex\regex.cpp
+       $(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
diff --git a/tests/makefile.gcc b/tests/makefile.gcc

index 5aeeb39f942bd94a46c476ab39b9c3ac77b151bb..2fe6eb76fe1b88239e037fe2261f1350797257f7 100644 (file)
--- a/tests/makefile.gcc
+++ b/tests/makefile.gcc
@@ -22,7 +22,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO) $(__OPTIMIZEFLAG_2) $(GCCFLAGS) -DHAVE_W32API_H \
         $(__EXCEPTIONSFLAG_4) $(__EXCEPTIONSFLAG_5) $(CPPFLAGS) $(CXXFLAGS)
  TEST_OBJECTS =  \
         $(OBJS)\test_test.o \
-       $(OBJS)\test_main.o
+       $(OBJS)\test_main.o \
+       $(OBJS)\test_regex.o
  
  ### Conditionally set variables: ###
  
@@ -151,4 +152,7 @@ $(OBJS)\test_test.o: ./test.cpp
  $(OBJS)\test_main.o: ./mbconv/main.cpp
         $(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
  
+$(OBJS)\test_regex.o: ./regex/regex.cpp
+       $(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
+
  .PHONY: all clean
diff --git a/tests/makefile.vc b/tests/makefile.vc

index 59f35e494a21d55cbfc739e004faa7846e65ef53..ee92075fb5751e7f892c8d147f3f3480954ea278 100644 (file)
--- a/tests/makefile.vc
+++ b/tests/makefile.vc
@@ -24,7 +24,8 @@ TEST_CXXFLAGS = /M$(__RUNTIME_LIBS_7)$(__DEBUGRUNTIME_3) /DWIN32 \
         $(__EXCEPTIONSFLAG_9) $(CPPFLAGS) $(CXXFLAGS)
  TEST_OBJECTS =  \
         $(OBJS)\test_test.obj \
-       $(OBJS)\test_main.obj
+       $(OBJS)\test_main.obj \
+       $(OBJS)\test_regex.obj
  
  ### Conditionally set variables: ###
  
@@ -212,3 +213,6 @@ $(OBJS)\test_test.obj: .\test.cpp
  
  $(OBJS)\test_main.obj: .\mbconv\main.cpp
         $(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
+
+$(OBJS)\test_regex.obj: .\regex\regex.cpp
+       $(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
diff --git a/tests/makefile.wat b/tests/makefile.wat

index 9df4df112a50db0864efd661be7f4b8c612dc73c..53e2b0c516e3c3d2b70a638274cf448e901c0451 100644 (file)
--- a/tests/makefile.wat
+++ b/tests/makefile.wat
@@ -172,7 +172,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO_0) $(__OPTIMIZEFLAG_2) -bm $(__RUNTIME_LIBS_5) &
         $(__EXCEPTIONSFLAG_7) $(CPPFLAGS) $(CXXFLAGS)
  TEST_OBJECTS =  &
         $(OBJS)\test_test.obj &
-       $(OBJS)\test_main.obj
+       $(OBJS)\test_main.obj &
+       $(OBJS)\test_regex.obj
  
  
  all : $(OBJS)
@@ -206,3 +207,6 @@ $(OBJS)\test_test.obj :  .AUTODEPEND .\test.cpp
  
  $(OBJS)\test_main.obj :  .AUTODEPEND .\mbconv\main.cpp
         $(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
+
+$(OBJS)\test_regex.obj :  .AUTODEPEND .\regex\regex.cpp
+       $(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
diff --git a/tests/regex/reg.test b/tests/regex/reg.test

new file mode 100644 (file)

index 0000000..8bfffad
--- /dev/null
+++ b/tests/regex/reg.test
@@ -0,0 +1,1135 @@
+# reg.test --
+#
+# This file contains a collection of tests for one or more of the Tcl
+# built-in commands.  Sourcing this file into Tcl runs the tests and
+# generates output for errors.  No output means no errors were found.
+# (Don't panic if you are seeing this as part of the reg distribution
+# and aren't using Tcl -- reg's own regression tester also knows how
+# to read this file, ignoring the Tcl-isms.)
+#
+# Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+#
+# RCS: @(#) $Id$
+
+if {[lsearch [namespace children] ::tcltest] == -1} {
+    package require tcltest 2
+    namespace import -force ::tcltest::*
+}
+
+# All tests require the testregexp command, return if this
+# command doesn't exist
+
+::tcltest::testConstraint testregexp \
+       [expr {[info commands testregexp] != {}}]
+::tcltest::testConstraint localeRegexp 0
+
+# This file uses some custom procedures, defined below, for regexp regression
+# testing.  The name of the procedure indicates the general nature of the
+# test:
+#      e       compile error expected
+#      f       match failure expected
+#      m       successful match
+#      i       successful match with -indices (used in checking things like
+#              nonparticipating subexpressions)
+#      p       unsuccessful match with -indices (!!) (used in checking
+#              partial-match reporting)
+# There is also "doing" which sets up title and major test number for each
+# block of tests.
+
+# The first 3 arguments are constant:  a minor number (which often gets
+# a letter or two suffixed to it internally), some flags, and the RE itself.
+# For e, the remaining argument is the name of the compile error expected,
+# less the leading "REG_".  For the rest, the next argument is the string
+# to try the match against.  Remaining arguments are the substring expected
+# to be matched, and any substrings expected to be matched by subexpressions.
+# (For f, these arguments are optional, and if present are ignored except
+# that they indicate how many subexpressions should be present in the RE.)
+# It is an error for the number of subexpression arguments to be wrong.
+# Cases involving nonparticipating subexpressions, checking where empty
+# substrings are located, etc. should be done using i and p.
+
+# The flag characters are complex and a bit eclectic.  Generally speaking, 
+# lowercase letters are compile options, uppercase are expected re_info
+# bits, and nonalphabetics are match options, controls for how the test is 
+# run, or testing options.  The one small surprise is that AREs are the
+# default, and you must explicitly request lesser flavors of RE.  The flags
+# are as follows.  It is admitted that some are not very mnemonic.
+# There are some others which are purely debugging tools and are not
+# useful in this file.
+#
+#      -       no-op (placeholder)
+#      +       provide fake xy equivalence class and ch collating element
+#      %       force small state-set cache in matcher (to test cache replace)
+#      ^       beginning of string is not beginning of line
+#      $       end of string is not end of line
+#      *       test is Unicode-specific, needs big character set
+#
+#      &       test as both ARE and BRE
+#      b       BRE
+#      e       ERE
+#      a       turn advanced-features bit on (error unless ERE already)
+#      q       literal string, no metacharacters at all
+#
+#      i       case-independent matching
+#      o       ("opaque") no subexpression capture
+#      p       newlines are half-magic, excluded from . and [^ only
+#      w       newlines are half-magic, significant to ^ and $ only
+#      n       newlines are fully magic, both effects
+#      x       expanded RE syntax
+#      t       incomplete-match reporting
+#
+#      A       backslash-_a_lphanumeric seen
+#      B       ERE/ARE literal-_b_race heuristic used
+#      E       backslash (_e_scape) seen within []
+#      H       looka_h_ead constraint seen
+#      I       _i_mpossible to match
+#      L       _l_ocale-specific construct seen
+#      M       unportable (_m_achine-specific) construct seen
+#      N       RE can match empty (_n_ull) string
+#      P       non-_P_OSIX construct seen
+#      Q       {} _q_uantifier seen
+#      R       back _r_eference seen
+#      S       POSIX-un_s_pecified syntax seen
+#      T       prefers shortest (_t_iny)
+#      U       saw original-POSIX botch:  unmatched right paren in ERE (_u_gh)
+
+# The one area we can't easily test is memory-allocation failures (which
+# are hard to provoke on command).  Embedded NULs also are not tested at
+# the moment, but this is a historical accident which should be fixed.
+
+
+
+# test procedures and related
+
+set ask "about"
+set xflags "xflags"
+set testbypassed 0
+
+# re_info abbreviation mapping table
+set infonames(A) "REG_UBSALNUM"
+set infonames(B) "REG_UBRACES"
+set infonames(E) "REG_UBBS"
+set infonames(H) "REG_ULOOKAHEAD"
+set infonames(I) "REG_UIMPOSSIBLE"
+set infonames(L) "REG_ULOCALE"
+set infonames(M) "REG_UUNPORT"
+set infonames(N) "REG_UEMPTYMATCH"
+set infonames(P) "REG_UNONPOSIX"
+set infonames(Q) "REG_UBOUNDS"
+set infonames(R) "REG_UBACKREF"
+set infonames(S) "REG_UUNSPEC"
+set infonames(T) "REG_USHORTEST"
+set infonames(U) "REG_UPBOTCH"
+set infonameorder "RHQBAUEPSMLNIT"     ;# must match bit order, lsb first
+
+# set major test number and description
+proc doing {major desc} {
+       global prefix description testbypassed
+
+       if {$testbypassed != 0} {
+               puts stdout "!!! bypassed $testbypassed tests in\
+                                        $prefix, `$description'"
+       }
+
+       set prefix reg-$major
+       set description "reg $desc"
+       set testbypassed 0
+}
+
+# build test number (internal)
+proc tno {testid} {
+       return [join $testid .]
+}
+
+# build description, with possible modifiers (internal)
+proc desc {testid} {
+       global description
+
+       set d $description
+       if {[llength $testid] > 1} {
+               set d "([lreplace $testid 0 0]) $d"
+       }
+       return $d
+}
+
+# build trailing options and flags argument from a flags string (internal)
+proc flags {fl} {
+       global xflags
+
+       set args [list]
+       set flags ""
+       foreach f [split $fl ""] {
+               switch -exact -- $f {
+               "i" { lappend args "-nocase" }
+               "x" { lappend args "-expanded" }
+               "n" { lappend args "-line" }
+               "p" { lappend args "-linestop" }
+               "w" { lappend args "-lineanchor" }
+               "-" { }
+               default { append flags $f }
+               }
+       }
+       if {[string compare $flags ""] != 0} {
+               lappend args -$xflags $flags
+       }
+       return $args
+}
+
+# build info-flags list from a flags string (internal)
+proc infoflags {fl} {
+       global infonames infonameorder
+
+       set ret [list]
+       foreach f [split $infonameorder ""] {
+               if {[string first $f $fl] >= 0} {
+                       lappend ret $infonames($f)
+               }
+       }
+       return $ret
+}
+
+# compilation error expected
+proc e {testid flags re err} {
+       global prefix ask errorCode
+
+       # Tcl locale stuff doesn't do the ch/xy test fakery yet
+       if {[string first "+" $flags] >= 0} {
+           # This will register as a skipped test
+           test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
+           return
+       }
+
+       # if &, test as both ARE and BRE
+       set amp [string first "&" $flags]
+       if {$amp >= 0} {
+               set f [string range $flags 0 [expr $amp - 1]]
+               append f [string range $flags [expr $amp + 1] end]
+               e [linsert $testid end ARE] ${f} $re $err
+               e [linsert $testid end BRE] ${f}b $re $err
+               return
+       }
+
+       set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
+       set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
+       test $prefix.[tno $testid] [desc $testid] \
+               {testregexp} $run [list 1 REG_$err]
+}
+
+# match failure expected
+proc f {testid flags re target args} {
+       global prefix description ask
+
+       # Tcl locale stuff doesn't do the ch/xy test fakery yet
+       if {[string first "+" $flags] >= 0} {
+           # This will register as a skipped test
+           test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
+           return
+       }
+
+       # if &, test as both ARE and BRE
+       set amp [string first "&" $flags]
+       if {$amp >= 0} {
+               set f [string range $flags 0 [expr $amp - 1]]
+               append f [string range $flags [expr $amp + 1] end]
+               eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \
+                                                               $target]
+               eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \
+                                                               $target]
+               return
+       }
+
+       set f [flags $flags]
+       set infoflags [infoflags $flags]
+       set ccmd [concat [list testregexp -$ask] $f [list $re]]
+       set nsub [expr [llength $args] - 1]
+       if {$nsub == -1} {
+               # didn't tell us number of subexps
+               set ccmd "lreplace \[$ccmd\] 0 0"
+               set info [list $infoflags]
+       } else {
+               set info [list $nsub $infoflags]
+       }
+       lappend testid "compile"
+       test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
+
+       set testid [lreplace $testid end end "execute"]
+       set ecmd [concat [list testregexp] $f [list $re $target]]
+       test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0
+}
+
+# match expected, internal routine that does the work
+# parameters like the "real" routines except they don't have "opts",
+#  which is a possibly-empty list of switches for the regexp match attempt
+# The ! flag is used to indicate expected match failure (for REG_EXPECT,
+#  which wants argument testing even in the event of failure).
+proc matchexpected {opts testid flags re target args} {
+       global prefix description ask regBug
+
+    if {[info exists regBug] && $regBug} {
+       # This will register as a skipped test
+       test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1}
+       return
+    }
+
+       # Tcl locale stuff doesn't do the ch/xy test fakery yet
+       if {[string first "+" $flags] >= 0} {
+           # This will register as a skipped test
+           test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
+           return
+       }
+
+       # if &, test as both BRE and ARE
+       set amp [string first "&" $flags]
+       if {$amp >= 0} {
+               set f [string range $flags 0 [expr $amp - 1]]
+               append f [string range $flags [expr $amp + 1] end]
+               eval [concat [list matchexpected $opts \
+                       [linsert $testid end ARE] ${f} $re $target] $args]
+               eval [concat [list matchexpected $opts \
+                       [linsert $testid end BRE] ${f}b $re $target] $args]
+               return
+       }
+
+       set f [flags $flags]
+       set infoflags [infoflags $flags]
+       set ccmd [concat [list testregexp -$ask] $f [list $re]]
+       set ecmd [concat [list testregexp] $opts $f [list $re $target]]
+
+       set nsub [expr [llength $args] - 1]
+       set names [list]
+       set refs ""
+       for {set i 0} {$i <= $nsub} {incr i} {
+               if {$i == 0} {
+                       set name match
+               } else {
+                       set name sub$i
+               }
+               lappend names $name
+               append refs " \$$name"
+               set $name ""
+       }
+       if {[string first "o" $flags] >= 0} {   ;# REG_NOSUB kludge
+               set nsub 0              ;# unsigned value cannot be -1
+       }
+       if {[string first "t" $flags] >= 0} {   ;# REG_EXPECT
+               incr nsub -1            ;# the extra does not count
+       }
+       set ecmd [concat $ecmd $names]
+       set erun "list \[$ecmd\] $refs"
+       set retcode [list 1]
+       if {[string first "!" $flags] >= 0} {
+               set retcode [list 0]
+       }
+       set result [concat $retcode $args]
+
+       set info [list $nsub $infoflags]
+       lappend testid "compile"
+       test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
+       set testid [lreplace $testid end end "execute"]
+       test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result
+}
+
+# match expected (no missing, empty, or ambiguous submatches)
+# m testno flags re target mat submat ...
+proc m {args} {
+       eval matchexpected [linsert $args 0 [list]]
+}
+
+# match expected (full fanciness)
+# i testno flags re target mat submat ...
+proc i {args} {
+       eval matchexpected [linsert $args 0 [list "-indices"]]
+}
+
+# partial match expected
+# p testno flags re target mat "" ...
+# Quirk:  number of ""s must be one more than number of subREs.
+proc p {args} {
+       set f [lindex $args 1]                  ;# add ! flag
+       set args [lreplace $args 1 1 "!$f"]
+       eval matchexpected [linsert $args 0 [list "-indices"]]
+}
+
+# test is a knownBug
+proc knownBug {args} {
+    set ::regBug 1
+    uplevel #0 $args
+    set ::regBug 0
+}
+
+
+
+# the tests themselves
+
+
+
+# support functions and preliminary misc.
+# This is sensitive to changes in message wording, but we really have to
+# test the code->message expansion at least once.
+test reg-0.1 "regexp error reporting" {
+       list [catch {regexp (*) ign} msg] $msg
+} {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
+
+
+
+doing 1 "basic sanity checks"
+m  1   &       abc             abc     abc
+f  2   &       abc             def
+m  3   &       abc             xyabxabce       abc
+
+
+
+doing 2 "invalid option combinations"
+e  1   qe      a               INVARG
+e  2   qa      a               INVARG
+e  3   qx      a               INVARG
+e  4   qn      a               INVARG
+e  5   ba      a               INVARG
+
+
+
+doing 3 "basic syntax"
+i  1   &NS     ""              a       {0 -1}
+m  2   NS      a|              a       a
+m  3   -       a|b             a       a
+m  4   -       a|b             b       b
+m  5   NS      a||b            b       b
+m  6   &       ab              ab      ab
+
+
+
+doing 4 "parentheses"
+m  1   -       (a)e            ae      ae      a
+m  2   o       (a)e            ae
+m  3   b       {\(a\)b}        ab      ab      a
+m  4   -       a((b)c)         abc     abc     bc      b
+m  5   -       a(b)(c)         abc     abc     b       c
+e  6   -       a(b             EPAREN
+e  7   b       {a\(b}          EPAREN
+# sigh, we blew it on the specs here... someday this will be fixed in POSIX,
+#  but meanwhile, it's fixed in AREs
+m  8   eU      a)b             a)b     a)b
+e  9   -       a)b             EPAREN
+e 10   b       {a\)b}          EPAREN
+m 11   P       a(?:b)c         abc     abc
+e 12   e       a(?:b)c         BADRPT
+i 13   S       a()b            ab      {0 1}   {1 0}
+m 14   SP      a(?:)b          ab      ab
+i 15   S       a(|b)c          ac      {0 1}   {1 0}
+m 16   S       a(b|)c          abc     abc     b
+
+
+
+doing 5 "simple one-char matching"
+# general case of brackets done later
+m  1   &       a.b             axb     axb
+f  2   &n      "a.b"           "a\nb"
+m  3   &       {a[bc]d}        abd     abd
+m  4   &       {a[bc]d}        acd     acd
+f  5   &       {a[bc]d}        aed
+f  6   &       {a[^bc]d}       abd
+m  7   &       {a[^bc]d}       aed     aed
+f  8   &p      "a\[^bc]d"      "a\nd"
+
+
+
+doing 6 "context-dependent syntax"
+# plus odds and ends
+e  1   -       *               BADRPT
+m  2   b       *               *       *
+m  3   b       {\(*\)}         *       *       *
+e  4   -       (*)             BADRPT
+m  5   b       ^*              *       *
+e  6   -       ^*              BADRPT
+f  7   &       ^b              ^b
+m  8   b       x^              x^      x^
+f  9   I       x^              x
+m 10   n       "\n^"           "x\nb"  "\n"
+f 11   bS      {\(^b\)}        ^b
+m 12   -       (^b)            b       b       b
+m 13   &       {x$}            x       x
+m 14   bS      {\(x$\)}        x       x       x
+m 15   -       {(x$)}          x       x       x
+m 16   b       {x$y}           "x\$y"  "x\$y"
+f 17   I       {x$y}           xy
+m 18   n       "x\$\n"         "x\n"   "x\n"
+e 19   -       +               BADRPT
+e 20   -       ?               BADRPT
+
+
+
+doing 7 "simple quantifiers"
+m  1   &N      a*              aa      aa
+i  2   &N      a*              b       {0 -1}
+m  3   -       a+              aa      aa
+m  4   -       a?b             ab      ab
+m  5   -       a?b             b       b
+e  6   -       **              BADRPT
+m  7   bN      **              ***     ***
+e  8   &       a**             BADRPT
+e  9   &       a**b            BADRPT
+e 10   &       ***             BADRPT
+e 11   -       a++             BADRPT
+e 12   -       a?+             BADRPT
+e 13   -       a?*             BADRPT
+e 14   -       a+*             BADRPT
+e 15   -       a*+             BADRPT
+
+
+
+doing 8 "braces"
+m  1   NQ      "a{0,1}"        ""      ""
+m  2   NQ      "a{0,1}"        ac      a
+e  3   -       "a{1,0}"        BADBR
+e  4   -       "a{1,2,3}"      BADBR
+e  5   -       "a{257}"        BADBR
+e  6   -       "a{1000}"       BADBR
+e  7   -       "a{1"           EBRACE
+e  8   -       "a{1n}"         BADBR
+m  9   BS      "a{b"           "a\{b"  "a\{b"
+m 10   BS      "a{"            "a\{"   "a\{"
+m 11   bQ      "a\\{0,1\\}b"   cb      b
+e 12   b       "a\\{0,1"       EBRACE
+e 13   -       "a{0,1\\"       BADBR
+m 14   Q       "a{0}b"         ab      b
+m 15   Q       "a{0,0}b"       ab      b
+m 16   Q       "a{0,1}b"       ab      ab
+m 17   Q       "a{0,2}b"       b       b
+m 18   Q       "a{0,2}b"       aab     aab
+m 19   Q       "a{0,}b"        aab     aab
+m 20   Q       "a{1,1}b"       aab     ab
+m 21   Q       "a{1,3}b"       aaaab   aaab
+f 22   Q       "a{1,3}b"       b
+m 23   Q       "a{1,}b"        aab     aab
+f 24   Q       "a{2,3}b"       ab
+m 25   Q       "a{2,3}b"       aaaab   aaab
+f 26   Q       "a{2,}b"        ab
+m 27   Q       "a{2,}b"        aaaab   aaaab
+
+
+
+doing 9 "brackets"
+m  1   &       {a[bc]}         ac      ac
+m  2   &       {a[-]}          a-      a-
+m  3   &       {a[[.-.]]}      a-      a-
+m  4   &L      {a[[.zero.]]}   a0      a0
+m  5   &LM     {a[[.zero.]-9]} a2      a2
+m  6   &M      {a[0-[.9.]]}    a2      a2
+m  7   &+L     {a[[=x=]]}      ax      ax
+m  8   &+L     {a[[=x=]]}      ay      ay
+f  9   &+L     {a[[=x=]]}      az
+e 10   &       {a[0-[=x=]]}    ERANGE
+m 11   &L      {a[[:digit:]]}  a0      a0
+e 12   &       {a[[:woopsie:]]}        ECTYPE
+f 13   &L      {a[[:digit:]]}  ab
+e 14   &       {a[0-[:digit:]]}        ERANGE
+m 15   &LP     {[[:<:]]a}      a       a
+m 16   &LP     {a[[:>:]]}      a       a
+e 17   &       {a[[..]]b}      ECOLLATE
+e 18   &       {a[[==]]b}      ECOLLATE
+e 19   &       {a[[::]]b}      ECTYPE
+e 20   &       {a[[.a}         EBRACK
+e 21   &       {a[[=a}         EBRACK
+e 22   &       {a[[:a}         EBRACK
+e 23   &       {a[}            EBRACK
+e 24   &       {a[b}           EBRACK
+e 25   &       {a[b-}          EBRACK
+e 26   &       {a[b-c}         EBRACK
+m 27   &M      {a[b-c]}        ab      ab
+m 28   &       {a[b-b]}        ab      ab
+m 29   &M      {a[1-2]}        a2      a2
+e 30   &       {a[c-b]}        ERANGE
+e 31   &       {a[a-b-c]}      ERANGE
+m 32   &M      {a[--?]b}       a?b     a?b
+m 33   &       {a[---]b}       a-b     a-b
+m 34   &       {a[]b]c}        a]c     a]c
+m 35   EP      {a[\]]b}        a]b     a]b
+f 36   bE      {a[\]]b}        a]b
+m 37   bE      {a[\]]b}        "a\\]b" "a\\]b"
+m 38   eE      {a[\]]b}        "a\\]b" "a\\]b"
+m 39   EP      {a[\\]b}        "a\\b"  "a\\b"
+m 40   eE      {a[\\]b}        "a\\b"  "a\\b"
+m 41   bE      {a[\\]b}        "a\\b"  "a\\b"
+e 42   -       {a[\Z]b}        EESCAPE
+m 43   &       {a[[b]c}        "a\[c"  "a\[c"
+m 44   EMP*    {a[\u00fe-\u0507][\u00ff-\u0300]b} \
+                       "a\u0102\u02ffb"        "a\u0102\u02ffb"
+
+
+
+doing 10 "anchors and newlines"
+m  1   &       ^a              a       a
+f  2   &^      ^a              a
+i  3   &N      ^               a       {0 -1}
+i  4   &       {a$}            aba     {2 2}
+f  5   {&$}    {a$}            a
+i  6   &N      {$}             ab      {2 1}
+m  7   &n      ^a              a       a
+m  8   &n      "^a"            "b\na"  "a"
+i  9   &w      "^a"            "a\na"  {0 0}
+i 10   &n^     "^a"            "a\na"  {2 2}
+m 11   &n      {a$}            a       a
+m 12   &n      "a\$"           "a\nb"  "a"
+i 13   &n      "a\$"           "a\na"  {0 0}
+i 14   N       ^^              a       {0 -1}
+m 15   b       ^^              ^       ^
+i 16   N       {$$}            a       {1 0}
+m 17   b       {$$}            "\$"    "\$"
+m 18   &N      {^$}            ""      ""
+f 19   &N      {^$}            a
+i 20   &nN     "^\$"           "a\n\nb"        {2 1}
+m 21   N       {$^}            ""      ""
+m 22   b       {$^}            "\$^"   "\$^"
+m 23   P       {\Aa}           a       a
+m 24   ^P      {\Aa}           a       a
+f 25   ^nP     {\Aa}           "b\na"
+m 26   P       {a\Z}           a       a
+m 27   {$P}    {a\Z}           a       a
+f 28   {$nP}   {a\Z}           "a\nb"
+e 29   -       ^*              BADRPT
+e 30   -       {$*}            BADRPT
+e 31   -       {\A*}           BADRPT
+e 32   -       {\Z*}           BADRPT
+
+
+
+doing 11 "boundary constraints"
+m  1   &LP     {[[:<:]]a}      a       a
+m  2   &LP     {[[:<:]]a}      -a      a
+f  3   &LP     {[[:<:]]a}      ba
+m  4   &LP     {a[[:>:]]}      a       a
+m  5   &LP     {a[[:>:]]}      a-      a
+f  6   &LP     {a[[:>:]]}      ab
+m  7   bLP     {\<a}           a       a
+f  8   bLP     {\<a}           ba
+m  9   bLP     {a\>}           a       a
+f 10   bLP     {a\>}           ab
+m 11   LP      {\ya}           a       a
+f 12   LP      {\ya}           ba
+m 13   LP      {a\y}           a       a
+f 14   LP      {a\y}           ab
+m 15   LP      {a\Y}           ab      a
+f 16   LP      {a\Y}           a-
+f 17   LP      {a\Y}           a
+f 18   LP      {-\Y}           -a
+m 19   LP      {-\Y}           -%      -
+f 20   LP      {\Y-}           a-
+e 21   -       {[[:<:]]*}      BADRPT
+e 22   -       {[[:>:]]*}      BADRPT
+e 23   b       {\<*}           BADRPT
+e 24   b       {\>*}           BADRPT
+e 25   -       {\y*}           BADRPT
+e 26   -       {\Y*}           BADRPT
+m 27   LP      {\ma}           a       a
+f 28   LP      {\ma}           ba
+m 29   LP      {a\M}           a       a
+f 30   LP      {a\M}           ab
+f 31   ILP     {\Ma}           a
+f 32   ILP     {a\m}           a
+
+
+
+doing 12 "character classes"
+m  1   LP      {a\db}          a0b     a0b
+f  2   LP      {a\db}          axb
+f  3   LP      {a\Db}          a0b
+m  4   LP      {a\Db}          axb     axb
+m  5   LP      "a\\sb"         "a b"   "a b"
+m  6   LP      "a\\sb"         "a\tb"  "a\tb"
+m  7   LP      "a\\sb"         "a\nb"  "a\nb"
+f  8   LP      {a\sb}          axb
+m  9   LP      {a\Sb}          axb     axb
+f 10   LP      "a\\Sb"         "a b"
+m 11   LP      {a\wb}          axb     axb
+f 12   LP      {a\wb}          a-b
+f 13   LP      {a\Wb}          axb
+m 14   LP      {a\Wb}          a-b     a-b
+m 15   LP      {\y\w+z\y}      adze-guz        guz
+m 16   LPE     {a[\d]b}        a1b     a1b
+m 17   LPE     "a\[\\s]b"      "a b"   "a b"
+m 18   LPE     {a[\w]b}        axb     axb
+
+
+
+doing 13 "escapes"
+e  1   &       "a\\"           EESCAPE
+m  2   -       {a\<b}          a<b     a<b
+m  3   e       {a\<b}          a<b     a<b
+m  4   bAS     {a\wb}          awb     awb
+m  5   eAS     {a\wb}          awb     awb
+m  6   PL      "a\\ab"         "a\007b"        "a\007b"
+m  7   P       "a\\bb"         "a\bb"  "a\bb"
+m  8   P       {a\Bb}          "a\\b"  "a\\b"
+m  9   MP      "a\\chb"        "a\bb"  "a\bb"
+m 10   MP      "a\\cHb"        "a\bb"  "a\bb"
+m 11   LMP     "a\\e"          "a\033" "a\033"
+m 12   P       "a\\fb"         "a\fb"  "a\fb"
+m 13   P       "a\\nb"         "a\nb"  "a\nb"
+m 14   P       "a\\rb"         "a\rb"  "a\rb"
+m 15   P       "a\\tb"         "a\tb"  "a\tb"
+m 16   P       "a\\u0008x"     "a\bx"  "a\bx"
+e 17   -       {a\u008x}       EESCAPE
+m 18   P       "a\\u00088x"    "a\b8x" "a\b8x"
+m 19   P       "a\\U00000008x" "a\bx"  "a\bx"
+e 20   -       {a\U0000008x}   EESCAPE
+m 21   P       "a\\vb"         "a\vb"  "a\vb"
+m 22   MP      "a\\x08x"       "a\bx"  "a\bx"
+e 23   -       {a\xq}          EESCAPE
+m 24   MP      "a\\x0008x"     "a\bx"  "a\bx"
+e 25   -       {a\z}           EESCAPE
+m 26   MP      "a\\010b"       "a\bb"  "a\bb"
+
+
+
+doing 14 "back references"
+# ugh
+m  1   RP      {a(b*)c\1}      abbcbb  abbcbb  bb
+m  2   RP      {a(b*)c\1}      ac      ac      ""
+f  3   RP      {a(b*)c\1}      abbcb
+m  4   RP      {a(b*)\1}       abbcbb  abb     b
+m  5   RP      {a(b|bb)\1}     abbcbb  abb     b
+m  6   RP      {a([bc])\1}     abb     abb     b
+f  7   RP      {a([bc])\1}     abc
+m  8   RP      {a([bc])\1}     abcabb  abb     b
+f  9   RP      {a([bc])*\1}    abc
+f 10   RP      {a([bc])\1}     abB
+m 11   iRP     {a([bc])\1}     abB     abB     b
+m 12   RP      {a([bc])\1+}    abbb    abbb    b
+m 13   QRP     "a(\[bc])\\1{3,4}"      abbbb   abbbb   b
+f 14   QRP     "a(\[bc])\\1{3,4}"      abbb
+m 15   RP      {a([bc])\1*}    abbb    abbb    b
+m 16   RP      {a([bc])\1*}    ab      ab      b
+m 17   RP      {a([bc])(\1*)}  ab      ab      b       ""
+e 18   -       {a((b)\1)}      ESUBREG
+e 19   -       {a(b)c\2}       ESUBREG
+m 20   bR      {a\(b*\)c\1}    abbcbb  abbcbb  bb
+
+
+
+doing 15 "octal escapes vs back references"
+# initial zero is always octal
+m  1   MP      "a\\010b"       "a\bb"  "a\bb"
+m  2   MP      "a\\0070b"      "a\0070b"       "a\0070b"
+m  3   MP      "a\\07b"        "a\007b"        "a\007b"
+m  4   MP      "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c"  "abbbbbbbbbb\007c" \
+       "abbbbbbbbbb\007c"      "b"     "b"     "b"     "b"     "b"     "b" \
+       "b"     "b"     "b"     "b"
+# a single digit is always a backref
+e  5   -       {a\7b}          ESUBREG
+# otherwise it's a backref only if within range (barf!)
+m  6   MP      "a\\10b"        "a\bb"  "a\bb"
+m  7   MP      {a\101b}        aAb     aAb
+m  8   RP      {a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c}   abbbbbbbbbbbc \
+       abbbbbbbbbbbc   b       b       b       b       b       b       b \
+       b       b       b
+# but we're fussy about border cases -- guys who want octal should use the zero
+e  9   -       {a((((((((((b\10))))))))))c}    ESUBREG
+# BREs don't have octal, EREs don't have backrefs
+m 10   MP      "a\\12b"        "a\nb"  "a\nb"
+e 11   b       {a\12b}         ESUBREG
+m 12   eAS     {a\12b}         a12b    a12b
+
+
+
+doing 16 "expanded syntax"
+m  1   xP      "a b c"         "abc"   "abc"
+m  2   xP      "a b #oops\nc\td"       "abcd"  "abcd"
+m  3   x       "a\\ b\\\tc"    "a b\tc"        "a b\tc"
+m  4   xP      "a b\\#c"       "ab#c"  "ab#c"
+m  5   xP      "a b\[c d]e"    "ab e"  "ab e"
+m  6   xP      "a b\[c#d]e"    "ab#e"  "ab#e"
+m  7   xP      "a b\[c#d]e"    "abde"  "abde"
+m  8   xSPB    "ab{ d"         "ab\{d" "ab\{d"
+m  9   xPQ     "ab{ 1 , 2 }c"  "abc"   "abc"
+
+
+
+doing 17 "misc syntax"
+m  1   P       a(?#comment)b   ab      ab
+
+
+
+doing 18 "unmatchable REs"
+f  1   I       a^b             ab
+
+
+
+doing 19 "case independence"
+m  1   &i      ab              Ab      Ab
+m  2   &i      {a[bc]}         aC      aC
+f  3   &i      {a[^bc]}        aB
+m  4   &iM     {a[b-d]}        aC      aC
+f  5   &iM     {a[^b-d]}       aC
+
+
+
+doing 20 "directors and embedded options"
+e  1   &       ***?            BADPAT
+m  2   q       ***?            ***?    ***?
+m  3   &P      ***=a*b         a*b     a*b
+m  4   q       ***=a*b         ***=a*b ***=a*b
+m  5   bLP     {***:\w+}       ab      ab
+m  6   eLP     {***:\w+}       ab      ab
+e  7   &       ***:***=a*b     BADRPT
+m  8   &P      ***:(?b)a+b     a+b     a+b
+m  9   P       (?b)a+b         a+b     a+b
+e 10   e       {(?b)\w+}       BADRPT
+m 11   bAS     {(?b)\w+}       (?b)w+  (?b)w+
+m 12   iP      (?c)a           a       a
+f 13   iP      (?c)a           A
+m 14   APS     {(?e)\W+}       WW      WW
+m 15   P       (?i)a+          Aa      Aa
+f 16   P       "(?m)a.b"       "a\nb"
+m 17   P       "(?m)^b"        "a\nb"  "b"
+f 18   P       "(?n)a.b"       "a\nb"
+m 19   P       "(?n)^b"        "a\nb"  "b"
+f 20   P       "(?p)a.b"       "a\nb"
+f 21   P       "(?p)^b"        "a\nb"
+m 22   P       (?q)a+b         a+b     a+b
+m 23   nP      "(?s)a.b"       "a\nb"  "a\nb"
+m 24   xP      "(?t)a b"       "a b"   "a b"
+m 25   P       "(?w)a.b"       "a\nb"  "a\nb"
+m 26   P       "(?w)^b"        "a\nb"  "b"
+m 27   P       "(?x)a b"       "ab"    "ab"
+e 28   -       (?z)ab          BADOPT
+m 29   P       (?ici)a+        Aa      Aa
+e 30   P       (?i)(?q)a+      BADRPT
+m 31   P       (?q)(?i)a+      (?i)a+  (?i)a+
+m 32   P       (?qe)a+         a       a
+m 33   xP      "(?q)a b"       "a b"   "a b"
+m 34   P       "(?qx)a b"      "a b"   "a b"
+m 35   P       (?qi)ab         Ab      Ab
+
+
+
+doing 21 "capturing"
+m  1   -       a(b)c           abc     abc     b
+m  2   P       a(?:b)c         xabc    abc
+m  3   -       a((b))c         xabcy   abc     b       b
+m  4   P       a(?:(b))c       abcy    abc     b
+m  5   P       a((?:b))c       abc     abc     b
+m  6   P       a(?:(?:b))c     abc     abc
+i  7   Q       "a(b){0}c"      ac      {0 1}   {-1 -1}
+m  8   -       a(b)c(d)e       abcde   abcde   b       d
+m  9   -       (b)c(d)e        bcde    bcde    b       d
+m 10   -       a(b)(d)e        abde    abde    b       d
+m 11   -       a(b)c(d)        abcd    abcd    b       d
+m 12   -       (ab)(cd)        xabcdy  abcd    ab      cd
+m 13   -       a(b)?c          xabcy   abc     b
+i 14   -       a(b)?c          xacy    {1 2}   {-1 -1}
+m 15   -       a(b)?c(d)?e     xabcdey abcde   b       d
+i 16   -       a(b)?c(d)?e     xacdey  {1 4}   {-1 -1} {3 3}
+i 17   -       a(b)?c(d)?e     xabcey  {1 4}   {2 2}   {-1 -1}
+i 18   -       a(b)?c(d)?e     xacey   {1 3}   {-1 -1} {-1 -1}
+m 19   -       a(b)*c          xabcy   abc     b
+i 20   -       a(b)*c          xabbbcy {1 5}   {4 4}
+i 21   -       a(b)*c          xacy    {1 2}   {-1 -1}
+m 22   -       a(b*)c          xabbbcy abbbc   bbb
+m 23   -       a(b*)c          xacy    ac      ""
+f 24   -       a(b)+c          xacy
+m 25   -       a(b)+c          xabcy   abc     b
+i 26   -       a(b)+c          xabbbcy {1 5}   {4 4}
+m 27   -       a(b+)c          xabbbcy abbbc   bbb
+i 28   Q       "a(b){2,3}c"    xabbbcy {1 5}   {4 4}
+i 29   Q       "a(b){2,3}c"    xabbcy  {1 4}   {3 3}
+f 30   Q       "a(b){2,3}c"    xabcy
+m 31   LP      "\\y(\\w+)\\y"  "-- abc-"       "abc"   "abc"
+m 32   -       a((b|c)d+)+     abacdbd acdbd   bd      b
+m 33   N       (.*).*          abc     abc     abc
+m 34   N       (a*)*           bc      ""      ""
+
+
+
+doing 22 "multicharacter collating elements"
+# again ugh
+m  1   &+L     {a[c]e}         ace     ace
+f  2   &+IL    {a[c]h}         ach
+m  3   &+L     {a[[.ch.]]}     ach     ach
+f  4   &+L     {a[[.ch.]]}     ace
+m  5   &+L     {a[c[.ch.]]}    ac      ac
+m  6   &+L     {a[c[.ch.]]}    ace     ac
+m  7   &+L     {a[c[.ch.]]}    ache    ach
+f  8   &+L     {a[^c]e}        ace
+m  9   &+L     {a[^c]e}        abe     abe
+m 10   &+L     {a[^c]e}        ache    ache
+f 11   &+L     {a[^[.ch.]]}    ach
+m 12   &+L     {a[^[.ch.]]}    ace     ac
+m 13   &+L     {a[^[.ch.]]}    ac      ac
+m 14   &+L     {a[^[.ch.]]}    abe     ab
+f 15   &+L     {a[^c[.ch.]]}   ach
+f 16   &+L     {a[^c[.ch.]]}   ace
+f 17   &+L     {a[^c[.ch.]]}   ac
+m 18   &+L     {a[^c[.ch.]]}   abe     ab
+m 19   &+L     {a[^b]}         ac      ac
+m 20   &+L     {a[^b]}         ace     ac
+m 21   &+L     {a[^b]}         ach     ach
+f 22   &+L     {a[^b]}         abe
+
+
+
+doing 23 "lookahead constraints"
+m  1   HP      a(?=b)b*        ab      ab
+f  2   HP      a(?=b)b*        a
+m  3   HP      a(?=b)b*(?=c)c* abc     abc
+f  4   HP      a(?=b)b*(?=c)c* ab
+f  5   HP      a(?!b)b*        ab
+m  6   HP      a(?!b)b*        a       a
+m  7   HP      (?=b)b          b       b
+f  8   HP      (?=b)b          a
+
+
+
+doing 24 "non-greedy quantifiers"
+m  1   PT      ab+?            abb     ab
+m  2   PT      ab+?c           abbc    abbc
+m  3   PT      ab*?            abb     a
+m  4   PT      ab*?c           abbc    abbc
+m  5   PT      ab??            ab      a
+m  6   PT      ab??c           abc     abc
+m  7   PQT     "ab{2,4}?"      abbbb   abb
+m  8   PQT     "ab{2,4}?c"     abbbbc  abbbbc
+m  9   -       3z*             123zzzz456      3zzzz
+m 10   PT      3z*?            123zzzz456      3
+m 11   -       z*4             123zzzz456      zzzz4
+m 12   PT      z*?4            123zzzz456      zzzz4
+
+
+
+doing 25 "mixed quantifiers"
+# this is very incomplete as yet
+# should include |
+m  1   PNT     {^(.*?)(a*)$}   xyza    xyza    xyz     a
+m  2   PNT     {^(.*?)(a*)$}   xyzaa   xyzaa   xyz     aa
+m  3   PNT     {^(.*?)(a*)$}   xyz     xyz     xyz     ""
+
+
+
+doing 26 "tricky cases"
+# attempts to trick the matcher into accepting a short match
+m  1   -       (week|wee)(night|knights)       weeknights      weeknights \
+       wee     knights
+m  2   RP      {a(bc*).*\1}    abccbccb        abccbccb        b
+m  3   -       {a(b.[bc]*)+}   abcbd   abcbd   bd
+
+
+
+doing 27 "implementation misc."
+# duplicate arcs are suppressed
+m  1   P       a(?:b|b)c       abc     abc
+# make color/subcolor relationship go back and forth
+m  2   &       {[ab][ab][ab]}  aba     aba
+m  3   &       {[ab][ab][ab][ab][ab][ab][ab]}  abababa abababa
+
+
+
+doing 28 "boundary busters etc."
+# color-descriptor allocation changes at 10
+m  1   &       abcdefghijkl    abcdefghijkl    abcdefghijkl
+# so does arc allocation
+m  2   P       a(?:b|c|d|e|f|g|h|i|j|k|l|m)n   agn     agn
+# subexpression tracking also at 10
+m  3   -       a(((((((((((((b)))))))))))))c   abc     abc     b       b       b       b       b       b       b       b       b       b       b       b       b
+# state-set handling changes slightly at unsigned size (might be 64...)
+# (also stresses arc allocation)
+m  4   Q       "ab{1,100}c"    abbc    abbc
+m  5   Q       "ab{1,100}c"    abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
+       abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
+m  6   Q       "ab{1,100}c" \
+       abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
+       abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
+# force small cache and bust it, several ways
+m  7   LP      {\w+abcdefgh}   xyzabcdefgh     xyzabcdefgh
+m  8   %LP     {\w+abcdefgh}   xyzabcdefgh     xyzabcdefgh
+m  9   %LP     {\w+abcdefghijklmnopqrst}       xyzabcdefghijklmnopqrst \
+       xyzabcdefghijklmnopqrst
+i 10   %LP     {\w+(abcdefgh)?}        xyz     {0 2}   {-1 -1}
+i 11   %LP     {\w+(abcdefgh)?}        xyzabcdefg      {0 9}   {-1 -1}
+i 12   %LP     {\w+(abcdefghijklmnopqrst)?}    xyzabcdefghijklmnopqrs \
+       {0 21}  {-1 -1}
+
+
+
+doing 29 "incomplete matches"
+p  1   t       def             abc     {3 2}   ""
+p  2   t       bcd             abc     {1 2}   ""
+p  3   t       abc             abab    {0 3}   ""
+p  4   t       abc             abdab   {3 4}   ""
+i  5   t       abc             abc     {0 2}   {0 2}
+i  6   t       abc             xyabc   {2 4}   {2 4}
+p  7   t       abc+            xyab    {2 3}   ""
+i  8   t       abc+            xyabc   {2 4}   {2 4}
+knownBug i  9  t       abc+            xyabcd  {2 4}   {6 5}
+i  10  t       abc+            xyabcdd {2 4}   {7 6}
+p  11  tPT     abc+?           xyab    {2 3}   ""
+# the retain numbers in these two may look wrong, but they aren't
+i  12  tPT     abc+?           xyabc   {2 4}   {5 4}
+i  13  tPT     abc+?           xyabcc  {2 4}   {6 5}
+i  14  tPT     abc+?           xyabcd  {2 4}   {6 5}
+i  15  tPT     abc+?           xyabcdd {2 4}   {7 6}
+i  16  t       abcd|bc         xyabc   {3 4}   {2 4}
+p  17  tn      .*k             "xx\nyyy"       {3 5}   ""
+
+
+doing 30 "misc. oddities and old bugs"
+e  1   &       ***             BADRPT
+m  2   N       a?b*            abb     abb
+m  3   N       a?b*            bb      bb
+m  4   &       a*b             aab     aab
+m  5   &       ^a*b            aaaab   aaaab
+m  6   &M      {[0-6][1-2][0-3][0-6][1-6][0-6]}        010010  010010
+# temporary REG_BOSONLY kludge
+m  7   s       abc             abcd    abc
+f  8   s       abc             xabcd
+# back to normal stuff
+m  9   HLP     {(?n)^(?![t#])\S+}      "tk\n\n#\n#\nit0"       it0
+
+
+# flush any leftover complaints
+doing 0 "flush"
+
+# Tests resulting from bugs reported by users
+test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
+    set str {2:::DebugWin32}
+    set re {([[:xdigit:]])([[:space:]]*)}
+    list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
+    # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
+} {1 2 2 {}}
+
+test reg-32.1 {canmatch functionality -- at end} {
+    set pat {blah}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 7}
+
+test reg-32.2 {canmatch functionality -- at end} {
+    set pat {s%$}
+    set line "asd asd"
+    # can only match after the end of the string
+    set res [testregexp -xflags -- c $pat $line resvar] 
+    lappend res $resvar
+} {0 7}
+
+test reg-32.3 {canmatch functionality -- not last char} {
+    set pat {[^d]%$}
+    set line "asd asd"
+    # can only match after the end of the string
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 7}
+
+test reg-32.3.1 {canmatch functionality -- no match} {
+    set pat {\Zx}
+    set line "asd asd"
+    # can match the last char, if followed by x
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 -1}
+
+test reg-32.4 {canmatch functionality -- last char} {knownBug} {
+    set pat {.x}
+    set line "asd asd"
+    # can match the last char, if followed by x
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.4.1 {canmatch functionality -- last char} {knownBug} {
+    set pat {.x$}
+    set line "asd asd"
+    # can match the last char, if followed by x
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.5 {canmatch functionality -- last char} {knownBug} {
+    set pat {.[^d]x$}
+    set line "asd asd"
+    # can match the last char, if followed by not-d and x.
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.6 {canmatch functionality -- last char} {knownBug} {
+    set pat {[^a]%[^\r\n]*$}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.7 {canmatch functionality -- last char} {knownBug} {
+    set pat {[^a]%$}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.8 {canmatch functionality -- last char} {knownBug} {
+    set pat {[^x]%$}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+test reg-32.9 {canmatch functionality -- more complex case} {knownBug} {
+    set pat {((\B\B|\Bh+line)[ \t]*|[^\B]%[^\r\n]*)$}
+    set line "asd asd"
+    # can match at the final d, if '%' follows
+    set res [testregexp -xflags -- c $pat $line resvar]
+    lappend res $resvar
+} {0 6}
+
+# Tests reg-33.*: Checks for bug fixes
+
+test reg-33.1 {Bug 230589} {
+    regexp {[ ]*(^|[^%])%V} "*%V2" m s
+} 1
+
+test reg-33.2 {Bug 504785} {
+    regexp -inline {([^_.]*)([^.]*)\.(..)(.).*} bbcos_001_c01.q1la
+} {bbcos_001_c01.q1la bbcos _001_c01 q1 l}
+
+test reg-33.3 {Bug 505048} {
+    regexp {\A\s*[^<]*\s*<([^>]+)>} a<a>
+} 1
+
+test reg-33.4 {Bug 505048} {
+    regexp {\A\s*([^b]*)b} ab
+} 1
+
+test reg-33.5 {Bug 505048} {
+    regexp {\A\s*[^b]*(b)} ab
+} 1
+
+test reg-33.6 {Bug 505048} {
+    regexp {\A(\s*)[^b]*(b)} ab
+} 1
+
+test reg-33.7 {Bug 505048} {
+    regexp {\A\s*[^b]*b} ab
+} 1
+
+test reg-33.8 {Bug 505048} {
+    regexp -inline {\A\s*[^b]*b} ab
+} ab
+
+test reg-33.9 {Bug 505048} {
+    regexp -indices -inline {\A\s*[^b]*b} ab
+} {{0 1}}
+
+test reg-33.10 {Bug 840258} {
+    regsub {(^|\n)+\.*b} \n.b {} tmp
+} 1
+
+test reg-33.11 {Bug 840258} {
+    regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \
+            "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
+} 1
+
+# cleanup
+::tcltest::cleanupTests
+return
diff --git a/tests/regex/regex.cpp b/tests/regex/regex.cpp

new file mode 100644 (file)

index 0000000..733e5ae
--- /dev/null
+++ b/tests/regex/regex.cpp
@@ -0,0 +1,421 @@
+///////////////////////////////////////////////////////////////////////////////
+// Name:        tests/regex/regex.cpp
+// Purpose:     Test the built-in regex lib and wxRegEx
+// Author:      Mike Wetherell
+// RCS-ID:      $Id$
+// Copyright:   (c) 2004 Mike Wetherell
+// Licence:     wxWidgets licence
+///////////////////////////////////////////////////////////////////////////////
+
+//
+// Notes:
+//
+// To run just one section, say wx_1, do this:
+//  test regex.wx_1
+//
+// To run all the regex tests:
+//  test regex
+// 
+// Some tests must be skipped since they use features which we do not make
+// available through wxRegEx. To see the list of tests that have been skipped
+// turn on verbose logging, e.g.:
+//  test --verbose regex
+// 
+// The tests here are for the builtin library, tests for wxRegEx in general
+// should go in another module.
+//
+// The tests are generated from Henry Spencer's reg.test, additional test
+// can be added in wxreg.test. These test files are then turned into a C++
+// include file 'regex.inc' (included below) using a script 'regex.pl'.
+// 
+
+#if defined(__GNUG__) && !defined(__APPLE__)
+    #pragma implementation
+    #pragma interface
+#endif
+
+// For compilers that support precompilation, includes "wx/wx.h".
+#include "wx/wxprec.h"
+
+#ifdef __BORLANDC__
+    #pragma hdrstop
+#endif
+
+// for all others, include the necessary headers
+#ifndef WX_PRECOMP
+    #include "wx/wx.h"
+#endif
+
+#include "wx/regex.h"
+#include "wx/cppunit.h"
+#include <iomanip>
+#include <stdexcept>
+
+using namespace std;
+using namespace CppUnit;
+
+// many of the tests are specific to the builtin regex lib, so only attempts
+// to do them when using the builtin regex lib.
+//
+#ifdef wxHAS_REGEX_ADVANCED
+
+
+///////////////////////////////////////////////////////////////////////////////
+// The test case - an instance represents a single test
+
+class RegExTestCase : public TestCase
+{
+public:
+    // constructor - create a single testcase
+    RegExTestCase(
+        const string& name,
+        const char *mode,
+        const char *id,
+        const char *flags,
+        const char *pattern,
+        const char *data,
+        const vector<const char *>& expected);
+
+protected:
+    // run this testcase
+    void runTest();
+
+private:
+    // workers
+    wxString Conv(const char *str);
+    void parseFlags(const wxString& flags);
+    void doTest(int flavor);
+    static size_t matchCount(const wxString& expr, int flags);
+    static wxString quote(const wxString& arg);
+    const wxChar *convError() const { return _T("<cannot convert>"); }
+
+    // assertions - adds some information about the test that failed
+    void fail(const wxString& msg) const;
+    void failIf(bool condition, const wxString& msg) const
+        { if (condition) fail(msg); }
+
+    // mode, id, flags, pattern, test data, expected results...
+    int m_mode;
+    wxString m_id;
+    wxString m_flags;
+    wxString m_pattern;
+    wxString m_data;
+    wxArrayString m_expected;
+
+    // the flag decoded
+    int m_compileFlags;
+    int m_matchFlags;
+    bool m_basic;
+    bool m_extended;
+    bool m_advanced;
+};
+
+// constructor - throws Exception on failure
+//
+RegExTestCase::RegExTestCase(
+    const string& name,
+    const char *mode,
+    const char *id,
+    const char *flags,
+    const char *pattern,
+    const char *data,
+    const vector<const char *>& expected)
+  :
+    TestCase(name),
+    m_mode(mode[0]),
+    m_id(Conv(id)),
+    m_flags(Conv(flags)),
+    m_pattern(Conv(pattern)),
+    m_data(Conv(data)),
+    m_compileFlags(0),
+    m_matchFlags(0),
+    m_basic(false),
+    m_extended(false),
+    m_advanced(false)
+{
+    bool badconv = m_pattern == convError() || m_data == convError();
+    vector<const char *>::const_iterator it;
+
+    for (it = expected.begin(); it != expected.end(); ++it) {
+        m_expected.push_back(Conv(*it));
+        badconv = badconv || *m_expected.rbegin() == convError();
+    }
+
+    failIf(badconv, _T("cannot convert to default character encoding"));
+    
+    // the flags need further parsing...
+    parseFlags(m_flags);
+
+#ifndef wxHAS_REGEX_ADVANCED
+    failIf(!m_basic && !m_extended, _T("advanced regexs not available"));
+#endif
+}
+
+// convert a string from UTF8 to the internal encoding
+//
+wxString RegExTestCase::Conv(const char *str)
+{
+    const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str);
+    const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr);
+
+    if (!buf || wcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0)
+        return convError();
+    else
+        return buf;
+}
+
+// Parse flags
+//
+void RegExTestCase::parseFlags(const wxString& flags)
+{
+    for (const wxChar *p = flags; *p; p++) {
+        switch (*p) {
+            // noop
+            case '-': break;
+
+            // we don't fully support these flags, but they don't stop us
+            // checking for success of failure of the match, so treat as noop
+            case 'A': case 'B': case 'E': case 'H':
+            case 'I': case 'L': case 'M': case 'N':
+            case 'P': case 'Q': case 'R': case 'S':
+            case 'T': case 'U': case '%':
+                break;
+
+            // match options
+            case '^': m_matchFlags |= wxRE_NOTBOL; break;
+            case '$': m_matchFlags |= wxRE_NOTEOL; break;
+#if wxUSE_UNICODE
+            case '*': break;
+#endif
+            // compile options
+            case '&': m_advanced = m_basic = true; break;
+            case 'b': m_basic = true; break;
+            case 'e': m_extended = true; break;
+            case 'i': m_compileFlags |= wxRE_ICASE; break;
+            case 'o': m_compileFlags |= wxRE_NOSUB; break;
+            case 'n': m_compileFlags |= wxRE_NEWLINE; break;
+            case 't': if (strchr("ep", m_mode)) break; // else fall through...
+
+            // anything else we must skip the test
+            default:
+                fail(wxString::Format(
+                     _T("requires unsupported flag '%c'"), *p));
+        }
+    }
+}
+
+// Try test for all flavours of expression specified
+//
+void RegExTestCase::runTest()
+{
+    if (m_basic)
+        doTest(wxRE_BASIC);
+    if (m_extended)
+        doTest(wxRE_EXTENDED);
+#ifdef wxHAS_REGEX_ADVANCED
+    if (m_advanced || (!m_basic && !m_extended))
+        doTest(wxRE_ADVANCED);
+#endif
+}
+    
+// Try the test for a single flavour of expression
+//
+void RegExTestCase::doTest(int flavor)
+{
+    wxRegEx re(m_pattern, m_compileFlags | flavor);
+
+    // 'e' - test that the pattern fails to compile
+    if (m_mode == 'e')
+        return failIf(re.IsValid(), _T("compile suceeded (should fail)"));
+    failIf(!re.IsValid(), _T("compile failed"));
+
+    bool matches = re.Matches(m_data, m_matchFlags);
+
+    // 'f' or 'p' - test that the pattern does not match
+    if (m_mode == 'f' || m_mode == 'p')
+        return failIf(matches, _T("match suceeded (should fail)"));
+
+    // otherwise 'm' or 'i' - test the pattern does match
+    failIf(!matches, _T("match failed"));
+
+    // Check that wxRegEx is going to allocate a large enough array for the
+    // results we are supposed to get
+    failIf(m_expected.size() > matchCount(m_pattern, m_compileFlags | flavor),
+           _T("wxRegEx has not allocated a large enough array for the ")
+           _T("number of results expected"));
+
+    wxString result;
+    size_t start, len;
+
+    for (size_t i = 0; i < m_expected.size(); i++) {
+        failIf(!re.GetMatch(&start, &len, i), wxString::Format(
+                _T("wxRegEx::GetMatch failed for match %d"), i));
+
+        // m - check the match returns the strings given
+        if (m_mode == 'm')
+            if (start < INT_MAX)
+                result = m_data.substr(start, len);
+            else
+                result = _T("");
+
+        // i - check the match returns the offsets given
+        else if (m_mode == 'i')
+            if (start < INT_MAX)
+                result = wxString::Format(_T("%d %d"), start, start + len - 1);
+            else
+                result = _T("-1 -1");
+
+        failIf(result != m_expected[i], wxString::Format(
+                _T("match(%d) == %s, expected == %s"), i,
+                quote(result).c_str(), quote(m_expected[i]).c_str()));
+    }
+}
+
+// assertion - adds some information about the test that failed
+//
+void RegExTestCase::fail(const wxString& msg) const
+{
+    wxString str;
+    wxArrayString::const_iterator it;
+
+    str << (wxChar)m_mode << _T(" ") << m_id << _T(" ") << m_flags << _T(" ")
+        << quote(m_pattern) << _T(" ") << quote(m_data);
+
+    for (it = m_expected.begin(); it != m_expected.end(); ++it)
+        str << _T(" ") << quote(*it);
+    
+    if (str.length() > 77)
+        str = str.substr(0, 74) + _T("...");
+
+    str << _T("\n ") << msg;
+
+    // no lossy convs so using utf8
+    CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8)));
+}
+
+// quote a string so that it can be displayed (static)
+//
+wxString RegExTestCase::quote(const wxString& arg)
+{
+    const wxChar *needEscape = _T("\a\b\t\n\v\f\r\"\\");
+    const wxChar *escapes = _T("abtnvfr\"\\");
+    wxString str;
+
+    for (size_t i = 0; i < arg.length(); i++) {
+        wxUChar ch = arg[i];
+        const wxChar *p = wxStrchr(needEscape, ch);
+        
+        if (p)
+            str += wxString::Format(_T("\\%c"), escapes[p - needEscape]);
+        else if (wxIscntrl(ch))
+            str += wxString::Format(_T("\\%03o"), ch);
+        else
+            str += ch;
+    }
+
+    return str.length() == arg.length() && str.find(' ') == wxString::npos ?
+        str : _T("\"") + str + _T("\"");
+}
+
+// Count the number of subexpressions (taken from wxRegExImpl::Compile)
+//
+size_t RegExTestCase::matchCount(const wxString& expr, int flags)
+{
+    // there is always one for the whole expression
+    size_t nMatches = 1;
+
+    // and some more for bracketed subexperessions
+    for ( const wxChar *cptr = expr; *cptr; cptr++ )
+    {
+        if ( *cptr == _T('\\') )
+        {
+            // in basic RE syntax groups are inside \(...\)
+            if ( *++cptr == _T('(') && (flags & wxRE_BASIC) )
+            {
+                nMatches++;
+            }
+        }
+        else if ( *cptr == _T('(') && !(flags & wxRE_BASIC) )
+        {
+            // we know that the previous character is not an unquoted
+            // backslash because it would have been eaten above, so we
+            // have a bar '(' and this indicates a group start for the
+            // extended syntax
+            nMatches++;
+        }
+    }
+
+    return nMatches;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Test suite
+//
+// In a non-unicode build the regex is affected by the current locale, so
+// this derived TestSuite is used. It sets the locale in it's run() method
+// for the duration of the regex tests.
+
+class RegExTestSuite : public TestSuite
+{
+public:
+    RegExTestSuite(string name);
+    void run(TestResult *result);
+    void add(const char *mode, const char *id, const char *flags,
+             const char *pattern, const char *data, const char *expected, ...);
+};
+
+// constructor, sets the locale so that it is set when the tests are added
+//
+RegExTestSuite::RegExTestSuite(string name) : TestSuite(name)
+{
+    setlocale(LC_ALL, "");
+}
+
+// run the test suite, sets the locale again since it may have been changed
+// by another test since this suite was crated
+//
+void RegExTestSuite::run(TestResult *result)
+{
+    setlocale(LC_ALL, "");
+    TestSuite::run(result);
+}
+
+// Add a testcase to the suite
+//
+void RegExTestSuite::add(
+    const char *mode,
+    const char *id,
+    const char *flags,
+    const char *pattern,
+    const char *data,
+    const char *expected, ...)
+{
+    string name = getName() + "." + id;
+
+    vector<const char *> expected_results;
+    va_list ap;
+
+    for (va_start(ap, expected); expected; expected = va_arg(ap, const char *))
+        expected_results.push_back(expected);
+
+    va_end(ap);
+        
+    try {
+        addTest(new RegExTestCase(
+            name, mode, id, flags, pattern, data, expected_results));
+    }
+    catch (Exception& e) {
+        wxLogInfo(wxString::Format(_T("skipping: %s\n %s\n"),
+            wxString(name.c_str(), wxConvUTF8).c_str(),
+            wxString(e.what(), wxConvUTF8).c_str()));
+    }
+}
+
+
+// Include the generated tests
+//
+#include "regex.inc"
+
+
+#endif // wxHAS_REGEX_ADVANCED
diff --git a/tests/regex/regex.inc b/tests/regex/regex.inc

new file mode 100644 (file)

index 0000000..e53d364
--- /dev/null
+++ b/tests/regex/regex.inc
@@ -0,0 +1,1361 @@
+/*
+ * Test data for wxRegEx (UTF-8 encoded)
+ * 
+ * Generated Fri Mar 5 21:35:22 2004 by regex.pl from the following files:
+ * 
+ *   reg.test: Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *   wxreg.test: Copyright (c) 2004 Mike Wetherell.
+ * 
+ * Test types:
+ *     e       compile error expected
+ *     f       match failure expected
+ *     m       successful match
+ *     i       successful match with -indices (used in checking things like
+ *             nonparticipating subexpressions)
+ *     p       unsuccessful match with -indices (!!) (used in checking
+ *             partial-match reporting)
+ * 
+ * Flag characters:
+ *     -       no-op (placeholder)
+ *     +       provide fake xy equivalence class and ch collating element
+ *     %       force small state-set cache in matcher (to test cache replace)
+ *     ^       beginning of string is not beginning of line
+ *     $       end of string is not end of line
+ *     *       test is Unicode-specific, needs big character set
+ * 
+ *     &       test as both ARE and BRE
+ *     b       BRE
+ *     e       ERE
+ *     a       turn advanced-features bit on (error unless ERE already)
+ *     q       literal string, no metacharacters at all
+ * 
+ *     i       case-independent matching
+ *     o       ("opaque") no subexpression capture
+ *     p       newlines are half-magic, excluded from . and [^ only
+ *     w       newlines are half-magic, significant to ^ and $ only
+ *     n       newlines are fully magic, both effects
+ *     x       expanded RE syntax
+ *     t       incomplete-match reporting
+ * 
+ *     A       backslash-_a_lphanumeric seen
+ *     B       ERE/ARE literal-_b_race heuristic used
+ *     E       backslash (_e_scape) seen within []
+ *     H       looka_h_ead constraint seen
+ *     I       _i_mpossible to match
+ *     L       _l_ocale-specific construct seen
+ *     M       unportable (_m_achine-specific) construct seen
+ *     N       RE can match empty (_n_ull) string
+ *     P       non-_P_OSIX construct seen
+ *     Q       {} _q_uantifier seen
+ *     R       back _r_eference seen
+ *     S       POSIX-un_s_pecified syntax seen
+ *     T       prefers shortest (_t_iny)
+ *     U       saw original-POSIX botch:  unmatched right paren in ERE (_u_gh)
+ */
+
+
+/*
+ * 1 basic sanity checks
+ */
+
+class regextest_1 : public RegExTestSuite
+{
+public:
+    regextest_1() : RegExTestSuite("regex.1") { }
+    static Test *suite();
+};
+
+Test *regextest_1::suite()
+{
+    RegExTestSuite *suite = new regextest_1;
+
+    suite->add("m", "1", "&", "abc", "abc", "abc", NULL);
+    suite->add("f", "2", "&", "abc", "def", NULL);
+    suite->add("m", "3", "&", "abc", "xyabxabce", "abc", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_1, "regex.1");
+
+
+/*
+ * 2 invalid option combinations
+ */
+
+class regextest_2 : public RegExTestSuite
+{
+public:
+    regextest_2() : RegExTestSuite("regex.2") { }
+    static Test *suite();
+};
+
+Test *regextest_2::suite()
+{
+    RegExTestSuite *suite = new regextest_2;
+
+    suite->add("e", "1", "qe", "a", "INVARG", NULL);
+    suite->add("e", "2", "qa", "a", "INVARG", NULL);
+    suite->add("e", "3", "qx", "a", "INVARG", NULL);
+    suite->add("e", "4", "qn", "a", "INVARG", NULL);
+    suite->add("e", "5", "ba", "a", "INVARG", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_2, "regex.2");
+
+
+/*
+ * 3 basic syntax
+ */
+
+class regextest_3 : public RegExTestSuite
+{
+public:
+    regextest_3() : RegExTestSuite("regex.3") { }
+    static Test *suite();
+};
+
+Test *regextest_3::suite()
+{
+    RegExTestSuite *suite = new regextest_3;
+
+    suite->add("i", "1", "&NS", "", "a", "0 -1", NULL);
+    suite->add("m", "2", "NS", "a|", "a", "a", NULL);
+    suite->add("m", "3", "-", "a|b", "a", "a", NULL);
+    suite->add("m", "4", "-", "a|b", "b", "b", NULL);
+    suite->add("m", "5", "NS", "a||b", "b", "b", NULL);
+    suite->add("m", "6", "&", "ab", "ab", "ab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_3, "regex.3");
+
+
+/*
+ * 4 parentheses
+ */
+
+class regextest_4 : public RegExTestSuite
+{
+public:
+    regextest_4() : RegExTestSuite("regex.4") { }
+    static Test *suite();
+};
+
+Test *regextest_4::suite()
+{
+    RegExTestSuite *suite = new regextest_4;
+
+    suite->add("m", "1", "-", "(a)e", "ae", "ae", "a", NULL);
+    suite->add("m", "2", "o", "(a)e", "ae", NULL);
+    suite->add("m", "3", "b", "\\(a\\)b", "ab", "ab", "a", NULL);
+    suite->add("m", "4", "-", "a((b)c)", "abc", "abc", "bc", "b", NULL);
+    suite->add("m", "5", "-", "a(b)(c)", "abc", "abc", "b", "c", NULL);
+    suite->add("e", "6", "-", "a(b", "EPAREN", NULL);
+    suite->add("e", "7", "b", "a\\(b", "EPAREN", NULL);
+    suite->add("m", "8", "eU", "a)b", "a)b", "a)b", NULL);
+    suite->add("e", "9", "-", "a)b", "EPAREN", NULL);
+    suite->add("e", "10", "b", "a\\)b", "EPAREN", NULL);
+    suite->add("m", "11", "P", "a(?:b)c", "abc", "abc", NULL);
+    suite->add("e", "12", "e", "a(?:b)c", "BADRPT", NULL);
+    suite->add("i", "13", "S", "a()b", "ab", "0 1", "1 0", NULL);
+    suite->add("m", "14", "SP", "a(?:)b", "ab", "ab", NULL);
+    suite->add("i", "15", "S", "a(|b)c", "ac", "0 1", "1 0", NULL);
+    suite->add("m", "16", "S", "a(b|)c", "abc", "abc", "b", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_4, "regex.4");
+
+
+/*
+ * 5 simple one-char matching
+ */
+
+class regextest_5 : public RegExTestSuite
+{
+public:
+    regextest_5() : RegExTestSuite("regex.5") { }
+    static Test *suite();
+};
+
+Test *regextest_5::suite()
+{
+    RegExTestSuite *suite = new regextest_5;
+
+    suite->add("m", "1", "&", "a.b", "axb", "axb", NULL);
+    suite->add("f", "2", "&n", "a.b", "a\nb", NULL);
+    suite->add("m", "3", "&", "a[bc]d", "abd", "abd", NULL);
+    suite->add("m", "4", "&", "a[bc]d", "acd", "acd", NULL);
+    suite->add("f", "5", "&", "a[bc]d", "aed", NULL);
+    suite->add("f", "6", "&", "a[^bc]d", "abd", NULL);
+    suite->add("m", "7", "&", "a[^bc]d", "aed", "aed", NULL);
+    suite->add("f", "8", "&p", "a[^bc]d", "a\nd", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_5, "regex.5");
+
+
+/*
+ * 6 context-dependent syntax
+ */
+
+class regextest_6 : public RegExTestSuite
+{
+public:
+    regextest_6() : RegExTestSuite("regex.6") { }
+    static Test *suite();
+};
+
+Test *regextest_6::suite()
+{
+    RegExTestSuite *suite = new regextest_6;
+
+    suite->add("e", "1", "-", "*", "BADRPT", NULL);
+    suite->add("m", "2", "b", "*", "*", "*", NULL);
+    suite->add("m", "3", "b", "\\(*\\)", "*", "*", "*", NULL);
+    suite->add("e", "4", "-", "(*)", "BADRPT", NULL);
+    suite->add("m", "5", "b", "^*", "*", "*", NULL);
+    suite->add("e", "6", "-", "^*", "BADRPT", NULL);
+    suite->add("f", "7", "&", "^b", "^b", NULL);
+    suite->add("m", "8", "b", "x^", "x^", "x^", NULL);
+    suite->add("f", "9", "I", "x^", "x", NULL);
+    suite->add("m", "10", "n", "\n^", "x\nb", "\n", NULL);
+    suite->add("f", "11", "bS", "\\(^b\\)", "^b", NULL);
+    suite->add("m", "12", "-", "(^b)", "b", "b", "b", NULL);
+    suite->add("m", "13", "&", "x$", "x", "x", NULL);
+    suite->add("m", "14", "bS", "\\(x$\\)", "x", "x", "x", NULL);
+    suite->add("m", "15", "-", "(x$)", "x", "x", "x", NULL);
+    suite->add("m", "16", "b", "x$y", "x$y", "x$y", NULL);
+    suite->add("f", "17", "I", "x$y", "xy", NULL);
+    suite->add("m", "18", "n", "x$\n", "x\n", "x\n", NULL);
+    suite->add("e", "19", "-", "+", "BADRPT", NULL);
+    suite->add("e", "20", "-", "?", "BADRPT", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_6, "regex.6");
+
+
+/*
+ * 7 simple quantifiers
+ */
+
+class regextest_7 : public RegExTestSuite
+{
+public:
+    regextest_7() : RegExTestSuite("regex.7") { }
+    static Test *suite();
+};
+
+Test *regextest_7::suite()
+{
+    RegExTestSuite *suite = new regextest_7;
+
+    suite->add("m", "1", "&N", "a*", "aa", "aa", NULL);
+    suite->add("i", "2", "&N", "a*", "b", "0 -1", NULL);
+    suite->add("m", "3", "-", "a+", "aa", "aa", NULL);
+    suite->add("m", "4", "-", "a?b", "ab", "ab", NULL);
+    suite->add("m", "5", "-", "a?b", "b", "b", NULL);
+    suite->add("e", "6", "-", "**", "BADRPT", NULL);
+    suite->add("m", "7", "bN", "**", "***", "***", NULL);
+    suite->add("e", "8", "&", "a**", "BADRPT", NULL);
+    suite->add("e", "9", "&", "a**b", "BADRPT", NULL);
+    suite->add("e", "10", "&", "***", "BADRPT", NULL);
+    suite->add("e", "11", "-", "a++", "BADRPT", NULL);
+    suite->add("e", "12", "-", "a?+", "BADRPT", NULL);
+    suite->add("e", "13", "-", "a?*", "BADRPT", NULL);
+    suite->add("e", "14", "-", "a+*", "BADRPT", NULL);
+    suite->add("e", "15", "-", "a*+", "BADRPT", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_7, "regex.7");
+
+
+/*
+ * 8 braces
+ */
+
+class regextest_8 : public RegExTestSuite
+{
+public:
+    regextest_8() : RegExTestSuite("regex.8") { }
+    static Test *suite();
+};
+
+Test *regextest_8::suite()
+{
+    RegExTestSuite *suite = new regextest_8;
+
+    suite->add("m", "1", "NQ", "a{0,1}", "", "", NULL);
+    suite->add("m", "2", "NQ", "a{0,1}", "ac", "a", NULL);
+    suite->add("e", "3", "-", "a{1,0}", "BADBR", NULL);
+    suite->add("e", "4", "-", "a{1,2,3}", "BADBR", NULL);
+    suite->add("e", "5", "-", "a{257}", "BADBR", NULL);
+    suite->add("e", "6", "-", "a{1000}", "BADBR", NULL);
+    suite->add("e", "7", "-", "a{1", "EBRACE", NULL);
+    suite->add("e", "8", "-", "a{1n}", "BADBR", NULL);
+    suite->add("m", "9", "BS", "a{b", "a{b", "a{b", NULL);
+    suite->add("m", "10", "BS", "a{", "a{", "a{", NULL);
+    suite->add("m", "11", "bQ", "a\\{0,1\\}b", "cb", "b", NULL);
+    suite->add("e", "12", "b", "a\\{0,1", "EBRACE", NULL);
+    suite->add("e", "13", "-", "a{0,1\\", "BADBR", NULL);
+    suite->add("m", "14", "Q", "a{0}b", "ab", "b", NULL);
+    suite->add("m", "15", "Q", "a{0,0}b", "ab", "b", NULL);
+    suite->add("m", "16", "Q", "a{0,1}b", "ab", "ab", NULL);
+    suite->add("m", "17", "Q", "a{0,2}b", "b", "b", NULL);
+    suite->add("m", "18", "Q", "a{0,2}b", "aab", "aab", NULL);
+    suite->add("m", "19", "Q", "a{0,}b", "aab", "aab", NULL);
+    suite->add("m", "20", "Q", "a{1,1}b", "aab", "ab", NULL);
+    suite->add("m", "21", "Q", "a{1,3}b", "aaaab", "aaab", NULL);
+    suite->add("f", "22", "Q", "a{1,3}b", "b", NULL);
+    suite->add("m", "23", "Q", "a{1,}b", "aab", "aab", NULL);
+    suite->add("f", "24", "Q", "a{2,3}b", "ab", NULL);
+    suite->add("m", "25", "Q", "a{2,3}b", "aaaab", "aaab", NULL);
+    suite->add("f", "26", "Q", "a{2,}b", "ab", NULL);
+    suite->add("m", "27", "Q", "a{2,}b", "aaaab", "aaaab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_8, "regex.8");
+
+
+/*
+ * 9 brackets
+ */
+
+class regextest_9 : public RegExTestSuite
+{
+public:
+    regextest_9() : RegExTestSuite("regex.9") { }
+    static Test *suite();
+};
+
+Test *regextest_9::suite()
+{
+    RegExTestSuite *suite = new regextest_9;
+
+    suite->add("m", "1", "&", "a[bc]", "ac", "ac", NULL);
+    suite->add("m", "2", "&", "a[-]", "a-", "a-", NULL);
+    suite->add("m", "3", "&", "a[[.-.]]", "a-", "a-", NULL);
+    suite->add("m", "4", "&L", "a[[.zero.]]", "a0", "a0", NULL);
+    suite->add("m", "5", "&LM", "a[[.zero.]-9]", "a2", "a2", NULL);
+    suite->add("m", "6", "&M", "a[0-[.9.]]", "a2", "a2", NULL);
+    suite->add("m", "7", "&+L", "a[[=x=]]", "ax", "ax", NULL);
+    suite->add("m", "8", "&+L", "a[[=x=]]", "ay", "ay", NULL);
+    suite->add("f", "9", "&+L", "a[[=x=]]", "az", NULL);
+    suite->add("e", "10", "&", "a[0-[=x=]]", "ERANGE", NULL);
+    suite->add("m", "11", "&L", "a[[:digit:]]", "a0", "a0", NULL);
+    suite->add("e", "12", "&", "a[[:woopsie:]]", "ECTYPE", NULL);
+    suite->add("f", "13", "&L", "a[[:digit:]]", "ab", NULL);
+    suite->add("e", "14", "&", "a[0-[:digit:]]", "ERANGE", NULL);
+    suite->add("m", "15", "&LP", "[[:<:]]a", "a", "a", NULL);
+    suite->add("m", "16", "&LP", "a[[:>:]]", "a", "a", NULL);
+    suite->add("e", "17", "&", "a[[..]]b", "ECOLLATE", NULL);
+    suite->add("e", "18", "&", "a[[==]]b", "ECOLLATE", NULL);
+    suite->add("e", "19", "&", "a[[::]]b", "ECTYPE", NULL);
+    suite->add("e", "20", "&", "a[[.a", "EBRACK", NULL);
+    suite->add("e", "21", "&", "a[[=a", "EBRACK", NULL);
+    suite->add("e", "22", "&", "a[[:a", "EBRACK", NULL);
+    suite->add("e", "23", "&", "a[", "EBRACK", NULL);
+    suite->add("e", "24", "&", "a[b", "EBRACK", NULL);
+    suite->add("e", "25", "&", "a[b-", "EBRACK", NULL);
+    suite->add("e", "26", "&", "a[b-c", "EBRACK", NULL);
+    suite->add("m", "27", "&M", "a[b-c]", "ab", "ab", NULL);
+    suite->add("m", "28", "&", "a[b-b]", "ab", "ab", NULL);
+    suite->add("m", "29", "&M", "a[1-2]", "a2", "a2", NULL);
+    suite->add("e", "30", "&", "a[c-b]", "ERANGE", NULL);
+    suite->add("e", "31", "&", "a[a-b-c]", "ERANGE", NULL);
+    suite->add("m", "32", "&M", "a[--?]b", "a?b", "a?b", NULL);
+    suite->add("m", "33", "&", "a[---]b", "a-b", "a-b", NULL);
+    suite->add("m", "34", "&", "a[]b]c", "a]c", "a]c", NULL);
+    suite->add("m", "35", "EP", "a[\\]]b", "a]b", "a]b", NULL);
+    suite->add("f", "36", "bE", "a[\\]]b", "a]b", NULL);
+    suite->add("m", "37", "bE", "a[\\]]b", "a\\]b", "a\\]b", NULL);
+    suite->add("m", "38", "eE", "a[\\]]b", "a\\]b", "a\\]b", NULL);
+    suite->add("m", "39", "EP", "a[\\\\]b", "a\\b", "a\\b", NULL);
+    suite->add("m", "40", "eE", "a[\\\\]b", "a\\b", "a\\b", NULL);
+    suite->add("m", "41", "bE", "a[\\\\]b", "a\\b", "a\\b", NULL);
+    suite->add("e", "42", "-", "a[\\Z]b", "EESCAPE", NULL);
+    suite->add("m", "43", "&", "a[[b]c", "a[c", "a[c", NULL);
+    suite->add("m", "44", "EMP*", "a[\\u00fe-\\u0507][\\u00ff-\\u0300]b", "aĂ˿b", "aĂ˿b", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_9, "regex.9");
+
+
+/*
+ * 10 anchors and newlines
+ */
+
+class regextest_10 : public RegExTestSuite
+{
+public:
+    regextest_10() : RegExTestSuite("regex.10") { }
+    static Test *suite();
+};
+
+Test *regextest_10::suite()
+{
+    RegExTestSuite *suite = new regextest_10;
+
+    suite->add("m", "1", "&", "^a", "a", "a", NULL);
+    suite->add("f", "2", "&^", "^a", "a", NULL);
+    suite->add("i", "3", "&N", "^", "a", "0 -1", NULL);
+    suite->add("i", "4", "&", "a$", "aba", "2 2", NULL);
+    suite->add("f", "5", "&$", "a$", "a", NULL);
+    suite->add("i", "6", "&N", "$", "ab", "2 1", NULL);
+    suite->add("m", "7", "&n", "^a", "a", "a", NULL);
+    suite->add("m", "8", "&n", "^a", "b\na", "a", NULL);
+    suite->add("i", "9", "&w", "^a", "a\na", "0 0", NULL);
+    suite->add("i", "10", "&n^", "^a", "a\na", "2 2", NULL);
+    suite->add("m", "11", "&n", "a$", "a", "a", NULL);
+    suite->add("m", "12", "&n", "a$", "a\nb", "a", NULL);
+    suite->add("i", "13", "&n", "a$", "a\na", "0 0", NULL);
+    suite->add("i", "14", "N", "^^", "a", "0 -1", NULL);
+    suite->add("m", "15", "b", "^^", "^", "^", NULL);
+    suite->add("i", "16", "N", "$$", "a", "1 0", NULL);
+    suite->add("m", "17", "b", "$$", "$", "$", NULL);
+    suite->add("m", "18", "&N", "^$", "", "", NULL);
+    suite->add("f", "19", "&N", "^$", "a", NULL);
+    suite->add("i", "20", "&nN", "^$", "a\n\nb", "2 1", NULL);
+    suite->add("m", "21", "N", "$^", "", "", NULL);
+    suite->add("m", "22", "b", "$^", "$^", "$^", NULL);
+    suite->add("m", "23", "P", "\\Aa", "a", "a", NULL);
+    suite->add("m", "24", "^P", "\\Aa", "a", "a", NULL);
+    suite->add("f", "25", "^nP", "\\Aa", "b\na", NULL);
+    suite->add("m", "26", "P", "a\\Z", "a", "a", NULL);
+    suite->add("m", "27", "$P", "a\\Z", "a", "a", NULL);
+    suite->add("f", "28", "$nP", "a\\Z", "a\nb", NULL);
+    suite->add("e", "29", "-", "^*", "BADRPT", NULL);
+    suite->add("e", "30", "-", "$*", "BADRPT", NULL);
+    suite->add("e", "31", "-", "\\A*", "BADRPT", NULL);
+    suite->add("e", "32", "-", "\\Z*", "BADRPT", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_10, "regex.10");
+
+
+/*
+ * 11 boundary constraints
+ */
+
+class regextest_11 : public RegExTestSuite
+{
+public:
+    regextest_11() : RegExTestSuite("regex.11") { }
+    static Test *suite();
+};
+
+Test *regextest_11::suite()
+{
+    RegExTestSuite *suite = new regextest_11;
+
+    suite->add("m", "1", "&LP", "[[:<:]]a", "a", "a", NULL);
+    suite->add("m", "2", "&LP", "[[:<:]]a", "-a", "a", NULL);
+    suite->add("f", "3", "&LP", "[[:<:]]a", "ba", NULL);
+    suite->add("m", "4", "&LP", "a[[:>:]]", "a", "a", NULL);
+    suite->add("m", "5", "&LP", "a[[:>:]]", "a-", "a", NULL);
+    suite->add("f", "6", "&LP", "a[[:>:]]", "ab", NULL);
+    suite->add("m", "7", "bLP", "\\<a", "a", "a", NULL);
+    suite->add("f", "8", "bLP", "\\<a", "ba", NULL);
+    suite->add("m", "9", "bLP", "a\\>", "a", "a", NULL);
+    suite->add("f", "10", "bLP", "a\\>", "ab", NULL);
+    suite->add("m", "11", "LP", "\\ya", "a", "a", NULL);
+    suite->add("f", "12", "LP", "\\ya", "ba", NULL);
+    suite->add("m", "13", "LP", "a\\y", "a", "a", NULL);
+    suite->add("f", "14", "LP", "a\\y", "ab", NULL);
+    suite->add("m", "15", "LP", "a\\Y", "ab", "a", NULL);
+    suite->add("f", "16", "LP", "a\\Y", "a-", NULL);
+    suite->add("f", "17", "LP", "a\\Y", "a", NULL);
+    suite->add("f", "18", "LP", "-\\Y", "-a", NULL);
+    suite->add("m", "19", "LP", "-\\Y", "-%", "-", NULL);
+    suite->add("f", "20", "LP", "\\Y-", "a-", NULL);
+    suite->add("e", "21", "-", "[[:<:]]*", "BADRPT", NULL);
+    suite->add("e", "22", "-", "[[:>:]]*", "BADRPT", NULL);
+    suite->add("e", "23", "b", "\\<*", "BADRPT", NULL);
+    suite->add("e", "24", "b", "\\>*", "BADRPT", NULL);
+    suite->add("e", "25", "-", "\\y*", "BADRPT", NULL);
+    suite->add("e", "26", "-", "\\Y*", "BADRPT", NULL);
+    suite->add("m", "27", "LP", "\\ma", "a", "a", NULL);
+    suite->add("f", "28", "LP", "\\ma", "ba", NULL);
+    suite->add("m", "29", "LP", "a\\M", "a", "a", NULL);
+    suite->add("f", "30", "LP", "a\\M", "ab", NULL);
+    suite->add("f", "31", "ILP", "\\Ma", "a", NULL);
+    suite->add("f", "32", "ILP", "a\\m", "a", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_11, "regex.11");
+
+
+/*
+ * 12 character classes
+ */
+
+class regextest_12 : public RegExTestSuite
+{
+public:
+    regextest_12() : RegExTestSuite("regex.12") { }
+    static Test *suite();
+};
+
+Test *regextest_12::suite()
+{
+    RegExTestSuite *suite = new regextest_12;
+
+    suite->add("m", "1", "LP", "a\\db", "a0b", "a0b", NULL);
+    suite->add("f", "2", "LP", "a\\db", "axb", NULL);
+    suite->add("f", "3", "LP", "a\\Db", "a0b", NULL);
+    suite->add("m", "4", "LP", "a\\Db", "axb", "axb", NULL);
+    suite->add("m", "5", "LP", "a\\sb", "a b", "a b", NULL);
+    suite->add("m", "6", "LP", "a\\sb", "a\tb", "a\tb", NULL);
+    suite->add("m", "7", "LP", "a\\sb", "a\nb", "a\nb", NULL);
+    suite->add("f", "8", "LP", "a\\sb", "axb", NULL);
+    suite->add("m", "9", "LP", "a\\Sb", "axb", "axb", NULL);
+    suite->add("f", "10", "LP", "a\\Sb", "a b", NULL);
+    suite->add("m", "11", "LP", "a\\wb", "axb", "axb", NULL);
+    suite->add("f", "12", "LP", "a\\wb", "a-b", NULL);
+    suite->add("f", "13", "LP", "a\\Wb", "axb", NULL);
+    suite->add("m", "14", "LP", "a\\Wb", "a-b", "a-b", NULL);
+    suite->add("m", "15", "LP", "\\y\\w+z\\y", "adze-guz", "guz", NULL);
+    suite->add("m", "16", "LPE", "a[\\d]b", "a1b", "a1b", NULL);
+    suite->add("m", "17", "LPE", "a[\\s]b", "a b", "a b", NULL);
+    suite->add("m", "18", "LPE", "a[\\w]b", "axb", "axb", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_12, "regex.12");
+
+
+/*
+ * 13 escapes
+ */
+
+class regextest_13 : public RegExTestSuite
+{
+public:
+    regextest_13() : RegExTestSuite("regex.13") { }
+    static Test *suite();
+};
+
+Test *regextest_13::suite()
+{
+    RegExTestSuite *suite = new regextest_13;
+
+    suite->add("e", "1", "&", "a\\", "EESCAPE", NULL);
+    suite->add("m", "2", "-", "a\\<b", "a<b", "a<b", NULL);
+    suite->add("m", "3", "e", "a\\<b", "a<b", "a<b", NULL);
+    suite->add("m", "4", "bAS", "a\\wb", "awb", "awb", NULL);
+    suite->add("m", "5", "eAS", "a\\wb", "awb", "awb", NULL);
+    suite->add("m", "6", "PL", "a\\ab", "a\ab", "a\ab", NULL);
+    suite->add("m", "7", "P", "a\\bb", "a\bb", "a\bb", NULL);
+    suite->add("m", "8", "P", "a\\Bb", "a\\b", "a\\b", NULL);
+    suite->add("m", "9", "MP", "a\\chb", "a\bb", "a\bb", NULL);
+    suite->add("m", "10", "MP", "a\\cHb", "a\bb", "a\bb", NULL);
+    suite->add("m", "11", "LMP", "a\\e", "a\033", "a\033", NULL);
+    suite->add("m", "12", "P", "a\\fb", "a\fb", "a\fb", NULL);
+    suite->add("m", "13", "P", "a\\nb", "a\nb", "a\nb", NULL);
+    suite->add("m", "14", "P", "a\\rb", "a\rb", "a\rb", NULL);
+    suite->add("m", "15", "P", "a\\tb", "a\tb", "a\tb", NULL);
+    suite->add("m", "16", "P", "a\\u0008x", "a\bx", "a\bx", NULL);
+    suite->add("e", "17", "-", "a\\u008x", "EESCAPE", NULL);
+    suite->add("m", "18", "P", "a\\u00088x", "a\b8x", "a\b8x", NULL);
+    suite->add("m", "19", "P", "a\\U00000008x", "a\bx", "a\bx", NULL);
+    suite->add("e", "20", "-", "a\\U0000008x", "EESCAPE", NULL);
+    suite->add("m", "21", "P", "a\\vb", "a\vb", "a\vb", NULL);
+    suite->add("m", "22", "MP", "a\\x08x", "a\bx", "a\bx", NULL);
+    suite->add("e", "23", "-", "a\\xq", "EESCAPE", NULL);
+    suite->add("m", "24", "MP", "a\\x0008x", "a\bx", "a\bx", NULL);
+    suite->add("e", "25", "-", "a\\z", "EESCAPE", NULL);
+    suite->add("m", "26", "MP", "a\\010b", "a\bb", "a\bb", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_13, "regex.13");
+
+
+/*
+ * 14 back references
+ */
+
+class regextest_14 : public RegExTestSuite
+{
+public:
+    regextest_14() : RegExTestSuite("regex.14") { }
+    static Test *suite();
+};
+
+Test *regextest_14::suite()
+{
+    RegExTestSuite *suite = new regextest_14;
+
+    suite->add("m", "1", "RP", "a(b*)c\\1", "abbcbb", "abbcbb", "bb", NULL);
+    suite->add("m", "2", "RP", "a(b*)c\\1", "ac", "ac", "", NULL);
+    suite->add("f", "3", "RP", "a(b*)c\\1", "abbcb", NULL);
+    suite->add("m", "4", "RP", "a(b*)\\1", "abbcbb", "abb", "b", NULL);
+    suite->add("m", "5", "RP", "a(b|bb)\\1", "abbcbb", "abb", "b", NULL);
+    suite->add("m", "6", "RP", "a([bc])\\1", "abb", "abb", "b", NULL);
+    suite->add("f", "7", "RP", "a([bc])\\1", "abc", NULL);
+    suite->add("m", "8", "RP", "a([bc])\\1", "abcabb", "abb", "b", NULL);
+    suite->add("f", "9", "RP", "a([bc])*\\1", "abc", NULL);
+    suite->add("f", "10", "RP", "a([bc])\\1", "abB", NULL);
+    suite->add("m", "11", "iRP", "a([bc])\\1", "abB", "abB", "b", NULL);
+    suite->add("m", "12", "RP", "a([bc])\\1+", "abbb", "abbb", "b", NULL);
+    suite->add("m", "13", "QRP", "a([bc])\\1{3,4}", "abbbb", "abbbb", "b", NULL);
+    suite->add("f", "14", "QRP", "a([bc])\\1{3,4}", "abbb", NULL);
+    suite->add("m", "15", "RP", "a([bc])\\1*", "abbb", "abbb", "b", NULL);
+    suite->add("m", "16", "RP", "a([bc])\\1*", "ab", "ab", "b", NULL);
+    suite->add("m", "17", "RP", "a([bc])(\\1*)", "ab", "ab", "b", "", NULL);
+    suite->add("e", "18", "-", "a((b)\\1)", "ESUBREG", NULL);
+    suite->add("e", "19", "-", "a(b)c\\2", "ESUBREG", NULL);
+    suite->add("m", "20", "bR", "a\\(b*\\)c\\1", "abbcbb", "abbcbb", "bb", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_14, "regex.14");
+
+
+/*
+ * 15 octal escapes vs back references
+ */
+
+class regextest_15 : public RegExTestSuite
+{
+public:
+    regextest_15() : RegExTestSuite("regex.15") { }
+    static Test *suite();
+};
+
+Test *regextest_15::suite()
+{
+    RegExTestSuite *suite = new regextest_15;
+
+    suite->add("m", "1", "MP", "a\\010b", "a\bb", "a\bb", NULL);
+    suite->add("m", "2", "MP", "a\\0070b", "a\a0b", "a\a0b", NULL);
+    suite->add("m", "3", "MP", "a\\07b", "a\ab", "a\ab", NULL);
+    suite->add("m", "4", "MP", "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c", "abbbbbbbbbb\ac", "abbbbbbbbbb\ac", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", NULL);
+    suite->add("e", "5", "-", "a\\7b", "ESUBREG", NULL);
+    suite->add("m", "6", "MP", "a\\10b", "a\bb", "a\bb", NULL);
+    suite->add("m", "7", "MP", "a\\101b", "aAb", "aAb", NULL);
+    suite->add("m", "8", "RP", "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\10c", "abbbbbbbbbbbc", "abbbbbbbbbbbc", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", NULL);
+    suite->add("e", "9", "-", "a((((((((((b\\10))))))))))c", "ESUBREG", NULL);
+    suite->add("m", "10", "MP", "a\\12b", "a\nb", "a\nb", NULL);
+    suite->add("e", "11", "b", "a\\12b", "ESUBREG", NULL);
+    suite->add("m", "12", "eAS", "a\\12b", "a12b", "a12b", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_15, "regex.15");
+
+
+/*
+ * 16 expanded syntax
+ */
+
+class regextest_16 : public RegExTestSuite
+{
+public:
+    regextest_16() : RegExTestSuite("regex.16") { }
+    static Test *suite();
+};
+
+Test *regextest_16::suite()
+{
+    RegExTestSuite *suite = new regextest_16;
+
+    suite->add("m", "1", "xP", "a b c", "abc", "abc", NULL);
+    suite->add("m", "2", "xP", "a b #oops\nc\td", "abcd", "abcd", NULL);
+    suite->add("m", "3", "x", "a\\ b\\\tc", "a b\tc", "a b\tc", NULL);
+    suite->add("m", "4", "xP", "a b\\#c", "ab#c", "ab#c", NULL);
+    suite->add("m", "5", "xP", "a b[c d]e", "ab e", "ab e", NULL);
+    suite->add("m", "6", "xP", "a b[c#d]e", "ab#e", "ab#e", NULL);
+    suite->add("m", "7", "xP", "a b[c#d]e", "abde", "abde", NULL);
+    suite->add("m", "8", "xSPB", "ab{ d", "ab{d", "ab{d", NULL);
+    suite->add("m", "9", "xPQ", "ab{ 1 , 2 }c", "abc", "abc", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_16, "regex.16");
+
+
+/*
+ * 17 misc syntax
+ */
+
+class regextest_17 : public RegExTestSuite
+{
+public:
+    regextest_17() : RegExTestSuite("regex.17") { }
+    static Test *suite();
+};
+
+Test *regextest_17::suite()
+{
+    RegExTestSuite *suite = new regextest_17;
+
+    suite->add("m", "1", "P", "a(?#comment)b", "ab", "ab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_17, "regex.17");
+
+
+/*
+ * 18 unmatchable REs
+ */
+
+class regextest_18 : public RegExTestSuite
+{
+public:
+    regextest_18() : RegExTestSuite("regex.18") { }
+    static Test *suite();
+};
+
+Test *regextest_18::suite()
+{
+    RegExTestSuite *suite = new regextest_18;
+
+    suite->add("f", "1", "I", "a^b", "ab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_18, "regex.18");
+
+
+/*
+ * 19 case independence
+ */
+
+class regextest_19 : public RegExTestSuite
+{
+public:
+    regextest_19() : RegExTestSuite("regex.19") { }
+    static Test *suite();
+};
+
+Test *regextest_19::suite()
+{
+    RegExTestSuite *suite = new regextest_19;
+
+    suite->add("m", "1", "&i", "ab", "Ab", "Ab", NULL);
+    suite->add("m", "2", "&i", "a[bc]", "aC", "aC", NULL);
+    suite->add("f", "3", "&i", "a[^bc]", "aB", NULL);
+    suite->add("m", "4", "&iM", "a[b-d]", "aC", "aC", NULL);
+    suite->add("f", "5", "&iM", "a[^b-d]", "aC", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_19, "regex.19");
+
+
+/*
+ * 20 directors and embedded options
+ */
+
+class regextest_20 : public RegExTestSuite
+{
+public:
+    regextest_20() : RegExTestSuite("regex.20") { }
+    static Test *suite();
+};
+
+Test *regextest_20::suite()
+{
+    RegExTestSuite *suite = new regextest_20;
+
+    suite->add("e", "1", "&", "***?", "BADPAT", NULL);
+    suite->add("m", "2", "q", "***?", "***?", "***?", NULL);
+    suite->add("m", "3", "&P", "***=a*b", "a*b", "a*b", NULL);
+    suite->add("m", "4", "q", "***=a*b", "***=a*b", "***=a*b", NULL);
+    suite->add("m", "5", "bLP", "***:\\w+", "ab", "ab", NULL);
+    suite->add("m", "6", "eLP", "***:\\w+", "ab", "ab", NULL);
+    suite->add("e", "7", "&", "***:***=a*b", "BADRPT", NULL);
+    suite->add("m", "8", "&P", "***:(?b)a+b", "a+b", "a+b", NULL);
+    suite->add("m", "9", "P", "(?b)a+b", "a+b", "a+b", NULL);
+    suite->add("e", "10", "e", "(?b)\\w+", "BADRPT", NULL);
+    suite->add("m", "11", "bAS", "(?b)\\w+", "(?b)w+", "(?b)w+", NULL);
+    suite->add("m", "12", "iP", "(?c)a", "a", "a", NULL);
+    suite->add("f", "13", "iP", "(?c)a", "A", NULL);
+    suite->add("m", "14", "APS", "(?e)\\W+", "WW", "WW", NULL);
+    suite->add("m", "15", "P", "(?i)a+", "Aa", "Aa", NULL);
+    suite->add("f", "16", "P", "(?m)a.b", "a\nb", NULL);
+    suite->add("m", "17", "P", "(?m)^b", "a\nb", "b", NULL);
+    suite->add("f", "18", "P", "(?n)a.b", "a\nb", NULL);
+    suite->add("m", "19", "P", "(?n)^b", "a\nb", "b", NULL);
+    suite->add("f", "20", "P", "(?p)a.b", "a\nb", NULL);
+    suite->add("f", "21", "P", "(?p)^b", "a\nb", NULL);
+    suite->add("m", "22", "P", "(?q)a+b", "a+b", "a+b", NULL);
+    suite->add("m", "23", "nP", "(?s)a.b", "a\nb", "a\nb", NULL);
+    suite->add("m", "24", "xP", "(?t)a b", "a b", "a b", NULL);
+    suite->add("m", "25", "P", "(?w)a.b", "a\nb", "a\nb", NULL);
+    suite->add("m", "26", "P", "(?w)^b", "a\nb", "b", NULL);
+    suite->add("m", "27", "P", "(?x)a b", "ab", "ab", NULL);
+    suite->add("e", "28", "-", "(?z)ab", "BADOPT", NULL);
+    suite->add("m", "29", "P", "(?ici)a+", "Aa", "Aa", NULL);
+    suite->add("e", "30", "P", "(?i)(?q)a+", "BADRPT", NULL);
+    suite->add("m", "31", "P", "(?q)(?i)a+", "(?i)a+", "(?i)a+", NULL);
+    suite->add("m", "32", "P", "(?qe)a+", "a", "a", NULL);
+    suite->add("m", "33", "xP", "(?q)a b", "a b", "a b", NULL);
+    suite->add("m", "34", "P", "(?qx)a b", "a b", "a b", NULL);
+    suite->add("m", "35", "P", "(?qi)ab", "Ab", "Ab", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_20, "regex.20");
+
+
+/*
+ * 21 capturing
+ */
+
+class regextest_21 : public RegExTestSuite
+{
+public:
+    regextest_21() : RegExTestSuite("regex.21") { }
+    static Test *suite();
+};
+
+Test *regextest_21::suite()
+{
+    RegExTestSuite *suite = new regextest_21;
+
+    suite->add("m", "1", "-", "a(b)c", "abc", "abc", "b", NULL);
+    suite->add("m", "2", "P", "a(?:b)c", "xabc", "abc", NULL);
+    suite->add("m", "3", "-", "a((b))c", "xabcy", "abc", "b", "b", NULL);
+    suite->add("m", "4", "P", "a(?:(b))c", "abcy", "abc", "b", NULL);
+    suite->add("m", "5", "P", "a((?:b))c", "abc", "abc", "b", NULL);
+    suite->add("m", "6", "P", "a(?:(?:b))c", "abc", "abc", NULL);
+    suite->add("i", "7", "Q", "a(b){0}c", "ac", "0 1", "-1 -1", NULL);
+    suite->add("m", "8", "-", "a(b)c(d)e", "abcde", "abcde", "b", "d", NULL);
+    suite->add("m", "9", "-", "(b)c(d)e", "bcde", "bcde", "b", "d", NULL);
+    suite->add("m", "10", "-", "a(b)(d)e", "abde", "abde", "b", "d", NULL);
+    suite->add("m", "11", "-", "a(b)c(d)", "abcd", "abcd", "b", "d", NULL);
+    suite->add("m", "12", "-", "(ab)(cd)", "xabcdy", "abcd", "ab", "cd", NULL);
+    suite->add("m", "13", "-", "a(b)?c", "xabcy", "abc", "b", NULL);
+    suite->add("i", "14", "-", "a(b)?c", "xacy", "1 2", "-1 -1", NULL);
+    suite->add("m", "15", "-", "a(b)?c(d)?e", "xabcdey", "abcde", "b", "d", NULL);
+    suite->add("i", "16", "-", "a(b)?c(d)?e", "xacdey", "1 4", "-1 -1", "3 3", NULL);
+    suite->add("i", "17", "-", "a(b)?c(d)?e", "xabcey", "1 4", "2 2", "-1 -1", NULL);
+    suite->add("i", "18", "-", "a(b)?c(d)?e", "xacey", "1 3", "-1 -1", "-1 -1", NULL);
+    suite->add("m", "19", "-", "a(b)*c", "xabcy", "abc", "b", NULL);
+    suite->add("i", "20", "-", "a(b)*c", "xabbbcy", "1 5", "4 4", NULL);
+    suite->add("i", "21", "-", "a(b)*c", "xacy", "1 2", "-1 -1", NULL);
+    suite->add("m", "22", "-", "a(b*)c", "xabbbcy", "abbbc", "bbb", NULL);
+    suite->add("m", "23", "-", "a(b*)c", "xacy", "ac", "", NULL);
+    suite->add("f", "24", "-", "a(b)+c", "xacy", NULL);
+    suite->add("m", "25", "-", "a(b)+c", "xabcy", "abc", "b", NULL);
+    suite->add("i", "26", "-", "a(b)+c", "xabbbcy", "1 5", "4 4", NULL);
+    suite->add("m", "27", "-", "a(b+)c", "xabbbcy", "abbbc", "bbb", NULL);
+    suite->add("i", "28", "Q", "a(b){2,3}c", "xabbbcy", "1 5", "4 4", NULL);
+    suite->add("i", "29", "Q", "a(b){2,3}c", "xabbcy", "1 4", "3 3", NULL);
+    suite->add("f", "30", "Q", "a(b){2,3}c", "xabcy", NULL);
+    suite->add("m", "31", "LP", "\\y(\\w+)\\y", "-- abc-", "abc", "abc", NULL);
+    suite->add("m", "32", "-", "a((b|c)d+)+", "abacdbd", "acdbd", "bd", "b", NULL);
+    suite->add("m", "33", "N", "(.*).*", "abc", "abc", "abc", NULL);
+    suite->add("m", "34", "N", "(a*)*", "bc", "", "", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_21, "regex.21");
+
+
+/*
+ * 22 multicharacter collating elements
+ */
+
+class regextest_22 : public RegExTestSuite
+{
+public:
+    regextest_22() : RegExTestSuite("regex.22") { }
+    static Test *suite();
+};
+
+Test *regextest_22::suite()
+{
+    RegExTestSuite *suite = new regextest_22;
+
+    suite->add("m", "1", "&+L", "a[c]e", "ace", "ace", NULL);
+    suite->add("f", "2", "&+IL", "a[c]h", "ach", NULL);
+    suite->add("m", "3", "&+L", "a[[.ch.]]", "ach", "ach", NULL);
+    suite->add("f", "4", "&+L", "a[[.ch.]]", "ace", NULL);
+    suite->add("m", "5", "&+L", "a[c[.ch.]]", "ac", "ac", NULL);
+    suite->add("m", "6", "&+L", "a[c[.ch.]]", "ace", "ac", NULL);
+    suite->add("m", "7", "&+L", "a[c[.ch.]]", "ache", "ach", NULL);
+    suite->add("f", "8", "&+L", "a[^c]e", "ace", NULL);
+    suite->add("m", "9", "&+L", "a[^c]e", "abe", "abe", NULL);
+    suite->add("m", "10", "&+L", "a[^c]e", "ache", "ache", NULL);
+    suite->add("f", "11", "&+L", "a[^[.ch.]]", "ach", NULL);
+    suite->add("m", "12", "&+L", "a[^[.ch.]]", "ace", "ac", NULL);
+    suite->add("m", "13", "&+L", "a[^[.ch.]]", "ac", "ac", NULL);
+    suite->add("m", "14", "&+L", "a[^[.ch.]]", "abe", "ab", NULL);
+    suite->add("f", "15", "&+L", "a[^c[.ch.]]", "ach", NULL);
+    suite->add("f", "16", "&+L", "a[^c[.ch.]]", "ace", NULL);
+    suite->add("f", "17", "&+L", "a[^c[.ch.]]", "ac", NULL);
+    suite->add("m", "18", "&+L", "a[^c[.ch.]]", "abe", "ab", NULL);
+    suite->add("m", "19", "&+L", "a[^b]", "ac", "ac", NULL);
+    suite->add("m", "20", "&+L", "a[^b]", "ace", "ac", NULL);
+    suite->add("m", "21", "&+L", "a[^b]", "ach", "ach", NULL);
+    suite->add("f", "22", "&+L", "a[^b]", "abe", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_22, "regex.22");
+
+
+/*
+ * 23 lookahead constraints
+ */
+
+class regextest_23 : public RegExTestSuite
+{
+public:
+    regextest_23() : RegExTestSuite("regex.23") { }
+    static Test *suite();
+};
+
+Test *regextest_23::suite()
+{
+    RegExTestSuite *suite = new regextest_23;
+
+    suite->add("m", "1", "HP", "a(?=b)b*", "ab", "ab", NULL);
+    suite->add("f", "2", "HP", "a(?=b)b*", "a", NULL);
+    suite->add("m", "3", "HP", "a(?=b)b*(?=c)c*", "abc", "abc", NULL);
+    suite->add("f", "4", "HP", "a(?=b)b*(?=c)c*", "ab", NULL);
+    suite->add("f", "5", "HP", "a(?!b)b*", "ab", NULL);
+    suite->add("m", "6", "HP", "a(?!b)b*", "a", "a", NULL);
+    suite->add("m", "7", "HP", "(?=b)b", "b", "b", NULL);
+    suite->add("f", "8", "HP", "(?=b)b", "a", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_23, "regex.23");
+
+
+/*
+ * 24 non-greedy quantifiers
+ */
+
+class regextest_24 : public RegExTestSuite
+{
+public:
+    regextest_24() : RegExTestSuite("regex.24") { }
+    static Test *suite();
+};
+
+Test *regextest_24::suite()
+{
+    RegExTestSuite *suite = new regextest_24;
+
+    suite->add("m", "1", "PT", "ab+?", "abb", "ab", NULL);
+    suite->add("m", "2", "PT", "ab+?c", "abbc", "abbc", NULL);
+    suite->add("m", "3", "PT", "ab*?", "abb", "a", NULL);
+    suite->add("m", "4", "PT", "ab*?c", "abbc", "abbc", NULL);
+    suite->add("m", "5", "PT", "ab??", "ab", "a", NULL);
+    suite->add("m", "6", "PT", "ab??c", "abc", "abc", NULL);
+    suite->add("m", "7", "PQT", "ab{2,4}?", "abbbb", "abb", NULL);
+    suite->add("m", "8", "PQT", "ab{2,4}?c", "abbbbc", "abbbbc", NULL);
+    suite->add("m", "9", "-", "3z*", "123zzzz456", "3zzzz", NULL);
+    suite->add("m", "10", "PT", "3z*?", "123zzzz456", "3", NULL);
+    suite->add("m", "11", "-", "z*4", "123zzzz456", "zzzz4", NULL);
+    suite->add("m", "12", "PT", "z*?4", "123zzzz456", "zzzz4", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_24, "regex.24");
+
+
+/*
+ * 25 mixed quantifiers
+ */
+
+class regextest_25 : public RegExTestSuite
+{
+public:
+    regextest_25() : RegExTestSuite("regex.25") { }
+    static Test *suite();
+};
+
+Test *regextest_25::suite()
+{
+    RegExTestSuite *suite = new regextest_25;
+
+    suite->add("m", "1", "PNT", "^(.*?)(a*)$", "xyza", "xyza", "xyz", "a", NULL);
+    suite->add("m", "2", "PNT", "^(.*?)(a*)$", "xyzaa", "xyzaa", "xyz", "aa", NULL);
+    suite->add("m", "3", "PNT", "^(.*?)(a*)$", "xyz", "xyz", "xyz", "", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_25, "regex.25");
+
+
+/*
+ * 26 tricky cases
+ */
+
+class regextest_26 : public RegExTestSuite
+{
+public:
+    regextest_26() : RegExTestSuite("regex.26") { }
+    static Test *suite();
+};
+
+Test *regextest_26::suite()
+{
+    RegExTestSuite *suite = new regextest_26;
+
+    suite->add("m", "1", "-", "(week|wee)(night|knights)", "weeknights", "weeknights", "wee", "knights", NULL);
+    suite->add("m", "2", "RP", "a(bc*).*\\1", "abccbccb", "abccbccb", "b", NULL);
+    suite->add("m", "3", "-", "a(b.[bc]*)+", "abcbd", "abcbd", "bd", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_26, "regex.26");
+
+
+/*
+ * 27 implementation misc.
+ */
+
+class regextest_27 : public RegExTestSuite
+{
+public:
+    regextest_27() : RegExTestSuite("regex.27") { }
+    static Test *suite();
+};
+
+Test *regextest_27::suite()
+{
+    RegExTestSuite *suite = new regextest_27;
+
+    suite->add("m", "1", "P", "a(?:b|b)c", "abc", "abc", NULL);
+    suite->add("m", "2", "&", "[ab][ab][ab]", "aba", "aba", NULL);
+    suite->add("m", "3", "&", "[ab][ab][ab][ab][ab][ab][ab]", "abababa", "abababa", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_27, "regex.27");
+
+
+/*
+ * 28 boundary busters etc.
+ */
+
+class regextest_28 : public RegExTestSuite
+{
+public:
+    regextest_28() : RegExTestSuite("regex.28") { }
+    static Test *suite();
+};
+
+Test *regextest_28::suite()
+{
+    RegExTestSuite *suite = new regextest_28;
+
+    suite->add("m", "1", "&", "abcdefghijkl", "abcdefghijkl", "abcdefghijkl", NULL);
+    suite->add("m", "2", "P", "a(?:b|c|d|e|f|g|h|i|j|k|l|m)n", "agn", "agn", NULL);
+    suite->add("m", "3", "-", "a(((((((((((((b)))))))))))))c", "abc", "abc", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", NULL);
+    suite->add("m", "4", "Q", "ab{1,100}c", "abbc", "abbc", NULL);
+    suite->add("m", "5", "Q", "ab{1,100}c", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", NULL);
+    suite->add("m", "6", "Q", "ab{1,100}c", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", NULL);
+    suite->add("m", "7", "LP", "\\w+abcdefgh", "xyzabcdefgh", "xyzabcdefgh", NULL);
+    suite->add("m", "8", "%LP", "\\w+abcdefgh", "xyzabcdefgh", "xyzabcdefgh", NULL);
+    suite->add("m", "9", "%LP", "\\w+abcdefghijklmnopqrst", "xyzabcdefghijklmnopqrst", "xyzabcdefghijklmnopqrst", NULL);
+    suite->add("i", "10", "%LP", "\\w+(abcdefgh)?", "xyz", "0 2", "-1 -1", NULL);
+    suite->add("i", "11", "%LP", "\\w+(abcdefgh)?", "xyzabcdefg", "0 9", "-1 -1", NULL);
+    suite->add("i", "12", "%LP", "\\w+(abcdefghijklmnopqrst)?", "xyzabcdefghijklmnopqrs", "0 21", "-1 -1", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_28, "regex.28");
+
+
+/*
+ * 29 incomplete matches
+ */
+
+class regextest_29 : public RegExTestSuite
+{
+public:
+    regextest_29() : RegExTestSuite("regex.29") { }
+    static Test *suite();
+};
+
+Test *regextest_29::suite()
+{
+    RegExTestSuite *suite = new regextest_29;
+
+    suite->add("p", "1", "t", "def", "abc", "3 2", "", NULL);
+    suite->add("p", "2", "t", "bcd", "abc", "1 2", "", NULL);
+    suite->add("p", "3", "t", "abc", "abab", "0 3", "", NULL);
+    suite->add("p", "4", "t", "abc", "abdab", "3 4", "", NULL);
+    suite->add("i", "5", "t", "abc", "abc", "0 2", "0 2", NULL);
+    suite->add("i", "6", "t", "abc", "xyabc", "2 4", "2 4", NULL);
+    suite->add("p", "7", "t", "abc+", "xyab", "2 3", "", NULL);
+    suite->add("i", "8", "t", "abc+", "xyabc", "2 4", "2 4", NULL);
+    suite->add("i", "10", "t", "abc+", "xyabcdd", "2 4", "7 6", NULL);
+    suite->add("p", "11", "tPT", "abc+?", "xyab", "2 3", "", NULL);
+    suite->add("i", "12", "tPT", "abc+?", "xyabc", "2 4", "5 4", NULL);
+    suite->add("i", "13", "tPT", "abc+?", "xyabcc", "2 4", "6 5", NULL);
+    suite->add("i", "14", "tPT", "abc+?", "xyabcd", "2 4", "6 5", NULL);
+    suite->add("i", "15", "tPT", "abc+?", "xyabcdd", "2 4", "7 6", NULL);
+    suite->add("i", "16", "t", "abcd|bc", "xyabc", "3 4", "2 4", NULL);
+    suite->add("p", "17", "tn", ".*k", "xx\nyyy", "3 5", "", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_29, "regex.29");
+
+
+/*
+ * 30 misc. oddities and old bugs
+ */
+
+class regextest_30 : public RegExTestSuite
+{
+public:
+    regextest_30() : RegExTestSuite("regex.30") { }
+    static Test *suite();
+};
+
+Test *regextest_30::suite()
+{
+    RegExTestSuite *suite = new regextest_30;
+
+    suite->add("e", "1", "&", "***", "BADRPT", NULL);
+    suite->add("m", "2", "N", "a?b*", "abb", "abb", NULL);
+    suite->add("m", "3", "N", "a?b*", "bb", "bb", NULL);
+    suite->add("m", "4", "&", "a*b", "aab", "aab", NULL);
+    suite->add("m", "5", "&", "^a*b", "aaaab", "aaaab", NULL);
+    suite->add("m", "6", "&M", "[0-6][1-2][0-3][0-6][1-6][0-6]", "010010", "010010", NULL);
+    suite->add("m", "7", "s", "abc", "abcd", "abc", NULL);
+    suite->add("f", "8", "s", "abc", "xabcd", NULL);
+    suite->add("m", "9", "HLP", "(?n)^(?![t#])\\S+", "tk\n\n#\n#\nit0", "it0", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_30, "regex.30");
+
+
+/*
+ * extra_1 checks for bug fixes
+ */
+
+class regextest_extra_1 : public RegExTestSuite
+{
+public:
+    regextest_extra_1() : RegExTestSuite("regex.extra_1") { }
+    static Test *suite();
+};
+
+Test *regextest_extra_1::suite()
+{
+    RegExTestSuite *suite = new regextest_extra_1;
+
+    suite->add("m", "Bug 230589", "-", "[ ]*(^|[^%])%V", "*%V2", NULL);
+    suite->add("m", "Bug 504785", "-", "([^_.]*)([^.]*)\\.(..)(.).*", "bbcos_001_c01.q1la", "bbcos_001_c01.q1la", "bbcos", "_001_c01", "q1", "l", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*[^<]*\\s*<([^>]+)>", "a<a>", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*([^b]*)b", "ab", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*[^b]*(b)", "ab", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A(\\s*)[^b]*(b)", "ab", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*[^b]*b", "ab", NULL);
+    suite->add("m", "Bug 505048", "-", "\\A\\s*[^b]*b", "ab", "ab", NULL);
+    suite->add("i", "Bug 505048", "-", "\\A\\s*[^b]*b", "ab", "0 1", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_extra_1, "regex.extra_1");
+
+
+/*
+ * wx_1 character classification: ascii
+ */
+
+class regextest_wx_1 : public RegExTestSuite
+{
+public:
+    regextest_wx_1() : RegExTestSuite("regex.wx_1") { }
+    static Test *suite();
+};
+
+Test *regextest_wx_1::suite()
+{
+    RegExTestSuite *suite = new regextest_wx_1;
+
+    suite->add("m", "1", "&", "[^[:alnum:]]", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!", "!", NULL);
+    suite->add("m", "2", "&", "[[:alnum:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~X", "X", NULL);
+    suite->add("m", "3", "&", "[^[:alpha:]]", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!", "!", NULL);
+    suite->add("m", "4", "&", "[[:alpha:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~X", "X", NULL);
+    suite->add("m", "5", "&", "[^[:cntrl:]]", "\a\b\t\n\v\f\r!", "!", NULL);
+    suite->add("m", "6", "&", "[[:cntrl:]]", " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n", "\n", NULL);
+    suite->add("m", "7", "&", "[^[:digit:]]", "0123456789!", "!", NULL);
+    suite->add("m", "8", "&", "[[:digit:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 0", "0", NULL);
+    suite->add("m", "9", "&", "[^[:graph:]]", "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n", "\n", NULL);
+    suite->add("m", "10", "&", "[[:graph:]]", "\a\b\t\n\v\f\r !", "!", NULL);
+    suite->add("m", "11", "&", "[^[:lower:]]", "abcdefghijklmnopqrstuvwxyz!", "!", NULL);
+    suite->add("m", "12", "&", "[[:lower:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~x", "x", NULL);
+    suite->add("m", "13", "&", "[^[:print:]]", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n", "\n", NULL);
+    suite->add("m", "14", "&", "[[:print:]]", "\a\b\n\v\f\rX", "X", NULL);
+    suite->add("m", "15", "&", "[^[:punct:]]", "!\"#%&'()*,-./:;?@[\\]_{}X", "X", NULL);
+    suite->add("m", "16", "&", "[[:punct:]]", "\a\b\t\n\v\f\r 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!", "!", NULL);
+    suite->add("m", "17", "&", "[^[:space:]]", "\t\n\v\f\r X", "X", NULL);
+    suite->add("m", "18", "&", "[[:space:]]", "\a\b!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n", "\n", NULL);
+    suite->add("m", "19", "&", "[^[:upper:]]", "ABCDEFGHIJKLMNOPQRSTUVWXYZ!", "!", NULL);
+    suite->add("m", "20", "&", "[[:upper:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~X", "X", NULL);
+    suite->add("m", "21", "&", "[^[:xdigit:]]", "0123456789ABCDEFabcdef!", "!", NULL);
+    suite->add("m", "22", "&", "[[:xdigit:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@GHIJKLMNOPQRSTUVWXYZ[\\]^_`ghijklmnopqrstuvwxyz{|}~a", "a", NULL);
+    suite->add("i", "23", "&i", "AbCdEfGhIjKlMnOpQrStUvWxYz", "aBcDeFgHiJkLmNoPqRsTuVwXyZ", "0 25", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_wx_1, "regex.wx_1");
+
+
+/*
+ * wx_2 character classification: western european
+ */
+
+class regextest_wx_2 : public RegExTestSuite
+{
+public:
+    regextest_wx_2() : RegExTestSuite("regex.wx_2") { }
+    static Test *suite();
+};
+
+Test *regextest_wx_2::suite()
+{
+    RegExTestSuite *suite = new regextest_wx_2;
+
+    suite->add("m", "1", "&", "[^[:alpha:]]", "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!", "!", NULL);
+    suite->add("m", "2", "&", "[[:alpha:]]", " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷X", "X", NULL);
+    suite->add("m", "3", "&", "[^[:lower:]]", "ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!", "!", NULL);
+    suite->add("m", "4", "&", "[[:lower:]]", " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ÷x", "x", NULL);
+    suite->add("m", "5", "&", "[^[:upper:]]", "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ!", "!", NULL);
+    suite->add("m", "6", "&", "[[:upper:]]", " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×ßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿX", "X", NULL);
+    suite->add("i", "7", "&i*", "ÀáÂãÄåÆçÈéÊëÌíÎïÐñÒóÔõÖøÙúÛüÝþ", "àÁâÃäÅæÇèÉêËìÍîÏðÑòÓôÕöØùÚûÜýÞ", "0 29", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_wx_2, "regex.wx_2");
+
+
+/*
+ * wx_3 character classification: cyrillic
+ */
+
+class regextest_wx_3 : public RegExTestSuite
+{
+public:
+    regextest_wx_3() : RegExTestSuite("regex.wx_3") { }
+    static Test *suite();
+};
+
+Test *regextest_wx_3::suite()
+{
+    RegExTestSuite *suite = new regextest_wx_3;
+
+    suite->add("m", "1", "&", "[^[:alpha:]]", "ёЁюабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!", "!", NULL);
+    suite->add("m", "2", "&", "[^[:lower:]]", "ёюабцдефгхийклмнопярстужвьызшэщчъ!", "!", NULL);
+    suite->add("m", "3", "&", "[[:lower:]]", "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪx", "x", NULL);
+    suite->add("m", "4", "&", "[^[:upper:]]", "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!", "!", NULL);
+    suite->add("m", "5", "&", "[[:upper:]]", "ёюабцдефгхийклмнопярстужвьызшэщчъX", "X", NULL);
+    suite->add("i", "6", "&i*", "ЁюАбЦдЕфГхИйКлМнОпЯрСтУжВьЫзШэЩчЪ", "ёЮаБцДеФгХиЙкЛмНоПяРсТуЖвЬыЗшЭщЧъ", "0 32", NULL);
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_wx_3, "regex.wx_3");
+
+
+/*
+ * A suite containing all the above suites
+ */
+
+class regextest : public TestSuite
+{
+public:
+    regextest() : TestSuite("regex") { }
+    static Test *suite();
+};
+
+Test *regextest::suite()
+{
+    TestSuite *suite = new regextest;
+
+    suite->addTest(regextest_1::suite());
+    suite->addTest(regextest_2::suite());
+    suite->addTest(regextest_3::suite());
+    suite->addTest(regextest_4::suite());
+    suite->addTest(regextest_5::suite());
+    suite->addTest(regextest_6::suite());
+    suite->addTest(regextest_7::suite());
+    suite->addTest(regextest_8::suite());
+    suite->addTest(regextest_9::suite());
+    suite->addTest(regextest_10::suite());
+    suite->addTest(regextest_11::suite());
+    suite->addTest(regextest_12::suite());
+    suite->addTest(regextest_13::suite());
+    suite->addTest(regextest_14::suite());
+    suite->addTest(regextest_15::suite());
+    suite->addTest(regextest_16::suite());
+    suite->addTest(regextest_17::suite());
+    suite->addTest(regextest_18::suite());
+    suite->addTest(regextest_19::suite());
+    suite->addTest(regextest_20::suite());
+    suite->addTest(regextest_21::suite());
+    suite->addTest(regextest_22::suite());
+    suite->addTest(regextest_23::suite());
+    suite->addTest(regextest_24::suite());
+    suite->addTest(regextest_25::suite());
+    suite->addTest(regextest_26::suite());
+    suite->addTest(regextest_27::suite());
+    suite->addTest(regextest_28::suite());
+    suite->addTest(regextest_29::suite());
+    suite->addTest(regextest_30::suite());
+    suite->addTest(regextest_extra_1::suite());
+    suite->addTest(regextest_wx_1::suite());
+    suite->addTest(regextest_wx_2::suite());
+    suite->addTest(regextest_wx_3::suite());
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
+CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
diff --git a/tests/regex/regex.pl b/tests/regex/regex.pl

new file mode 100755 (executable)

index 0000000..af0cfe8
--- /dev/null
+++ b/tests/regex/regex.pl
@@ -0,0 +1,437 @@
+#!/usr/bin/env perl -w
+#############################################################################
+# Name:        regex.pl
+# Purpose:     Generate test code for wxRegEx from 'reg.test'
+# Author:      Mike Wetherell
+# RCS-ID:      $Id$
+# Copyright:   (c) Mike Wetherell
+# Licence:     wxWidgets licence
+#############################################################################
+
+#
+# Notes:
+#   See './regex.pl -h' for usage
+#
+#   Output at the moment is C++ using the cppunit testing framework. The
+#   language/framework specifics are separated, with the following 5
+#   subs as an interface: 'begin_output', 'begin_section', 'write_test',
+#   'end_section' and 'end_output'. So for a different language/framework,
+#   implement 5 new similar subs.
+# 
+#   I've avoided using 'use encoding "UTF-8"', since this wasn't available
+#   in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
+#   earler than perl 5.6.0 aren't going to work.
+#
+
+use strict;
+use File::Basename;
+#use encoding "UTF-8";  # enable in the future when perl 5.6.x is just a memory
+
+# if 0 output is wide characters, if 1 output is utf8 encoded
+my $utf = 1;
+
+# quote a parameter (C++ helper)
+#
+sub quotecxx {
+    my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
+                "\n" => "n", "\r" => "r", "\t" => "t",
+                "\013" => "v", '"' => '"', "\\" => "\\" );
+
+    # working around lack of 'use encoding'
+    $_ = pack "U0C*", unpack "C*", $_;
+    use utf8;
+
+    s/[\000-\037"\\\177-\x{ffff}]/
+        if ($esc{$&}) {
+            "\\$esc{$&}";
+        } elsif (ord($&) > 0x9f) {
+            if ($utf) {
+                $&;
+            } else {
+                sprintf "\\u%04x", ord($&);
+            }
+        } else {
+            sprintf "\\%03o", ord($&);
+        }
+    /ge;
+
+    # working around lack of 'use encoding'
+    no utf8;
+    $_ = pack "C*", unpack "C*", $_;
+
+    return ($utf ? '"' : 'L"') . $_ . '"'
+}
+
+# start writing the output code (C++ interface)
+#
+sub begin_output {
+    my ($from, $instructions) = @_;
+
+    # embed it in the comment
+    $from = "\n$from";
+    $from =~ s/^(?:   )?/ * /mg;
+
+    # $instructions contains information about the flags etc.
+    if ($instructions) {
+        $instructions = "\n$instructions";
+        $instructions =~ s/^(?:   )?/ * /mg;
+    }
+
+    my $u = $utf ? " (UTF-8 encoded)" : "";
+
+    print <<EOT;
+/*
+ * Test data for wxRegEx$u
+$from$instructions */
+
+EOT
+}
+
+my @classes;
+
+# start a new section (C++ interface)
+#
+sub begin_section {
+    my ($id, $title) = @_;
+    my $class = "regextest_$id";
+    $class =~ s/\W/_/g;
+    push @classes, [$id, $class];
+
+    print <<EOT;
+
+/*
+ * $id $title
+ */
+
+class $class : public RegExTestSuite
+{
+public:
+    $class() : RegExTestSuite("regex.$id") { }
+    static Test *suite();
+};
+
+Test *$class\::suite()
+{
+    RegExTestSuite *suite = new $class;
+
+EOT
+}
+
+# output a test line (C++ interface)
+#
+sub write_test {
+    my @args = @_;
+    $_ = quotecxx for @args;
+    print "    suite->add(" . (join ', ', @args) . ", NULL);\n"; 
+}
+
+# end a section (C++ interface)
+#
+sub end_section {
+    my ($id, $class) = @{$classes[$#classes]};
+
+    print <<EOT;
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
+
+EOT
+}
+
+# finish off the output (C++ interface)
+#
+sub end_output {
+    print <<EOT;
+
+/*
+ * A suite containing all the above suites
+ */
+
+class regextest : public TestSuite
+{
+public:
+    regextest() : TestSuite("regex") { }
+    static Test *suite();
+};
+
+Test *regextest::suite()
+{
+    TestSuite *suite = new regextest;
+
+EOT
+    print "    suite->addTest(".$_->[1]."::suite());\n" for @classes;
+
+    print <<EOT;
+
+    return suite;
+}
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
+CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
+EOT
+}
+
+# Parse a tcl string. Handles curly quoting and double quoting.
+#
+sub parsetcl {
+    my ($curly, $quote);
+    # recursively defined expression that can parse balanced braces
+    # warning: uses experimental features of perl, see perlop(1)
+    $curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
+    $quote = qr/"(?:\\"|[^"])*"/;
+    my @tokens = shift =~ /($curly|$quote|\S+)/g;
+
+    # now remove braces/quotes and unescape any escapes
+    for (@tokens) {
+        if (s/^{(.*)}$/$1/) {
+            # for curly quoting, only unescape \{ and \}
+            s/\\([{}])/$1/g;
+        } else {
+            s/^"(.*)"$/$1/;
+
+            # unescape any escapes
+            my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
+                        "n" => "\n", "r" => "\r", "t" => "\t",
+                        "v" => "\013" );
+            my $x = qr/[[:xdigit:]]/;
+
+            s/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
+                if ($1 =~ m{^([0-7]+)}) {
+                    chr(oct($1));
+                } elsif ($1 =~ m{^x($x+)}) {
+                    pack("C0U", hex($1) & 0xff);
+                } elsif ($1 =~ m{^u($x+)}) {
+                    pack("C0U", hex($1));
+                } elsif ($esc{$1}) {
+                    $esc{$1};
+                } else {
+                    $1;
+                }
+            /ge;
+        }
+    }
+
+    return @tokens;
+}
+
+# helpers which keep track of whether begin_section has been called, so that
+# end_section can be called when appropriate
+#
+my @doing = ("0", "");
+my $in_section = 0;
+
+sub handle_doing {
+    end_section if $in_section;
+    $in_section = 0;
+    @doing = @_;
+}
+
+sub handle_test {
+    begin_section(@doing) if !$in_section;
+    $in_section = 1;
+    write_test @_;
+}
+
+sub handle_end {
+    end_section if $in_section;
+    $in_section = 0;
+    end_output;
+}
+
+# 'main' - start by parsing the command lines options.
+#
+my $badoption = !@ARGV;
+my $utfdefault = $utf;
+my $outputname;
+
+for (my $i = 0; $i < @ARGV; ) {
+    if ($ARGV[$i] !~ m{^-.}) {
+        $i++;
+        next;
+    }
+
+    if ($ARGV[$i] eq '--') {
+        splice @ARGV, $i, 1;
+        last;
+    }
+
+    if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) {       # -o : output file
+        $outputname = $2 || splice @ARGV, $i + 1, 1;
+    }
+
+    for (split //, substr($ARGV[$i], 1)) {
+        if (/u/i) {                                 # -u : utf-8 output
+            $utf = 1;
+        } elsif (/w/i) {                            # -w : wide char output
+            $utf = 0;
+        } else {
+            $badoption = 1;
+        }
+    }
+
+    splice @ARGV, $i, 1;
+}
+
+# Display help
+#
+if ($badoption) {
+    my $prog = basename $0;
+    my ($w, $u) = (" (default)", "          ");
+    ($w, $u) = ($u, $w) if $utfdefault;
+    
+    print <<EOT;
+Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
+Generate test code for wxRegEx from 'reg.test'
+Example: $prog -o regex.inc reg.test wxreg.test 
+
+ -w$w   Output will be wide characters.
+ -u$u   Output will be UTF-8 encoded.
+
+Input files should be in UTF-8. If no input files are specified input is
+read from stdin. If no output file is specified output is written to stdout.
+See the comments in reg.test (in src/regex) for details of the input file
+format.
+EOT
+    exit 0;
+}
+
+# Open the output file
+#
+open STDOUT, ">$outputname" if $outputname;
+
+# Read in the files and initially parse just the comments for copyright
+# information and instructions on the tests
+#
+my @input;                  # slurped input files stripped of comments
+my $files = "";             # copyright info from the input comments
+my $instructions = "";      # test instructions from the input comments
+
+do {
+    my $inputname = basename $ARGV[0] if @ARGV;
+
+    # slurp input
+    undef $/;
+    my $in = <>;
+
+    # remove escaped newlines
+    $in =~ s/(?<!\\)\\\n//g;
+
+    # record the copyrights of the input files
+    for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
+        s/[\s:]+/ /g;
+        $files .= "  ";
+        $files .= $inputname . ": " if $inputname && $inputname ne '-';
+        $files .= "$_\n";
+    }
+
+    # Parse the comments for instructions on the tests, which look like this:
+    #    i    successful match with -indices (used in checking things like
+    #         nonparticipating subexpressions)
+    if (!$instructions) {
+        my $sp = qr{\t|   +};                   # tab or three or more spaces
+        my @instructions = $in =~
+            /\n(
+                (?:
+                    \#$sp\S?$sp\S[^\n]+\n       # instruction line
+                    (?:\#$sp$sp\S[^\n]+\n)*     # continuation lines (if any)
+                )+
+            )/gx;
+
+        if (@instructions) {
+            $instructions[0] = "Test types:\n$instructions[0]";
+            if (@instructions > 1) {
+                $instructions[1] = "Flag characters:\n$instructions[1]";
+            }
+            $instructions = join "\n", @instructions;
+            $instructions =~ s/^#([^\t]?)/ $1/mg;
+        }
+    }
+
+    # @input is the input of all files (stipped of comments)
+    $in =~ s/^#.*$//mg;
+    push @input, $in;
+
+} while $ARGV[0];
+
+# Make a string naming the generator, the input files and copyright info
+#
+my $from = "Generated " . localtime() . " by " . basename $0;
+$from =~ s/[\s]+/ /g;
+if ($files) {
+    if ($files =~ /:/) {
+        $from .= " from the following files:";
+    } else {
+        $from .= " from work with the following copyright:";
+    }
+}
+$from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g);  # word-wrap
+$from .= "\n$files" if $files;
+
+# Now start to print the code
+#
+begin_output $from, $instructions;
+
+# numbers for 'extra' sections
+my $extra = 1;
+
+for (@input)
+{
+    # Print the main tests
+    #
+    # Test lines look like this:
+    # m  3  b       {\(a\)b}        ab      ab      a
+    # 
+    # Also looks for heading lines, e.g.:
+    # doing 4 "parentheses"
+    #
+    for (split "\n") {
+        if (/^doing\s+(\S+)\s+(\S.*)/) {
+            handle_doing parsetcl "$1 $2";
+        } elsif (/^[efimp]\s/) {
+            handle_test parsetcl $_;
+        }
+    }
+
+    # Extra tests
+    #
+    # The expression below matches something like this:
+    #   test reg-33.8 {Bug 505048} {
+    #       regexp -inline {\A\s*[^b]*b} ab
+    #   } ab
+    #   
+    # The three subexpressions then return these parts: 
+    #   $extras[$i]     = '{Bug 505048}',
+    #   $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
+    #   $extras[$i + 2] = 'ab'
+    #
+    my @extras = /\ntest\s+\S+\s*(\{.*?\})\s*\{\n       # line 1
+                  \s*regexp\s+([^\n]+)\n                # line 2
+                  \}\s*(\S[^\n]*)/gx;                   # line 3
+
+    handle_doing "extra_" . $extra++, "checks for bug fixes" if @extras;
+
+    for (my $i = 0; $i < @extras; $i += 3) {
+        my $id = $extras[$i];
+
+        # further parse the middle line into options and the rest (i.e. $args)
+        my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;
+
+        my @args = parsetcl $args;
+        $#args = 1;     # only want the first two
+
+        # now handle the options
+        my $test    = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
+        my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
+
+        # get them all in the right order and print
+        unshift @args, $test, parsetcl($id), '-';
+        push @args, parsetcl(parsetcl($results)) if $results;
+        handle_test @args;
+    }
+}
+
+# finish
+#
+handle_end;
diff --git a/tests/regex/wxreg.test b/tests/regex/wxreg.test

new file mode 100644 (file)

index 0000000..3ae0f23
--- /dev/null
+++ b/tests/regex/wxreg.test
@@ -0,0 +1,71 @@
+#############################################################################
+# Name:        wxreg.test
+# Purpose:     Additional tests for the regex lib and wxRegEx
+# Author:      Mike Wetherell
+# RCS-ID:      $Id$
+# Copyright:   (c) 2004 Mike Wetherell.
+# Licence:     wxWidgets licence
+#############################################################################
+
+#
+# The layout of this file is the same as src/regex/reg.test. See the comments
+# in that file for full details. The encoding used in here is UTF-8.
+#
+# These tests test the character classifications over the ascii range pretty
+# thoroughly, since hopefully these will be similar for all platforms and
+# locales where wxWidgets runs.
+#
+# Also does some tests involving western european and cyrillic characters.
+# In Unicode mode, all these tests should succeed, which verifies that the
+# classifications aren't limited to a single 8-bit character set.
+#
+# In non-unicode mode, if the test can't be translated into the character
+# encoding of the current locale, the test will be skipped. So either may
+# succeed or be skipped.
+#
+
+doing wx_1 "character classification: ascii"
+m   1   &   {[^[:alnum:]]}  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
+m   2   &   {[[:alnum:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~X" "X"
+m   3   &   {[^[:alpha:]]}  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
+m   4   &   {[[:alpha:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~X" "X"
+m   5   &   {[^[:cntrl:]]}  "\a\b\t\n\v\f\r!" "!"
+m   6   &   {[[:cntrl:]]}   " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
+m   7   &   {[^[:digit:]]}  "0123456789!" "!"
+m   8   &   {[[:digit:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 0" "0"
+m   9   &   {[^[:graph:]]}  "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
+m   10  &   {[[:graph:]]}   "\a\b\t\n\v\f\r !" "!"
+m   11  &   {[^[:lower:]]}  "abcdefghijklmnopqrstuvwxyz!" "!"
+m   12  &   {[[:lower:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~x" "x"
+m   13  &   {[^[:print:]]}  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n" "\n"
+m   14  &   {[[:print:]]}   "\a\b\n\v\f\rX" "X"
+m   15  &   {[^[:punct:]]}  "!\"#%&'()*,-./:;?@[\\]_{}X" "X"
+m   16  &   {[[:punct:]]}   "\a\b\t\n\v\f\r 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
+m   17  &   {[^[:space:]]}  "\t\n\v\f\r X" "X"
+m   18  &   {[[:space:]]}   "\a\b!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
+m   19  &   {[^[:upper:]]}  "ABCDEFGHIJKLMNOPQRSTUVWXYZ!" "!"
+m   20  &   {[[:upper:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~X" "X"
+m   21  &   {[^[:xdigit:]]} "0123456789ABCDEFabcdef!" "!"
+m   22  &   {[[:xdigit:]]}  "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@GHIJKLMNOPQRSTUVWXYZ[\\]^_`ghijklmnopqrstuvwxyz{|}~a" "a"
+i   23  &i  "AbCdEfGhIjKlMnOpQrStUvWxYz" "aBcDeFgHiJkLmNoPqRsTuVwXyZ" "0 25"
+
+doing wx_2 "character classification: western european"
+m   1   &   {[^[:alpha:]]}  "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
+m   2   &   {[[:alpha:]]}   " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷X" "X"
+m   3   &   {[^[:lower:]]}  "ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
+m   4   &   {[[:lower:]]}   " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ÷x" "x"
+m   5   &   {[^[:upper:]]}  "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ!" "!"
+m   6   &   {[[:upper:]]}   " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×ßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿX" "X"
+i   7   &i* "ÀáÂãÄåÆçÈéÊëÌíÎïÐñÒóÔõÖøÙúÛüÝþ" "àÁâÃäÅæÇèÉêËìÍîÏðÑòÓôÕöØùÚûÜýÞ" "0 29"
+
+doing wx_3 "character classification: cyrillic"
+m   1   &   {[^[:alpha:]]}  "ёЁюабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
+m   2   &   {[^[:lower:]]}  "ёюабцдефгхийклмнопярстужвьызшэщчъ!" "!"
+m   3   &   {[[:lower:]]}   "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪx" "x"
+m   4   &   {[^[:upper:]]}  "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
+m   5   &   {[[:upper:]]}   "ёюабцдефгхийклмнопярстужвьызшэщчъX" "X"
+i   6   &i* "ЁюАбЦдЕфГхИйКлМнОпЯрСтУжВьЫзШэЩчЪ" "ёЮаБцДеФгХиЙкЛмНоПяРсТуЖвЬыЗшЭщЧъ" "0 32"
+
+#doing bugs "known bugs"
+#m  1    -   {(\w+).*?(\d\d:\d\d)} "from 10:30 until 12:00" "from" "10:30"
+
diff --git a/tests/test.bkl b/tests/test.bkl

index 910296b8a7eca2b3272793860580d6b2c1649480..2fd21e6d094e36b1a2650cbfd65f631e4ee7af59 100644 (file)
--- a/tests/test.bkl
+++ b/tests/test.bkl
@@ -10,6 +10,7 @@
          <sources>
              test.cpp
              mbconv/main.cpp
+            regex/regex.cpp
          </sources>
          <wx-lib>base</wx-lib>
      </exe>
diff --git a/tests/test.dsp b/tests/test.dsp

index 4feb363e4e809268f4a472b6256f98562e55155a..54b7fb6087388401619c196201d75e08756736f2 100644 (file)
--- a/tests/test.dsp
+++ b/tests/test.dsp
@@ -439,6 +439,10 @@ SOURCE=.\mbconv\main.cpp
  # End Source File
  # Begin Source File
  
+SOURCE=.\regex\regex.cpp
+# End Source File
+# Begin Source File
+
  SOURCE=.\test.cpp
  # End Source File
  # End Group
author	Václav Slavík <vslavik@fastmail.fm>
	Fri, 5 Mar 2004 23:14:23 +0000 (23:14 +0000)
committer	Václav Slavík <vslavik@fastmail.fm>
	Fri, 5 Mar 2004 23:14:23 +0000 (23:14 +0000)
tests/Makefile.in		patch \| blob \| blame \| history
tests/makefile.bcc		patch \| blob \| blame \| history
tests/makefile.gcc		patch \| blob \| blame \| history
tests/makefile.vc		patch \| blob \| blame \| history
tests/makefile.wat		patch \| blob \| blame \| history
tests/regex/reg.test	[new file with mode: 0644]	patch \| blob
tests/regex/regex.cpp	[new file with mode: 0644]	patch \| blob
tests/regex/regex.inc	[new file with mode: 0644]	patch \| blob
tests/regex/regex.pl	[new file with mode: 0755]	patch \| blob
tests/regex/wxreg.test	[new file with mode: 0644]	patch \| blob
tests/test.bkl		patch \| blob \| blame \| history
tests/test.dsp		patch \| blob \| blame \| history