From e70833fb1dfa271e7b0c9dec11ad644880e03c6f Mon Sep 17 00:00:00 2001 From: =?utf8?q?V=C3=A1clav=20Slav=C3=ADk?= <vslavik@fastmail.fm> Date: Fri, 5 Mar 2004 23:14:23 +0000 Subject: [PATCH] added regex test suite git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@26104 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- tests/Makefile.in | 6 +- tests/makefile.bcc | 6 +- tests/makefile.gcc | 6 +- tests/makefile.vc | 6 +- tests/makefile.wat | 6 +- tests/regex/reg.test | 1135 +++++++++++++++++++++++++++++++++ tests/regex/regex.cpp | 421 +++++++++++++ tests/regex/regex.inc | 1361 ++++++++++++++++++++++++++++++++++++++++ tests/regex/regex.pl | 437 +++++++++++++ tests/regex/wxreg.test | 71 +++ tests/test.bkl | 1 + tests/test.dsp | 4 + 12 files changed, 3455 insertions(+), 5 deletions(-) create mode 100644 tests/regex/reg.test create mode 100644 tests/regex/regex.cpp create mode 100644 tests/regex/regex.inc create mode 100755 tests/regex/regex.pl create mode 100644 tests/regex/wxreg.test diff --git a/tests/Makefile.in b/tests/Makefile.in index 6f272dd47a..ac19e587e7 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -37,7 +37,8 @@ TEST_CXXFLAGS = -D__WX$(TOOLKIT)__ $(__WXUNIV_DEFINE_p) -I$(srcdir) \ $(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS) TEST_OBJECTS = \ test_test.o \ - test_main.o + test_main.o \ + test_regex.o ### Conditionally set variables: ### @@ -106,6 +107,9 @@ test_test.o: $(srcdir)/test.cpp test_main.o: $(srcdir)/mbconv/main.cpp $(CXXC) -c -o $@ $(TEST_CXXFLAGS) $< +test_regex.o: $(srcdir)/regex/regex.cpp + $(CXXC) -c -o $@ $(TEST_CXXFLAGS) $< + # Include dependency info, if present: @IF_GNU_MAKE@-include .deps/*.d diff --git a/tests/makefile.bcc b/tests/makefile.bcc index a968cacdf8..749a041311 100644 --- a/tests/makefile.bcc +++ b/tests/makefile.bcc @@ -31,7 +31,8 @@ TEST_CXXFLAGS = $(__RUNTIME_LIBS_6) -I$(BCCDIR)\include $(__DEBUGINFO) \ $(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS) TEST_OBJECTS = \ $(OBJS)\test_test.obj \ - $(OBJS)\test_main.obj + $(OBJS)\test_main.obj \ + $(OBJS)\test_regex.obj ### Conditionally set variables: ### @@ -156,3 +157,6 @@ $(OBJS)\test_test.obj: .\test.cpp $(OBJS)\test_main.obj: .\mbconv\main.cpp $(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $** + +$(OBJS)\test_regex.obj: .\regex\regex.cpp + $(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $** diff --git a/tests/makefile.gcc b/tests/makefile.gcc index 5aeeb39f94..2fe6eb76fe 100644 --- a/tests/makefile.gcc +++ b/tests/makefile.gcc @@ -22,7 +22,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO) $(__OPTIMIZEFLAG_2) $(GCCFLAGS) -DHAVE_W32API_H \ $(__EXCEPTIONSFLAG_4) $(__EXCEPTIONSFLAG_5) $(CPPFLAGS) $(CXXFLAGS) TEST_OBJECTS = \ $(OBJS)\test_test.o \ - $(OBJS)\test_main.o + $(OBJS)\test_main.o \ + $(OBJS)\test_regex.o ### Conditionally set variables: ### @@ -151,4 +152,7 @@ $(OBJS)\test_test.o: ./test.cpp $(OBJS)\test_main.o: ./mbconv/main.cpp $(CXX) -c -o $@ $(TEST_CXXFLAGS) $< +$(OBJS)\test_regex.o: ./regex/regex.cpp + $(CXX) -c -o $@ $(TEST_CXXFLAGS) $< + .PHONY: all clean diff --git a/tests/makefile.vc b/tests/makefile.vc index 59f35e494a..ee92075fb5 100644 --- a/tests/makefile.vc +++ b/tests/makefile.vc @@ -24,7 +24,8 @@ TEST_CXXFLAGS = /M$(__RUNTIME_LIBS_7)$(__DEBUGRUNTIME_3) /DWIN32 \ $(__EXCEPTIONSFLAG_9) $(CPPFLAGS) $(CXXFLAGS) TEST_OBJECTS = \ $(OBJS)\test_test.obj \ - $(OBJS)\test_main.obj + $(OBJS)\test_main.obj \ + $(OBJS)\test_regex.obj ### Conditionally set variables: ### @@ -212,3 +213,6 @@ $(OBJS)\test_test.obj: .\test.cpp $(OBJS)\test_main.obj: .\mbconv\main.cpp $(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $** + +$(OBJS)\test_regex.obj: .\regex\regex.cpp + $(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $** diff --git a/tests/makefile.wat b/tests/makefile.wat index 9df4df112a..53e2b0c516 100644 --- a/tests/makefile.wat +++ b/tests/makefile.wat @@ -172,7 +172,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO_0) $(__OPTIMIZEFLAG_2) -bm $(__RUNTIME_LIBS_5) & $(__EXCEPTIONSFLAG_7) $(CPPFLAGS) $(CXXFLAGS) TEST_OBJECTS = & $(OBJS)\test_test.obj & - $(OBJS)\test_main.obj + $(OBJS)\test_main.obj & + $(OBJS)\test_regex.obj all : $(OBJS) @@ -206,3 +207,6 @@ $(OBJS)\test_test.obj : .AUTODEPEND .\test.cpp $(OBJS)\test_main.obj : .AUTODEPEND .\mbconv\main.cpp $(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $< + +$(OBJS)\test_regex.obj : .AUTODEPEND .\regex\regex.cpp + $(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $< diff --git a/tests/regex/reg.test b/tests/regex/reg.test new file mode 100644 index 0000000000..8bfffad107 --- /dev/null +++ b/tests/regex/reg.test @@ -0,0 +1,1135 @@ +# reg.test -- +# +# This file contains a collection of tests for one or more of the Tcl +# built-in commands. Sourcing this file into Tcl runs the tests and +# generates output for errors. No output means no errors were found. +# (Don't panic if you are seeing this as part of the reg distribution +# and aren't using Tcl -- reg's own regression tester also knows how +# to read this file, ignoring the Tcl-isms.) +# +# Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. +# +# RCS: @(#) $Id$ + +if {[lsearch [namespace children] ::tcltest] == -1} { + package require tcltest 2 + namespace import -force ::tcltest::* +} + +# All tests require the testregexp command, return if this +# command doesn't exist + +::tcltest::testConstraint testregexp \ + [expr {[info commands testregexp] != {}}] +::tcltest::testConstraint localeRegexp 0 + +# This file uses some custom procedures, defined below, for regexp regression +# testing. The name of the procedure indicates the general nature of the +# test: +# e compile error expected +# f match failure expected +# m successful match +# i successful match with -indices (used in checking things like +# nonparticipating subexpressions) +# p unsuccessful match with -indices (!!) (used in checking +# partial-match reporting) +# There is also "doing" which sets up title and major test number for each +# block of tests. + +# The first 3 arguments are constant: a minor number (which often gets +# a letter or two suffixed to it internally), some flags, and the RE itself. +# For e, the remaining argument is the name of the compile error expected, +# less the leading "REG_". For the rest, the next argument is the string +# to try the match against. Remaining arguments are the substring expected +# to be matched, and any substrings expected to be matched by subexpressions. +# (For f, these arguments are optional, and if present are ignored except +# that they indicate how many subexpressions should be present in the RE.) +# It is an error for the number of subexpression arguments to be wrong. +# Cases involving nonparticipating subexpressions, checking where empty +# substrings are located, etc. should be done using i and p. + +# The flag characters are complex and a bit eclectic. Generally speaking, +# lowercase letters are compile options, uppercase are expected re_info +# bits, and nonalphabetics are match options, controls for how the test is +# run, or testing options. The one small surprise is that AREs are the +# default, and you must explicitly request lesser flavors of RE. The flags +# are as follows. It is admitted that some are not very mnemonic. +# There are some others which are purely debugging tools and are not +# useful in this file. +# +# - no-op (placeholder) +# + provide fake xy equivalence class and ch collating element +# % force small state-set cache in matcher (to test cache replace) +# ^ beginning of string is not beginning of line +# $ end of string is not end of line +# * test is Unicode-specific, needs big character set +# +# & test as both ARE and BRE +# b BRE +# e ERE +# a turn advanced-features bit on (error unless ERE already) +# q literal string, no metacharacters at all +# +# i case-independent matching +# o ("opaque") no subexpression capture +# p newlines are half-magic, excluded from . and [^ only +# w newlines are half-magic, significant to ^ and $ only +# n newlines are fully magic, both effects +# x expanded RE syntax +# t incomplete-match reporting +# +# A backslash-_a_lphanumeric seen +# B ERE/ARE literal-_b_race heuristic used +# E backslash (_e_scape) seen within [] +# H looka_h_ead constraint seen +# I _i_mpossible to match +# L _l_ocale-specific construct seen +# M unportable (_m_achine-specific) construct seen +# N RE can match empty (_n_ull) string +# P non-_P_OSIX construct seen +# Q {} _q_uantifier seen +# R back _r_eference seen +# S POSIX-un_s_pecified syntax seen +# T prefers shortest (_t_iny) +# U saw original-POSIX botch: unmatched right paren in ERE (_u_gh) + +# The one area we can't easily test is memory-allocation failures (which +# are hard to provoke on command). Embedded NULs also are not tested at +# the moment, but this is a historical accident which should be fixed. + + + +# test procedures and related + +set ask "about" +set xflags "xflags" +set testbypassed 0 + +# re_info abbreviation mapping table +set infonames(A) "REG_UBSALNUM" +set infonames(B) "REG_UBRACES" +set infonames(E) "REG_UBBS" +set infonames(H) "REG_ULOOKAHEAD" +set infonames(I) "REG_UIMPOSSIBLE" +set infonames(L) "REG_ULOCALE" +set infonames(M) "REG_UUNPORT" +set infonames(N) "REG_UEMPTYMATCH" +set infonames(P) "REG_UNONPOSIX" +set infonames(Q) "REG_UBOUNDS" +set infonames(R) "REG_UBACKREF" +set infonames(S) "REG_UUNSPEC" +set infonames(T) "REG_USHORTEST" +set infonames(U) "REG_UPBOTCH" +set infonameorder "RHQBAUEPSMLNIT" ;# must match bit order, lsb first + +# set major test number and description +proc doing {major desc} { + global prefix description testbypassed + + if {$testbypassed != 0} { + puts stdout "!!! bypassed $testbypassed tests in\ + $prefix, `$description'" + } + + set prefix reg-$major + set description "reg $desc" + set testbypassed 0 +} + +# build test number (internal) +proc tno {testid} { + return [join $testid .] +} + +# build description, with possible modifiers (internal) +proc desc {testid} { + global description + + set d $description + if {[llength $testid] > 1} { + set d "([lreplace $testid 0 0]) $d" + } + return $d +} + +# build trailing options and flags argument from a flags string (internal) +proc flags {fl} { + global xflags + + set args [list] + set flags "" + foreach f [split $fl ""] { + switch -exact -- $f { + "i" { lappend args "-nocase" } + "x" { lappend args "-expanded" } + "n" { lappend args "-line" } + "p" { lappend args "-linestop" } + "w" { lappend args "-lineanchor" } + "-" { } + default { append flags $f } + } + } + if {[string compare $flags ""] != 0} { + lappend args -$xflags $flags + } + return $args +} + +# build info-flags list from a flags string (internal) +proc infoflags {fl} { + global infonames infonameorder + + set ret [list] + foreach f [split $infonameorder ""] { + if {[string first $f $fl] >= 0} { + lappend ret $infonames($f) + } + } + return $ret +} + +# compilation error expected +proc e {testid flags re err} { + global prefix ask errorCode + + # Tcl locale stuff doesn't do the ch/xy test fakery yet + if {[string first "+" $flags] >= 0} { + # This will register as a skipped test + test $prefix.[tno $testid] [desc $testid] localeRegexp {} {} + return + } + + # if &, test as both ARE and BRE + set amp [string first "&" $flags] + if {$amp >= 0} { + set f [string range $flags 0 [expr $amp - 1]] + append f [string range $flags [expr $amp + 1] end] + e [linsert $testid end ARE] ${f} $re $err + e [linsert $testid end BRE] ${f}b $re $err + return + } + + set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]] + set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]" + test $prefix.[tno $testid] [desc $testid] \ + {testregexp} $run [list 1 REG_$err] +} + +# match failure expected +proc f {testid flags re target args} { + global prefix description ask + + # Tcl locale stuff doesn't do the ch/xy test fakery yet + if {[string first "+" $flags] >= 0} { + # This will register as a skipped test + test $prefix.[tno $testid] [desc $testid] localeRegexp {} {} + return + } + + # if &, test as both ARE and BRE + set amp [string first "&" $flags] + if {$amp >= 0} { + set f [string range $flags 0 [expr $amp - 1]] + append f [string range $flags [expr $amp + 1] end] + eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \ + $target] + eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \ + $target] + return + } + + set f [flags $flags] + set infoflags [infoflags $flags] + set ccmd [concat [list testregexp -$ask] $f [list $re]] + set nsub [expr [llength $args] - 1] + if {$nsub == -1} { + # didn't tell us number of subexps + set ccmd "lreplace \[$ccmd\] 0 0" + set info [list $infoflags] + } else { + set info [list $nsub $infoflags] + } + lappend testid "compile" + test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info + + set testid [lreplace $testid end end "execute"] + set ecmd [concat [list testregexp] $f [list $re $target]] + test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0 +} + +# match expected, internal routine that does the work +# parameters like the "real" routines except they don't have "opts", +# which is a possibly-empty list of switches for the regexp match attempt +# The ! flag is used to indicate expected match failure (for REG_EXPECT, +# which wants argument testing even in the event of failure). +proc matchexpected {opts testid flags re target args} { + global prefix description ask regBug + + if {[info exists regBug] && $regBug} { + # This will register as a skipped test + test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1} + return + } + + # Tcl locale stuff doesn't do the ch/xy test fakery yet + if {[string first "+" $flags] >= 0} { + # This will register as a skipped test + test $prefix.[tno $testid] [desc $testid] localeRegexp {} {} + return + } + + # if &, test as both BRE and ARE + set amp [string first "&" $flags] + if {$amp >= 0} { + set f [string range $flags 0 [expr $amp - 1]] + append f [string range $flags [expr $amp + 1] end] + eval [concat [list matchexpected $opts \ + [linsert $testid end ARE] ${f} $re $target] $args] + eval [concat [list matchexpected $opts \ + [linsert $testid end BRE] ${f}b $re $target] $args] + return + } + + set f [flags $flags] + set infoflags [infoflags $flags] + set ccmd [concat [list testregexp -$ask] $f [list $re]] + set ecmd [concat [list testregexp] $opts $f [list $re $target]] + + set nsub [expr [llength $args] - 1] + set names [list] + set refs "" + for {set i 0} {$i <= $nsub} {incr i} { + if {$i == 0} { + set name match + } else { + set name sub$i + } + lappend names $name + append refs " \$$name" + set $name "" + } + if {[string first "o" $flags] >= 0} { ;# REG_NOSUB kludge + set nsub 0 ;# unsigned value cannot be -1 + } + if {[string first "t" $flags] >= 0} { ;# REG_EXPECT + incr nsub -1 ;# the extra does not count + } + set ecmd [concat $ecmd $names] + set erun "list \[$ecmd\] $refs" + set retcode [list 1] + if {[string first "!" $flags] >= 0} { + set retcode [list 0] + } + set result [concat $retcode $args] + + set info [list $nsub $infoflags] + lappend testid "compile" + test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info + set testid [lreplace $testid end end "execute"] + test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result +} + +# match expected (no missing, empty, or ambiguous submatches) +# m testno flags re target mat submat ... +proc m {args} { + eval matchexpected [linsert $args 0 [list]] +} + +# match expected (full fanciness) +# i testno flags re target mat submat ... +proc i {args} { + eval matchexpected [linsert $args 0 [list "-indices"]] +} + +# partial match expected +# p testno flags re target mat "" ... +# Quirk: number of ""s must be one more than number of subREs. +proc p {args} { + set f [lindex $args 1] ;# add ! flag + set args [lreplace $args 1 1 "!$f"] + eval matchexpected [linsert $args 0 [list "-indices"]] +} + +# test is a knownBug +proc knownBug {args} { + set ::regBug 1 + uplevel #0 $args + set ::regBug 0 +} + + + +# the tests themselves + + + +# support functions and preliminary misc. +# This is sensitive to changes in message wording, but we really have to +# test the code->message expansion at least once. +test reg-0.1 "regexp error reporting" { + list [catch {regexp (*) ign} msg] $msg +} {1 {couldn't compile regular expression pattern: quantifier operand invalid}} + + + +doing 1 "basic sanity checks" +m 1 & abc abc abc +f 2 & abc def +m 3 & abc xyabxabce abc + + + +doing 2 "invalid option combinations" +e 1 qe a INVARG +e 2 qa a INVARG +e 3 qx a INVARG +e 4 qn a INVARG +e 5 ba a INVARG + + + +doing 3 "basic syntax" +i 1 &NS "" a {0 -1} +m 2 NS a| a a +m 3 - a|b a a +m 4 - a|b b b +m 5 NS a||b b b +m 6 & ab ab ab + + + +doing 4 "parentheses" +m 1 - (a)e ae ae a +m 2 o (a)e ae +m 3 b {\(a\)b} ab ab a +m 4 - a((b)c) abc abc bc b +m 5 - a(b)(c) abc abc b c +e 6 - a(b EPAREN +e 7 b {a\(b} EPAREN +# sigh, we blew it on the specs here... someday this will be fixed in POSIX, +# but meanwhile, it's fixed in AREs +m 8 eU a)b a)b a)b +e 9 - a)b EPAREN +e 10 b {a\)b} EPAREN +m 11 P a(?:b)c abc abc +e 12 e a(?:b)c BADRPT +i 13 S a()b ab {0 1} {1 0} +m 14 SP a(?:)b ab ab +i 15 S a(|b)c ac {0 1} {1 0} +m 16 S a(b|)c abc abc b + + + +doing 5 "simple one-char matching" +# general case of brackets done later +m 1 & a.b axb axb +f 2 &n "a.b" "a\nb" +m 3 & {a[bc]d} abd abd +m 4 & {a[bc]d} acd acd +f 5 & {a[bc]d} aed +f 6 & {a[^bc]d} abd +m 7 & {a[^bc]d} aed aed +f 8 &p "a\[^bc]d" "a\nd" + + + +doing 6 "context-dependent syntax" +# plus odds and ends +e 1 - * BADRPT +m 2 b * * * +m 3 b {\(*\)} * * * +e 4 - (*) BADRPT +m 5 b ^* * * +e 6 - ^* BADRPT +f 7 & ^b ^b +m 8 b x^ x^ x^ +f 9 I x^ x +m 10 n "\n^" "x\nb" "\n" +f 11 bS {\(^b\)} ^b +m 12 - (^b) b b b +m 13 & {x$} x x +m 14 bS {\(x$\)} x x x +m 15 - {(x$)} x x x +m 16 b {x$y} "x\$y" "x\$y" +f 17 I {x$y} xy +m 18 n "x\$\n" "x\n" "x\n" +e 19 - + BADRPT +e 20 - ? BADRPT + + + +doing 7 "simple quantifiers" +m 1 &N a* aa aa +i 2 &N a* b {0 -1} +m 3 - a+ aa aa +m 4 - a?b ab ab +m 5 - a?b b b +e 6 - ** BADRPT +m 7 bN ** *** *** +e 8 & a** BADRPT +e 9 & a**b BADRPT +e 10 & *** BADRPT +e 11 - a++ BADRPT +e 12 - a?+ BADRPT +e 13 - a?* BADRPT +e 14 - a+* BADRPT +e 15 - a*+ BADRPT + + + +doing 8 "braces" +m 1 NQ "a{0,1}" "" "" +m 2 NQ "a{0,1}" ac a +e 3 - "a{1,0}" BADBR +e 4 - "a{1,2,3}" BADBR +e 5 - "a{257}" BADBR +e 6 - "a{1000}" BADBR +e 7 - "a{1" EBRACE +e 8 - "a{1n}" BADBR +m 9 BS "a{b" "a\{b" "a\{b" +m 10 BS "a{" "a\{" "a\{" +m 11 bQ "a\\{0,1\\}b" cb b +e 12 b "a\\{0,1" EBRACE +e 13 - "a{0,1\\" BADBR +m 14 Q "a{0}b" ab b +m 15 Q "a{0,0}b" ab b +m 16 Q "a{0,1}b" ab ab +m 17 Q "a{0,2}b" b b +m 18 Q "a{0,2}b" aab aab +m 19 Q "a{0,}b" aab aab +m 20 Q "a{1,1}b" aab ab +m 21 Q "a{1,3}b" aaaab aaab +f 22 Q "a{1,3}b" b +m 23 Q "a{1,}b" aab aab +f 24 Q "a{2,3}b" ab +m 25 Q "a{2,3}b" aaaab aaab +f 26 Q "a{2,}b" ab +m 27 Q "a{2,}b" aaaab aaaab + + + +doing 9 "brackets" +m 1 & {a[bc]} ac ac +m 2 & {a[-]} a- a- +m 3 & {a[[.-.]]} a- a- +m 4 &L {a[[.zero.]]} a0 a0 +m 5 &LM {a[[.zero.]-9]} a2 a2 +m 6 &M {a[0-[.9.]]} a2 a2 +m 7 &+L {a[[=x=]]} ax ax +m 8 &+L {a[[=x=]]} ay ay +f 9 &+L {a[[=x=]]} az +e 10 & {a[0-[=x=]]} ERANGE +m 11 &L {a[[:digit:]]} a0 a0 +e 12 & {a[[:woopsie:]]} ECTYPE +f 13 &L {a[[:digit:]]} ab +e 14 & {a[0-[:digit:]]} ERANGE +m 15 &LP {[[:<:]]a} a a +m 16 &LP {a[[:>:]]} a a +e 17 & {a[[..]]b} ECOLLATE +e 18 & {a[[==]]b} ECOLLATE +e 19 & {a[[::]]b} ECTYPE +e 20 & {a[[.a} EBRACK +e 21 & {a[[=a} EBRACK +e 22 & {a[[:a} EBRACK +e 23 & {a[} EBRACK +e 24 & {a[b} EBRACK +e 25 & {a[b-} EBRACK +e 26 & {a[b-c} EBRACK +m 27 &M {a[b-c]} ab ab +m 28 & {a[b-b]} ab ab +m 29 &M {a[1-2]} a2 a2 +e 30 & {a[c-b]} ERANGE +e 31 & {a[a-b-c]} ERANGE +m 32 &M {a[--?]b} a?b a?b +m 33 & {a[---]b} a-b a-b +m 34 & {a[]b]c} a]c a]c +m 35 EP {a[\]]b} a]b a]b +f 36 bE {a[\]]b} a]b +m 37 bE {a[\]]b} "a\\]b" "a\\]b" +m 38 eE {a[\]]b} "a\\]b" "a\\]b" +m 39 EP {a[\\]b} "a\\b" "a\\b" +m 40 eE {a[\\]b} "a\\b" "a\\b" +m 41 bE {a[\\]b} "a\\b" "a\\b" +e 42 - {a[\Z]b} EESCAPE +m 43 & {a[[b]c} "a\[c" "a\[c" +m 44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \ + "a\u0102\u02ffb" "a\u0102\u02ffb" + + + +doing 10 "anchors and newlines" +m 1 & ^a a a +f 2 &^ ^a a +i 3 &N ^ a {0 -1} +i 4 & {a$} aba {2 2} +f 5 {&$} {a$} a +i 6 &N {$} ab {2 1} +m 7 &n ^a a a +m 8 &n "^a" "b\na" "a" +i 9 &w "^a" "a\na" {0 0} +i 10 &n^ "^a" "a\na" {2 2} +m 11 &n {a$} a a +m 12 &n "a\$" "a\nb" "a" +i 13 &n "a\$" "a\na" {0 0} +i 14 N ^^ a {0 -1} +m 15 b ^^ ^ ^ +i 16 N {$$} a {1 0} +m 17 b {$$} "\$" "\$" +m 18 &N {^$} "" "" +f 19 &N {^$} a +i 20 &nN "^\$" "a\n\nb" {2 1} +m 21 N {$^} "" "" +m 22 b {$^} "\$^" "\$^" +m 23 P {\Aa} a a +m 24 ^P {\Aa} a a +f 25 ^nP {\Aa} "b\na" +m 26 P {a\Z} a a +m 27 {$P} {a\Z} a a +f 28 {$nP} {a\Z} "a\nb" +e 29 - ^* BADRPT +e 30 - {$*} BADRPT +e 31 - {\A*} BADRPT +e 32 - {\Z*} BADRPT + + + +doing 11 "boundary constraints" +m 1 &LP {[[:<:]]a} a a +m 2 &LP {[[:<:]]a} -a a +f 3 &LP {[[:<:]]a} ba +m 4 &LP {a[[:>:]]} a a +m 5 &LP {a[[:>:]]} a- a +f 6 &LP {a[[:>:]]} ab +m 7 bLP {\<a} a a +f 8 bLP {\<a} ba +m 9 bLP {a\>} a a +f 10 bLP {a\>} ab +m 11 LP {\ya} a a +f 12 LP {\ya} ba +m 13 LP {a\y} a a +f 14 LP {a\y} ab +m 15 LP {a\Y} ab a +f 16 LP {a\Y} a- +f 17 LP {a\Y} a +f 18 LP {-\Y} -a +m 19 LP {-\Y} -% - +f 20 LP {\Y-} a- +e 21 - {[[:<:]]*} BADRPT +e 22 - {[[:>:]]*} BADRPT +e 23 b {\<*} BADRPT +e 24 b {\>*} BADRPT +e 25 - {\y*} BADRPT +e 26 - {\Y*} BADRPT +m 27 LP {\ma} a a +f 28 LP {\ma} ba +m 29 LP {a\M} a a +f 30 LP {a\M} ab +f 31 ILP {\Ma} a +f 32 ILP {a\m} a + + + +doing 12 "character classes" +m 1 LP {a\db} a0b a0b +f 2 LP {a\db} axb +f 3 LP {a\Db} a0b +m 4 LP {a\Db} axb axb +m 5 LP "a\\sb" "a b" "a b" +m 6 LP "a\\sb" "a\tb" "a\tb" +m 7 LP "a\\sb" "a\nb" "a\nb" +f 8 LP {a\sb} axb +m 9 LP {a\Sb} axb axb +f 10 LP "a\\Sb" "a b" +m 11 LP {a\wb} axb axb +f 12 LP {a\wb} a-b +f 13 LP {a\Wb} axb +m 14 LP {a\Wb} a-b a-b +m 15 LP {\y\w+z\y} adze-guz guz +m 16 LPE {a[\d]b} a1b a1b +m 17 LPE "a\[\\s]b" "a b" "a b" +m 18 LPE {a[\w]b} axb axb + + + +doing 13 "escapes" +e 1 & "a\\" EESCAPE +m 2 - {a\<b} a<b a<b +m 3 e {a\<b} a<b a<b +m 4 bAS {a\wb} awb awb +m 5 eAS {a\wb} awb awb +m 6 PL "a\\ab" "a\007b" "a\007b" +m 7 P "a\\bb" "a\bb" "a\bb" +m 8 P {a\Bb} "a\\b" "a\\b" +m 9 MP "a\\chb" "a\bb" "a\bb" +m 10 MP "a\\cHb" "a\bb" "a\bb" +m 11 LMP "a\\e" "a\033" "a\033" +m 12 P "a\\fb" "a\fb" "a\fb" +m 13 P "a\\nb" "a\nb" "a\nb" +m 14 P "a\\rb" "a\rb" "a\rb" +m 15 P "a\\tb" "a\tb" "a\tb" +m 16 P "a\\u0008x" "a\bx" "a\bx" +e 17 - {a\u008x} EESCAPE +m 18 P "a\\u00088x" "a\b8x" "a\b8x" +m 19 P "a\\U00000008x" "a\bx" "a\bx" +e 20 - {a\U0000008x} EESCAPE +m 21 P "a\\vb" "a\vb" "a\vb" +m 22 MP "a\\x08x" "a\bx" "a\bx" +e 23 - {a\xq} EESCAPE +m 24 MP "a\\x0008x" "a\bx" "a\bx" +e 25 - {a\z} EESCAPE +m 26 MP "a\\010b" "a\bb" "a\bb" + + + +doing 14 "back references" +# ugh +m 1 RP {a(b*)c\1} abbcbb abbcbb bb +m 2 RP {a(b*)c\1} ac ac "" +f 3 RP {a(b*)c\1} abbcb +m 4 RP {a(b*)\1} abbcbb abb b +m 5 RP {a(b|bb)\1} abbcbb abb b +m 6 RP {a([bc])\1} abb abb b +f 7 RP {a([bc])\1} abc +m 8 RP {a([bc])\1} abcabb abb b +f 9 RP {a([bc])*\1} abc +f 10 RP {a([bc])\1} abB +m 11 iRP {a([bc])\1} abB abB b +m 12 RP {a([bc])\1+} abbb abbb b +m 13 QRP "a(\[bc])\\1{3,4}" abbbb abbbb b +f 14 QRP "a(\[bc])\\1{3,4}" abbb +m 15 RP {a([bc])\1*} abbb abbb b +m 16 RP {a([bc])\1*} ab ab b +m 17 RP {a([bc])(\1*)} ab ab b "" +e 18 - {a((b)\1)} ESUBREG +e 19 - {a(b)c\2} ESUBREG +m 20 bR {a\(b*\)c\1} abbcbb abbcbb bb + + + +doing 15 "octal escapes vs back references" +# initial zero is always octal +m 1 MP "a\\010b" "a\bb" "a\bb" +m 2 MP "a\\0070b" "a\0070b" "a\0070b" +m 3 MP "a\\07b" "a\007b" "a\007b" +m 4 MP "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c" "abbbbbbbbbb\007c" \ + "abbbbbbbbbb\007c" "b" "b" "b" "b" "b" "b" \ + "b" "b" "b" "b" +# a single digit is always a backref +e 5 - {a\7b} ESUBREG +# otherwise it's a backref only if within range (barf!) +m 6 MP "a\\10b" "a\bb" "a\bb" +m 7 MP {a\101b} aAb aAb +m 8 RP {a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c} abbbbbbbbbbbc \ + abbbbbbbbbbbc b b b b b b b \ + b b b +# but we're fussy about border cases -- guys who want octal should use the zero +e 9 - {a((((((((((b\10))))))))))c} ESUBREG +# BREs don't have octal, EREs don't have backrefs +m 10 MP "a\\12b" "a\nb" "a\nb" +e 11 b {a\12b} ESUBREG +m 12 eAS {a\12b} a12b a12b + + + +doing 16 "expanded syntax" +m 1 xP "a b c" "abc" "abc" +m 2 xP "a b #oops\nc\td" "abcd" "abcd" +m 3 x "a\\ b\\\tc" "a b\tc" "a b\tc" +m 4 xP "a b\\#c" "ab#c" "ab#c" +m 5 xP "a b\[c d]e" "ab e" "ab e" +m 6 xP "a b\[c#d]e" "ab#e" "ab#e" +m 7 xP "a b\[c#d]e" "abde" "abde" +m 8 xSPB "ab{ d" "ab\{d" "ab\{d" +m 9 xPQ "ab{ 1 , 2 }c" "abc" "abc" + + + +doing 17 "misc syntax" +m 1 P a(?#comment)b ab ab + + + +doing 18 "unmatchable REs" +f 1 I a^b ab + + + +doing 19 "case independence" +m 1 &i ab Ab Ab +m 2 &i {a[bc]} aC aC +f 3 &i {a[^bc]} aB +m 4 &iM {a[b-d]} aC aC +f 5 &iM {a[^b-d]} aC + + + +doing 20 "directors and embedded options" +e 1 & ***? BADPAT +m 2 q ***? ***? ***? +m 3 &P ***=a*b a*b a*b +m 4 q ***=a*b ***=a*b ***=a*b +m 5 bLP {***:\w+} ab ab +m 6 eLP {***:\w+} ab ab +e 7 & ***:***=a*b BADRPT +m 8 &P ***:(?b)a+b a+b a+b +m 9 P (?b)a+b a+b a+b +e 10 e {(?b)\w+} BADRPT +m 11 bAS {(?b)\w+} (?b)w+ (?b)w+ +m 12 iP (?c)a a a +f 13 iP (?c)a A +m 14 APS {(?e)\W+} WW WW +m 15 P (?i)a+ Aa Aa +f 16 P "(?m)a.b" "a\nb" +m 17 P "(?m)^b" "a\nb" "b" +f 18 P "(?n)a.b" "a\nb" +m 19 P "(?n)^b" "a\nb" "b" +f 20 P "(?p)a.b" "a\nb" +f 21 P "(?p)^b" "a\nb" +m 22 P (?q)a+b a+b a+b +m 23 nP "(?s)a.b" "a\nb" "a\nb" +m 24 xP "(?t)a b" "a b" "a b" +m 25 P "(?w)a.b" "a\nb" "a\nb" +m 26 P "(?w)^b" "a\nb" "b" +m 27 P "(?x)a b" "ab" "ab" +e 28 - (?z)ab BADOPT +m 29 P (?ici)a+ Aa Aa +e 30 P (?i)(?q)a+ BADRPT +m 31 P (?q)(?i)a+ (?i)a+ (?i)a+ +m 32 P (?qe)a+ a a +m 33 xP "(?q)a b" "a b" "a b" +m 34 P "(?qx)a b" "a b" "a b" +m 35 P (?qi)ab Ab Ab + + + +doing 21 "capturing" +m 1 - a(b)c abc abc b +m 2 P a(?:b)c xabc abc +m 3 - a((b))c xabcy abc b b +m 4 P a(?:(b))c abcy abc b +m 5 P a((?:b))c abc abc b +m 6 P a(?:(?:b))c abc abc +i 7 Q "a(b){0}c" ac {0 1} {-1 -1} +m 8 - a(b)c(d)e abcde abcde b d +m 9 - (b)c(d)e bcde bcde b d +m 10 - a(b)(d)e abde abde b d +m 11 - a(b)c(d) abcd abcd b d +m 12 - (ab)(cd) xabcdy abcd ab cd +m 13 - a(b)?c xabcy abc b +i 14 - a(b)?c xacy {1 2} {-1 -1} +m 15 - a(b)?c(d)?e xabcdey abcde b d +i 16 - a(b)?c(d)?e xacdey {1 4} {-1 -1} {3 3} +i 17 - a(b)?c(d)?e xabcey {1 4} {2 2} {-1 -1} +i 18 - a(b)?c(d)?e xacey {1 3} {-1 -1} {-1 -1} +m 19 - a(b)*c xabcy abc b +i 20 - a(b)*c xabbbcy {1 5} {4 4} +i 21 - a(b)*c xacy {1 2} {-1 -1} +m 22 - a(b*)c xabbbcy abbbc bbb +m 23 - a(b*)c xacy ac "" +f 24 - a(b)+c xacy +m 25 - a(b)+c xabcy abc b +i 26 - a(b)+c xabbbcy {1 5} {4 4} +m 27 - a(b+)c xabbbcy abbbc bbb +i 28 Q "a(b){2,3}c" xabbbcy {1 5} {4 4} +i 29 Q "a(b){2,3}c" xabbcy {1 4} {3 3} +f 30 Q "a(b){2,3}c" xabcy +m 31 LP "\\y(\\w+)\\y" "-- abc-" "abc" "abc" +m 32 - a((b|c)d+)+ abacdbd acdbd bd b +m 33 N (.*).* abc abc abc +m 34 N (a*)* bc "" "" + + + +doing 22 "multicharacter collating elements" +# again ugh +m 1 &+L {a[c]e} ace ace +f 2 &+IL {a[c]h} ach +m 3 &+L {a[[.ch.]]} ach ach +f 4 &+L {a[[.ch.]]} ace +m 5 &+L {a[c[.ch.]]} ac ac +m 6 &+L {a[c[.ch.]]} ace ac +m 7 &+L {a[c[.ch.]]} ache ach +f 8 &+L {a[^c]e} ace +m 9 &+L {a[^c]e} abe abe +m 10 &+L {a[^c]e} ache ache +f 11 &+L {a[^[.ch.]]} ach +m 12 &+L {a[^[.ch.]]} ace ac +m 13 &+L {a[^[.ch.]]} ac ac +m 14 &+L {a[^[.ch.]]} abe ab +f 15 &+L {a[^c[.ch.]]} ach +f 16 &+L {a[^c[.ch.]]} ace +f 17 &+L {a[^c[.ch.]]} ac +m 18 &+L {a[^c[.ch.]]} abe ab +m 19 &+L {a[^b]} ac ac +m 20 &+L {a[^b]} ace ac +m 21 &+L {a[^b]} ach ach +f 22 &+L {a[^b]} abe + + + +doing 23 "lookahead constraints" +m 1 HP a(?=b)b* ab ab +f 2 HP a(?=b)b* a +m 3 HP a(?=b)b*(?=c)c* abc abc +f 4 HP a(?=b)b*(?=c)c* ab +f 5 HP a(?!b)b* ab +m 6 HP a(?!b)b* a a +m 7 HP (?=b)b b b +f 8 HP (?=b)b a + + + +doing 24 "non-greedy quantifiers" +m 1 PT ab+? abb ab +m 2 PT ab+?c abbc abbc +m 3 PT ab*? abb a +m 4 PT ab*?c abbc abbc +m 5 PT ab?? ab a +m 6 PT ab??c abc abc +m 7 PQT "ab{2,4}?" abbbb abb +m 8 PQT "ab{2,4}?c" abbbbc abbbbc +m 9 - 3z* 123zzzz456 3zzzz +m 10 PT 3z*? 123zzzz456 3 +m 11 - z*4 123zzzz456 zzzz4 +m 12 PT z*?4 123zzzz456 zzzz4 + + + +doing 25 "mixed quantifiers" +# this is very incomplete as yet +# should include | +m 1 PNT {^(.*?)(a*)$} xyza xyza xyz a +m 2 PNT {^(.*?)(a*)$} xyzaa xyzaa xyz aa +m 3 PNT {^(.*?)(a*)$} xyz xyz xyz "" + + + +doing 26 "tricky cases" +# attempts to trick the matcher into accepting a short match +m 1 - (week|wee)(night|knights) weeknights weeknights \ + wee knights +m 2 RP {a(bc*).*\1} abccbccb abccbccb b +m 3 - {a(b.[bc]*)+} abcbd abcbd bd + + + +doing 27 "implementation misc." +# duplicate arcs are suppressed +m 1 P a(?:b|b)c abc abc +# make color/subcolor relationship go back and forth +m 2 & {[ab][ab][ab]} aba aba +m 3 & {[ab][ab][ab][ab][ab][ab][ab]} abababa abababa + + + +doing 28 "boundary busters etc." +# color-descriptor allocation changes at 10 +m 1 & abcdefghijkl abcdefghijkl abcdefghijkl +# so does arc allocation +m 2 P a(?:b|c|d|e|f|g|h|i|j|k|l|m)n agn agn +# subexpression tracking also at 10 +m 3 - a(((((((((((((b)))))))))))))c abc abc b b b b b b b b b b b b b +# state-set handling changes slightly at unsigned size (might be 64...) +# (also stresses arc allocation) +m 4 Q "ab{1,100}c" abbc abbc +m 5 Q "ab{1,100}c" abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \ + abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc +m 6 Q "ab{1,100}c" \ + abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \ + abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc +# force small cache and bust it, several ways +m 7 LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh +m 8 %LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh +m 9 %LP {\w+abcdefghijklmnopqrst} xyzabcdefghijklmnopqrst \ + xyzabcdefghijklmnopqrst +i 10 %LP {\w+(abcdefgh)?} xyz {0 2} {-1 -1} +i 11 %LP {\w+(abcdefgh)?} xyzabcdefg {0 9} {-1 -1} +i 12 %LP {\w+(abcdefghijklmnopqrst)?} xyzabcdefghijklmnopqrs \ + {0 21} {-1 -1} + + + +doing 29 "incomplete matches" +p 1 t def abc {3 2} "" +p 2 t bcd abc {1 2} "" +p 3 t abc abab {0 3} "" +p 4 t abc abdab {3 4} "" +i 5 t abc abc {0 2} {0 2} +i 6 t abc xyabc {2 4} {2 4} +p 7 t abc+ xyab {2 3} "" +i 8 t abc+ xyabc {2 4} {2 4} +knownBug i 9 t abc+ xyabcd {2 4} {6 5} +i 10 t abc+ xyabcdd {2 4} {7 6} +p 11 tPT abc+? xyab {2 3} "" +# the retain numbers in these two may look wrong, but they aren't +i 12 tPT abc+? xyabc {2 4} {5 4} +i 13 tPT abc+? xyabcc {2 4} {6 5} +i 14 tPT abc+? xyabcd {2 4} {6 5} +i 15 tPT abc+? xyabcdd {2 4} {7 6} +i 16 t abcd|bc xyabc {3 4} {2 4} +p 17 tn .*k "xx\nyyy" {3 5} "" + + +doing 30 "misc. oddities and old bugs" +e 1 & *** BADRPT +m 2 N a?b* abb abb +m 3 N a?b* bb bb +m 4 & a*b aab aab +m 5 & ^a*b aaaab aaaab +m 6 &M {[0-6][1-2][0-3][0-6][1-6][0-6]} 010010 010010 +# temporary REG_BOSONLY kludge +m 7 s abc abcd abc +f 8 s abc xabcd +# back to normal stuff +m 9 HLP {(?n)^(?![t#])\S+} "tk\n\n#\n#\nit0" it0 + + +# flush any leftover complaints +doing 0 "flush" + +# Tests resulting from bugs reported by users +test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} { + set str {2:::DebugWin32} + set re {([[:xdigit:]])([[:space:]]*)} + list [regexp $re $str match xdigit spaces] $match $xdigit $spaces + # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!! +} {1 2 2 {}} + +test reg-32.1 {canmatch functionality -- at end} { + set pat {blah} + set line "asd asd" + # can match at the final d, if '%' follows + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 7} + +test reg-32.2 {canmatch functionality -- at end} { + set pat {s%$} + set line "asd asd" + # can only match after the end of the string + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 7} + +test reg-32.3 {canmatch functionality -- not last char} { + set pat {[^d]%$} + set line "asd asd" + # can only match after the end of the string + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 7} + +test reg-32.3.1 {canmatch functionality -- no match} { + set pat {\Zx} + set line "asd asd" + # can match the last char, if followed by x + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 -1} + +test reg-32.4 {canmatch functionality -- last char} {knownBug} { + set pat {.x} + set line "asd asd" + # can match the last char, if followed by x + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 6} + +test reg-32.4.1 {canmatch functionality -- last char} {knownBug} { + set pat {.x$} + set line "asd asd" + # can match the last char, if followed by x + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 6} + +test reg-32.5 {canmatch functionality -- last char} {knownBug} { + set pat {.[^d]x$} + set line "asd asd" + # can match the last char, if followed by not-d and x. + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 6} + +test reg-32.6 {canmatch functionality -- last char} {knownBug} { + set pat {[^a]%[^\r\n]*$} + set line "asd asd" + # can match at the final d, if '%' follows + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 6} + +test reg-32.7 {canmatch functionality -- last char} {knownBug} { + set pat {[^a]%$} + set line "asd asd" + # can match at the final d, if '%' follows + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 6} + +test reg-32.8 {canmatch functionality -- last char} {knownBug} { + set pat {[^x]%$} + set line "asd asd" + # can match at the final d, if '%' follows + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 6} + +test reg-32.9 {canmatch functionality -- more complex case} {knownBug} { + set pat {((\B\B|\Bh+line)[ \t]*|[^\B]%[^\r\n]*)$} + set line "asd asd" + # can match at the final d, if '%' follows + set res [testregexp -xflags -- c $pat $line resvar] + lappend res $resvar +} {0 6} + +# Tests reg-33.*: Checks for bug fixes + +test reg-33.1 {Bug 230589} { + regexp {[ ]*(^|[^%])%V} "*%V2" m s +} 1 + +test reg-33.2 {Bug 504785} { + regexp -inline {([^_.]*)([^.]*)\.(..)(.).*} bbcos_001_c01.q1la +} {bbcos_001_c01.q1la bbcos _001_c01 q1 l} + +test reg-33.3 {Bug 505048} { + regexp {\A\s*[^<]*\s*<([^>]+)>} a<a> +} 1 + +test reg-33.4 {Bug 505048} { + regexp {\A\s*([^b]*)b} ab +} 1 + +test reg-33.5 {Bug 505048} { + regexp {\A\s*[^b]*(b)} ab +} 1 + +test reg-33.6 {Bug 505048} { + regexp {\A(\s*)[^b]*(b)} ab +} 1 + +test reg-33.7 {Bug 505048} { + regexp {\A\s*[^b]*b} ab +} 1 + +test reg-33.8 {Bug 505048} { + regexp -inline {\A\s*[^b]*b} ab +} ab + +test reg-33.9 {Bug 505048} { + regexp -indices -inline {\A\s*[^b]*b} ab +} {{0 1}} + +test reg-33.10 {Bug 840258} { + regsub {(^|\n)+\.*b} \n.b {} tmp +} 1 + +test reg-33.11 {Bug 840258} { + regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \ + "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp +} 1 + +# cleanup +::tcltest::cleanupTests +return diff --git a/tests/regex/regex.cpp b/tests/regex/regex.cpp new file mode 100644 index 0000000000..733e5aed69 --- /dev/null +++ b/tests/regex/regex.cpp @@ -0,0 +1,421 @@ +/////////////////////////////////////////////////////////////////////////////// +// Name: tests/regex/regex.cpp +// Purpose: Test the built-in regex lib and wxRegEx +// Author: Mike Wetherell +// RCS-ID: $Id$ +// Copyright: (c) 2004 Mike Wetherell +// Licence: wxWidgets licence +/////////////////////////////////////////////////////////////////////////////// + +// +// Notes: +// +// To run just one section, say wx_1, do this: +// test regex.wx_1 +// +// To run all the regex tests: +// test regex +// +// Some tests must be skipped since they use features which we do not make +// available through wxRegEx. To see the list of tests that have been skipped +// turn on verbose logging, e.g.: +// test --verbose regex +// +// The tests here are for the builtin library, tests for wxRegEx in general +// should go in another module. +// +// The tests are generated from Henry Spencer's reg.test, additional test +// can be added in wxreg.test. These test files are then turned into a C++ +// include file 'regex.inc' (included below) using a script 'regex.pl'. +// + +#if defined(__GNUG__) && !defined(__APPLE__) + #pragma implementation + #pragma interface +#endif + +// For compilers that support precompilation, includes "wx/wx.h". +#include "wx/wxprec.h" + +#ifdef __BORLANDC__ + #pragma hdrstop +#endif + +// for all others, include the necessary headers +#ifndef WX_PRECOMP + #include "wx/wx.h" +#endif + +#include "wx/regex.h" +#include "wx/cppunit.h" +#include <iomanip> +#include <stdexcept> + +using namespace std; +using namespace CppUnit; + +// many of the tests are specific to the builtin regex lib, so only attempts +// to do them when using the builtin regex lib. +// +#ifdef wxHAS_REGEX_ADVANCED + + +/////////////////////////////////////////////////////////////////////////////// +// The test case - an instance represents a single test + +class RegExTestCase : public TestCase +{ +public: + // constructor - create a single testcase + RegExTestCase( + const string& name, + const char *mode, + const char *id, + const char *flags, + const char *pattern, + const char *data, + const vector<const char *>& expected); + +protected: + // run this testcase + void runTest(); + +private: + // workers + wxString Conv(const char *str); + void parseFlags(const wxString& flags); + void doTest(int flavor); + static size_t matchCount(const wxString& expr, int flags); + static wxString quote(const wxString& arg); + const wxChar *convError() const { return _T("<cannot convert>"); } + + // assertions - adds some information about the test that failed + void fail(const wxString& msg) const; + void failIf(bool condition, const wxString& msg) const + { if (condition) fail(msg); } + + // mode, id, flags, pattern, test data, expected results... + int m_mode; + wxString m_id; + wxString m_flags; + wxString m_pattern; + wxString m_data; + wxArrayString m_expected; + + // the flag decoded + int m_compileFlags; + int m_matchFlags; + bool m_basic; + bool m_extended; + bool m_advanced; +}; + +// constructor - throws Exception on failure +// +RegExTestCase::RegExTestCase( + const string& name, + const char *mode, + const char *id, + const char *flags, + const char *pattern, + const char *data, + const vector<const char *>& expected) + : + TestCase(name), + m_mode(mode[0]), + m_id(Conv(id)), + m_flags(Conv(flags)), + m_pattern(Conv(pattern)), + m_data(Conv(data)), + m_compileFlags(0), + m_matchFlags(0), + m_basic(false), + m_extended(false), + m_advanced(false) +{ + bool badconv = m_pattern == convError() || m_data == convError(); + vector<const char *>::const_iterator it; + + for (it = expected.begin(); it != expected.end(); ++it) { + m_expected.push_back(Conv(*it)); + badconv = badconv || *m_expected.rbegin() == convError(); + } + + failIf(badconv, _T("cannot convert to default character encoding")); + + // the flags need further parsing... + parseFlags(m_flags); + +#ifndef wxHAS_REGEX_ADVANCED + failIf(!m_basic && !m_extended, _T("advanced regexs not available")); +#endif +} + +// convert a string from UTF8 to the internal encoding +// +wxString RegExTestCase::Conv(const char *str) +{ + const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str); + const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr); + + if (!buf || wcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0) + return convError(); + else + return buf; +} + +// Parse flags +// +void RegExTestCase::parseFlags(const wxString& flags) +{ + for (const wxChar *p = flags; *p; p++) { + switch (*p) { + // noop + case '-': break; + + // we don't fully support these flags, but they don't stop us + // checking for success of failure of the match, so treat as noop + case 'A': case 'B': case 'E': case 'H': + case 'I': case 'L': case 'M': case 'N': + case 'P': case 'Q': case 'R': case 'S': + case 'T': case 'U': case '%': + break; + + // match options + case '^': m_matchFlags |= wxRE_NOTBOL; break; + case '$': m_matchFlags |= wxRE_NOTEOL; break; +#if wxUSE_UNICODE + case '*': break; +#endif + // compile options + case '&': m_advanced = m_basic = true; break; + case 'b': m_basic = true; break; + case 'e': m_extended = true; break; + case 'i': m_compileFlags |= wxRE_ICASE; break; + case 'o': m_compileFlags |= wxRE_NOSUB; break; + case 'n': m_compileFlags |= wxRE_NEWLINE; break; + case 't': if (strchr("ep", m_mode)) break; // else fall through... + + // anything else we must skip the test + default: + fail(wxString::Format( + _T("requires unsupported flag '%c'"), *p)); + } + } +} + +// Try test for all flavours of expression specified +// +void RegExTestCase::runTest() +{ + if (m_basic) + doTest(wxRE_BASIC); + if (m_extended) + doTest(wxRE_EXTENDED); +#ifdef wxHAS_REGEX_ADVANCED + if (m_advanced || (!m_basic && !m_extended)) + doTest(wxRE_ADVANCED); +#endif +} + +// Try the test for a single flavour of expression +// +void RegExTestCase::doTest(int flavor) +{ + wxRegEx re(m_pattern, m_compileFlags | flavor); + + // 'e' - test that the pattern fails to compile + if (m_mode == 'e') + return failIf(re.IsValid(), _T("compile suceeded (should fail)")); + failIf(!re.IsValid(), _T("compile failed")); + + bool matches = re.Matches(m_data, m_matchFlags); + + // 'f' or 'p' - test that the pattern does not match + if (m_mode == 'f' || m_mode == 'p') + return failIf(matches, _T("match suceeded (should fail)")); + + // otherwise 'm' or 'i' - test the pattern does match + failIf(!matches, _T("match failed")); + + // Check that wxRegEx is going to allocate a large enough array for the + // results we are supposed to get + failIf(m_expected.size() > matchCount(m_pattern, m_compileFlags | flavor), + _T("wxRegEx has not allocated a large enough array for the ") + _T("number of results expected")); + + wxString result; + size_t start, len; + + for (size_t i = 0; i < m_expected.size(); i++) { + failIf(!re.GetMatch(&start, &len, i), wxString::Format( + _T("wxRegEx::GetMatch failed for match %d"), i)); + + // m - check the match returns the strings given + if (m_mode == 'm') + if (start < INT_MAX) + result = m_data.substr(start, len); + else + result = _T(""); + + // i - check the match returns the offsets given + else if (m_mode == 'i') + if (start < INT_MAX) + result = wxString::Format(_T("%d %d"), start, start + len - 1); + else + result = _T("-1 -1"); + + failIf(result != m_expected[i], wxString::Format( + _T("match(%d) == %s, expected == %s"), i, + quote(result).c_str(), quote(m_expected[i]).c_str())); + } +} + +// assertion - adds some information about the test that failed +// +void RegExTestCase::fail(const wxString& msg) const +{ + wxString str; + wxArrayString::const_iterator it; + + str << (wxChar)m_mode << _T(" ") << m_id << _T(" ") << m_flags << _T(" ") + << quote(m_pattern) << _T(" ") << quote(m_data); + + for (it = m_expected.begin(); it != m_expected.end(); ++it) + str << _T(" ") << quote(*it); + + if (str.length() > 77) + str = str.substr(0, 74) + _T("..."); + + str << _T("\n ") << msg; + + // no lossy convs so using utf8 + CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8))); +} + +// quote a string so that it can be displayed (static) +// +wxString RegExTestCase::quote(const wxString& arg) +{ + const wxChar *needEscape = _T("\a\b\t\n\v\f\r\"\\"); + const wxChar *escapes = _T("abtnvfr\"\\"); + wxString str; + + for (size_t i = 0; i < arg.length(); i++) { + wxUChar ch = arg[i]; + const wxChar *p = wxStrchr(needEscape, ch); + + if (p) + str += wxString::Format(_T("\\%c"), escapes[p - needEscape]); + else if (wxIscntrl(ch)) + str += wxString::Format(_T("\\%03o"), ch); + else + str += ch; + } + + return str.length() == arg.length() && str.find(' ') == wxString::npos ? + str : _T("\"") + str + _T("\""); +} + +// Count the number of subexpressions (taken from wxRegExImpl::Compile) +// +size_t RegExTestCase::matchCount(const wxString& expr, int flags) +{ + // there is always one for the whole expression + size_t nMatches = 1; + + // and some more for bracketed subexperessions + for ( const wxChar *cptr = expr; *cptr; cptr++ ) + { + if ( *cptr == _T('\\') ) + { + // in basic RE syntax groups are inside \(...\) + if ( *++cptr == _T('(') && (flags & wxRE_BASIC) ) + { + nMatches++; + } + } + else if ( *cptr == _T('(') && !(flags & wxRE_BASIC) ) + { + // we know that the previous character is not an unquoted + // backslash because it would have been eaten above, so we + // have a bar '(' and this indicates a group start for the + // extended syntax + nMatches++; + } + } + + return nMatches; +} + + +/////////////////////////////////////////////////////////////////////////////// +// Test suite +// +// In a non-unicode build the regex is affected by the current locale, so +// this derived TestSuite is used. It sets the locale in it's run() method +// for the duration of the regex tests. + +class RegExTestSuite : public TestSuite +{ +public: + RegExTestSuite(string name); + void run(TestResult *result); + void add(const char *mode, const char *id, const char *flags, + const char *pattern, const char *data, const char *expected, ...); +}; + +// constructor, sets the locale so that it is set when the tests are added +// +RegExTestSuite::RegExTestSuite(string name) : TestSuite(name) +{ + setlocale(LC_ALL, ""); +} + +// run the test suite, sets the locale again since it may have been changed +// by another test since this suite was crated +// +void RegExTestSuite::run(TestResult *result) +{ + setlocale(LC_ALL, ""); + TestSuite::run(result); +} + +// Add a testcase to the suite +// +void RegExTestSuite::add( + const char *mode, + const char *id, + const char *flags, + const char *pattern, + const char *data, + const char *expected, ...) +{ + string name = getName() + "." + id; + + vector<const char *> expected_results; + va_list ap; + + for (va_start(ap, expected); expected; expected = va_arg(ap, const char *)) + expected_results.push_back(expected); + + va_end(ap); + + try { + addTest(new RegExTestCase( + name, mode, id, flags, pattern, data, expected_results)); + } + catch (Exception& e) { + wxLogInfo(wxString::Format(_T("skipping: %s\n %s\n"), + wxString(name.c_str(), wxConvUTF8).c_str(), + wxString(e.what(), wxConvUTF8).c_str())); + } +} + + +// Include the generated tests +// +#include "regex.inc" + + +#endif // wxHAS_REGEX_ADVANCED diff --git a/tests/regex/regex.inc b/tests/regex/regex.inc new file mode 100644 index 0000000000..e53d36451a --- /dev/null +++ b/tests/regex/regex.inc @@ -0,0 +1,1361 @@ +/* + * Test data for wxRegEx (UTF-8 encoded) + * + * Generated Fri Mar 5 21:35:22 2004 by regex.pl from the following files: + * + * reg.test: Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * wxreg.test: Copyright (c) 2004 Mike Wetherell. + * + * Test types: + * e compile error expected + * f match failure expected + * m successful match + * i successful match with -indices (used in checking things like + * nonparticipating subexpressions) + * p unsuccessful match with -indices (!!) (used in checking + * partial-match reporting) + * + * Flag characters: + * - no-op (placeholder) + * + provide fake xy equivalence class and ch collating element + * % force small state-set cache in matcher (to test cache replace) + * ^ beginning of string is not beginning of line + * $ end of string is not end of line + * * test is Unicode-specific, needs big character set + * + * & test as both ARE and BRE + * b BRE + * e ERE + * a turn advanced-features bit on (error unless ERE already) + * q literal string, no metacharacters at all + * + * i case-independent matching + * o ("opaque") no subexpression capture + * p newlines are half-magic, excluded from . and [^ only + * w newlines are half-magic, significant to ^ and $ only + * n newlines are fully magic, both effects + * x expanded RE syntax + * t incomplete-match reporting + * + * A backslash-_a_lphanumeric seen + * B ERE/ARE literal-_b_race heuristic used + * E backslash (_e_scape) seen within [] + * H looka_h_ead constraint seen + * I _i_mpossible to match + * L _l_ocale-specific construct seen + * M unportable (_m_achine-specific) construct seen + * N RE can match empty (_n_ull) string + * P non-_P_OSIX construct seen + * Q {} _q_uantifier seen + * R back _r_eference seen + * S POSIX-un_s_pecified syntax seen + * T prefers shortest (_t_iny) + * U saw original-POSIX botch: unmatched right paren in ERE (_u_gh) + */ + + +/* + * 1 basic sanity checks + */ + +class regextest_1 : public RegExTestSuite +{ +public: + regextest_1() : RegExTestSuite("regex.1") { } + static Test *suite(); +}; + +Test *regextest_1::suite() +{ + RegExTestSuite *suite = new regextest_1; + + suite->add("m", "1", "&", "abc", "abc", "abc", NULL); + suite->add("f", "2", "&", "abc", "def", NULL); + suite->add("m", "3", "&", "abc", "xyabxabce", "abc", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_1, "regex.1"); + + +/* + * 2 invalid option combinations + */ + +class regextest_2 : public RegExTestSuite +{ +public: + regextest_2() : RegExTestSuite("regex.2") { } + static Test *suite(); +}; + +Test *regextest_2::suite() +{ + RegExTestSuite *suite = new regextest_2; + + suite->add("e", "1", "qe", "a", "INVARG", NULL); + suite->add("e", "2", "qa", "a", "INVARG", NULL); + suite->add("e", "3", "qx", "a", "INVARG", NULL); + suite->add("e", "4", "qn", "a", "INVARG", NULL); + suite->add("e", "5", "ba", "a", "INVARG", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_2, "regex.2"); + + +/* + * 3 basic syntax + */ + +class regextest_3 : public RegExTestSuite +{ +public: + regextest_3() : RegExTestSuite("regex.3") { } + static Test *suite(); +}; + +Test *regextest_3::suite() +{ + RegExTestSuite *suite = new regextest_3; + + suite->add("i", "1", "&NS", "", "a", "0 -1", NULL); + suite->add("m", "2", "NS", "a|", "a", "a", NULL); + suite->add("m", "3", "-", "a|b", "a", "a", NULL); + suite->add("m", "4", "-", "a|b", "b", "b", NULL); + suite->add("m", "5", "NS", "a||b", "b", "b", NULL); + suite->add("m", "6", "&", "ab", "ab", "ab", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_3, "regex.3"); + + +/* + * 4 parentheses + */ + +class regextest_4 : public RegExTestSuite +{ +public: + regextest_4() : RegExTestSuite("regex.4") { } + static Test *suite(); +}; + +Test *regextest_4::suite() +{ + RegExTestSuite *suite = new regextest_4; + + suite->add("m", "1", "-", "(a)e", "ae", "ae", "a", NULL); + suite->add("m", "2", "o", "(a)e", "ae", NULL); + suite->add("m", "3", "b", "\\(a\\)b", "ab", "ab", "a", NULL); + suite->add("m", "4", "-", "a((b)c)", "abc", "abc", "bc", "b", NULL); + suite->add("m", "5", "-", "a(b)(c)", "abc", "abc", "b", "c", NULL); + suite->add("e", "6", "-", "a(b", "EPAREN", NULL); + suite->add("e", "7", "b", "a\\(b", "EPAREN", NULL); + suite->add("m", "8", "eU", "a)b", "a)b", "a)b", NULL); + suite->add("e", "9", "-", "a)b", "EPAREN", NULL); + suite->add("e", "10", "b", "a\\)b", "EPAREN", NULL); + suite->add("m", "11", "P", "a(?:b)c", "abc", "abc", NULL); + suite->add("e", "12", "e", "a(?:b)c", "BADRPT", NULL); + suite->add("i", "13", "S", "a()b", "ab", "0 1", "1 0", NULL); + suite->add("m", "14", "SP", "a(?:)b", "ab", "ab", NULL); + suite->add("i", "15", "S", "a(|b)c", "ac", "0 1", "1 0", NULL); + suite->add("m", "16", "S", "a(b|)c", "abc", "abc", "b", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_4, "regex.4"); + + +/* + * 5 simple one-char matching + */ + +class regextest_5 : public RegExTestSuite +{ +public: + regextest_5() : RegExTestSuite("regex.5") { } + static Test *suite(); +}; + +Test *regextest_5::suite() +{ + RegExTestSuite *suite = new regextest_5; + + suite->add("m", "1", "&", "a.b", "axb", "axb", NULL); + suite->add("f", "2", "&n", "a.b", "a\nb", NULL); + suite->add("m", "3", "&", "a[bc]d", "abd", "abd", NULL); + suite->add("m", "4", "&", "a[bc]d", "acd", "acd", NULL); + suite->add("f", "5", "&", "a[bc]d", "aed", NULL); + suite->add("f", "6", "&", "a[^bc]d", "abd", NULL); + suite->add("m", "7", "&", "a[^bc]d", "aed", "aed", NULL); + suite->add("f", "8", "&p", "a[^bc]d", "a\nd", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_5, "regex.5"); + + +/* + * 6 context-dependent syntax + */ + +class regextest_6 : public RegExTestSuite +{ +public: + regextest_6() : RegExTestSuite("regex.6") { } + static Test *suite(); +}; + +Test *regextest_6::suite() +{ + RegExTestSuite *suite = new regextest_6; + + suite->add("e", "1", "-", "*", "BADRPT", NULL); + suite->add("m", "2", "b", "*", "*", "*", NULL); + suite->add("m", "3", "b", "\\(*\\)", "*", "*", "*", NULL); + suite->add("e", "4", "-", "(*)", "BADRPT", NULL); + suite->add("m", "5", "b", "^*", "*", "*", NULL); + suite->add("e", "6", "-", "^*", "BADRPT", NULL); + suite->add("f", "7", "&", "^b", "^b", NULL); + suite->add("m", "8", "b", "x^", "x^", "x^", NULL); + suite->add("f", "9", "I", "x^", "x", NULL); + suite->add("m", "10", "n", "\n^", "x\nb", "\n", NULL); + suite->add("f", "11", "bS", "\\(^b\\)", "^b", NULL); + suite->add("m", "12", "-", "(^b)", "b", "b", "b", NULL); + suite->add("m", "13", "&", "x$", "x", "x", NULL); + suite->add("m", "14", "bS", "\\(x$\\)", "x", "x", "x", NULL); + suite->add("m", "15", "-", "(x$)", "x", "x", "x", NULL); + suite->add("m", "16", "b", "x$y", "x$y", "x$y", NULL); + suite->add("f", "17", "I", "x$y", "xy", NULL); + suite->add("m", "18", "n", "x$\n", "x\n", "x\n", NULL); + suite->add("e", "19", "-", "+", "BADRPT", NULL); + suite->add("e", "20", "-", "?", "BADRPT", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_6, "regex.6"); + + +/* + * 7 simple quantifiers + */ + +class regextest_7 : public RegExTestSuite +{ +public: + regextest_7() : RegExTestSuite("regex.7") { } + static Test *suite(); +}; + +Test *regextest_7::suite() +{ + RegExTestSuite *suite = new regextest_7; + + suite->add("m", "1", "&N", "a*", "aa", "aa", NULL); + suite->add("i", "2", "&N", "a*", "b", "0 -1", NULL); + suite->add("m", "3", "-", "a+", "aa", "aa", NULL); + suite->add("m", "4", "-", "a?b", "ab", "ab", NULL); + suite->add("m", "5", "-", "a?b", "b", "b", NULL); + suite->add("e", "6", "-", "**", "BADRPT", NULL); + suite->add("m", "7", "bN", "**", "***", "***", NULL); + suite->add("e", "8", "&", "a**", "BADRPT", NULL); + suite->add("e", "9", "&", "a**b", "BADRPT", NULL); + suite->add("e", "10", "&", "***", "BADRPT", NULL); + suite->add("e", "11", "-", "a++", "BADRPT", NULL); + suite->add("e", "12", "-", "a?+", "BADRPT", NULL); + suite->add("e", "13", "-", "a?*", "BADRPT", NULL); + suite->add("e", "14", "-", "a+*", "BADRPT", NULL); + suite->add("e", "15", "-", "a*+", "BADRPT", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_7, "regex.7"); + + +/* + * 8 braces + */ + +class regextest_8 : public RegExTestSuite +{ +public: + regextest_8() : RegExTestSuite("regex.8") { } + static Test *suite(); +}; + +Test *regextest_8::suite() +{ + RegExTestSuite *suite = new regextest_8; + + suite->add("m", "1", "NQ", "a{0,1}", "", "", NULL); + suite->add("m", "2", "NQ", "a{0,1}", "ac", "a", NULL); + suite->add("e", "3", "-", "a{1,0}", "BADBR", NULL); + suite->add("e", "4", "-", "a{1,2,3}", "BADBR", NULL); + suite->add("e", "5", "-", "a{257}", "BADBR", NULL); + suite->add("e", "6", "-", "a{1000}", "BADBR", NULL); + suite->add("e", "7", "-", "a{1", "EBRACE", NULL); + suite->add("e", "8", "-", "a{1n}", "BADBR", NULL); + suite->add("m", "9", "BS", "a{b", "a{b", "a{b", NULL); + suite->add("m", "10", "BS", "a{", "a{", "a{", NULL); + suite->add("m", "11", "bQ", "a\\{0,1\\}b", "cb", "b", NULL); + suite->add("e", "12", "b", "a\\{0,1", "EBRACE", NULL); + suite->add("e", "13", "-", "a{0,1\\", "BADBR", NULL); + suite->add("m", "14", "Q", "a{0}b", "ab", "b", NULL); + suite->add("m", "15", "Q", "a{0,0}b", "ab", "b", NULL); + suite->add("m", "16", "Q", "a{0,1}b", "ab", "ab", NULL); + suite->add("m", "17", "Q", "a{0,2}b", "b", "b", NULL); + suite->add("m", "18", "Q", "a{0,2}b", "aab", "aab", NULL); + suite->add("m", "19", "Q", "a{0,}b", "aab", "aab", NULL); + suite->add("m", "20", "Q", "a{1,1}b", "aab", "ab", NULL); + suite->add("m", "21", "Q", "a{1,3}b", "aaaab", "aaab", NULL); + suite->add("f", "22", "Q", "a{1,3}b", "b", NULL); + suite->add("m", "23", "Q", "a{1,}b", "aab", "aab", NULL); + suite->add("f", "24", "Q", "a{2,3}b", "ab", NULL); + suite->add("m", "25", "Q", "a{2,3}b", "aaaab", "aaab", NULL); + suite->add("f", "26", "Q", "a{2,}b", "ab", NULL); + suite->add("m", "27", "Q", "a{2,}b", "aaaab", "aaaab", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_8, "regex.8"); + + +/* + * 9 brackets + */ + +class regextest_9 : public RegExTestSuite +{ +public: + regextest_9() : RegExTestSuite("regex.9") { } + static Test *suite(); +}; + +Test *regextest_9::suite() +{ + RegExTestSuite *suite = new regextest_9; + + suite->add("m", "1", "&", "a[bc]", "ac", "ac", NULL); + suite->add("m", "2", "&", "a[-]", "a-", "a-", NULL); + suite->add("m", "3", "&", "a[[.-.]]", "a-", "a-", NULL); + suite->add("m", "4", "&L", "a[[.zero.]]", "a0", "a0", NULL); + suite->add("m", "5", "&LM", "a[[.zero.]-9]", "a2", "a2", NULL); + suite->add("m", "6", "&M", "a[0-[.9.]]", "a2", "a2", NULL); + suite->add("m", "7", "&+L", "a[[=x=]]", "ax", "ax", NULL); + suite->add("m", "8", "&+L", "a[[=x=]]", "ay", "ay", NULL); + suite->add("f", "9", "&+L", "a[[=x=]]", "az", NULL); + suite->add("e", "10", "&", "a[0-[=x=]]", "ERANGE", NULL); + suite->add("m", "11", "&L", "a[[:digit:]]", "a0", "a0", NULL); + suite->add("e", "12", "&", "a[[:woopsie:]]", "ECTYPE", NULL); + suite->add("f", "13", "&L", "a[[:digit:]]", "ab", NULL); + suite->add("e", "14", "&", "a[0-[:digit:]]", "ERANGE", NULL); + suite->add("m", "15", "&LP", "[[:<:]]a", "a", "a", NULL); + suite->add("m", "16", "&LP", "a[[:>:]]", "a", "a", NULL); + suite->add("e", "17", "&", "a[[..]]b", "ECOLLATE", NULL); + suite->add("e", "18", "&", "a[[==]]b", "ECOLLATE", NULL); + suite->add("e", "19", "&", "a[[::]]b", "ECTYPE", NULL); + suite->add("e", "20", "&", "a[[.a", "EBRACK", NULL); + suite->add("e", "21", "&", "a[[=a", "EBRACK", NULL); + suite->add("e", "22", "&", "a[[:a", "EBRACK", NULL); + suite->add("e", "23", "&", "a[", "EBRACK", NULL); + suite->add("e", "24", "&", "a[b", "EBRACK", NULL); + suite->add("e", "25", "&", "a[b-", "EBRACK", NULL); + suite->add("e", "26", "&", "a[b-c", "EBRACK", NULL); + suite->add("m", "27", "&M", "a[b-c]", "ab", "ab", NULL); + suite->add("m", "28", "&", "a[b-b]", "ab", "ab", NULL); + suite->add("m", "29", "&M", "a[1-2]", "a2", "a2", NULL); + suite->add("e", "30", "&", "a[c-b]", "ERANGE", NULL); + suite->add("e", "31", "&", "a[a-b-c]", "ERANGE", NULL); + suite->add("m", "32", "&M", "a[--?]b", "a?b", "a?b", NULL); + suite->add("m", "33", "&", "a[---]b", "a-b", "a-b", NULL); + suite->add("m", "34", "&", "a[]b]c", "a]c", "a]c", NULL); + suite->add("m", "35", "EP", "a[\\]]b", "a]b", "a]b", NULL); + suite->add("f", "36", "bE", "a[\\]]b", "a]b", NULL); + suite->add("m", "37", "bE", "a[\\]]b", "a\\]b", "a\\]b", NULL); + suite->add("m", "38", "eE", "a[\\]]b", "a\\]b", "a\\]b", NULL); + suite->add("m", "39", "EP", "a[\\\\]b", "a\\b", "a\\b", NULL); + suite->add("m", "40", "eE", "a[\\\\]b", "a\\b", "a\\b", NULL); + suite->add("m", "41", "bE", "a[\\\\]b", "a\\b", "a\\b", NULL); + suite->add("e", "42", "-", "a[\\Z]b", "EESCAPE", NULL); + suite->add("m", "43", "&", "a[[b]c", "a[c", "a[c", NULL); + suite->add("m", "44", "EMP*", "a[\\u00fe-\\u0507][\\u00ff-\\u0300]b", "aÄË¿b", "aÄË¿b", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_9, "regex.9"); + + +/* + * 10 anchors and newlines + */ + +class regextest_10 : public RegExTestSuite +{ +public: + regextest_10() : RegExTestSuite("regex.10") { } + static Test *suite(); +}; + +Test *regextest_10::suite() +{ + RegExTestSuite *suite = new regextest_10; + + suite->add("m", "1", "&", "^a", "a", "a", NULL); + suite->add("f", "2", "&^", "^a", "a", NULL); + suite->add("i", "3", "&N", "^", "a", "0 -1", NULL); + suite->add("i", "4", "&", "a$", "aba", "2 2", NULL); + suite->add("f", "5", "&$", "a$", "a", NULL); + suite->add("i", "6", "&N", "$", "ab", "2 1", NULL); + suite->add("m", "7", "&n", "^a", "a", "a", NULL); + suite->add("m", "8", "&n", "^a", "b\na", "a", NULL); + suite->add("i", "9", "&w", "^a", "a\na", "0 0", NULL); + suite->add("i", "10", "&n^", "^a", "a\na", "2 2", NULL); + suite->add("m", "11", "&n", "a$", "a", "a", NULL); + suite->add("m", "12", "&n", "a$", "a\nb", "a", NULL); + suite->add("i", "13", "&n", "a$", "a\na", "0 0", NULL); + suite->add("i", "14", "N", "^^", "a", "0 -1", NULL); + suite->add("m", "15", "b", "^^", "^", "^", NULL); + suite->add("i", "16", "N", "$$", "a", "1 0", NULL); + suite->add("m", "17", "b", "$$", "$", "$", NULL); + suite->add("m", "18", "&N", "^$", "", "", NULL); + suite->add("f", "19", "&N", "^$", "a", NULL); + suite->add("i", "20", "&nN", "^$", "a\n\nb", "2 1", NULL); + suite->add("m", "21", "N", "$^", "", "", NULL); + suite->add("m", "22", "b", "$^", "$^", "$^", NULL); + suite->add("m", "23", "P", "\\Aa", "a", "a", NULL); + suite->add("m", "24", "^P", "\\Aa", "a", "a", NULL); + suite->add("f", "25", "^nP", "\\Aa", "b\na", NULL); + suite->add("m", "26", "P", "a\\Z", "a", "a", NULL); + suite->add("m", "27", "$P", "a\\Z", "a", "a", NULL); + suite->add("f", "28", "$nP", "a\\Z", "a\nb", NULL); + suite->add("e", "29", "-", "^*", "BADRPT", NULL); + suite->add("e", "30", "-", "$*", "BADRPT", NULL); + suite->add("e", "31", "-", "\\A*", "BADRPT", NULL); + suite->add("e", "32", "-", "\\Z*", "BADRPT", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_10, "regex.10"); + + +/* + * 11 boundary constraints + */ + +class regextest_11 : public RegExTestSuite +{ +public: + regextest_11() : RegExTestSuite("regex.11") { } + static Test *suite(); +}; + +Test *regextest_11::suite() +{ + RegExTestSuite *suite = new regextest_11; + + suite->add("m", "1", "&LP", "[[:<:]]a", "a", "a", NULL); + suite->add("m", "2", "&LP", "[[:<:]]a", "-a", "a", NULL); + suite->add("f", "3", "&LP", "[[:<:]]a", "ba", NULL); + suite->add("m", "4", "&LP", "a[[:>:]]", "a", "a", NULL); + suite->add("m", "5", "&LP", "a[[:>:]]", "a-", "a", NULL); + suite->add("f", "6", "&LP", "a[[:>:]]", "ab", NULL); + suite->add("m", "7", "bLP", "\\<a", "a", "a", NULL); + suite->add("f", "8", "bLP", "\\<a", "ba", NULL); + suite->add("m", "9", "bLP", "a\\>", "a", "a", NULL); + suite->add("f", "10", "bLP", "a\\>", "ab", NULL); + suite->add("m", "11", "LP", "\\ya", "a", "a", NULL); + suite->add("f", "12", "LP", "\\ya", "ba", NULL); + suite->add("m", "13", "LP", "a\\y", "a", "a", NULL); + suite->add("f", "14", "LP", "a\\y", "ab", NULL); + suite->add("m", "15", "LP", "a\\Y", "ab", "a", NULL); + suite->add("f", "16", "LP", "a\\Y", "a-", NULL); + suite->add("f", "17", "LP", "a\\Y", "a", NULL); + suite->add("f", "18", "LP", "-\\Y", "-a", NULL); + suite->add("m", "19", "LP", "-\\Y", "-%", "-", NULL); + suite->add("f", "20", "LP", "\\Y-", "a-", NULL); + suite->add("e", "21", "-", "[[:<:]]*", "BADRPT", NULL); + suite->add("e", "22", "-", "[[:>:]]*", "BADRPT", NULL); + suite->add("e", "23", "b", "\\<*", "BADRPT", NULL); + suite->add("e", "24", "b", "\\>*", "BADRPT", NULL); + suite->add("e", "25", "-", "\\y*", "BADRPT", NULL); + suite->add("e", "26", "-", "\\Y*", "BADRPT", NULL); + suite->add("m", "27", "LP", "\\ma", "a", "a", NULL); + suite->add("f", "28", "LP", "\\ma", "ba", NULL); + suite->add("m", "29", "LP", "a\\M", "a", "a", NULL); + suite->add("f", "30", "LP", "a\\M", "ab", NULL); + suite->add("f", "31", "ILP", "\\Ma", "a", NULL); + suite->add("f", "32", "ILP", "a\\m", "a", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_11, "regex.11"); + + +/* + * 12 character classes + */ + +class regextest_12 : public RegExTestSuite +{ +public: + regextest_12() : RegExTestSuite("regex.12") { } + static Test *suite(); +}; + +Test *regextest_12::suite() +{ + RegExTestSuite *suite = new regextest_12; + + suite->add("m", "1", "LP", "a\\db", "a0b", "a0b", NULL); + suite->add("f", "2", "LP", "a\\db", "axb", NULL); + suite->add("f", "3", "LP", "a\\Db", "a0b", NULL); + suite->add("m", "4", "LP", "a\\Db", "axb", "axb", NULL); + suite->add("m", "5", "LP", "a\\sb", "a b", "a b", NULL); + suite->add("m", "6", "LP", "a\\sb", "a\tb", "a\tb", NULL); + suite->add("m", "7", "LP", "a\\sb", "a\nb", "a\nb", NULL); + suite->add("f", "8", "LP", "a\\sb", "axb", NULL); + suite->add("m", "9", "LP", "a\\Sb", "axb", "axb", NULL); + suite->add("f", "10", "LP", "a\\Sb", "a b", NULL); + suite->add("m", "11", "LP", "a\\wb", "axb", "axb", NULL); + suite->add("f", "12", "LP", "a\\wb", "a-b", NULL); + suite->add("f", "13", "LP", "a\\Wb", "axb", NULL); + suite->add("m", "14", "LP", "a\\Wb", "a-b", "a-b", NULL); + suite->add("m", "15", "LP", "\\y\\w+z\\y", "adze-guz", "guz", NULL); + suite->add("m", "16", "LPE", "a[\\d]b", "a1b", "a1b", NULL); + suite->add("m", "17", "LPE", "a[\\s]b", "a b", "a b", NULL); + suite->add("m", "18", "LPE", "a[\\w]b", "axb", "axb", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_12, "regex.12"); + + +/* + * 13 escapes + */ + +class regextest_13 : public RegExTestSuite +{ +public: + regextest_13() : RegExTestSuite("regex.13") { } + static Test *suite(); +}; + +Test *regextest_13::suite() +{ + RegExTestSuite *suite = new regextest_13; + + suite->add("e", "1", "&", "a\\", "EESCAPE", NULL); + suite->add("m", "2", "-", "a\\<b", "a<b", "a<b", NULL); + suite->add("m", "3", "e", "a\\<b", "a<b", "a<b", NULL); + suite->add("m", "4", "bAS", "a\\wb", "awb", "awb", NULL); + suite->add("m", "5", "eAS", "a\\wb", "awb", "awb", NULL); + suite->add("m", "6", "PL", "a\\ab", "a\ab", "a\ab", NULL); + suite->add("m", "7", "P", "a\\bb", "a\bb", "a\bb", NULL); + suite->add("m", "8", "P", "a\\Bb", "a\\b", "a\\b", NULL); + suite->add("m", "9", "MP", "a\\chb", "a\bb", "a\bb", NULL); + suite->add("m", "10", "MP", "a\\cHb", "a\bb", "a\bb", NULL); + suite->add("m", "11", "LMP", "a\\e", "a\033", "a\033", NULL); + suite->add("m", "12", "P", "a\\fb", "a\fb", "a\fb", NULL); + suite->add("m", "13", "P", "a\\nb", "a\nb", "a\nb", NULL); + suite->add("m", "14", "P", "a\\rb", "a\rb", "a\rb", NULL); + suite->add("m", "15", "P", "a\\tb", "a\tb", "a\tb", NULL); + suite->add("m", "16", "P", "a\\u0008x", "a\bx", "a\bx", NULL); + suite->add("e", "17", "-", "a\\u008x", "EESCAPE", NULL); + suite->add("m", "18", "P", "a\\u00088x", "a\b8x", "a\b8x", NULL); + suite->add("m", "19", "P", "a\\U00000008x", "a\bx", "a\bx", NULL); + suite->add("e", "20", "-", "a\\U0000008x", "EESCAPE", NULL); + suite->add("m", "21", "P", "a\\vb", "a\vb", "a\vb", NULL); + suite->add("m", "22", "MP", "a\\x08x", "a\bx", "a\bx", NULL); + suite->add("e", "23", "-", "a\\xq", "EESCAPE", NULL); + suite->add("m", "24", "MP", "a\\x0008x", "a\bx", "a\bx", NULL); + suite->add("e", "25", "-", "a\\z", "EESCAPE", NULL); + suite->add("m", "26", "MP", "a\\010b", "a\bb", "a\bb", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_13, "regex.13"); + + +/* + * 14 back references + */ + +class regextest_14 : public RegExTestSuite +{ +public: + regextest_14() : RegExTestSuite("regex.14") { } + static Test *suite(); +}; + +Test *regextest_14::suite() +{ + RegExTestSuite *suite = new regextest_14; + + suite->add("m", "1", "RP", "a(b*)c\\1", "abbcbb", "abbcbb", "bb", NULL); + suite->add("m", "2", "RP", "a(b*)c\\1", "ac", "ac", "", NULL); + suite->add("f", "3", "RP", "a(b*)c\\1", "abbcb", NULL); + suite->add("m", "4", "RP", "a(b*)\\1", "abbcbb", "abb", "b", NULL); + suite->add("m", "5", "RP", "a(b|bb)\\1", "abbcbb", "abb", "b", NULL); + suite->add("m", "6", "RP", "a([bc])\\1", "abb", "abb", "b", NULL); + suite->add("f", "7", "RP", "a([bc])\\1", "abc", NULL); + suite->add("m", "8", "RP", "a([bc])\\1", "abcabb", "abb", "b", NULL); + suite->add("f", "9", "RP", "a([bc])*\\1", "abc", NULL); + suite->add("f", "10", "RP", "a([bc])\\1", "abB", NULL); + suite->add("m", "11", "iRP", "a([bc])\\1", "abB", "abB", "b", NULL); + suite->add("m", "12", "RP", "a([bc])\\1+", "abbb", "abbb", "b", NULL); + suite->add("m", "13", "QRP", "a([bc])\\1{3,4}", "abbbb", "abbbb", "b", NULL); + suite->add("f", "14", "QRP", "a([bc])\\1{3,4}", "abbb", NULL); + suite->add("m", "15", "RP", "a([bc])\\1*", "abbb", "abbb", "b", NULL); + suite->add("m", "16", "RP", "a([bc])\\1*", "ab", "ab", "b", NULL); + suite->add("m", "17", "RP", "a([bc])(\\1*)", "ab", "ab", "b", "", NULL); + suite->add("e", "18", "-", "a((b)\\1)", "ESUBREG", NULL); + suite->add("e", "19", "-", "a(b)c\\2", "ESUBREG", NULL); + suite->add("m", "20", "bR", "a\\(b*\\)c\\1", "abbcbb", "abbcbb", "bb", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_14, "regex.14"); + + +/* + * 15 octal escapes vs back references + */ + +class regextest_15 : public RegExTestSuite +{ +public: + regextest_15() : RegExTestSuite("regex.15") { } + static Test *suite(); +}; + +Test *regextest_15::suite() +{ + RegExTestSuite *suite = new regextest_15; + + suite->add("m", "1", "MP", "a\\010b", "a\bb", "a\bb", NULL); + suite->add("m", "2", "MP", "a\\0070b", "a\a0b", "a\a0b", NULL); + suite->add("m", "3", "MP", "a\\07b", "a\ab", "a\ab", NULL); + suite->add("m", "4", "MP", "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c", "abbbbbbbbbb\ac", "abbbbbbbbbb\ac", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", NULL); + suite->add("e", "5", "-", "a\\7b", "ESUBREG", NULL); + suite->add("m", "6", "MP", "a\\10b", "a\bb", "a\bb", NULL); + suite->add("m", "7", "MP", "a\\101b", "aAb", "aAb", NULL); + suite->add("m", "8", "RP", "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\10c", "abbbbbbbbbbbc", "abbbbbbbbbbbc", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", NULL); + suite->add("e", "9", "-", "a((((((((((b\\10))))))))))c", "ESUBREG", NULL); + suite->add("m", "10", "MP", "a\\12b", "a\nb", "a\nb", NULL); + suite->add("e", "11", "b", "a\\12b", "ESUBREG", NULL); + suite->add("m", "12", "eAS", "a\\12b", "a12b", "a12b", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_15, "regex.15"); + + +/* + * 16 expanded syntax + */ + +class regextest_16 : public RegExTestSuite +{ +public: + regextest_16() : RegExTestSuite("regex.16") { } + static Test *suite(); +}; + +Test *regextest_16::suite() +{ + RegExTestSuite *suite = new regextest_16; + + suite->add("m", "1", "xP", "a b c", "abc", "abc", NULL); + suite->add("m", "2", "xP", "a b #oops\nc\td", "abcd", "abcd", NULL); + suite->add("m", "3", "x", "a\\ b\\\tc", "a b\tc", "a b\tc", NULL); + suite->add("m", "4", "xP", "a b\\#c", "ab#c", "ab#c", NULL); + suite->add("m", "5", "xP", "a b[c d]e", "ab e", "ab e", NULL); + suite->add("m", "6", "xP", "a b[c#d]e", "ab#e", "ab#e", NULL); + suite->add("m", "7", "xP", "a b[c#d]e", "abde", "abde", NULL); + suite->add("m", "8", "xSPB", "ab{ d", "ab{d", "ab{d", NULL); + suite->add("m", "9", "xPQ", "ab{ 1 , 2 }c", "abc", "abc", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_16, "regex.16"); + + +/* + * 17 misc syntax + */ + +class regextest_17 : public RegExTestSuite +{ +public: + regextest_17() : RegExTestSuite("regex.17") { } + static Test *suite(); +}; + +Test *regextest_17::suite() +{ + RegExTestSuite *suite = new regextest_17; + + suite->add("m", "1", "P", "a(?#comment)b", "ab", "ab", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_17, "regex.17"); + + +/* + * 18 unmatchable REs + */ + +class regextest_18 : public RegExTestSuite +{ +public: + regextest_18() : RegExTestSuite("regex.18") { } + static Test *suite(); +}; + +Test *regextest_18::suite() +{ + RegExTestSuite *suite = new regextest_18; + + suite->add("f", "1", "I", "a^b", "ab", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_18, "regex.18"); + + +/* + * 19 case independence + */ + +class regextest_19 : public RegExTestSuite +{ +public: + regextest_19() : RegExTestSuite("regex.19") { } + static Test *suite(); +}; + +Test *regextest_19::suite() +{ + RegExTestSuite *suite = new regextest_19; + + suite->add("m", "1", "&i", "ab", "Ab", "Ab", NULL); + suite->add("m", "2", "&i", "a[bc]", "aC", "aC", NULL); + suite->add("f", "3", "&i", "a[^bc]", "aB", NULL); + suite->add("m", "4", "&iM", "a[b-d]", "aC", "aC", NULL); + suite->add("f", "5", "&iM", "a[^b-d]", "aC", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_19, "regex.19"); + + +/* + * 20 directors and embedded options + */ + +class regextest_20 : public RegExTestSuite +{ +public: + regextest_20() : RegExTestSuite("regex.20") { } + static Test *suite(); +}; + +Test *regextest_20::suite() +{ + RegExTestSuite *suite = new regextest_20; + + suite->add("e", "1", "&", "***?", "BADPAT", NULL); + suite->add("m", "2", "q", "***?", "***?", "***?", NULL); + suite->add("m", "3", "&P", "***=a*b", "a*b", "a*b", NULL); + suite->add("m", "4", "q", "***=a*b", "***=a*b", "***=a*b", NULL); + suite->add("m", "5", "bLP", "***:\\w+", "ab", "ab", NULL); + suite->add("m", "6", "eLP", "***:\\w+", "ab", "ab", NULL); + suite->add("e", "7", "&", "***:***=a*b", "BADRPT", NULL); + suite->add("m", "8", "&P", "***:(?b)a+b", "a+b", "a+b", NULL); + suite->add("m", "9", "P", "(?b)a+b", "a+b", "a+b", NULL); + suite->add("e", "10", "e", "(?b)\\w+", "BADRPT", NULL); + suite->add("m", "11", "bAS", "(?b)\\w+", "(?b)w+", "(?b)w+", NULL); + suite->add("m", "12", "iP", "(?c)a", "a", "a", NULL); + suite->add("f", "13", "iP", "(?c)a", "A", NULL); + suite->add("m", "14", "APS", "(?e)\\W+", "WW", "WW", NULL); + suite->add("m", "15", "P", "(?i)a+", "Aa", "Aa", NULL); + suite->add("f", "16", "P", "(?m)a.b", "a\nb", NULL); + suite->add("m", "17", "P", "(?m)^b", "a\nb", "b", NULL); + suite->add("f", "18", "P", "(?n)a.b", "a\nb", NULL); + suite->add("m", "19", "P", "(?n)^b", "a\nb", "b", NULL); + suite->add("f", "20", "P", "(?p)a.b", "a\nb", NULL); + suite->add("f", "21", "P", "(?p)^b", "a\nb", NULL); + suite->add("m", "22", "P", "(?q)a+b", "a+b", "a+b", NULL); + suite->add("m", "23", "nP", "(?s)a.b", "a\nb", "a\nb", NULL); + suite->add("m", "24", "xP", "(?t)a b", "a b", "a b", NULL); + suite->add("m", "25", "P", "(?w)a.b", "a\nb", "a\nb", NULL); + suite->add("m", "26", "P", "(?w)^b", "a\nb", "b", NULL); + suite->add("m", "27", "P", "(?x)a b", "ab", "ab", NULL); + suite->add("e", "28", "-", "(?z)ab", "BADOPT", NULL); + suite->add("m", "29", "P", "(?ici)a+", "Aa", "Aa", NULL); + suite->add("e", "30", "P", "(?i)(?q)a+", "BADRPT", NULL); + suite->add("m", "31", "P", "(?q)(?i)a+", "(?i)a+", "(?i)a+", NULL); + suite->add("m", "32", "P", "(?qe)a+", "a", "a", NULL); + suite->add("m", "33", "xP", "(?q)a b", "a b", "a b", NULL); + suite->add("m", "34", "P", "(?qx)a b", "a b", "a b", NULL); + suite->add("m", "35", "P", "(?qi)ab", "Ab", "Ab", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_20, "regex.20"); + + +/* + * 21 capturing + */ + +class regextest_21 : public RegExTestSuite +{ +public: + regextest_21() : RegExTestSuite("regex.21") { } + static Test *suite(); +}; + +Test *regextest_21::suite() +{ + RegExTestSuite *suite = new regextest_21; + + suite->add("m", "1", "-", "a(b)c", "abc", "abc", "b", NULL); + suite->add("m", "2", "P", "a(?:b)c", "xabc", "abc", NULL); + suite->add("m", "3", "-", "a((b))c", "xabcy", "abc", "b", "b", NULL); + suite->add("m", "4", "P", "a(?:(b))c", "abcy", "abc", "b", NULL); + suite->add("m", "5", "P", "a((?:b))c", "abc", "abc", "b", NULL); + suite->add("m", "6", "P", "a(?:(?:b))c", "abc", "abc", NULL); + suite->add("i", "7", "Q", "a(b){0}c", "ac", "0 1", "-1 -1", NULL); + suite->add("m", "8", "-", "a(b)c(d)e", "abcde", "abcde", "b", "d", NULL); + suite->add("m", "9", "-", "(b)c(d)e", "bcde", "bcde", "b", "d", NULL); + suite->add("m", "10", "-", "a(b)(d)e", "abde", "abde", "b", "d", NULL); + suite->add("m", "11", "-", "a(b)c(d)", "abcd", "abcd", "b", "d", NULL); + suite->add("m", "12", "-", "(ab)(cd)", "xabcdy", "abcd", "ab", "cd", NULL); + suite->add("m", "13", "-", "a(b)?c", "xabcy", "abc", "b", NULL); + suite->add("i", "14", "-", "a(b)?c", "xacy", "1 2", "-1 -1", NULL); + suite->add("m", "15", "-", "a(b)?c(d)?e", "xabcdey", "abcde", "b", "d", NULL); + suite->add("i", "16", "-", "a(b)?c(d)?e", "xacdey", "1 4", "-1 -1", "3 3", NULL); + suite->add("i", "17", "-", "a(b)?c(d)?e", "xabcey", "1 4", "2 2", "-1 -1", NULL); + suite->add("i", "18", "-", "a(b)?c(d)?e", "xacey", "1 3", "-1 -1", "-1 -1", NULL); + suite->add("m", "19", "-", "a(b)*c", "xabcy", "abc", "b", NULL); + suite->add("i", "20", "-", "a(b)*c", "xabbbcy", "1 5", "4 4", NULL); + suite->add("i", "21", "-", "a(b)*c", "xacy", "1 2", "-1 -1", NULL); + suite->add("m", "22", "-", "a(b*)c", "xabbbcy", "abbbc", "bbb", NULL); + suite->add("m", "23", "-", "a(b*)c", "xacy", "ac", "", NULL); + suite->add("f", "24", "-", "a(b)+c", "xacy", NULL); + suite->add("m", "25", "-", "a(b)+c", "xabcy", "abc", "b", NULL); + suite->add("i", "26", "-", "a(b)+c", "xabbbcy", "1 5", "4 4", NULL); + suite->add("m", "27", "-", "a(b+)c", "xabbbcy", "abbbc", "bbb", NULL); + suite->add("i", "28", "Q", "a(b){2,3}c", "xabbbcy", "1 5", "4 4", NULL); + suite->add("i", "29", "Q", "a(b){2,3}c", "xabbcy", "1 4", "3 3", NULL); + suite->add("f", "30", "Q", "a(b){2,3}c", "xabcy", NULL); + suite->add("m", "31", "LP", "\\y(\\w+)\\y", "-- abc-", "abc", "abc", NULL); + suite->add("m", "32", "-", "a((b|c)d+)+", "abacdbd", "acdbd", "bd", "b", NULL); + suite->add("m", "33", "N", "(.*).*", "abc", "abc", "abc", NULL); + suite->add("m", "34", "N", "(a*)*", "bc", "", "", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_21, "regex.21"); + + +/* + * 22 multicharacter collating elements + */ + +class regextest_22 : public RegExTestSuite +{ +public: + regextest_22() : RegExTestSuite("regex.22") { } + static Test *suite(); +}; + +Test *regextest_22::suite() +{ + RegExTestSuite *suite = new regextest_22; + + suite->add("m", "1", "&+L", "a[c]e", "ace", "ace", NULL); + suite->add("f", "2", "&+IL", "a[c]h", "ach", NULL); + suite->add("m", "3", "&+L", "a[[.ch.]]", "ach", "ach", NULL); + suite->add("f", "4", "&+L", "a[[.ch.]]", "ace", NULL); + suite->add("m", "5", "&+L", "a[c[.ch.]]", "ac", "ac", NULL); + suite->add("m", "6", "&+L", "a[c[.ch.]]", "ace", "ac", NULL); + suite->add("m", "7", "&+L", "a[c[.ch.]]", "ache", "ach", NULL); + suite->add("f", "8", "&+L", "a[^c]e", "ace", NULL); + suite->add("m", "9", "&+L", "a[^c]e", "abe", "abe", NULL); + suite->add("m", "10", "&+L", "a[^c]e", "ache", "ache", NULL); + suite->add("f", "11", "&+L", "a[^[.ch.]]", "ach", NULL); + suite->add("m", "12", "&+L", "a[^[.ch.]]", "ace", "ac", NULL); + suite->add("m", "13", "&+L", "a[^[.ch.]]", "ac", "ac", NULL); + suite->add("m", "14", "&+L", "a[^[.ch.]]", "abe", "ab", NULL); + suite->add("f", "15", "&+L", "a[^c[.ch.]]", "ach", NULL); + suite->add("f", "16", "&+L", "a[^c[.ch.]]", "ace", NULL); + suite->add("f", "17", "&+L", "a[^c[.ch.]]", "ac", NULL); + suite->add("m", "18", "&+L", "a[^c[.ch.]]", "abe", "ab", NULL); + suite->add("m", "19", "&+L", "a[^b]", "ac", "ac", NULL); + suite->add("m", "20", "&+L", "a[^b]", "ace", "ac", NULL); + suite->add("m", "21", "&+L", "a[^b]", "ach", "ach", NULL); + suite->add("f", "22", "&+L", "a[^b]", "abe", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_22, "regex.22"); + + +/* + * 23 lookahead constraints + */ + +class regextest_23 : public RegExTestSuite +{ +public: + regextest_23() : RegExTestSuite("regex.23") { } + static Test *suite(); +}; + +Test *regextest_23::suite() +{ + RegExTestSuite *suite = new regextest_23; + + suite->add("m", "1", "HP", "a(?=b)b*", "ab", "ab", NULL); + suite->add("f", "2", "HP", "a(?=b)b*", "a", NULL); + suite->add("m", "3", "HP", "a(?=b)b*(?=c)c*", "abc", "abc", NULL); + suite->add("f", "4", "HP", "a(?=b)b*(?=c)c*", "ab", NULL); + suite->add("f", "5", "HP", "a(?!b)b*", "ab", NULL); + suite->add("m", "6", "HP", "a(?!b)b*", "a", "a", NULL); + suite->add("m", "7", "HP", "(?=b)b", "b", "b", NULL); + suite->add("f", "8", "HP", "(?=b)b", "a", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_23, "regex.23"); + + +/* + * 24 non-greedy quantifiers + */ + +class regextest_24 : public RegExTestSuite +{ +public: + regextest_24() : RegExTestSuite("regex.24") { } + static Test *suite(); +}; + +Test *regextest_24::suite() +{ + RegExTestSuite *suite = new regextest_24; + + suite->add("m", "1", "PT", "ab+?", "abb", "ab", NULL); + suite->add("m", "2", "PT", "ab+?c", "abbc", "abbc", NULL); + suite->add("m", "3", "PT", "ab*?", "abb", "a", NULL); + suite->add("m", "4", "PT", "ab*?c", "abbc", "abbc", NULL); + suite->add("m", "5", "PT", "ab??", "ab", "a", NULL); + suite->add("m", "6", "PT", "ab??c", "abc", "abc", NULL); + suite->add("m", "7", "PQT", "ab{2,4}?", "abbbb", "abb", NULL); + suite->add("m", "8", "PQT", "ab{2,4}?c", "abbbbc", "abbbbc", NULL); + suite->add("m", "9", "-", "3z*", "123zzzz456", "3zzzz", NULL); + suite->add("m", "10", "PT", "3z*?", "123zzzz456", "3", NULL); + suite->add("m", "11", "-", "z*4", "123zzzz456", "zzzz4", NULL); + suite->add("m", "12", "PT", "z*?4", "123zzzz456", "zzzz4", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_24, "regex.24"); + + +/* + * 25 mixed quantifiers + */ + +class regextest_25 : public RegExTestSuite +{ +public: + regextest_25() : RegExTestSuite("regex.25") { } + static Test *suite(); +}; + +Test *regextest_25::suite() +{ + RegExTestSuite *suite = new regextest_25; + + suite->add("m", "1", "PNT", "^(.*?)(a*)$", "xyza", "xyza", "xyz", "a", NULL); + suite->add("m", "2", "PNT", "^(.*?)(a*)$", "xyzaa", "xyzaa", "xyz", "aa", NULL); + suite->add("m", "3", "PNT", "^(.*?)(a*)$", "xyz", "xyz", "xyz", "", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_25, "regex.25"); + + +/* + * 26 tricky cases + */ + +class regextest_26 : public RegExTestSuite +{ +public: + regextest_26() : RegExTestSuite("regex.26") { } + static Test *suite(); +}; + +Test *regextest_26::suite() +{ + RegExTestSuite *suite = new regextest_26; + + suite->add("m", "1", "-", "(week|wee)(night|knights)", "weeknights", "weeknights", "wee", "knights", NULL); + suite->add("m", "2", "RP", "a(bc*).*\\1", "abccbccb", "abccbccb", "b", NULL); + suite->add("m", "3", "-", "a(b.[bc]*)+", "abcbd", "abcbd", "bd", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_26, "regex.26"); + + +/* + * 27 implementation misc. + */ + +class regextest_27 : public RegExTestSuite +{ +public: + regextest_27() : RegExTestSuite("regex.27") { } + static Test *suite(); +}; + +Test *regextest_27::suite() +{ + RegExTestSuite *suite = new regextest_27; + + suite->add("m", "1", "P", "a(?:b|b)c", "abc", "abc", NULL); + suite->add("m", "2", "&", "[ab][ab][ab]", "aba", "aba", NULL); + suite->add("m", "3", "&", "[ab][ab][ab][ab][ab][ab][ab]", "abababa", "abababa", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_27, "regex.27"); + + +/* + * 28 boundary busters etc. + */ + +class regextest_28 : public RegExTestSuite +{ +public: + regextest_28() : RegExTestSuite("regex.28") { } + static Test *suite(); +}; + +Test *regextest_28::suite() +{ + RegExTestSuite *suite = new regextest_28; + + suite->add("m", "1", "&", "abcdefghijkl", "abcdefghijkl", "abcdefghijkl", NULL); + suite->add("m", "2", "P", "a(?:b|c|d|e|f|g|h|i|j|k|l|m)n", "agn", "agn", NULL); + suite->add("m", "3", "-", "a(((((((((((((b)))))))))))))c", "abc", "abc", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", NULL); + suite->add("m", "4", "Q", "ab{1,100}c", "abbc", "abbc", NULL); + suite->add("m", "5", "Q", "ab{1,100}c", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", NULL); + suite->add("m", "6", "Q", "ab{1,100}c", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc", NULL); + suite->add("m", "7", "LP", "\\w+abcdefgh", "xyzabcdefgh", "xyzabcdefgh", NULL); + suite->add("m", "8", "%LP", "\\w+abcdefgh", "xyzabcdefgh", "xyzabcdefgh", NULL); + suite->add("m", "9", "%LP", "\\w+abcdefghijklmnopqrst", "xyzabcdefghijklmnopqrst", "xyzabcdefghijklmnopqrst", NULL); + suite->add("i", "10", "%LP", "\\w+(abcdefgh)?", "xyz", "0 2", "-1 -1", NULL); + suite->add("i", "11", "%LP", "\\w+(abcdefgh)?", "xyzabcdefg", "0 9", "-1 -1", NULL); + suite->add("i", "12", "%LP", "\\w+(abcdefghijklmnopqrst)?", "xyzabcdefghijklmnopqrs", "0 21", "-1 -1", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_28, "regex.28"); + + +/* + * 29 incomplete matches + */ + +class regextest_29 : public RegExTestSuite +{ +public: + regextest_29() : RegExTestSuite("regex.29") { } + static Test *suite(); +}; + +Test *regextest_29::suite() +{ + RegExTestSuite *suite = new regextest_29; + + suite->add("p", "1", "t", "def", "abc", "3 2", "", NULL); + suite->add("p", "2", "t", "bcd", "abc", "1 2", "", NULL); + suite->add("p", "3", "t", "abc", "abab", "0 3", "", NULL); + suite->add("p", "4", "t", "abc", "abdab", "3 4", "", NULL); + suite->add("i", "5", "t", "abc", "abc", "0 2", "0 2", NULL); + suite->add("i", "6", "t", "abc", "xyabc", "2 4", "2 4", NULL); + suite->add("p", "7", "t", "abc+", "xyab", "2 3", "", NULL); + suite->add("i", "8", "t", "abc+", "xyabc", "2 4", "2 4", NULL); + suite->add("i", "10", "t", "abc+", "xyabcdd", "2 4", "7 6", NULL); + suite->add("p", "11", "tPT", "abc+?", "xyab", "2 3", "", NULL); + suite->add("i", "12", "tPT", "abc+?", "xyabc", "2 4", "5 4", NULL); + suite->add("i", "13", "tPT", "abc+?", "xyabcc", "2 4", "6 5", NULL); + suite->add("i", "14", "tPT", "abc+?", "xyabcd", "2 4", "6 5", NULL); + suite->add("i", "15", "tPT", "abc+?", "xyabcdd", "2 4", "7 6", NULL); + suite->add("i", "16", "t", "abcd|bc", "xyabc", "3 4", "2 4", NULL); + suite->add("p", "17", "tn", ".*k", "xx\nyyy", "3 5", "", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_29, "regex.29"); + + +/* + * 30 misc. oddities and old bugs + */ + +class regextest_30 : public RegExTestSuite +{ +public: + regextest_30() : RegExTestSuite("regex.30") { } + static Test *suite(); +}; + +Test *regextest_30::suite() +{ + RegExTestSuite *suite = new regextest_30; + + suite->add("e", "1", "&", "***", "BADRPT", NULL); + suite->add("m", "2", "N", "a?b*", "abb", "abb", NULL); + suite->add("m", "3", "N", "a?b*", "bb", "bb", NULL); + suite->add("m", "4", "&", "a*b", "aab", "aab", NULL); + suite->add("m", "5", "&", "^a*b", "aaaab", "aaaab", NULL); + suite->add("m", "6", "&M", "[0-6][1-2][0-3][0-6][1-6][0-6]", "010010", "010010", NULL); + suite->add("m", "7", "s", "abc", "abcd", "abc", NULL); + suite->add("f", "8", "s", "abc", "xabcd", NULL); + suite->add("m", "9", "HLP", "(?n)^(?![t#])\\S+", "tk\n\n#\n#\nit0", "it0", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_30, "regex.30"); + + +/* + * extra_1 checks for bug fixes + */ + +class regextest_extra_1 : public RegExTestSuite +{ +public: + regextest_extra_1() : RegExTestSuite("regex.extra_1") { } + static Test *suite(); +}; + +Test *regextest_extra_1::suite() +{ + RegExTestSuite *suite = new regextest_extra_1; + + suite->add("m", "Bug 230589", "-", "[ ]*(^|[^%])%V", "*%V2", NULL); + suite->add("m", "Bug 504785", "-", "([^_.]*)([^.]*)\\.(..)(.).*", "bbcos_001_c01.q1la", "bbcos_001_c01.q1la", "bbcos", "_001_c01", "q1", "l", NULL); + suite->add("m", "Bug 505048", "-", "\\A\\s*[^<]*\\s*<([^>]+)>", "a<a>", NULL); + suite->add("m", "Bug 505048", "-", "\\A\\s*([^b]*)b", "ab", NULL); + suite->add("m", "Bug 505048", "-", "\\A\\s*[^b]*(b)", "ab", NULL); + suite->add("m", "Bug 505048", "-", "\\A(\\s*)[^b]*(b)", "ab", NULL); + suite->add("m", "Bug 505048", "-", "\\A\\s*[^b]*b", "ab", NULL); + suite->add("m", "Bug 505048", "-", "\\A\\s*[^b]*b", "ab", "ab", NULL); + suite->add("i", "Bug 505048", "-", "\\A\\s*[^b]*b", "ab", "0 1", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_extra_1, "regex.extra_1"); + + +/* + * wx_1 character classification: ascii + */ + +class regextest_wx_1 : public RegExTestSuite +{ +public: + regextest_wx_1() : RegExTestSuite("regex.wx_1") { } + static Test *suite(); +}; + +Test *regextest_wx_1::suite() +{ + RegExTestSuite *suite = new regextest_wx_1; + + suite->add("m", "1", "&", "[^[:alnum:]]", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!", "!", NULL); + suite->add("m", "2", "&", "[[:alnum:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~X", "X", NULL); + suite->add("m", "3", "&", "[^[:alpha:]]", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!", "!", NULL); + suite->add("m", "4", "&", "[[:alpha:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~X", "X", NULL); + suite->add("m", "5", "&", "[^[:cntrl:]]", "\a\b\t\n\v\f\r!", "!", NULL); + suite->add("m", "6", "&", "[[:cntrl:]]", " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n", "\n", NULL); + suite->add("m", "7", "&", "[^[:digit:]]", "0123456789!", "!", NULL); + suite->add("m", "8", "&", "[[:digit:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 0", "0", NULL); + suite->add("m", "9", "&", "[^[:graph:]]", "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n", "\n", NULL); + suite->add("m", "10", "&", "[[:graph:]]", "\a\b\t\n\v\f\r !", "!", NULL); + suite->add("m", "11", "&", "[^[:lower:]]", "abcdefghijklmnopqrstuvwxyz!", "!", NULL); + suite->add("m", "12", "&", "[[:lower:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~x", "x", NULL); + suite->add("m", "13", "&", "[^[:print:]]", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n", "\n", NULL); + suite->add("m", "14", "&", "[[:print:]]", "\a\b\n\v\f\rX", "X", NULL); + suite->add("m", "15", "&", "[^[:punct:]]", "!\"#%&'()*,-./:;?@[\\]_{}X", "X", NULL); + suite->add("m", "16", "&", "[[:punct:]]", "\a\b\t\n\v\f\r 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!", "!", NULL); + suite->add("m", "17", "&", "[^[:space:]]", "\t\n\v\f\r X", "X", NULL); + suite->add("m", "18", "&", "[[:space:]]", "\a\b!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n", "\n", NULL); + suite->add("m", "19", "&", "[^[:upper:]]", "ABCDEFGHIJKLMNOPQRSTUVWXYZ!", "!", NULL); + suite->add("m", "20", "&", "[[:upper:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~X", "X", NULL); + suite->add("m", "21", "&", "[^[:xdigit:]]", "0123456789ABCDEFabcdef!", "!", NULL); + suite->add("m", "22", "&", "[[:xdigit:]]", "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@GHIJKLMNOPQRSTUVWXYZ[\\]^_`ghijklmnopqrstuvwxyz{|}~a", "a", NULL); + suite->add("i", "23", "&i", "AbCdEfGhIjKlMnOpQrStUvWxYz", "aBcDeFgHiJkLmNoPqRsTuVwXyZ", "0 25", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_wx_1, "regex.wx_1"); + + +/* + * wx_2 character classification: western european + */ + +class regextest_wx_2 : public RegExTestSuite +{ +public: + regextest_wx_2() : RegExTestSuite("regex.wx_2") { } + static Test *suite(); +}; + +Test *regextest_wx_2::suite() +{ + RegExTestSuite *suite = new regextest_wx_2; + + suite->add("m", "1", "&", "[^[:alpha:]]", "ÃÃÃÃÃà ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃà áâãäåæçèéêëìÃîïðñòóôõöøùúûüýþÿ!", "!", NULL); + suite->add("m", "2", "&", "[[:alpha:]]", " ¡¢£¤¥¦§¨©«¬Â®¯°±²³´¶·¸¹»¼½¾¿Ã÷X", "X", NULL); + suite->add("m", "3", "&", "[^[:lower:]]", "Ãà áâãäåæçèéêëìÃîïðñòóôõöøùúûüýþÿ!", "!", NULL); + suite->add("m", "4", "&", "[[:lower:]]", " ¡¢£¤¥¦§¨©«¬Â®¯°±²³´¶·¸¹»¼½¾¿ÃÃÃÃÃà ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ÷x", "x", NULL); + suite->add("m", "5", "&", "[^[:upper:]]", "ÃÃÃÃÃà ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ!", "!", NULL); + suite->add("m", "6", "&", "[[:upper:]]", " ¡¢£¤¥¦§¨©«¬Â®¯°±²³´¶·¸¹»¼½¾¿ÃÃà áâãäåæçèéêëìÃîïðñòóôõö÷øùúûüýþÿX", "X", NULL); + suite->add("i", "7", "&i*", "ÃáÃãÃÃ¥ÃçÃéÃëÃÃÃïÃñÃóÃõÃøÃúÃüÃþ", "à ÃâÃäà æÃèÃêÃìÃîÃðÃòÃôÃöÃùÃûÃýÃ", "0 29", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_wx_2, "regex.wx_2"); + + +/* + * wx_3 character classification: cyrillic + */ + +class regextest_wx_3 : public RegExTestSuite +{ +public: + regextest_wx_3() : RegExTestSuite("regex.wx_3") { } + static Test *suite(); +}; + +Test *regextest_wx_3::suite() +{ + RegExTestSuite *suite = new regextest_wx_3; + + suite->add("m", "1", "&", "[^[:alpha:]]", "ÑÐÑабÑдеÑÐ³Ñ Ð¸Ð¹ÐºÐ»Ð¼Ð½Ð¾Ð¿ÑÑÑÑÑжвÑÑзÑÑÑÑÑЮÐÐЦÐÐФÐÐ¥ÐÐÐÐÐÐÐÐЯРСТУÐÐЬЫÐШÐЩЧЪ!", "!", NULL); + suite->add("m", "2", "&", "[^[:lower:]]", "ÑÑабÑдеÑÐ³Ñ Ð¸Ð¹ÐºÐ»Ð¼Ð½Ð¾Ð¿ÑÑÑÑÑжвÑÑзÑÑÑÑÑ!", "!", NULL); + suite->add("m", "3", "&", "[[:lower:]]", "ÐЮÐÐЦÐÐФÐÐ¥ÐÐÐÐÐÐÐÐЯРСТУÐÐЬЫÐШÐЩЧЪx", "x", NULL); + suite->add("m", "4", "&", "[^[:upper:]]", "ÐЮÐÐЦÐÐФÐÐ¥ÐÐÐÐÐÐÐÐЯРСТУÐÐЬЫÐШÐЩЧЪ!", "!", NULL); + suite->add("m", "5", "&", "[[:upper:]]", "ÑÑабÑдеÑÐ³Ñ Ð¸Ð¹ÐºÐ»Ð¼Ð½Ð¾Ð¿ÑÑÑÑÑжвÑÑзÑÑÑÑÑX", "X", NULL); + suite->add("i", "6", "&i*", "ÐÑÐбЦдÐÑÐÑ ÐйÐлÐнÐпЯÑСÑУжÐÑЫзШÑЩÑЪ", "ÑЮаÐÑÐеФгХиÐкÐмÐоÐÑÐ ÑТÑÐвЬÑÐÑÐÑЧÑ", "0 32", NULL); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest_wx_3, "regex.wx_3"); + + +/* + * A suite containing all the above suites + */ + +class regextest : public TestSuite +{ +public: + regextest() : TestSuite("regex") { } + static Test *suite(); +}; + +Test *regextest::suite() +{ + TestSuite *suite = new regextest; + + suite->addTest(regextest_1::suite()); + suite->addTest(regextest_2::suite()); + suite->addTest(regextest_3::suite()); + suite->addTest(regextest_4::suite()); + suite->addTest(regextest_5::suite()); + suite->addTest(regextest_6::suite()); + suite->addTest(regextest_7::suite()); + suite->addTest(regextest_8::suite()); + suite->addTest(regextest_9::suite()); + suite->addTest(regextest_10::suite()); + suite->addTest(regextest_11::suite()); + suite->addTest(regextest_12::suite()); + suite->addTest(regextest_13::suite()); + suite->addTest(regextest_14::suite()); + suite->addTest(regextest_15::suite()); + suite->addTest(regextest_16::suite()); + suite->addTest(regextest_17::suite()); + suite->addTest(regextest_18::suite()); + suite->addTest(regextest_19::suite()); + suite->addTest(regextest_20::suite()); + suite->addTest(regextest_21::suite()); + suite->addTest(regextest_22::suite()); + suite->addTest(regextest_23::suite()); + suite->addTest(regextest_24::suite()); + suite->addTest(regextest_25::suite()); + suite->addTest(regextest_26::suite()); + suite->addTest(regextest_27::suite()); + suite->addTest(regextest_28::suite()); + suite->addTest(regextest_29::suite()); + suite->addTest(regextest_30::suite()); + suite->addTest(regextest_extra_1::suite()); + suite->addTest(regextest_wx_1::suite()); + suite->addTest(regextest_wx_2::suite()); + suite->addTest(regextest_wx_3::suite()); + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex"); +CPPUNIT_TEST_SUITE_REGISTRATION(regextest); diff --git a/tests/regex/regex.pl b/tests/regex/regex.pl new file mode 100755 index 0000000000..af0cfe8a78 --- /dev/null +++ b/tests/regex/regex.pl @@ -0,0 +1,437 @@ +#!/usr/bin/env perl -w +############################################################################# +# Name: regex.pl +# Purpose: Generate test code for wxRegEx from 'reg.test' +# Author: Mike Wetherell +# RCS-ID: $Id$ +# Copyright: (c) Mike Wetherell +# Licence: wxWidgets licence +############################################################################# + +# +# Notes: +# See './regex.pl -h' for usage +# +# Output at the moment is C++ using the cppunit testing framework. The +# language/framework specifics are separated, with the following 5 +# subs as an interface: 'begin_output', 'begin_section', 'write_test', +# 'end_section' and 'end_output'. So for a different language/framework, +# implement 5 new similar subs. +# +# I've avoided using 'use encoding "UTF-8"', since this wasn't available +# in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions +# earler than perl 5.6.0 aren't going to work. +# + +use strict; +use File::Basename; +#use encoding "UTF-8"; # enable in the future when perl 5.6.x is just a memory + +# if 0 output is wide characters, if 1 output is utf8 encoded +my $utf = 1; + +# quote a parameter (C++ helper) +# +sub quotecxx { + my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f", + "\n" => "n", "\r" => "r", "\t" => "t", + "\013" => "v", '"' => '"', "\\" => "\\" ); + + # working around lack of 'use encoding' + $_ = pack "U0C*", unpack "C*", $_; + use utf8; + + s/[\000-\037"\\\177-\x{ffff}]/ + if ($esc{$&}) { + "\\$esc{$&}"; + } elsif (ord($&) > 0x9f) { + if ($utf) { + $&; + } else { + sprintf "\\u%04x", ord($&); + } + } else { + sprintf "\\%03o", ord($&); + } + /ge; + + # working around lack of 'use encoding' + no utf8; + $_ = pack "C*", unpack "C*", $_; + + return ($utf ? '"' : 'L"') . $_ . '"' +} + +# start writing the output code (C++ interface) +# +sub begin_output { + my ($from, $instructions) = @_; + + # embed it in the comment + $from = "\n$from"; + $from =~ s/^(?: )?/ * /mg; + + # $instructions contains information about the flags etc. + if ($instructions) { + $instructions = "\n$instructions"; + $instructions =~ s/^(?: )?/ * /mg; + } + + my $u = $utf ? " (UTF-8 encoded)" : ""; + + print <<EOT; +/* + * Test data for wxRegEx$u +$from$instructions */ + +EOT +} + +my @classes; + +# start a new section (C++ interface) +# +sub begin_section { + my ($id, $title) = @_; + my $class = "regextest_$id"; + $class =~ s/\W/_/g; + push @classes, [$id, $class]; + + print <<EOT; + +/* + * $id $title + */ + +class $class : public RegExTestSuite +{ +public: + $class() : RegExTestSuite("regex.$id") { } + static Test *suite(); +}; + +Test *$class\::suite() +{ + RegExTestSuite *suite = new $class; + +EOT +} + +# output a test line (C++ interface) +# +sub write_test { + my @args = @_; + $_ = quotecxx for @args; + print " suite->add(" . (join ', ', @args) . ", NULL);\n"; +} + +# end a section (C++ interface) +# +sub end_section { + my ($id, $class) = @{$classes[$#classes]}; + + print <<EOT; + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id"); + +EOT +} + +# finish off the output (C++ interface) +# +sub end_output { + print <<EOT; + +/* + * A suite containing all the above suites + */ + +class regextest : public TestSuite +{ +public: + regextest() : TestSuite("regex") { } + static Test *suite(); +}; + +Test *regextest::suite() +{ + TestSuite *suite = new regextest; + +EOT + print " suite->addTest(".$_->[1]."::suite());\n" for @classes; + + print <<EOT; + + return suite; +} + +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex"); +CPPUNIT_TEST_SUITE_REGISTRATION(regextest); +EOT +} + +# Parse a tcl string. Handles curly quoting and double quoting. +# +sub parsetcl { + my ($curly, $quote); + # recursively defined expression that can parse balanced braces + # warning: uses experimental features of perl, see perlop(1) + $curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/; + $quote = qr/"(?:\\"|[^"])*"/; + my @tokens = shift =~ /($curly|$quote|\S+)/g; + + # now remove braces/quotes and unescape any escapes + for (@tokens) { + if (s/^{(.*)}$/$1/) { + # for curly quoting, only unescape \{ and \} + s/\\([{}])/$1/g; + } else { + s/^"(.*)"$/$1/; + + # unescape any escapes + my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f", + "n" => "\n", "r" => "\r", "t" => "\t", + "v" => "\013" ); + my $x = qr/[[:xdigit:]]/; + + s/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/ + if ($1 =~ m{^([0-7]+)}) { + chr(oct($1)); + } elsif ($1 =~ m{^x($x+)}) { + pack("C0U", hex($1) & 0xff); + } elsif ($1 =~ m{^u($x+)}) { + pack("C0U", hex($1)); + } elsif ($esc{$1}) { + $esc{$1}; + } else { + $1; + } + /ge; + } + } + + return @tokens; +} + +# helpers which keep track of whether begin_section has been called, so that +# end_section can be called when appropriate +# +my @doing = ("0", ""); +my $in_section = 0; + +sub handle_doing { + end_section if $in_section; + $in_section = 0; + @doing = @_; +} + +sub handle_test { + begin_section(@doing) if !$in_section; + $in_section = 1; + write_test @_; +} + +sub handle_end { + end_section if $in_section; + $in_section = 0; + end_output; +} + +# 'main' - start by parsing the command lines options. +# +my $badoption = !@ARGV; +my $utfdefault = $utf; +my $outputname; + +for (my $i = 0; $i < @ARGV; ) { + if ($ARGV[$i] !~ m{^-.}) { + $i++; + next; + } + + if ($ARGV[$i] eq '--') { + splice @ARGV, $i, 1; + last; + } + + if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) { # -o : output file + $outputname = $2 || splice @ARGV, $i + 1, 1; + } + + for (split //, substr($ARGV[$i], 1)) { + if (/u/i) { # -u : utf-8 output + $utf = 1; + } elsif (/w/i) { # -w : wide char output + $utf = 0; + } else { + $badoption = 1; + } + } + + splice @ARGV, $i, 1; +} + +# Display help +# +if ($badoption) { + my $prog = basename $0; + my ($w, $u) = (" (default)", " "); + ($w, $u) = ($u, $w) if $utfdefault; + + print <<EOT; +Usage: $prog [-u|-w] [-o OUTPUT] [FILE...] +Generate test code for wxRegEx from 'reg.test' +Example: $prog -o regex.inc reg.test wxreg.test + + -w$w Output will be wide characters. + -u$u Output will be UTF-8 encoded. + +Input files should be in UTF-8. If no input files are specified input is +read from stdin. If no output file is specified output is written to stdout. +See the comments in reg.test (in src/regex) for details of the input file +format. +EOT + exit 0; +} + +# Open the output file +# +open STDOUT, ">$outputname" if $outputname; + +# Read in the files and initially parse just the comments for copyright +# information and instructions on the tests +# +my @input; # slurped input files stripped of comments +my $files = ""; # copyright info from the input comments +my $instructions = ""; # test instructions from the input comments + +do { + my $inputname = basename $ARGV[0] if @ARGV; + + # slurp input + undef $/; + my $in = <>; + + # remove escaped newlines + $in =~ s/(?<!\\)\\\n//g; + + # record the copyrights of the input files + for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) { + s/[\s:]+/ /g; + $files .= " "; + $files .= $inputname . ": " if $inputname && $inputname ne '-'; + $files .= "$_\n"; + } + + # Parse the comments for instructions on the tests, which look like this: + # i successful match with -indices (used in checking things like + # nonparticipating subexpressions) + if (!$instructions) { + my $sp = qr{\t| +}; # tab or three or more spaces + my @instructions = $in =~ + /\n( + (?: + \#$sp\S?$sp\S[^\n]+\n # instruction line + (?:\#$sp$sp\S[^\n]+\n)* # continuation lines (if any) + )+ + )/gx; + + if (@instructions) { + $instructions[0] = "Test types:\n$instructions[0]"; + if (@instructions > 1) { + $instructions[1] = "Flag characters:\n$instructions[1]"; + } + $instructions = join "\n", @instructions; + $instructions =~ s/^#([^\t]?)/ $1/mg; + } + } + + # @input is the input of all files (stipped of comments) + $in =~ s/^#.*$//mg; + push @input, $in; + +} while $ARGV[0]; + +# Make a string naming the generator, the input files and copyright info +# +my $from = "Generated " . localtime() . " by " . basename $0; +$from =~ s/[\s]+/ /g; +if ($files) { + if ($files =~ /:/) { + $from .= " from the following files:"; + } else { + $from .= " from work with the following copyright:"; + } +} +$from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g); # word-wrap +$from .= "\n$files" if $files; + +# Now start to print the code +# +begin_output $from, $instructions; + +# numbers for 'extra' sections +my $extra = 1; + +for (@input) +{ + # Print the main tests + # + # Test lines look like this: + # m 3 b {\(a\)b} ab ab a + # + # Also looks for heading lines, e.g.: + # doing 4 "parentheses" + # + for (split "\n") { + if (/^doing\s+(\S+)\s+(\S.*)/) { + handle_doing parsetcl "$1 $2"; + } elsif (/^[efimp]\s/) { + handle_test parsetcl $_; + } + } + + # Extra tests + # + # The expression below matches something like this: + # test reg-33.8 {Bug 505048} { + # regexp -inline {\A\s*[^b]*b} ab + # } ab + # + # The three subexpressions then return these parts: + # $extras[$i] = '{Bug 505048}', + # $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab' + # $extras[$i + 2] = 'ab' + # + my @extras = /\ntest\s+\S+\s*(\{.*?\})\s*\{\n # line 1 + \s*regexp\s+([^\n]+)\n # line 2 + \}\s*(\S[^\n]*)/gx; # line 3 + + handle_doing "extra_" . $extra++, "checks for bug fixes" if @extras; + + for (my $i = 0; $i < @extras; $i += 3) { + my $id = $extras[$i]; + + # further parse the middle line into options and the rest (i.e. $args) + my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/; + + my @args = parsetcl $args; + $#args = 1; # only want the first two + + # now handle the options + my $test = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f'; + my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : ''; + + # get them all in the right order and print + unshift @args, $test, parsetcl($id), '-'; + push @args, parsetcl(parsetcl($results)) if $results; + handle_test @args; + } +} + +# finish +# +handle_end; diff --git a/tests/regex/wxreg.test b/tests/regex/wxreg.test new file mode 100644 index 0000000000..3ae0f23551 --- /dev/null +++ b/tests/regex/wxreg.test @@ -0,0 +1,71 @@ +############################################################################# +# Name: wxreg.test +# Purpose: Additional tests for the regex lib and wxRegEx +# Author: Mike Wetherell +# RCS-ID: $Id$ +# Copyright: (c) 2004 Mike Wetherell. +# Licence: wxWidgets licence +############################################################################# + +# +# The layout of this file is the same as src/regex/reg.test. See the comments +# in that file for full details. The encoding used in here is UTF-8. +# +# These tests test the character classifications over the ascii range pretty +# thoroughly, since hopefully these will be similar for all platforms and +# locales where wxWidgets runs. +# +# Also does some tests involving western european and cyrillic characters. +# In Unicode mode, all these tests should succeed, which verifies that the +# classifications aren't limited to a single 8-bit character set. +# +# In non-unicode mode, if the test can't be translated into the character +# encoding of the current locale, the test will be skipped. So either may +# succeed or be skipped. +# + +doing wx_1 "character classification: ascii" +m 1 & {[^[:alnum:]]} "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!" +m 2 & {[[:alnum:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~X" "X" +m 3 & {[^[:alpha:]]} "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!" +m 4 & {[[:alpha:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~X" "X" +m 5 & {[^[:cntrl:]]} "\a\b\t\n\v\f\r!" "!" +m 6 & {[[:cntrl:]]} " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n" +m 7 & {[^[:digit:]]} "0123456789!" "!" +m 8 & {[[:digit:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 0" "0" +m 9 & {[^[:graph:]]} "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n" +m 10 & {[[:graph:]]} "\a\b\t\n\v\f\r !" "!" +m 11 & {[^[:lower:]]} "abcdefghijklmnopqrstuvwxyz!" "!" +m 12 & {[[:lower:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~x" "x" +m 13 & {[^[:print:]]} "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n" "\n" +m 14 & {[[:print:]]} "\a\b\n\v\f\rX" "X" +m 15 & {[^[:punct:]]} "!\"#%&'()*,-./:;?@[\\]_{}X" "X" +m 16 & {[[:punct:]]} "\a\b\t\n\v\f\r 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!" +m 17 & {[^[:space:]]} "\t\n\v\f\r X" "X" +m 18 & {[[:space:]]} "\a\b!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n" +m 19 & {[^[:upper:]]} "ABCDEFGHIJKLMNOPQRSTUVWXYZ!" "!" +m 20 & {[[:upper:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~X" "X" +m 21 & {[^[:xdigit:]]} "0123456789ABCDEFabcdef!" "!" +m 22 & {[[:xdigit:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@GHIJKLMNOPQRSTUVWXYZ[\\]^_`ghijklmnopqrstuvwxyz{|}~a" "a" +i 23 &i "AbCdEfGhIjKlMnOpQrStUvWxYz" "aBcDeFgHiJkLmNoPqRsTuVwXyZ" "0 25" + +doing wx_2 "character classification: western european" +m 1 & {[^[:alpha:]]} "ÃÃÃÃÃà ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃà áâãäåæçèéêëìÃîïðñòóôõöøùúûüýþÿ!" "!" +m 2 & {[[:alpha:]]} " ¡¢£¤¥¦§¨©«¬Â®¯°±²³´¶·¸¹»¼½¾¿Ã÷X" "X" +m 3 & {[^[:lower:]]} "Ãà áâãäåæçèéêëìÃîïðñòóôõöøùúûüýþÿ!" "!" +m 4 & {[[:lower:]]} " ¡¢£¤¥¦§¨©«¬Â®¯°±²³´¶·¸¹»¼½¾¿ÃÃÃÃÃà ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ÷x" "x" +m 5 & {[^[:upper:]]} "ÃÃÃÃÃà ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃ!" "!" +m 6 & {[[:upper:]]} " ¡¢£¤¥¦§¨©«¬Â®¯°±²³´¶·¸¹»¼½¾¿ÃÃà áâãäåæçèéêëìÃîïðñòóôõö÷øùúûüýþÿX" "X" +i 7 &i* "ÃáÃãÃÃ¥ÃçÃéÃëÃÃÃïÃñÃóÃõÃøÃúÃüÃþ" "à ÃâÃäà æÃèÃêÃìÃîÃðÃòÃôÃöÃùÃûÃýÃ" "0 29" + +doing wx_3 "character classification: cyrillic" +m 1 & {[^[:alpha:]]} "ÑÐÑабÑдеÑÐ³Ñ Ð¸Ð¹ÐºÐ»Ð¼Ð½Ð¾Ð¿ÑÑÑÑÑжвÑÑзÑÑÑÑÑЮÐÐЦÐÐФÐÐ¥ÐÐÐÐÐÐÐÐЯРСТУÐÐЬЫÐШÐЩЧЪ!" "!" +m 2 & {[^[:lower:]]} "ÑÑабÑдеÑÐ³Ñ Ð¸Ð¹ÐºÐ»Ð¼Ð½Ð¾Ð¿ÑÑÑÑÑжвÑÑзÑÑÑÑÑ!" "!" +m 3 & {[[:lower:]]} "ÐЮÐÐЦÐÐФÐÐ¥ÐÐÐÐÐÐÐÐЯРСТУÐÐЬЫÐШÐЩЧЪx" "x" +m 4 & {[^[:upper:]]} "ÐЮÐÐЦÐÐФÐÐ¥ÐÐÐÐÐÐÐÐЯРСТУÐÐЬЫÐШÐЩЧЪ!" "!" +m 5 & {[[:upper:]]} "ÑÑабÑдеÑÐ³Ñ Ð¸Ð¹ÐºÐ»Ð¼Ð½Ð¾Ð¿ÑÑÑÑÑжвÑÑзÑÑÑÑÑX" "X" +i 6 &i* "ÐÑÐбЦдÐÑÐÑ ÐйÐлÐнÐпЯÑСÑУжÐÑЫзШÑЩÑЪ" "ÑЮаÐÑÐеФгХиÐкÐмÐоÐÑÐ ÑТÑÐвЬÑÐÑÐÑЧÑ" "0 32" + +#doing bugs "known bugs" +#m 1 - {(\w+).*?(\d\d:\d\d)} "from 10:30 until 12:00" "from" "10:30" + diff --git a/tests/test.bkl b/tests/test.bkl index 910296b8a7..2fd21e6d09 100644 --- a/tests/test.bkl +++ b/tests/test.bkl @@ -10,6 +10,7 @@ <sources> test.cpp mbconv/main.cpp + regex/regex.cpp </sources> <wx-lib>base</wx-lib> </exe> diff --git a/tests/test.dsp b/tests/test.dsp index 4feb363e4e..54b7fb6087 100644 --- a/tests/test.dsp +++ b/tests/test.dsp @@ -439,6 +439,10 @@ SOURCE=.\mbconv\main.cpp # End Source File # Begin Source File +SOURCE=.\regex\regex.cpp +# End Source File +# Begin Source File + SOURCE=.\test.cpp # End Source File # End Group -- 2.47.2