From: Ryan Norton Date: Sat, 10 Jan 2004 10:38:46 +0000 (+0000) Subject: Removed old tests file and replaced with tcl version.... X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/de251aef9b847c20ba83c8343145d58713d5a80d?ds=inline Removed old tests file and replaced with tcl version.... git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@25112 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- diff --git a/src/regex/reg.test b/src/regex/reg.test new file mode 100644 index 0000000000..8bfffad107 --- /dev/null +++ b/src/regex/reg.test @@ -0,0 +1,1135 @@ +# reg.test -- +# +# This file contains a collection of tests for one or more of the Tcl +# built-in commands. Sourcing this file into Tcl runs the tests and +# generates output for errors. No output means no errors were found. +# (Don't panic if you are seeing this as part of the reg distribution +# and aren't using Tcl -- reg's own regression tester also knows how +# to read this file, ignoring the Tcl-isms.) +# +# Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. +# +# RCS: @(#) $Id$ + +if {[lsearch [namespace children] ::tcltest] == -1} { + package require tcltest 2 + namespace import -force ::tcltest::* +} + +# All tests require the testregexp command, return if this +# command doesn't exist + +::tcltest::testConstraint testregexp \ + [expr {[info commands testregexp] != {}}] +::tcltest::testConstraint localeRegexp 0 + +# This file uses some custom procedures, defined below, for regexp regression +# testing. The name of the procedure indicates the general nature of the +# test: +# e compile error expected +# f match failure expected +# m successful match +# i successful match with -indices (used in checking things like +# nonparticipating subexpressions) +# p unsuccessful match with -indices (!!) (used in checking +# partial-match reporting) +# There is also "doing" which sets up title and major test number for each +# block of tests. + +# The first 3 arguments are constant: a minor number (which often gets +# a letter or two suffixed to it internally), some flags, and the RE itself. +# For e, the remaining argument is the name of the compile error expected, +# less the leading "REG_". For the rest, the next argument is the string +# to try the match against. Remaining arguments are the substring expected +# to be matched, and any substrings expected to be matched by subexpressions. +# (For f, these arguments are optional, and if present are ignored except +# that they indicate how many subexpressions should be present in the RE.) +# It is an error for the number of subexpression arguments to be wrong. +# Cases involving nonparticipating subexpressions, checking where empty +# substrings are located, etc. should be done using i and p. + +# The flag characters are complex and a bit eclectic. Generally speaking, +# lowercase letters are compile options, uppercase are expected re_info +# bits, and nonalphabetics are match options, controls for how the test is +# run, or testing options. The one small surprise is that AREs are the +# default, and you must explicitly request lesser flavors of RE. The flags +# are as follows. It is admitted that some are not very mnemonic. +# There are some others which are purely debugging tools and are not +# useful in this file. +# +# - no-op (placeholder) +# + provide fake xy equivalence class and ch collating element +# % force small state-set cache in matcher (to test cache replace) +# ^ beginning of string is not beginning of line +# $ end of string is not end of line +# * test is Unicode-specific, needs big character set +# +# & test as both ARE and BRE +# b BRE +# e ERE +# a turn advanced-features bit on (error unless ERE already) +# q literal string, no metacharacters at all +# +# i case-independent matching +# o ("opaque") no subexpression capture +# p newlines are half-magic, excluded from . and [^ only +# w newlines are half-magic, significant to ^ and $ only +# n newlines are fully magic, both effects +# x expanded RE syntax +# t incomplete-match reporting +# +# A backslash-_a_lphanumeric seen +# B ERE/ARE literal-_b_race heuristic used +# E backslash (_e_scape) seen within [] +# H looka_h_ead constraint seen +# I _i_mpossible to match +# L _l_ocale-specific construct seen +# M unportable (_m_achine-specific) construct seen +# N RE can match empty (_n_ull) string +# P non-_P_OSIX construct seen +# Q {} _q_uantifier seen +# R back _r_eference seen +# S POSIX-un_s_pecified syntax seen +# T prefers shortest (_t_iny) +# U saw original-POSIX botch: unmatched right paren in ERE (_u_gh) + +# The one area we can't easily test is memory-allocation failures (which +# are hard to provoke on command). Embedded NULs also are not tested at +# the moment, but this is a historical accident which should be fixed. + + + +# test procedures and related + +set ask "about" +set xflags "xflags" +set testbypassed 0 + +# re_info abbreviation mapping table +set infonames(A) "REG_UBSALNUM" +set infonames(B) "REG_UBRACES" +set infonames(E) "REG_UBBS" +set infonames(H) "REG_ULOOKAHEAD" +set infonames(I) "REG_UIMPOSSIBLE" +set infonames(L) "REG_ULOCALE" +set infonames(M) "REG_UUNPORT" +set infonames(N) "REG_UEMPTYMATCH" +set infonames(P) "REG_UNONPOSIX" +set infonames(Q) "REG_UBOUNDS" +set infonames(R) "REG_UBACKREF" +set infonames(S) "REG_UUNSPEC" +set infonames(T) "REG_USHORTEST" +set infonames(U) "REG_UPBOTCH" +set infonameorder "RHQBAUEPSMLNIT" ;# must match bit order, lsb first + +# set major test number and description +proc doing {major desc} { + global prefix description testbypassed + + if {$testbypassed != 0} { + puts stdout "!!! bypassed $testbypassed tests in\ + $prefix, `$description'" + } + + set prefix reg-$major + set description "reg $desc" + set testbypassed 0 +} + +# build test number (internal) +proc tno {testid} { + return [join $testid .] +} + +# build description, with possible modifiers (internal) +proc desc {testid} { + global description + + set d $description + if {[llength $testid] > 1} { + set d "([lreplace $testid 0 0]) $d" + } + return $d +} + +# build trailing options and flags argument from a flags string (internal) +proc flags {fl} { + global xflags + + set args [list] + set flags "" + foreach f [split $fl ""] { + switch -exact -- $f { + "i" { lappend args "-nocase" } + "x" { lappend args "-expanded" } + "n" { lappend args "-line" } + "p" { lappend args "-linestop" } + "w" { lappend args "-lineanchor" } + "-" { } + default { append flags $f } + } + } + if {[string compare $flags ""] != 0} { + lappend args -$xflags $flags + } + return $args +} + +# build info-flags list from a flags string (internal) +proc infoflags {fl} { + global infonames infonameorder + + set ret [list] + foreach f [split $infonameorder ""] { + if {[string first $f $fl] >= 0} { + lappend ret $infonames($f) + } + } + return $ret +} + +# compilation error expected +proc e {testid flags re err} { + global prefix ask errorCode + + # Tcl locale stuff doesn't do the ch/xy test fakery yet + if {[string first "+" $flags] >= 0} { + # This will register as a skipped test + test $prefix.[tno $testid] [desc $testid] localeRegexp {} {} + return + } + + # if &, test as both ARE and BRE + set amp [string first "&" $flags] + if {$amp >= 0} { + set f [string range $flags 0 [expr $amp - 1]] + append f [string range $flags [expr $amp + 1] end] + e [linsert $testid end ARE] ${f} $re $err + e [linsert $testid end BRE] ${f}b $re $err + return + } + + set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]] + set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]" + test $prefix.[tno $testid] [desc $testid] \ + {testregexp} $run [list 1 REG_$err] +} + +# match failure expected +proc f {testid flags re target args} { + global prefix description ask + + # Tcl locale stuff doesn't do the ch/xy test fakery yet + if {[string first "+" $flags] >= 0} { + # This will register as a skipped test + test $prefix.[tno $testid] [desc $testid] localeRegexp {} {} + return + } + + # if &, test as both ARE and BRE + set amp [string first "&" $flags] + if {$amp >= 0} { + set f [string range $flags 0 [expr $amp - 1]] + append f [string range $flags [expr $amp + 1] end] + eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \ + $target] + eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \ + $target] + return + } + + set f [flags $flags] + set infoflags [infoflags $flags] + set ccmd [concat [list testregexp -$ask] $f [list $re]] + set nsub [expr [llength $args] - 1] + if {$nsub == -1} { + # didn't tell us number of subexps + set ccmd "lreplace \[$ccmd\] 0 0" + set info [list $infoflags] + } else { + set info [list $nsub $infoflags] + } + lappend testid "compile" + test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info + + set testid [lreplace $testid end end "execute"] + set ecmd [concat [list testregexp] $f [list $re $target]] + test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0 +} + +# match expected, internal routine that does the work +# parameters like the "real" routines except they don't have "opts", +# which is a possibly-empty list of switches for the regexp match attempt +# The ! flag is used to indicate expected match failure (for REG_EXPECT, +# which wants argument testing even in the event of failure). +proc matchexpected {opts testid flags re target args} { + global prefix description ask regBug + + if {[info exists regBug] && $regBug} { + # This will register as a skipped test + test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1} + return + } + + # Tcl locale stuff doesn't do the ch/xy test fakery yet + if {[string first "+" $flags] >= 0} { + # This will register as a skipped test + test $prefix.[tno $testid] [desc $testid] localeRegexp {} {} + return + } + + # if &, test as both BRE and ARE + set amp [string first "&" $flags] + if {$amp >= 0} { + set f [string range $flags 0 [expr $amp - 1]] + append f [string range $flags [expr $amp + 1] end] + eval [concat [list matchexpected $opts \ + [linsert $testid end ARE] ${f} $re $target] $args] + eval [concat [list matchexpected $opts \ + [linsert $testid end BRE] ${f}b $re $target] $args] + return + } + + set f [flags $flags] + set infoflags [infoflags $flags] + set ccmd [concat [list testregexp -$ask] $f [list $re]] + set ecmd [concat [list testregexp] $opts $f [list $re $target]] + + set nsub [expr [llength $args] - 1] + set names [list] + set refs "" + for {set i 0} {$i <= $nsub} {incr i} { + if {$i == 0} { + set name match + } else { + set name sub$i + } + lappend names $name + append refs " \$$name" + set $name "" + } + if {[string first "o" $flags] >= 0} { ;# REG_NOSUB kludge + set nsub 0 ;# unsigned value cannot be -1 + } + if {[string first "t" $flags] >= 0} { ;# REG_EXPECT + incr nsub -1 ;# the extra does not count + } + set ecmd [concat $ecmd $names] + set erun "list \[$ecmd\] $refs" + set retcode [list 1] + if {[string first "!" $flags] >= 0} { + set retcode [list 0] + } + set result [concat $retcode $args] + + set info [list $nsub $infoflags] + lappend testid "compile" + test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info + set testid [lreplace $testid end end "execute"] + test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result +} + +# match expected (no missing, empty, or ambiguous submatches) +# m testno flags re target mat submat ... +proc m {args} { + eval matchexpected [linsert $args 0 [list]] +} + +# match expected (full fanciness) +# i testno flags re target mat submat ... +proc i {args} { + eval matchexpected [linsert $args 0 [list "-indices"]] +} + +# partial match expected +# p testno flags re target mat "" ... +# Quirk: number of ""s must be one more than number of subREs. +proc p {args} { + set f [lindex $args 1] ;# add ! flag + set args [lreplace $args 1 1 "!$f"] + eval matchexpected [linsert $args 0 [list "-indices"]] +} + +# test is a knownBug +proc knownBug {args} { + set ::regBug 1 + uplevel #0 $args + set ::regBug 0 +} + + + +# the tests themselves + + + +# support functions and preliminary misc. +# This is sensitive to changes in message wording, but we really have to +# test the code->message expansion at least once. +test reg-0.1 "regexp error reporting" { + list [catch {regexp (*) ign} msg] $msg +} {1 {couldn't compile regular expression pattern: quantifier operand invalid}} + + + +doing 1 "basic sanity checks" +m 1 & abc abc abc +f 2 & abc def +m 3 & abc xyabxabce abc + + + +doing 2 "invalid option combinations" +e 1 qe a INVARG +e 2 qa a INVARG +e 3 qx a INVARG +e 4 qn a INVARG +e 5 ba a INVARG + + + +doing 3 "basic syntax" +i 1 &NS "" a {0 -1} +m 2 NS a| a a +m 3 - a|b a a +m 4 - a|b b b +m 5 NS a||b b b +m 6 & ab ab ab + + + +doing 4 "parentheses" +m 1 - (a)e ae ae a +m 2 o (a)e ae +m 3 b {\(a\)b} ab ab a +m 4 - a((b)c) abc abc bc b +m 5 - a(b)(c) abc abc b c +e 6 - a(b EPAREN +e 7 b {a\(b} EPAREN +# sigh, we blew it on the specs here... someday this will be fixed in POSIX, +# but meanwhile, it's fixed in AREs +m 8 eU a)b a)b a)b +e 9 - a)b EPAREN +e 10 b {a\)b} EPAREN +m 11 P a(?:b)c abc abc +e 12 e a(?:b)c BADRPT +i 13 S a()b ab {0 1} {1 0} +m 14 SP a(?:)b ab ab +i 15 S a(|b)c ac {0 1} {1 0} +m 16 S a(b|)c abc abc b + + + +doing 5 "simple one-char matching" +# general case of brackets done later +m 1 & a.b axb axb +f 2 &n "a.b" "a\nb" +m 3 & {a[bc]d} abd abd +m 4 & {a[bc]d} acd acd +f 5 & {a[bc]d} aed +f 6 & {a[^bc]d} abd +m 7 & {a[^bc]d} aed aed +f 8 &p "a\[^bc]d" "a\nd" + + + +doing 6 "context-dependent syntax" +# plus odds and ends +e 1 - * BADRPT +m 2 b * * * +m 3 b {\(*\)} * * * +e 4 - (*) BADRPT +m 5 b ^* * * +e 6 - ^* BADRPT +f 7 & ^b ^b +m 8 b x^ x^ x^ +f 9 I x^ x +m 10 n "\n^" "x\nb" "\n" +f 11 bS {\(^b\)} ^b +m 12 - (^b) b b b +m 13 & {x$} x x +m 14 bS {\(x$\)} x x x +m 15 - {(x$)} x x x +m 16 b {x$y} "x\$y" "x\$y" +f 17 I {x$y} xy +m 18 n "x\$\n" "x\n" "x\n" +e 19 - + BADRPT +e 20 - ? BADRPT + + + +doing 7 "simple quantifiers" +m 1 &N a* aa aa +i 2 &N a* b {0 -1} +m 3 - a+ aa aa +m 4 - a?b ab ab +m 5 - a?b b b +e 6 - ** BADRPT +m 7 bN ** *** *** +e 8 & a** BADRPT +e 9 & a**b BADRPT +e 10 & *** BADRPT +e 11 - a++ BADRPT +e 12 - a?+ BADRPT +e 13 - a?* BADRPT +e 14 - a+* BADRPT +e 15 - a*+ BADRPT + + + +doing 8 "braces" +m 1 NQ "a{0,1}" "" "" +m 2 NQ "a{0,1}" ac a +e 3 - "a{1,0}" BADBR +e 4 - "a{1,2,3}" BADBR +e 5 - "a{257}" BADBR +e 6 - "a{1000}" BADBR +e 7 - "a{1" EBRACE +e 8 - "a{1n}" BADBR +m 9 BS "a{b" "a\{b" "a\{b" +m 10 BS "a{" "a\{" "a\{" +m 11 bQ "a\\{0,1\\}b" cb b +e 12 b "a\\{0,1" EBRACE +e 13 - "a{0,1\\" BADBR +m 14 Q "a{0}b" ab b +m 15 Q "a{0,0}b" ab b +m 16 Q "a{0,1}b" ab ab +m 17 Q "a{0,2}b" b b +m 18 Q "a{0,2}b" aab aab +m 19 Q "a{0,}b" aab aab +m 20 Q "a{1,1}b" aab ab +m 21 Q "a{1,3}b" aaaab aaab +f 22 Q "a{1,3}b" b +m 23 Q "a{1,}b" aab aab +f 24 Q "a{2,3}b" ab +m 25 Q "a{2,3}b" aaaab aaab +f 26 Q "a{2,}b" ab +m 27 Q "a{2,}b" aaaab aaaab + + + +doing 9 "brackets" +m 1 & {a[bc]} ac ac +m 2 & {a[-]} a- a- +m 3 & {a[[.-.]]} a- a- +m 4 &L {a[[.zero.]]} a0 a0 +m 5 &LM {a[[.zero.]-9]} a2 a2 +m 6 &M {a[0-[.9.]]} a2 a2 +m 7 &+L {a[[=x=]]} ax ax +m 8 &+L {a[[=x=]]} ay ay +f 9 &+L {a[[=x=]]} az +e 10 & {a[0-[=x=]]} ERANGE +m 11 &L {a[[:digit:]]} a0 a0 +e 12 & {a[[:woopsie:]]} ECTYPE +f 13 &L {a[[:digit:]]} ab +e 14 & {a[0-[:digit:]]} ERANGE +m 15 &LP {[[:<:]]a} a a +m 16 &LP {a[[:>:]]} a a +e 17 & {a[[..]]b} ECOLLATE +e 18 & {a[[==]]b} ECOLLATE +e 19 & {a[[::]]b} ECTYPE +e 20 & {a[[.a} EBRACK +e 21 & {a[[=a} EBRACK +e 22 & {a[[:a} EBRACK +e 23 & {a[} EBRACK +e 24 & {a[b} EBRACK +e 25 & {a[b-} EBRACK +e 26 & {a[b-c} EBRACK +m 27 &M {a[b-c]} ab ab +m 28 & {a[b-b]} ab ab +m 29 &M {a[1-2]} a2 a2 +e 30 & {a[c-b]} ERANGE +e 31 & {a[a-b-c]} ERANGE +m 32 &M {a[--?]b} a?b a?b +m 33 & {a[---]b} a-b a-b +m 34 & {a[]b]c} a]c a]c +m 35 EP {a[\]]b} a]b a]b +f 36 bE {a[\]]b} a]b +m 37 bE {a[\]]b} "a\\]b" "a\\]b" +m 38 eE {a[\]]b} "a\\]b" "a\\]b" +m 39 EP {a[\\]b} "a\\b" "a\\b" +m 40 eE {a[\\]b} "a\\b" "a\\b" +m 41 bE {a[\\]b} "a\\b" "a\\b" +e 42 - {a[\Z]b} EESCAPE +m 43 & {a[[b]c} "a\[c" "a\[c" +m 44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \ + "a\u0102\u02ffb" "a\u0102\u02ffb" + + + +doing 10 "anchors and newlines" +m 1 & ^a a a +f 2 &^ ^a a +i 3 &N ^ a {0 -1} +i 4 & {a$} aba {2 2} +f 5 {&$} {a$} a +i 6 &N {$} ab {2 1} +m 7 &n ^a a a +m 8 &n "^a" "b\na" "a" +i 9 &w "^a" "a\na" {0 0} +i 10 &n^ "^a" "a\na" {2 2} +m 11 &n {a$} a a +m 12 &n "a\$" "a\nb" "a" +i 13 &n "a\$" "a\na" {0 0} +i 14 N ^^ a {0 -1} +m 15 b ^^ ^ ^ +i 16 N {$$} a {1 0} +m 17 b {$$} "\$" "\$" +m 18 &N {^$} "" "" +f 19 &N {^$} a +i 20 &nN "^\$" "a\n\nb" {2 1} +m 21 N {$^} "" "" +m 22 b {$^} "\$^" "\$^" +m 23 P {\Aa} a a +m 24 ^P {\Aa} a a +f 25 ^nP {\Aa} "b\na" +m 26 P {a\Z} a a +m 27 {$P} {a\Z} a a +f 28 {$nP} {a\Z} "a\nb" +e 29 - ^* BADRPT +e 30 - {$*} BADRPT +e 31 - {\A*} BADRPT +e 32 - {\Z*} BADRPT + + + +doing 11 "boundary constraints" +m 1 &LP {[[:<:]]a} a a +m 2 &LP {[[:<:]]a} -a a +f 3 &LP {[[:<:]]a} ba +m 4 &LP {a[[:>:]]} a a +m 5 &LP {a[[:>:]]} a- a +f 6 &LP {a[[:>:]]} ab +m 7 bLP {\} a a +f 10 bLP {a\>} ab +m 11 LP {\ya} a a +f 12 LP {\ya} ba +m 13 LP {a\y} a a +f 14 LP {a\y} ab +m 15 LP {a\Y} ab a +f 16 LP {a\Y} a- +f 17 LP {a\Y} a +f 18 LP {-\Y} -a +m 19 LP {-\Y} -% - +f 20 LP {\Y-} a- +e 21 - {[[:<:]]*} BADRPT +e 22 - {[[:>:]]*} BADRPT +e 23 b {\<*} BADRPT +e 24 b {\>*} BADRPT +e 25 - {\y*} BADRPT +e 26 - {\Y*} BADRPT +m 27 LP {\ma} a a +f 28 LP {\ma} ba +m 29 LP {a\M} a a +f 30 LP {a\M} ab +f 31 ILP {\Ma} a +f 32 ILP {a\m} a + + + +doing 12 "character classes" +m 1 LP {a\db} a0b a0b +f 2 LP {a\db} axb +f 3 LP {a\Db} a0b +m 4 LP {a\Db} axb axb +m 5 LP "a\\sb" "a b" "a b" +m 6 LP "a\\sb" "a\tb" "a\tb" +m 7 LP "a\\sb" "a\nb" "a\nb" +f 8 LP {a\sb} axb +m 9 LP {a\Sb} axb axb +f 10 LP "a\\Sb" "a b" +m 11 LP {a\wb} axb axb +f 12 LP {a\wb} a-b +f 13 LP {a\Wb} axb +m 14 LP {a\Wb} a-b a-b +m 15 LP {\y\w+z\y} adze-guz guz +m 16 LPE {a[\d]b} a1b a1b +m 17 LPE "a\[\\s]b" "a b" "a b" +m 18 LPE {a[\w]b} axb axb + + + +doing 13 "escapes" +e 1 & "a\\" EESCAPE +m 2 - {a\]+)>} a +} 1 + +test reg-33.4 {Bug 505048} { + regexp {\A\s*([^b]*)b} ab +} 1 + +test reg-33.5 {Bug 505048} { + regexp {\A\s*[^b]*(b)} ab +} 1 + +test reg-33.6 {Bug 505048} { + regexp {\A(\s*)[^b]*(b)} ab +} 1 + +test reg-33.7 {Bug 505048} { + regexp {\A\s*[^b]*b} ab +} 1 + +test reg-33.8 {Bug 505048} { + regexp -inline {\A\s*[^b]*b} ab +} ab + +test reg-33.9 {Bug 505048} { + regexp -indices -inline {\A\s*[^b]*b} ab +} {{0 1}} + +test reg-33.10 {Bug 840258} { + regsub {(^|\n)+\.*b} \n.b {} tmp +} 1 + +test reg-33.11 {Bug 840258} { + regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \ + "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp +} 1 + +# cleanup +::tcltest::cleanupTests +return diff --git a/src/regex/tests b/src/regex/tests deleted file mode 100644 index e4d928dad6..0000000000 --- a/src/regex/tests +++ /dev/null @@ -1,477 +0,0 @@ -# regular expression test set -# Lines are at least three fields, separated by one or more tabs. "" stands -# for an empty field. First field is an RE. Second field is flags. If -# C flag given, regcomp() is expected to fail, and the third field is the -# error name (minus the leading REG_). -# -# Otherwise it is expected to succeed, and the third field is the string to -# try matching it against. If there is no fourth field, the match is -# expected to fail. If there is a fourth field, it is the substring that -# the RE is expected to match. If there is a fifth field, it is a comma- -# separated list of what the subexpressions should match, with - indicating -# no match for that one. In both the fourth and fifth fields, a (sub)field -# starting with @ indicates that the (sub)expression is expected to match -# a null string followed by the stuff after the @; this provides a way to -# test where null strings match. The character `N' in REs and strings -# is newline, `S' is space, `T' is tab, `Z' is NUL. -# -# The full list of flags: -# - placeholder, does nothing -# b RE is a BRE, not an ERE -# & try it as both an ERE and a BRE -# C regcomp() error expected, third field is error name -# i REG_ICASE -# m ("mundane") REG_NOSPEC -# s REG_NOSUB (not really testable) -# n REG_NEWLINE -# ^ REG_NOTBOL -# $ REG_NOTEOL -# # REG_STARTEND (see below) -# p REG_PEND -# -# For REG_STARTEND, the start/end offsets are those of the substring -# enclosed in (). - -# basics -a & a a -abc & abc abc -abc|de - abc abc -a|b|c - abc a - -# parentheses and perversions thereof -a(b)c - abc abc -a\(b\)c b abc abc -a( C EPAREN -a( b a( a( -a\( - a( a( -a\( bC EPAREN -a\(b bC EPAREN -a(b C EPAREN -a(b b a(b a(b -# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly) -a) - a) a) -) - ) ) -# end gagging (in a just world, those *should* give EPAREN) -a) b a) a) -a\) bC EPAREN -\) bC EPAREN -a()b - ab ab -a\(\)b b ab ab - -# anchoring and REG_NEWLINE -^abc$ & abc abc -a^b - a^b -a^b b a^b a^b -a$b - a$b -a$b b a$b a$b -^ & abc @abc -$ & abc @ -^$ & "" @ -$^ - "" @ -\($\)\(^\) b "" @ -# stop retching, those are legitimate (although disgusting) -^^ - "" @ -$$ - "" @ -b$ & abNc -b$ &n abNc b -^b$ & aNbNc -^b$ &n aNbNc b -^$ &n aNNb @Nb -^$ n abc -^$ n abcN @ -$^ n aNNb @Nb -\($\)\(^\) bn aNNb @Nb -^^ n^ aNNb @Nb -$$ n aNNb @NN -^a ^ a -a$ $ a -^a ^n aNb -^b ^n aNb b -a$ $n bNa -b$ $n bNa b -a*(^b$)c* - b b -a*\(^b$\)c* b b b - -# certain syntax errors and non-errors -| C EMPTY -| b | | -* C BADRPT -* b * * -+ C BADRPT -? C BADRPT -"" &C EMPTY -() - abc @abc -\(\) b abc @abc -a||b C EMPTY -|ab C EMPTY -ab| C EMPTY -(|a)b C EMPTY -(a|)b C EMPTY -(*a) C BADRPT -(+a) C BADRPT -(?a) C BADRPT -({1}a) C BADRPT -\(\{1\}a\) bC BADRPT -(a|*b) C BADRPT -(a|+b) C BADRPT -(a|?b) C BADRPT -(a|{1}b) C BADRPT -^* C BADRPT -^* b * * -^+ C BADRPT -^? C BADRPT -^{1} C BADRPT -^\{1\} bC BADRPT - -# metacharacters, backslashes -a.c & abc abc -a[bc]d & abd abd -a\*c & a*c a*c -a\\b & a\b a\b -a\\\*b & a\*b a\*b -a\bc & abc abc -a\ &C EESCAPE -a\\bc & a\bc a\bc -\{ bC BADRPT -a\[b & a[b a[b -a[b &C EBRACK -# trailing $ is a peculiar special case for the BRE code -a$ & a a -a$ & a$ -a\$ & a -a\$ & a$ a$ -a\\$ & a -a\\$ & a$ -a\\$ & a\$ -a\\$ & a\ a\ - -# back references, ugh -a\(b\)\2c bC ESUBREG -a\(b\1\)c bC ESUBREG -a\(b*\)c\1d b abbcbbd abbcbbd bb -a\(b*\)c\1d b abbcbd -a\(b*\)c\1d b abbcbbbd -^\(.\)\1 b abc -a\([bc]\)\1d b abcdabbd abbd b -a\(\([bc]\)\2\)*d b abbccd abbccd -a\(\([bc]\)\2\)*d b abbcbd -# actually, this next one probably ought to fail, but the spec is unclear -a\(\(b\)*\2\)*d b abbbd abbbd -# here is a case that no NFA implementation does right -\(ab*\)[ab]*\1 b ababaaa ababaaa a -# check out normal matching in the presence of back refs -\(a\)\1bcd b aabcd aabcd -\(a\)\1bc*d b aabcd aabcd -\(a\)\1bc*d b aabd aabd -\(a\)\1bc*d b aabcccd aabcccd -\(a\)\1bc*[ce]d b aabcccd aabcccd -^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd - -# ordinary repetitions -ab*c & abc abc -ab+c - abc abc -ab?c - abc abc -a\(*\)b b a*b a*b -a\(**\)b b ab ab -a\(***\)b bC BADRPT -*a b *a *a -**a b a a -***a bC BADRPT - -# the dreaded bounded repetitions -{ & { { -{abc & {abc {abc -{1 C BADRPT -{1} C BADRPT -a{b & a{b a{b -a{1}b - ab ab -a\{1\}b b ab ab -a{1,}b - ab ab -a\{1,\}b b ab ab -a{1,2}b - aab aab -a\{1,2\}b b aab aab -a{1 C EBRACE -a\{1 bC EBRACE -a{1a C EBRACE -a\{1a bC EBRACE -a{1a} C BADBR -a\{1a\} bC BADBR -a{,2} - a{,2} a{,2} -a\{,2\} bC BADBR -a{,} - a{,} a{,} -a\{,\} bC BADBR -a{1,x} C BADBR -a\{1,x\} bC BADBR -a{1,x C EBRACE -a\{1,x bC EBRACE -a{300} C BADBR -a\{300\} bC BADBR -a{1,0} C BADBR -a\{1,0\} bC BADBR -ab{0,0}c - abcac ac -ab\{0,0\}c b abcac ac -ab{0,1}c - abcac abc -ab\{0,1\}c b abcac abc -ab{0,3}c - abbcac abbc -ab\{0,3\}c b abbcac abbc -ab{1,1}c - acabc abc -ab\{1,1\}c b acabc abc -ab{1,3}c - acabc abc -ab\{1,3\}c b acabc abc -ab{2,2}c - abcabbc abbc -ab\{2,2\}c b abcabbc abbc -ab{2,4}c - abcabbc abbc -ab\{2,4\}c b abcabbc abbc -((a{1,10}){1,10}){1,10} - a a a,a - -# multiple repetitions -a** &C BADRPT -a++ C BADRPT -a?? C BADRPT -a*+ C BADRPT -a*? C BADRPT -a+* C BADRPT -a+? C BADRPT -a?* C BADRPT -a?+ C BADRPT -a{1}{1} C BADRPT -a*{1} C BADRPT -a+{1} C BADRPT -a?{1} C BADRPT -a{1}* C BADRPT -a{1}+ C BADRPT -a{1}? C BADRPT -a*{b} - a{b} a{b} -a\{1\}\{1\} bC BADRPT -a*\{1\} bC BADRPT -a\{1\}* bC BADRPT - -# brackets, and numerous perversions thereof -a[b]c & abc abc -a[ab]c & abc abc -a[^ab]c & adc adc -a[]b]c & a]c a]c -a[[b]c & a[c a[c -a[-b]c & a-c a-c -a[^]b]c & adc adc -a[^-b]c & adc adc -a[b-]c & a-c a-c -a[b &C EBRACK -a[] &C EBRACK -a[1-3]c & a2c a2c -a[3-1]c &C ERANGE -a[1-3-5]c &C ERANGE -a[[.-.]--]c & a-c a-c -a[1- &C ERANGE -a[[. &C EBRACK -a[[.x &C EBRACK -a[[.x. &C EBRACK -a[[.x.] &C EBRACK -a[[.x.]] & ax ax -a[[.x,.]] &C ECOLLATE -a[[.one.]]b & a1b a1b -a[[.notdef.]]b &C ECOLLATE -a[[.].]]b & a]b a]b -a[[:alpha:]]c & abc abc -a[[:notdef:]]c &C ECTYPE -a[[: &C EBRACK -a[[:alpha &C EBRACK -a[[:alpha:] &C EBRACK -a[[:alpha,:] &C ECTYPE -a[[:]:]]b &C ECTYPE -a[[:-:]]b &C ECTYPE -a[[:alph:]] &C ECTYPE -a[[:alphabet:]] &C ECTYPE -[[:alnum:]]+ - -%@a0X- a0X -[[:alpha:]]+ - -%@aX0- aX -[[:blank:]]+ - aSSTb SST -[[:cntrl:]]+ - aNTb NT -[[:digit:]]+ - a019b 019 -[[:graph:]]+ - Sa%bS a%b -[[:lower:]]+ - AabC ab -[[:print:]]+ - NaSbN aSb -[[:punct:]]+ - S%-&T %-& -[[:space:]]+ - aSNTb SNT -[[:upper:]]+ - aBCd BC -[[:xdigit:]]+ - p0f3Cq 0f3C -a[[=b=]]c & abc abc -a[[= &C EBRACK -a[[=b &C EBRACK -a[[=b= &C EBRACK -a[[=b=] &C EBRACK -a[[=b,=]] &C ECOLLATE -a[[=one=]]b & a1b a1b - -# complexities -a(((b)))c - abc abc -a(b|(c))d - abd abd -a(b*|c)d - abbd abbd -# just gotta have one DFA-buster, of course -a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab -# and an inline expansion in case somebody gets tricky -a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab -# and in case somebody just slips in an NFA... -a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights -# fish for anomalies as the number of states passes 32 -12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789 -123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890 -1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901 -12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012 -123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123 -# and one really big one, beyond any plausible word width -1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890 -# fish for problems as brackets go past 8 -[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm -[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo -[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq -[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq - -# subtleties of matching -abc & xabcy abc -a\(b\)?c\1d b acd -aBc i Abc Abc -a[Bc]*d i abBCcd abBCcd -0[[:upper:]]1 &i 0a1 0a1 -0[[:lower:]]1 &i 0A1 0A1 -a[^b]c &i abc -a[^b]c &i aBc -a[^b]c &i adc adc -[a]b[c] - abc abc -[a]b[a] - aba aba -[abc]b[abc] - abc abc -[abc]b[abd] - abd abd -a(b?c)+d - accd accd -(wee|week)(knights|night) - weeknights weeknights -(we|wee|week|frob)(knights|night|day) - weeknights weeknights -a[bc]d - xyzaaabcaababdacd abd -a[ab]c - aaabc abc -abc s abc abc -a* & b @b - -# Let's have some fun -- try to match a C comment. -# first the obvious, which looks okay at first glance... -/\*.*\*/ - /*x*/ /*x*/ -# but... -/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/ -# okay, we must not match */ inside; try to do that... -/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/ -/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/ -# but... -/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/ -# and a still fancier version, which does it right (I think)... -/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/ -/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/ -/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/ -/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/ -/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/ -/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/ - -# subexpressions -.* - abc abc - -a(b)(c)d - abcd abcd b,c -a(((b)))c - abc abc b,b,b -a(b|(c))d - abd abd b,- -a(b*|c|e)d - abbd abbd bb -a(b*|c|e)d - acd acd c -a(b*|c|e)d - ad ad @d -a(b?)c - abc abc b -a(b?)c - ac ac @c -a(b+)c - abc abc b -a(b+)c - abbbc abbbc bbb -a(b*)c - ac ac @c -(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de -# the regression tester only asks for 9 subexpressions -a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j -a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k -a([bc]?)c - abc abc b -a([bc]?)c - ac ac @c -a([bc]+)c - abc abc b -a([bc]+)c - abcc abcc bc -a([bc]+)bc - abcbc abcbc bc -a(bb+|b)b - abb abb b -a(bbb+|bb+|b)b - abb abb b -a(bbb+|bb+|b)b - abbb abbb bb -a(bbb+|bb+|b)bb - abbb abbb b -(.*).* - abcdef abcdef abcdef -(a*)* - bc @b @b - -# do we get the right subexpression when it is used more than once? -a(b|c)*d - ad ad - -a(b|c)*d - abcd abcd c -a(b|c)+d - abd abd b -a(b|c)+d - abcd abcd c -a(b|c?)+d - ad ad @d -a(b|c?)+d - abcd abcd @d -a(b|c){0,0}d - ad ad - -a(b|c){0,1}d - ad ad - -a(b|c){0,1}d - abd abd b -a(b|c){0,2}d - ad ad - -a(b|c){0,2}d - abcd abcd c -a(b|c){0,}d - ad ad - -a(b|c){0,}d - abcd abcd c -a(b|c){1,1}d - abd abd b -a(b|c){1,1}d - acd acd c -a(b|c){1,2}d - abd abd b -a(b|c){1,2}d - abcd abcd c -a(b|c){1,}d - abd abd b -a(b|c){1,}d - abcd abcd c -a(b|c){2,2}d - acbd acbd b -a(b|c){2,2}d - abcd abcd c -a(b|c){2,4}d - abcd abcd c -a(b|c){2,4}d - abcbd abcbd b -a(b|c){2,4}d - abcbcd abcbcd c -a(b|c){2,}d - abcd abcd c -a(b|c){2,}d - abcbd abcbd b -a(b+|((c)*))+d - abd abd @d,@d,- -a(b+|((c)*))+d - abcd abcd @d,@d,- - -# check out the STARTEND option -[abc] &# a(b)c b -[abc] &# a(d)c -[abc] &# a(bc)d b -[abc] &# a(dc)d c -. &# a()c -b.*c &# b(bc)c bc -b.* &# b(bc)c bc -.*c &# b(bc)c bc - -# plain strings, with the NOSPEC flag -abc m abc abc -abc m xabcy abc -abc m xyz -a*b m aba*b a*b -a*b m ab -"" mC EMPTY - -# cases involving NULs -aZb & a a -aZb &p a -aZb &p# (aZb) aZb -aZ*b &p# (ab) ab -a.b &# (aZb) aZb -a.* &# (aZb)c aZb - -# word boundaries (ick) -[[:<:]]a & a a -[[:<:]]a & ba -[[:<:]]a & -a a -a[[:>:]] & a a -a[[:>:]] & ab -a[[:>:]] & a- a -[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc -[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc -[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc -[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc -[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_ -[[:<:]]a_b[[:>:]] & x_a_b - -# past problems, and suspected problems -(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1 -abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop -abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv -(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11 -CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11 -Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz -a?b - ab ab --\{0,1\}[0-9]*$ b -5 -5 -a*a*a*a*a*a*a* & aaaaaa aaaaaa