3 # This file contains a collection of tests for one or more of the Tcl
4 # built-in commands. Sourcing this file into Tcl runs the tests and
5 # generates output for errors. No output means no errors were found.
6 # (Don't panic if you are seeing this as part of the reg distribution
7 # and aren't using Tcl -- reg's own regression tester also knows how
8 # to read this file, ignoring the Tcl-isms.)
10 # Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
14 if {[lsearch [namespace children] ::tcltest] == -1} {
15 package require tcltest 2
16 namespace import -force ::tcltest::*
19 # All tests require the testregexp command, return if this
20 # command doesn't exist
22 ::tcltest::testConstraint testregexp \
23 [expr {[info commands testregexp] != {}}]
24 ::tcltest::testConstraint localeRegexp 0
26 # This file uses some custom procedures, defined below, for regexp regression
27 # testing. The name of the procedure indicates the general nature of the
29 # e compile error expected
30 # f match failure expected
32 # i successful match with -indices (used in checking things like
33 # nonparticipating subexpressions)
34 # p unsuccessful match with -indices (!!) (used in checking
35 # partial-match reporting)
36 # There is also "doing" which sets up title and major test number for each
39 # The first 3 arguments are constant: a minor number (which often gets
40 # a letter or two suffixed to it internally), some flags, and the RE itself.
41 # For e, the remaining argument is the name of the compile error expected,
42 # less the leading "REG_". For the rest, the next argument is the string
43 # to try the match against. Remaining arguments are the substring expected
44 # to be matched, and any substrings expected to be matched by subexpressions.
45 # (For f, these arguments are optional, and if present are ignored except
46 # that they indicate how many subexpressions should be present in the RE.)
47 # It is an error for the number of subexpression arguments to be wrong.
48 # Cases involving nonparticipating subexpressions, checking where empty
49 # substrings are located, etc. should be done using i and p.
51 # The flag characters are complex and a bit eclectic. Generally speaking,
52 # lowercase letters are compile options, uppercase are expected re_info
53 # bits, and nonalphabetics are match options, controls for how the test is
54 # run, or testing options. The one small surprise is that AREs are the
55 # default, and you must explicitly request lesser flavors of RE. The flags
56 # are as follows. It is admitted that some are not very mnemonic.
57 # There are some others which are purely debugging tools and are not
58 # useful in this file.
60 # - no-op (placeholder)
61 # + provide fake xy equivalence class and ch collating element
62 # % force small state-set cache in matcher (to test cache replace)
63 # ^ beginning of string is not beginning of line
64 # $ end of string is not end of line
65 # * test is Unicode-specific, needs big character set
67 # & test as both ARE and BRE
70 # a turn advanced-features bit on (error unless ERE already)
71 # q literal string, no metacharacters at all
73 # i case-independent matching
74 # o ("opaque") no subexpression capture
75 # p newlines are half-magic, excluded from . and [^ only
76 # w newlines are half-magic, significant to ^ and $ only
77 # n newlines are fully magic, both effects
78 # x expanded RE syntax
79 # t incomplete-match reporting
81 # A backslash-_a_lphanumeric seen
82 # B ERE/ARE literal-_b_race heuristic used
83 # E backslash (_e_scape) seen within []
84 # H looka_h_ead constraint seen
85 # I _i_mpossible to match
86 # L _l_ocale-specific construct seen
87 # M unportable (_m_achine-specific) construct seen
88 # N RE can match empty (_n_ull) string
89 # P non-_P_OSIX construct seen
90 # Q {} _q_uantifier seen
91 # R back _r_eference seen
92 # S POSIX-un_s_pecified syntax seen
93 # T prefers shortest (_t_iny)
94 # U saw original-POSIX botch: unmatched right paren in ERE (_u_gh)
96 # The one area we can't easily test is memory-allocation failures (which
97 # are hard to provoke on command). Embedded NULs also are not tested at
98 # the moment, but this is a historical accident which should be fixed.
102 # test procedures and related
108 # re_info abbreviation mapping table
109 set infonames(A) "REG_UBSALNUM"
110 set infonames(B) "REG_UBRACES"
111 set infonames(E) "REG_UBBS"
112 set infonames(H) "REG_ULOOKAHEAD"
113 set infonames(I) "REG_UIMPOSSIBLE"
114 set infonames(L) "REG_ULOCALE"
115 set infonames(M) "REG_UUNPORT"
116 set infonames(N) "REG_UEMPTYMATCH"
117 set infonames(P) "REG_UNONPOSIX"
118 set infonames(Q) "REG_UBOUNDS"
119 set infonames(R) "REG_UBACKREF"
120 set infonames(S) "REG_UUNSPEC"
121 set infonames(T) "REG_USHORTEST"
122 set infonames(U) "REG_UPBOTCH"
123 set infonameorder "RHQBAUEPSMLNIT" ;# must match bit order, lsb first
125 # set major test number and description
126 proc doing {major desc} {
127 global prefix description testbypassed
129 if {$testbypassed != 0} {
130 puts stdout "!!! bypassed $testbypassed tests in\
131 $prefix, `$description'"
134 set prefix reg-$major
135 set description "reg $desc"
139 # build test number (internal)
141 return [join $testid .]
144 # build description, with possible modifiers (internal)
149 if {[llength $testid] > 1} {
150 set d "([lreplace $testid 0 0]) $d"
155 # build trailing options and flags argument from a flags string (internal)
161 foreach f [split $fl ""] {
162 switch -exact -- $f {
163 "i" { lappend args "-nocase" }
164 "x" { lappend args "-expanded" }
165 "n" { lappend args "-line" }
166 "p" { lappend args "-linestop" }
167 "w" { lappend args "-lineanchor" }
169 default { append flags $f }
172 if {[string compare $flags ""] != 0} {
173 lappend args -$xflags $flags
178 # build info-flags list from a flags string (internal)
179 proc infoflags {fl} {
180 global infonames infonameorder
183 foreach f [split $infonameorder ""] {
184 if {[string first $f $fl] >= 0} {
185 lappend ret $infonames($f)
191 # compilation error expected
192 proc e {testid flags re err} {
193 global prefix ask errorCode
195 # Tcl locale stuff doesn't do the ch/xy test fakery yet
196 if {[string first "+" $flags] >= 0} {
197 # This will register as a skipped test
198 test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
202 # if &, test as both ARE and BRE
203 set amp [string first "&" $flags]
205 set f [string range $flags 0 [expr $amp - 1]]
206 append f [string range $flags [expr $amp + 1] end]
207 e [linsert $testid end ARE] ${f} $re $err
208 e [linsert $testid end BRE] ${f}b $re $err
212 set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
213 set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
214 test $prefix.[tno $testid] [desc $testid] \
215 {testregexp} $run [list 1 REG_$err]
218 # match failure expected
219 proc f {testid flags re target args} {
220 global prefix description ask
222 # Tcl locale stuff doesn't do the ch/xy test fakery yet
223 if {[string first "+" $flags] >= 0} {
224 # This will register as a skipped test
225 test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
229 # if &, test as both ARE and BRE
230 set amp [string first "&" $flags]
232 set f [string range $flags 0 [expr $amp - 1]]
233 append f [string range $flags [expr $amp + 1] end]
234 eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \
236 eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \
242 set infoflags [infoflags $flags]
243 set ccmd [concat [list testregexp -$ask] $f [list $re]]
244 set nsub [expr [llength $args] - 1]
246 # didn't tell us number of subexps
247 set ccmd "lreplace \[$ccmd\] 0 0"
248 set info [list $infoflags]
250 set info [list $nsub $infoflags]
252 lappend testid "compile"
253 test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
255 set testid [lreplace $testid end end "execute"]
256 set ecmd [concat [list testregexp] $f [list $re $target]]
257 test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0
260 # match expected, internal routine that does the work
261 # parameters like the "real" routines except they don't have "opts",
262 # which is a possibly-empty list of switches for the regexp match attempt
263 # The ! flag is used to indicate expected match failure (for REG_EXPECT,
264 # which wants argument testing even in the event of failure).
265 proc matchexpected {opts testid flags re target args} {
266 global prefix description ask regBug
268 if {[info exists regBug] && $regBug} {
269 # This will register as a skipped test
270 test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1}
274 # Tcl locale stuff doesn't do the ch/xy test fakery yet
275 if {[string first "+" $flags] >= 0} {
276 # This will register as a skipped test
277 test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
281 # if &, test as both BRE and ARE
282 set amp [string first "&" $flags]
284 set f [string range $flags 0 [expr $amp - 1]]
285 append f [string range $flags [expr $amp + 1] end]
286 eval [concat [list matchexpected $opts \
287 [linsert $testid end ARE] ${f} $re $target] $args]
288 eval [concat [list matchexpected $opts \
289 [linsert $testid end BRE] ${f}b $re $target] $args]
294 set infoflags [infoflags $flags]
295 set ccmd [concat [list testregexp -$ask] $f [list $re]]
296 set ecmd [concat [list testregexp] $opts $f [list $re $target]]
298 set nsub [expr [llength $args] - 1]
301 for {set i 0} {$i <= $nsub} {incr i} {
308 append refs " \$$name"
311 if {[string first "o" $flags] >= 0} { ;# REG_NOSUB kludge
312 set nsub 0 ;# unsigned value cannot be -1
314 if {[string first "t" $flags] >= 0} { ;# REG_EXPECT
315 incr nsub -1 ;# the extra does not count
317 set ecmd [concat $ecmd $names]
318 set erun "list \[$ecmd\] $refs"
320 if {[string first "!" $flags] >= 0} {
323 set result [concat $retcode $args]
325 set info [list $nsub $infoflags]
326 lappend testid "compile"
327 test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
328 set testid [lreplace $testid end end "execute"]
329 test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result
332 # match expected (no missing, empty, or ambiguous submatches)
333 # m testno flags re target mat submat ...
335 eval matchexpected [linsert $args 0 [list]]
338 # match expected (full fanciness)
339 # i testno flags re target mat submat ...
341 eval matchexpected [linsert $args 0 [list "-indices"]]
344 # partial match expected
345 # p testno flags re target mat "" ...
346 # Quirk: number of ""s must be one more than number of subREs.
348 set f [lindex $args 1] ;# add ! flag
349 set args [lreplace $args 1 1 "!$f"]
350 eval matchexpected [linsert $args 0 [list "-indices"]]
354 proc knownBug {args} {
362 # the tests themselves
366 # support functions and preliminary misc.
367 # This is sensitive to changes in message wording, but we really have to
368 # test the code->message expansion at least once.
369 test reg-0.1 "regexp error reporting" {
370 list [catch {regexp (*) ign} msg] $msg
371 } {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
375 doing 1 "basic sanity checks"
378 m 3 & abc xyabxabce abc
382 doing 2 "invalid option combinations"
391 doing 3 "basic syntax"
401 doing 4 "parentheses"
404 m 3 b {\(a\)b} ab ab a
405 m 4 - a((b)c) abc abc bc b
406 m 5 - a(b)(c) abc abc b c
409 # sigh, we blew it on the specs here... someday this will be fixed in POSIX,
410 # but meanwhile, it's fixed in AREs
414 m 11 P a(?:b)c abc abc
415 e 12 e a(?:b)c BADRPT
416 i 13 S a()b ab {0 1} {1 0}
418 i 15 S a(|b)c ac {0 1} {1 0}
419 m 16 S a(b|)c abc abc b
423 doing 5 "simple one-char matching"
424 # general case of brackets done later
427 m 3 & {a[bc]d} abd abd
428 m 4 & {a[bc]d} acd acd
431 m 7 & {a[^bc]d} aed aed
432 f 8 &p "a\[^bc]d" "a\nd"
436 doing 6 "context-dependent syntax"
447 m 10 n "\n^" "x\nb" "\n"
451 m 14 bS {\(x$\)} x x x
453 m 16 b {x$y} "x\$y" "x\$y"
455 m 18 n "x\$\n" "x\n" "x\n"
461 doing 7 "simple quantifiers"
481 m 1 NQ "a{0,1}" "" ""
484 e 4 - "a{1,2,3}" BADBR
486 e 6 - "a{1000}" BADBR
489 m 9 BS "a{b" "a\{b" "a\{b"
490 m 10 BS "a{" "a\{" "a\{"
491 m 11 bQ "a\\{0,1\\}b" cb b
492 e 12 b "a\\{0,1" EBRACE
493 e 13 - "a{0,1\\" BADBR
495 m 15 Q "a{0,0}b" ab b
496 m 16 Q "a{0,1}b" ab ab
498 m 18 Q "a{0,2}b" aab aab
499 m 19 Q "a{0,}b" aab aab
500 m 20 Q "a{1,1}b" aab ab
501 m 21 Q "a{1,3}b" aaaab aaab
503 m 23 Q "a{1,}b" aab aab
505 m 25 Q "a{2,3}b" aaaab aaab
507 m 27 Q "a{2,}b" aaaab aaaab
514 m 3 & {a[[.-.]]} a- a-
515 m 4 &L {a[[.zero.]]} a0 a0
516 m 5 &LM {a[[.zero.]-9]} a2 a2
517 m 6 &M {a[0-[.9.]]} a2 a2
518 m 7 &+L {a[[=x=]]} ax ax
519 m 8 &+L {a[[=x=]]} ay ay
520 f 9 &+L {a[[=x=]]} az
521 e 10 & {a[0-[=x=]]} ERANGE
522 m 11 &L {a[[:digit:]]} a0 a0
523 e 12 & {a[[:woopsie:]]} ECTYPE
524 f 13 &L {a[[:digit:]]} ab
525 e 14 & {a[0-[:digit:]]} ERANGE
526 m 15 &LP {[[:<:]]a} a a
527 m 16 &LP {a[[:>:]]} a a
528 e 17 & {a[[..]]b} ECOLLATE
529 e 18 & {a[[==]]b} ECOLLATE
530 e 19 & {a[[::]]b} ECTYPE
531 e 20 & {a[[.a} EBRACK
532 e 21 & {a[[=a} EBRACK
533 e 22 & {a[[:a} EBRACK
537 e 26 & {a[b-c} EBRACK
538 m 27 &M {a[b-c]} ab ab
539 m 28 & {a[b-b]} ab ab
540 m 29 &M {a[1-2]} a2 a2
541 e 30 & {a[c-b]} ERANGE
542 e 31 & {a[a-b-c]} ERANGE
543 m 32 &M {a[--?]b} a?b a?b
544 m 33 & {a[---]b} a-b a-b
545 m 34 & {a[]b]c} a]c a]c
546 m 35 EP {a[\]]b} a]b a]b
548 m 37 bE {a[\]]b} "a\\]b" "a\\]b"
549 m 38 eE {a[\]]b} "a\\]b" "a\\]b"
550 m 39 EP {a[\\]b} "a\\b" "a\\b"
551 m 40 eE {a[\\]b} "a\\b" "a\\b"
552 m 41 bE {a[\\]b} "a\\b" "a\\b"
553 e 42 - {a[\Z]b} EESCAPE
554 m 43 & {a[[b]c} "a\[c" "a\[c"
555 m 44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \
556 "a\u0102\u02ffb" "a\u0102\u02ffb"
560 doing 10 "anchors and newlines"
568 m 8 &n "^a" "b\na" "a"
569 i 9 &w "^a" "a\na" {0 0}
570 i 10 &n^ "^a" "a\na" {2 2}
572 m 12 &n "a\$" "a\nb" "a"
573 i 13 &n "a\$" "a\na" {0 0}
577 m 17 b {$$} "\$" "\$"
580 i 20 &nN "^\$" "a\n\nb" {2 1}
582 m 22 b {$^} "\$^" "\$^"
585 f 25 ^nP {\Aa} "b\na"
588 f 28 {$nP} {a\Z} "a\nb"
596 doing 11 "boundary constraints"
597 m 1 &LP {[[:<:]]a} a a
598 m 2 &LP {[[:<:]]a} -a a
599 f 3 &LP {[[:<:]]a} ba
600 m 4 &LP {a[[:>:]]} a a
601 m 5 &LP {a[[:>:]]} a- a
602 f 6 &LP {a[[:>:]]} ab
617 e 21 - {[[:<:]]*} BADRPT
618 e 22 - {[[:>:]]*} BADRPT
632 doing 12 "character classes"
633 m 1 LP {a\db} a0b a0b
636 m 4 LP {a\Db} axb axb
637 m 5 LP "a\\sb" "a b" "a b"
638 m 6 LP "a\\sb" "a\tb" "a\tb"
639 m 7 LP "a\\sb" "a\nb" "a\nb"
641 m 9 LP {a\Sb} axb axb
642 f 10 LP "a\\Sb" "a b"
643 m 11 LP {a\wb} axb axb
646 m 14 LP {a\Wb} a-b a-b
647 m 15 LP {\y\w+z\y} adze-guz guz
648 m 16 LPE {a[\d]b} a1b a1b
649 m 17 LPE "a\[\\s]b" "a b" "a b"
650 m 18 LPE {a[\w]b} axb axb
658 m 4 bAS {a\wb} awb awb
659 m 5 eAS {a\wb} awb awb
660 m 6 PL "a\\ab" "a\007b" "a\007b"
661 m 7 P "a\\bb" "a\bb" "a\bb"
662 m 8 P {a\Bb} "a\\b" "a\\b"
663 m 9 MP "a\\chb" "a\bb" "a\bb"
664 m 10 MP "a\\cHb" "a\bb" "a\bb"
665 m 11 LMP "a\\e" "a\033" "a\033"
666 m 12 P "a\\fb" "a\fb" "a\fb"
667 m 13 P "a\\nb" "a\nb" "a\nb"
668 m 14 P "a\\rb" "a\rb" "a\rb"
669 m 15 P "a\\tb" "a\tb" "a\tb"
670 m 16 P "a\\u0008x" "a\bx" "a\bx"
671 e 17 - {a\u008x} EESCAPE
672 m 18 P "a\\u00088x" "a\b8x" "a\b8x"
673 m 19 P "a\\U00000008x" "a\bx" "a\bx"
674 e 20 - {a\U0000008x} EESCAPE
675 m 21 P "a\\vb" "a\vb" "a\vb"
676 m 22 MP "a\\x08x" "a\bx" "a\bx"
677 e 23 - {a\xq} EESCAPE
678 m 24 MP "a\\x0008x" "a\bx" "a\bx"
680 m 26 MP "a\\010b" "a\bb" "a\bb"
684 doing 14 "back references"
686 m 1 RP {a(b*)c\1} abbcbb abbcbb bb
687 m 2 RP {a(b*)c\1} ac ac ""
688 f 3 RP {a(b*)c\1} abbcb
689 m 4 RP {a(b*)\1} abbcbb abb b
690 m 5 RP {a(b|bb)\1} abbcbb abb b
691 m 6 RP {a([bc])\1} abb abb b
692 f 7 RP {a([bc])\1} abc
693 m 8 RP {a([bc])\1} abcabb abb b
694 f 9 RP {a([bc])*\1} abc
695 f 10 RP {a([bc])\1} abB
696 m 11 iRP {a([bc])\1} abB abB b
697 m 12 RP {a([bc])\1+} abbb abbb b
698 m 13 QRP "a(\[bc])\\1{3,4}" abbbb abbbb b
699 f 14 QRP "a(\[bc])\\1{3,4}" abbb
700 m 15 RP {a([bc])\1*} abbb abbb b
701 m 16 RP {a([bc])\1*} ab ab b
702 m 17 RP {a([bc])(\1*)} ab ab b ""
703 e 18 - {a((b)\1)} ESUBREG
704 e 19 - {a(b)c\2} ESUBREG
705 m 20 bR {a\(b*\)c\1} abbcbb abbcbb bb
709 doing 15 "octal escapes vs back references"
710 # initial zero is always octal
711 m 1 MP "a\\010b" "a\bb" "a\bb"
712 m 2 MP "a\\0070b" "a\0070b" "a\0070b"
713 m 3 MP "a\\07b" "a\007b" "a\007b"
714 m 4 MP "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c" "abbbbbbbbbb\007c" \
715 "abbbbbbbbbb\007c" "b" "b" "b" "b" "b" "b" \
717 # a single digit is always a backref
719 # otherwise it's a backref only if within range (barf!)
720 m 6 MP "a\\10b" "a\bb" "a\bb"
721 m 7 MP {a\101b} aAb aAb
722 m 8 RP {a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c} abbbbbbbbbbbc \
723 abbbbbbbbbbbc b b b b b b b \
725 # but we're fussy about border cases -- guys who want octal should use the zero
726 e 9 - {a((((((((((b\10))))))))))c} ESUBREG
727 # BREs don't have octal, EREs don't have backrefs
728 m 10 MP "a\\12b" "a\nb" "a\nb"
729 e 11 b {a\12b} ESUBREG
730 m 12 eAS {a\12b} a12b a12b
734 doing 16 "expanded syntax"
735 m 1 xP "a b c" "abc" "abc"
736 m 2 xP "a b #oops\nc\td" "abcd" "abcd"
737 m 3 x "a\\ b\\\tc" "a b\tc" "a b\tc"
738 m 4 xP "a b\\#c" "ab#c" "ab#c"
739 m 5 xP "a b\[c d]e" "ab e" "ab e"
740 m 6 xP "a b\[c#d]e" "ab#e" "ab#e"
741 m 7 xP "a b\[c#d]e" "abde" "abde"
742 m 8 xSPB "ab{ d" "ab\{d" "ab\{d"
743 m 9 xPQ "ab{ 1 , 2 }c" "abc" "abc"
747 doing 17 "misc syntax"
748 m 1 P a(?#comment)b ab ab
752 doing 18 "unmatchable REs"
757 doing 19 "case independence"
761 m 4 &iM {a[b-d]} aC aC
766 doing 20 "directors and embedded options"
769 m 3 &P ***=a*b a*b a*b
770 m 4 q ***=a*b ***=a*b ***=a*b
771 m 5 bLP {***:\w+} ab ab
772 m 6 eLP {***:\w+} ab ab
773 e 7 & ***:***=a*b BADRPT
774 m 8 &P ***:(?b)a+b a+b a+b
775 m 9 P (?b)a+b a+b a+b
776 e 10 e {(?b)\w+} BADRPT
777 m 11 bAS {(?b)\w+} (?b)w+ (?b)w+
780 m 14 APS {(?e)\W+} WW WW
782 f 16 P "(?m)a.b" "a\nb"
783 m 17 P "(?m)^b" "a\nb" "b"
784 f 18 P "(?n)a.b" "a\nb"
785 m 19 P "(?n)^b" "a\nb" "b"
786 f 20 P "(?p)a.b" "a\nb"
787 f 21 P "(?p)^b" "a\nb"
788 m 22 P (?q)a+b a+b a+b
789 m 23 nP "(?s)a.b" "a\nb" "a\nb"
790 m 24 xP "(?t)a b" "a b" "a b"
791 m 25 P "(?w)a.b" "a\nb" "a\nb"
792 m 26 P "(?w)^b" "a\nb" "b"
793 m 27 P "(?x)a b" "ab" "ab"
795 m 29 P (?ici)a+ Aa Aa
796 e 30 P (?i)(?q)a+ BADRPT
797 m 31 P (?q)(?i)a+ (?i)a+ (?i)a+
799 m 33 xP "(?q)a b" "a b" "a b"
800 m 34 P "(?qx)a b" "a b" "a b"
806 m 1 - a(b)c abc abc b
807 m 2 P a(?:b)c xabc abc
808 m 3 - a((b))c xabcy abc b b
809 m 4 P a(?:(b))c abcy abc b
810 m 5 P a((?:b))c abc abc b
811 m 6 P a(?:(?:b))c abc abc
812 i 7 Q "a(b){0}c" ac {0 1} {-1 -1}
813 m 8 - a(b)c(d)e abcde abcde b d
814 m 9 - (b)c(d)e bcde bcde b d
815 m 10 - a(b)(d)e abde abde b d
816 m 11 - a(b)c(d) abcd abcd b d
817 m 12 - (ab)(cd) xabcdy abcd ab cd
818 m 13 - a(b)?c xabcy abc b
819 i 14 - a(b)?c xacy {1 2} {-1 -1}
820 m 15 - a(b)?c(d)?e xabcdey abcde b d
821 i 16 - a(b)?c(d)?e xacdey {1 4} {-1 -1} {3 3}
822 i 17 - a(b)?c(d)?e xabcey {1 4} {2 2} {-1 -1}
823 i 18 - a(b)?c(d)?e xacey {1 3} {-1 -1} {-1 -1}
824 m 19 - a(b)*c xabcy abc b
825 i 20 - a(b)*c xabbbcy {1 5} {4 4}
826 i 21 - a(b)*c xacy {1 2} {-1 -1}
827 m 22 - a(b*)c xabbbcy abbbc bbb
828 m 23 - a(b*)c xacy ac ""
830 m 25 - a(b)+c xabcy abc b
831 i 26 - a(b)+c xabbbcy {1 5} {4 4}
832 m 27 - a(b+)c xabbbcy abbbc bbb
833 i 28 Q "a(b){2,3}c" xabbbcy {1 5} {4 4}
834 i 29 Q "a(b){2,3}c" xabbcy {1 4} {3 3}
835 f 30 Q "a(b){2,3}c" xabcy
836 m 31 LP "\\y(\\w+)\\y" "-- abc-" "abc" "abc"
837 m 32 - a((b|c)d+)+ abacdbd acdbd bd b
838 m 33 N (.*).* abc abc abc
839 m 34 N (a*)* bc "" ""
843 doing 22 "multicharacter collating elements"
845 m 1 &+L {a[c]e} ace ace
847 m 3 &+L {a[[.ch.]]} ach ach
848 f 4 &+L {a[[.ch.]]} ace
849 m 5 &+L {a[c[.ch.]]} ac ac
850 m 6 &+L {a[c[.ch.]]} ace ac
851 m 7 &+L {a[c[.ch.]]} ache ach
853 m 9 &+L {a[^c]e} abe abe
854 m 10 &+L {a[^c]e} ache ache
855 f 11 &+L {a[^[.ch.]]} ach
856 m 12 &+L {a[^[.ch.]]} ace ac
857 m 13 &+L {a[^[.ch.]]} ac ac
858 m 14 &+L {a[^[.ch.]]} abe ab
859 f 15 &+L {a[^c[.ch.]]} ach
860 f 16 &+L {a[^c[.ch.]]} ace
861 f 17 &+L {a[^c[.ch.]]} ac
862 m 18 &+L {a[^c[.ch.]]} abe ab
863 m 19 &+L {a[^b]} ac ac
864 m 20 &+L {a[^b]} ace ac
865 m 21 &+L {a[^b]} ach ach
870 doing 23 "lookahead constraints"
871 m 1 HP a(?=b)b* ab ab
873 m 3 HP a(?=b)b*(?=c)c* abc abc
874 f 4 HP a(?=b)b*(?=c)c* ab
882 doing 24 "non-greedy quantifiers"
884 m 2 PT ab+?c abbc abbc
886 m 4 PT ab*?c abbc abbc
889 m 7 PQT "ab{2,4}?" abbbb abb
890 m 8 PQT "ab{2,4}?c" abbbbc abbbbc
891 m 9 - 3z* 123zzzz456 3zzzz
892 m 10 PT 3z*? 123zzzz456 3
893 m 11 - z*4 123zzzz456 zzzz4
894 m 12 PT z*?4 123zzzz456 zzzz4
898 doing 25 "mixed quantifiers"
899 # this is very incomplete as yet
901 m 1 PNT {^(.*?)(a*)$} xyza xyza xyz a
902 m 2 PNT {^(.*?)(a*)$} xyzaa xyzaa xyz aa
903 m 3 PNT {^(.*?)(a*)$} xyz xyz xyz ""
907 doing 26 "tricky cases"
908 # attempts to trick the matcher into accepting a short match
909 m 1 - (week|wee)(night|knights) weeknights weeknights \
911 m 2 RP {a(bc*).*\1} abccbccb abccbccb b
912 m 3 - {a(b.[bc]*)+} abcbd abcbd bd
916 doing 27 "implementation misc."
917 # duplicate arcs are suppressed
918 m 1 P a(?:b|b)c abc abc
919 # make color/subcolor relationship go back and forth
920 m 2 & {[ab][ab][ab]} aba aba
921 m 3 & {[ab][ab][ab][ab][ab][ab][ab]} abababa abababa
925 doing 28 "boundary busters etc."
926 # color-descriptor allocation changes at 10
927 m 1 & abcdefghijkl abcdefghijkl abcdefghijkl
928 # so does arc allocation
929 m 2 P a(?:b|c|d|e|f|g|h|i|j|k|l|m)n agn agn
930 # subexpression tracking also at 10
931 m 3 - a(((((((((((((b)))))))))))))c abc abc b b b b b b b b b b b b b
932 # state-set handling changes slightly at unsigned size (might be 64...)
933 # (also stresses arc allocation)
934 m 4 Q "ab{1,100}c" abbc abbc
935 m 5 Q "ab{1,100}c" abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
936 abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
938 abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
939 abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
940 # force small cache and bust it, several ways
941 m 7 LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh
942 m 8 %LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh
943 m 9 %LP {\w+abcdefghijklmnopqrst} xyzabcdefghijklmnopqrst \
944 xyzabcdefghijklmnopqrst
945 i 10 %LP {\w+(abcdefgh)?} xyz {0 2} {-1 -1}
946 i 11 %LP {\w+(abcdefgh)?} xyzabcdefg {0 9} {-1 -1}
947 i 12 %LP {\w+(abcdefghijklmnopqrst)?} xyzabcdefghijklmnopqrs \
952 doing 29 "incomplete matches"
953 p 1 t def abc {3 2} ""
954 p 2 t bcd abc {1 2} ""
955 p 3 t abc abab {0 3} ""
956 p 4 t abc abdab {3 4} ""
957 i 5 t abc abc {0 2} {0 2}
958 i 6 t abc xyabc {2 4} {2 4}
959 p 7 t abc+ xyab {2 3} ""
960 i 8 t abc+ xyabc {2 4} {2 4}
961 knownBug i 9 t abc+ xyabcd {2 4} {6 5}
962 i 10 t abc+ xyabcdd {2 4} {7 6}
963 p 11 tPT abc+? xyab {2 3} ""
964 # the retain numbers in these two may look wrong, but they aren't
965 i 12 tPT abc+? xyabc {2 4} {5 4}
966 i 13 tPT abc+? xyabcc {2 4} {6 5}
967 i 14 tPT abc+? xyabcd {2 4} {6 5}
968 i 15 tPT abc+? xyabcdd {2 4} {7 6}
969 i 16 t abcd|bc xyabc {3 4} {2 4}
970 p 17 tn .*k "xx\nyyy" {3 5} ""
973 doing 30 "misc. oddities and old bugs"
978 m 5 & ^a*b aaaab aaaab
979 m 6 &M {[0-6][1-2][0-3][0-6][1-6][0-6]} 010010 010010
980 # temporary REG_BOSONLY kludge
983 # back to normal stuff
984 m 9 HLP {(?n)^(?![t#])\S+} "tk\n\n#\n#\nit0" it0
987 # flush any leftover complaints
990 # Tests resulting from bugs reported by users
991 test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
992 set str {2:::DebugWin32}
993 set re {([[:xdigit:]])([[:space:]]*)}
994 list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
995 # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
998 test reg-32.1 {canmatch functionality -- at end} {
1001 # can match at the final d, if '%' follows
1002 set res [testregexp -xflags -- c $pat $line resvar]
1006 test reg-32.2 {canmatch functionality -- at end} {
1009 # can only match after the end of the string
1010 set res [testregexp -xflags -- c $pat $line resvar]
1014 test reg-32.3 {canmatch functionality -- not last char} {
1017 # can only match after the end of the string
1018 set res [testregexp -xflags -- c $pat $line resvar]
1022 test reg-32.3.1 {canmatch functionality -- no match} {
1025 # can match the last char, if followed by x
1026 set res [testregexp -xflags -- c $pat $line resvar]
1030 test reg-32.4 {canmatch functionality -- last char} {knownBug} {
1033 # can match the last char, if followed by x
1034 set res [testregexp -xflags -- c $pat $line resvar]
1038 test reg-32.4.1 {canmatch functionality -- last char} {knownBug} {
1041 # can match the last char, if followed by x
1042 set res [testregexp -xflags -- c $pat $line resvar]
1046 test reg-32.5 {canmatch functionality -- last char} {knownBug} {
1049 # can match the last char, if followed by not-d and x.
1050 set res [testregexp -xflags -- c $pat $line resvar]
1054 test reg-32.6 {canmatch functionality -- last char} {knownBug} {
1055 set pat {[^a]%[^\r\n]*$}
1057 # can match at the final d, if '%' follows
1058 set res [testregexp -xflags -- c $pat $line resvar]
1062 test reg-32.7 {canmatch functionality -- last char} {knownBug} {
1065 # can match at the final d, if '%' follows
1066 set res [testregexp -xflags -- c $pat $line resvar]
1070 test reg-32.8 {canmatch functionality -- last char} {knownBug} {
1073 # can match at the final d, if '%' follows
1074 set res [testregexp -xflags -- c $pat $line resvar]
1078 test reg-32.9 {canmatch functionality -- more complex case} {knownBug} {
1079 set pat {((\B\B|\Bh+line)[ \t]*|[^\B]%[^\r\n]*)$}
1081 # can match at the final d, if '%' follows
1082 set res [testregexp -xflags -- c $pat $line resvar]
1086 # Tests reg-33.*: Checks for bug fixes
1088 test reg-33.1 {Bug 230589} {
1089 regexp {[ ]*(^|[^%])%V} "*%V2" m s
1092 test reg-33.2 {Bug 504785} {
1093 regexp -inline {([^_.]*)([^.]*)\.(..)(.).*} bbcos_001_c01.q1la
1094 } {bbcos_001_c01.q1la bbcos _001_c01 q1 l}
1096 test reg-33.3 {Bug 505048} {
1097 regexp {\A\s*[^<]*\s*<([^>]+)>} a<a>
1100 test reg-33.4 {Bug 505048} {
1101 regexp {\A\s*([^b]*)b} ab
1104 test reg-33.5 {Bug 505048} {
1105 regexp {\A\s*[^b]*(b)} ab
1108 test reg-33.6 {Bug 505048} {
1109 regexp {\A(\s*)[^b]*(b)} ab
1112 test reg-33.7 {Bug 505048} {
1113 regexp {\A\s*[^b]*b} ab
1116 test reg-33.8 {Bug 505048} {
1117 regexp -inline {\A\s*[^b]*b} ab
1120 test reg-33.9 {Bug 505048} {
1121 regexp -indices -inline {\A\s*[^b]*b} ab
1124 test reg-33.10 {Bug 840258} {
1125 regsub {(^|\n)+\.*b} \n.b {} tmp
1128 test reg-33.11 {Bug 840258} {
1129 regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \
1130 "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
1134 ::tcltest::cleanupTests