]> git.saurik.com Git - wxWidgets.git/blame_incremental - tests/regex/reg.test
using terminate: terminates prematurely, OnExit is not called anymore
[wxWidgets.git] / tests / regex / reg.test
... / ...
CommitLineData
1# reg.test --
2#
3# This file contains a collection of tests for one or more of the Tcl
4# built-in commands. Sourcing this file into Tcl runs the tests and
5# generates output for errors. No output means no errors were found.
6# (Don't panic if you are seeing this as part of the reg distribution
7# and aren't using Tcl -- reg's own regression tester also knows how
8# to read this file, ignoring the Tcl-isms.)
9#
10# Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
11#
12
13if {[lsearch [namespace children] ::tcltest] == -1} {
14 package require tcltest 2
15 namespace import -force ::tcltest::*
16}
17
18# All tests require the testregexp command, return if this
19# command doesn't exist
20
21::tcltest::testConstraint testregexp \
22 [expr {[info commands testregexp] != {}}]
23::tcltest::testConstraint localeRegexp 0
24
25# This file uses some custom procedures, defined below, for regexp regression
26# testing. The name of the procedure indicates the general nature of the
27# test:
28# e compile error expected
29# f match failure expected
30# m successful match
31# i successful match with -indices (used in checking things like
32# nonparticipating subexpressions)
33# p unsuccessful match with -indices (!!) (used in checking
34# partial-match reporting)
35# There is also "doing" which sets up title and major test number for each
36# block of tests.
37
38# The first 3 arguments are constant: a minor number (which often gets
39# a letter or two suffixed to it internally), some flags, and the RE itself.
40# For e, the remaining argument is the name of the compile error expected,
41# less the leading "REG_". For the rest, the next argument is the string
42# to try the match against. Remaining arguments are the substring expected
43# to be matched, and any substrings expected to be matched by subexpressions.
44# (For f, these arguments are optional, and if present are ignored except
45# that they indicate how many subexpressions should be present in the RE.)
46# It is an error for the number of subexpression arguments to be wrong.
47# Cases involving nonparticipating subexpressions, checking where empty
48# substrings are located, etc. should be done using i and p.
49
50# The flag characters are complex and a bit eclectic. Generally speaking,
51# lowercase letters are compile options, uppercase are expected re_info
52# bits, and nonalphabetics are match options, controls for how the test is
53# run, or testing options. The one small surprise is that AREs are the
54# default, and you must explicitly request lesser flavors of RE. The flags
55# are as follows. It is admitted that some are not very mnemonic.
56# There are some others which are purely debugging tools and are not
57# useful in this file.
58#
59# - no-op (placeholder)
60# + provide fake xy equivalence class and ch collating element
61# % force small state-set cache in matcher (to test cache replace)
62# ^ beginning of string is not beginning of line
63# $ end of string is not end of line
64# * test is Unicode-specific, needs big character set
65#
66# & test as both ARE and BRE
67# b BRE
68# e ERE
69# a turn advanced-features bit on (error unless ERE already)
70# q literal string, no metacharacters at all
71#
72# i case-independent matching
73# o ("opaque") no subexpression capture
74# p newlines are half-magic, excluded from . and [^ only
75# w newlines are half-magic, significant to ^ and $ only
76# n newlines are fully magic, both effects
77# x expanded RE syntax
78# t incomplete-match reporting
79#
80# A backslash-_a_lphanumeric seen
81# B ERE/ARE literal-_b_race heuristic used
82# E backslash (_e_scape) seen within []
83# H looka_h_ead constraint seen
84# I _i_mpossible to match
85# L _l_ocale-specific construct seen
86# M unportable (_m_achine-specific) construct seen
87# N RE can match empty (_n_ull) string
88# P non-_P_OSIX construct seen
89# Q {} _q_uantifier seen
90# R back _r_eference seen
91# S POSIX-un_s_pecified syntax seen
92# T prefers shortest (_t_iny)
93# U saw original-POSIX botch: unmatched right paren in ERE (_u_gh)
94
95# The one area we can't easily test is memory-allocation failures (which
96# are hard to provoke on command). Embedded NULs also are not tested at
97# the moment, but this is a historical accident which should be fixed.
98
99
100
101# test procedures and related
102
103set ask "about"
104set xflags "xflags"
105set testbypassed 0
106
107# re_info abbreviation mapping table
108set infonames(A) "REG_UBSALNUM"
109set infonames(B) "REG_UBRACES"
110set infonames(E) "REG_UBBS"
111set infonames(H) "REG_ULOOKAHEAD"
112set infonames(I) "REG_UIMPOSSIBLE"
113set infonames(L) "REG_ULOCALE"
114set infonames(M) "REG_UUNPORT"
115set infonames(N) "REG_UEMPTYMATCH"
116set infonames(P) "REG_UNONPOSIX"
117set infonames(Q) "REG_UBOUNDS"
118set infonames(R) "REG_UBACKREF"
119set infonames(S) "REG_UUNSPEC"
120set infonames(T) "REG_USHORTEST"
121set infonames(U) "REG_UPBOTCH"
122set infonameorder "RHQBAUEPSMLNIT" ;# must match bit order, lsb first
123
124# set major test number and description
125proc doing {major desc} {
126 global prefix description testbypassed
127
128 if {$testbypassed != 0} {
129 puts stdout "!!! bypassed $testbypassed tests in\
130 $prefix, `$description'"
131 }
132
133 set prefix reg-$major
134 set description "reg $desc"
135 set testbypassed 0
136}
137
138# build test number (internal)
139proc tno {testid} {
140 return [join $testid .]
141}
142
143# build description, with possible modifiers (internal)
144proc desc {testid} {
145 global description
146
147 set d $description
148 if {[llength $testid] > 1} {
149 set d "([lreplace $testid 0 0]) $d"
150 }
151 return $d
152}
153
154# build trailing options and flags argument from a flags string (internal)
155proc flags {fl} {
156 global xflags
157
158 set args [list]
159 set flags ""
160 foreach f [split $fl ""] {
161 switch -exact -- $f {
162 "i" { lappend args "-nocase" }
163 "x" { lappend args "-expanded" }
164 "n" { lappend args "-line" }
165 "p" { lappend args "-linestop" }
166 "w" { lappend args "-lineanchor" }
167 "-" { }
168 default { append flags $f }
169 }
170 }
171 if {[string compare $flags ""] != 0} {
172 lappend args -$xflags $flags
173 }
174 return $args
175}
176
177# build info-flags list from a flags string (internal)
178proc infoflags {fl} {
179 global infonames infonameorder
180
181 set ret [list]
182 foreach f [split $infonameorder ""] {
183 if {[string first $f $fl] >= 0} {
184 lappend ret $infonames($f)
185 }
186 }
187 return $ret
188}
189
190# compilation error expected
191proc e {testid flags re err} {
192 global prefix ask errorCode
193
194 # Tcl locale stuff doesn't do the ch/xy test fakery yet
195 if {[string first "+" $flags] >= 0} {
196 # This will register as a skipped test
197 test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
198 return
199 }
200
201 # if &, test as both ARE and BRE
202 set amp [string first "&" $flags]
203 if {$amp >= 0} {
204 set f [string range $flags 0 [expr $amp - 1]]
205 append f [string range $flags [expr $amp + 1] end]
206 e [linsert $testid end ARE] ${f} $re $err
207 e [linsert $testid end BRE] ${f}b $re $err
208 return
209 }
210
211 set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
212 set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
213 test $prefix.[tno $testid] [desc $testid] \
214 {testregexp} $run [list 1 REG_$err]
215}
216
217# match failure expected
218proc f {testid flags re target args} {
219 global prefix description ask
220
221 # Tcl locale stuff doesn't do the ch/xy test fakery yet
222 if {[string first "+" $flags] >= 0} {
223 # This will register as a skipped test
224 test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
225 return
226 }
227
228 # if &, test as both ARE and BRE
229 set amp [string first "&" $flags]
230 if {$amp >= 0} {
231 set f [string range $flags 0 [expr $amp - 1]]
232 append f [string range $flags [expr $amp + 1] end]
233 eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \
234 $target]
235 eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \
236 $target]
237 return
238 }
239
240 set f [flags $flags]
241 set infoflags [infoflags $flags]
242 set ccmd [concat [list testregexp -$ask] $f [list $re]]
243 set nsub [expr [llength $args] - 1]
244 if {$nsub == -1} {
245 # didn't tell us number of subexps
246 set ccmd "lreplace \[$ccmd\] 0 0"
247 set info [list $infoflags]
248 } else {
249 set info [list $nsub $infoflags]
250 }
251 lappend testid "compile"
252 test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
253
254 set testid [lreplace $testid end end "execute"]
255 set ecmd [concat [list testregexp] $f [list $re $target]]
256 test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0
257}
258
259# match expected, internal routine that does the work
260# parameters like the "real" routines except they don't have "opts",
261# which is a possibly-empty list of switches for the regexp match attempt
262# The ! flag is used to indicate expected match failure (for REG_EXPECT,
263# which wants argument testing even in the event of failure).
264proc matchexpected {opts testid flags re target args} {
265 global prefix description ask regBug
266
267 if {[info exists regBug] && $regBug} {
268 # This will register as a skipped test
269 test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1}
270 return
271 }
272
273 # Tcl locale stuff doesn't do the ch/xy test fakery yet
274 if {[string first "+" $flags] >= 0} {
275 # This will register as a skipped test
276 test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
277 return
278 }
279
280 # if &, test as both BRE and ARE
281 set amp [string first "&" $flags]
282 if {$amp >= 0} {
283 set f [string range $flags 0 [expr $amp - 1]]
284 append f [string range $flags [expr $amp + 1] end]
285 eval [concat [list matchexpected $opts \
286 [linsert $testid end ARE] ${f} $re $target] $args]
287 eval [concat [list matchexpected $opts \
288 [linsert $testid end BRE] ${f}b $re $target] $args]
289 return
290 }
291
292 set f [flags $flags]
293 set infoflags [infoflags $flags]
294 set ccmd [concat [list testregexp -$ask] $f [list $re]]
295 set ecmd [concat [list testregexp] $opts $f [list $re $target]]
296
297 set nsub [expr [llength $args] - 1]
298 set names [list]
299 set refs ""
300 for {set i 0} {$i <= $nsub} {incr i} {
301 if {$i == 0} {
302 set name match
303 } else {
304 set name sub$i
305 }
306 lappend names $name
307 append refs " \$$name"
308 set $name ""
309 }
310 if {[string first "o" $flags] >= 0} { ;# REG_NOSUB kludge
311 set nsub 0 ;# unsigned value cannot be -1
312 }
313 if {[string first "t" $flags] >= 0} { ;# REG_EXPECT
314 incr nsub -1 ;# the extra does not count
315 }
316 set ecmd [concat $ecmd $names]
317 set erun "list \[$ecmd\] $refs"
318 set retcode [list 1]
319 if {[string first "!" $flags] >= 0} {
320 set retcode [list 0]
321 }
322 set result [concat $retcode $args]
323
324 set info [list $nsub $infoflags]
325 lappend testid "compile"
326 test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
327 set testid [lreplace $testid end end "execute"]
328 test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result
329}
330
331# match expected (no missing, empty, or ambiguous submatches)
332# m testno flags re target mat submat ...
333proc m {args} {
334 eval matchexpected [linsert $args 0 [list]]
335}
336
337# match expected (full fanciness)
338# i testno flags re target mat submat ...
339proc i {args} {
340 eval matchexpected [linsert $args 0 [list "-indices"]]
341}
342
343# partial match expected
344# p testno flags re target mat "" ...
345# Quirk: number of ""s must be one more than number of subREs.
346proc p {args} {
347 set f [lindex $args 1] ;# add ! flag
348 set args [lreplace $args 1 1 "!$f"]
349 eval matchexpected [linsert $args 0 [list "-indices"]]
350}
351
352# test is a knownBug
353proc knownBug {args} {
354 set ::regBug 1
355 uplevel #0 $args
356 set ::regBug 0
357}
358
359
360
361# the tests themselves
362
363
364
365# support functions and preliminary misc.
366# This is sensitive to changes in message wording, but we really have to
367# test the code->message expansion at least once.
368test reg-0.1 "regexp error reporting" {
369 list [catch {regexp (*) ign} msg] $msg
370} {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
371
372
373
374doing 1 "basic sanity checks"
375m 1 & abc abc abc
376f 2 & abc def
377m 3 & abc xyabxabce abc
378
379
380
381doing 2 "invalid option combinations"
382e 1 qe a INVARG
383e 2 qa a INVARG
384e 3 qx a INVARG
385e 4 qn a INVARG
386e 5 ba a INVARG
387
388
389
390doing 3 "basic syntax"
391i 1 &NS "" a {0 -1}
392m 2 NS a| a a
393m 3 - a|b a a
394m 4 - a|b b b
395m 5 NS a||b b b
396m 6 & ab ab ab
397
398
399
400doing 4 "parentheses"
401m 1 - (a)e ae ae a
402m 2 o (a)e ae
403m 3 b {\(a\)b} ab ab a
404m 4 - a((b)c) abc abc bc b
405m 5 - a(b)(c) abc abc b c
406e 6 - a(b EPAREN
407e 7 b {a\(b} EPAREN
408# sigh, we blew it on the specs here... someday this will be fixed in POSIX,
409# but meanwhile, it's fixed in AREs
410m 8 eU a)b a)b a)b
411e 9 - a)b EPAREN
412e 10 b {a\)b} EPAREN
413m 11 P a(?:b)c abc abc
414e 12 e a(?:b)c BADRPT
415i 13 S a()b ab {0 1} {1 0}
416m 14 SP a(?:)b ab ab
417i 15 S a(|b)c ac {0 1} {1 0}
418m 16 S a(b|)c abc abc b
419
420
421
422doing 5 "simple one-char matching"
423# general case of brackets done later
424m 1 & a.b axb axb
425f 2 &n "a.b" "a\nb"
426m 3 & {a[bc]d} abd abd
427m 4 & {a[bc]d} acd acd
428f 5 & {a[bc]d} aed
429f 6 & {a[^bc]d} abd
430m 7 & {a[^bc]d} aed aed
431f 8 &p "a\[^bc]d" "a\nd"
432
433
434
435doing 6 "context-dependent syntax"
436# plus odds and ends
437e 1 - * BADRPT
438m 2 b * * *
439m 3 b {\(*\)} * * *
440e 4 - (*) BADRPT
441m 5 b ^* * *
442e 6 - ^* BADRPT
443f 7 & ^b ^b
444m 8 b x^ x^ x^
445f 9 I x^ x
446m 10 n "\n^" "x\nb" "\n"
447f 11 bS {\(^b\)} ^b
448m 12 - (^b) b b b
449m 13 & {x$} x x
450m 14 bS {\(x$\)} x x x
451m 15 - {(x$)} x x x
452m 16 b {x$y} "x\$y" "x\$y"
453f 17 I {x$y} xy
454m 18 n "x\$\n" "x\n" "x\n"
455e 19 - + BADRPT
456e 20 - ? BADRPT
457
458
459
460doing 7 "simple quantifiers"
461m 1 &N a* aa aa
462i 2 &N a* b {0 -1}
463m 3 - a+ aa aa
464m 4 - a?b ab ab
465m 5 - a?b b b
466e 6 - ** BADRPT
467m 7 bN ** *** ***
468e 8 & a** BADRPT
469e 9 & a**b BADRPT
470e 10 & *** BADRPT
471e 11 - a++ BADRPT
472e 12 - a?+ BADRPT
473e 13 - a?* BADRPT
474e 14 - a+* BADRPT
475e 15 - a*+ BADRPT
476
477
478
479doing 8 "braces"
480m 1 NQ "a{0,1}" "" ""
481m 2 NQ "a{0,1}" ac a
482e 3 - "a{1,0}" BADBR
483e 4 - "a{1,2,3}" BADBR
484e 5 - "a{257}" BADBR
485e 6 - "a{1000}" BADBR
486e 7 - "a{1" EBRACE
487e 8 - "a{1n}" BADBR
488m 9 BS "a{b" "a\{b" "a\{b"
489m 10 BS "a{" "a\{" "a\{"
490m 11 bQ "a\\{0,1\\}b" cb b
491e 12 b "a\\{0,1" EBRACE
492e 13 - "a{0,1\\" BADBR
493m 14 Q "a{0}b" ab b
494m 15 Q "a{0,0}b" ab b
495m 16 Q "a{0,1}b" ab ab
496m 17 Q "a{0,2}b" b b
497m 18 Q "a{0,2}b" aab aab
498m 19 Q "a{0,}b" aab aab
499m 20 Q "a{1,1}b" aab ab
500m 21 Q "a{1,3}b" aaaab aaab
501f 22 Q "a{1,3}b" b
502m 23 Q "a{1,}b" aab aab
503f 24 Q "a{2,3}b" ab
504m 25 Q "a{2,3}b" aaaab aaab
505f 26 Q "a{2,}b" ab
506m 27 Q "a{2,}b" aaaab aaaab
507
508
509
510doing 9 "brackets"
511m 1 & {a[bc]} ac ac
512m 2 & {a[-]} a- a-
513m 3 & {a[[.-.]]} a- a-
514m 4 &L {a[[.zero.]]} a0 a0
515m 5 &LM {a[[.zero.]-9]} a2 a2
516m 6 &M {a[0-[.9.]]} a2 a2
517m 7 &+L {a[[=x=]]} ax ax
518m 8 &+L {a[[=x=]]} ay ay
519f 9 &+L {a[[=x=]]} az
520e 10 & {a[0-[=x=]]} ERANGE
521m 11 &L {a[[:digit:]]} a0 a0
522e 12 & {a[[:woopsie:]]} ECTYPE
523f 13 &L {a[[:digit:]]} ab
524e 14 & {a[0-[:digit:]]} ERANGE
525m 15 &LP {[[:<:]]a} a a
526m 16 &LP {a[[:>:]]} a a
527e 17 & {a[[..]]b} ECOLLATE
528e 18 & {a[[==]]b} ECOLLATE
529e 19 & {a[[::]]b} ECTYPE
530e 20 & {a[[.a} EBRACK
531e 21 & {a[[=a} EBRACK
532e 22 & {a[[:a} EBRACK
533e 23 & {a[} EBRACK
534e 24 & {a[b} EBRACK
535e 25 & {a[b-} EBRACK
536e 26 & {a[b-c} EBRACK
537m 27 &M {a[b-c]} ab ab
538m 28 & {a[b-b]} ab ab
539m 29 &M {a[1-2]} a2 a2
540e 30 & {a[c-b]} ERANGE
541e 31 & {a[a-b-c]} ERANGE
542m 32 &M {a[--?]b} a?b a?b
543m 33 & {a[---]b} a-b a-b
544m 34 & {a[]b]c} a]c a]c
545m 35 EP {a[\]]b} a]b a]b
546f 36 bE {a[\]]b} a]b
547m 37 bE {a[\]]b} "a\\]b" "a\\]b"
548m 38 eE {a[\]]b} "a\\]b" "a\\]b"
549m 39 EP {a[\\]b} "a\\b" "a\\b"
550m 40 eE {a[\\]b} "a\\b" "a\\b"
551m 41 bE {a[\\]b} "a\\b" "a\\b"
552e 42 - {a[\Z]b} EESCAPE
553m 43 & {a[[b]c} "a\[c" "a\[c"
554m 44 EMP* {a[\u00fe-\u0507][\u00ff-\u0300]b} \
555 "a\u0102\u02ffb" "a\u0102\u02ffb"
556
557
558
559doing 10 "anchors and newlines"
560m 1 & ^a a a
561f 2 &^ ^a a
562i 3 &N ^ a {0 -1}
563i 4 & {a$} aba {2 2}
564f 5 {&$} {a$} a
565i 6 &N {$} ab {2 1}
566m 7 &n ^a a a
567m 8 &n "^a" "b\na" "a"
568i 9 &w "^a" "a\na" {0 0}
569i 10 &n^ "^a" "a\na" {2 2}
570m 11 &n {a$} a a
571m 12 &n "a\$" "a\nb" "a"
572i 13 &n "a\$" "a\na" {0 0}
573i 14 N ^^ a {0 -1}
574m 15 b ^^ ^ ^
575i 16 N {$$} a {1 0}
576m 17 b {$$} "\$" "\$"
577m 18 &N {^$} "" ""
578f 19 &N {^$} a
579i 20 &nN "^\$" "a\n\nb" {2 1}
580m 21 N {$^} "" ""
581m 22 b {$^} "\$^" "\$^"
582m 23 P {\Aa} a a
583m 24 ^P {\Aa} a a
584f 25 ^nP {\Aa} "b\na"
585m 26 P {a\Z} a a
586m 27 {$P} {a\Z} a a
587f 28 {$nP} {a\Z} "a\nb"
588e 29 - ^* BADRPT
589e 30 - {$*} BADRPT
590e 31 - {\A*} BADRPT
591e 32 - {\Z*} BADRPT
592
593
594
595doing 11 "boundary constraints"
596m 1 &LP {[[:<:]]a} a a
597m 2 &LP {[[:<:]]a} -a a
598f 3 &LP {[[:<:]]a} ba
599m 4 &LP {a[[:>:]]} a a
600m 5 &LP {a[[:>:]]} a- a
601f 6 &LP {a[[:>:]]} ab
602m 7 bLP {\<a} a a
603f 8 bLP {\<a} ba
604m 9 bLP {a\>} a a
605f 10 bLP {a\>} ab
606m 11 LP {\ya} a a
607f 12 LP {\ya} ba
608m 13 LP {a\y} a a
609f 14 LP {a\y} ab
610m 15 LP {a\Y} ab a
611f 16 LP {a\Y} a-
612f 17 LP {a\Y} a
613f 18 LP {-\Y} -a
614m 19 LP {-\Y} -% -
615f 20 LP {\Y-} a-
616e 21 - {[[:<:]]*} BADRPT
617e 22 - {[[:>:]]*} BADRPT
618e 23 b {\<*} BADRPT
619e 24 b {\>*} BADRPT
620e 25 - {\y*} BADRPT
621e 26 - {\Y*} BADRPT
622m 27 LP {\ma} a a
623f 28 LP {\ma} ba
624m 29 LP {a\M} a a
625f 30 LP {a\M} ab
626f 31 ILP {\Ma} a
627f 32 ILP {a\m} a
628
629
630
631doing 12 "character classes"
632m 1 LP {a\db} a0b a0b
633f 2 LP {a\db} axb
634f 3 LP {a\Db} a0b
635m 4 LP {a\Db} axb axb
636m 5 LP "a\\sb" "a b" "a b"
637m 6 LP "a\\sb" "a\tb" "a\tb"
638m 7 LP "a\\sb" "a\nb" "a\nb"
639f 8 LP {a\sb} axb
640m 9 LP {a\Sb} axb axb
641f 10 LP "a\\Sb" "a b"
642m 11 LP {a\wb} axb axb
643f 12 LP {a\wb} a-b
644f 13 LP {a\Wb} axb
645m 14 LP {a\Wb} a-b a-b
646m 15 LP {\y\w+z\y} adze-guz guz
647m 16 LPE {a[\d]b} a1b a1b
648m 17 LPE "a\[\\s]b" "a b" "a b"
649m 18 LPE {a[\w]b} axb axb
650
651
652
653doing 13 "escapes"
654e 1 & "a\\" EESCAPE
655m 2 - {a\<b} a<b a<b
656m 3 e {a\<b} a<b a<b
657m 4 bAS {a\wb} awb awb
658m 5 eAS {a\wb} awb awb
659m 6 PL "a\\ab" "a\007b" "a\007b"
660m 7 P "a\\bb" "a\bb" "a\bb"
661m 8 P {a\Bb} "a\\b" "a\\b"
662m 9 MP "a\\chb" "a\bb" "a\bb"
663m 10 MP "a\\cHb" "a\bb" "a\bb"
664m 11 LMP "a\\e" "a\033" "a\033"
665m 12 P "a\\fb" "a\fb" "a\fb"
666m 13 P "a\\nb" "a\nb" "a\nb"
667m 14 P "a\\rb" "a\rb" "a\rb"
668m 15 P "a\\tb" "a\tb" "a\tb"
669m 16 P "a\\u0008x" "a\bx" "a\bx"
670e 17 - {a\u008x} EESCAPE
671m 18 P "a\\u00088x" "a\b8x" "a\b8x"
672m 19 P "a\\U00000008x" "a\bx" "a\bx"
673e 20 - {a\U0000008x} EESCAPE
674m 21 P "a\\vb" "a\vb" "a\vb"
675m 22 MP "a\\x08x" "a\bx" "a\bx"
676e 23 - {a\xq} EESCAPE
677m 24 MP "a\\x0008x" "a\bx" "a\bx"
678e 25 - {a\z} EESCAPE
679m 26 MP "a\\010b" "a\bb" "a\bb"
680
681
682
683doing 14 "back references"
684# ugh
685m 1 RP {a(b*)c\1} abbcbb abbcbb bb
686m 2 RP {a(b*)c\1} ac ac ""
687f 3 RP {a(b*)c\1} abbcb
688m 4 RP {a(b*)\1} abbcbb abb b
689m 5 RP {a(b|bb)\1} abbcbb abb b
690m 6 RP {a([bc])\1} abb abb b
691f 7 RP {a([bc])\1} abc
692m 8 RP {a([bc])\1} abcabb abb b
693f 9 RP {a([bc])*\1} abc
694f 10 RP {a([bc])\1} abB
695m 11 iRP {a([bc])\1} abB abB b
696m 12 RP {a([bc])\1+} abbb abbb b
697m 13 QRP "a(\[bc])\\1{3,4}" abbbb abbbb b
698f 14 QRP "a(\[bc])\\1{3,4}" abbb
699m 15 RP {a([bc])\1*} abbb abbb b
700m 16 RP {a([bc])\1*} ab ab b
701m 17 RP {a([bc])(\1*)} ab ab b ""
702e 18 - {a((b)\1)} ESUBREG
703e 19 - {a(b)c\2} ESUBREG
704m 20 bR {a\(b*\)c\1} abbcbb abbcbb bb
705
706
707
708doing 15 "octal escapes vs back references"
709# initial zero is always octal
710m 1 MP "a\\010b" "a\bb" "a\bb"
711m 2 MP "a\\0070b" "a\0070b" "a\0070b"
712m 3 MP "a\\07b" "a\007b" "a\007b"
713m 4 MP "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c" "abbbbbbbbbb\007c" \
714 "abbbbbbbbbb\007c" "b" "b" "b" "b" "b" "b" \
715 "b" "b" "b" "b"
716# a single digit is always a backref
717e 5 - {a\7b} ESUBREG
718# otherwise it's a backref only if within range (barf!)
719m 6 MP "a\\10b" "a\bb" "a\bb"
720m 7 MP {a\101b} aAb aAb
721m 8 RP {a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c} abbbbbbbbbbbc \
722 abbbbbbbbbbbc b b b b b b b \
723 b b b
724# but we're fussy about border cases -- guys who want octal should use the zero
725e 9 - {a((((((((((b\10))))))))))c} ESUBREG
726# BREs don't have octal, EREs don't have backrefs
727m 10 MP "a\\12b" "a\nb" "a\nb"
728e 11 b {a\12b} ESUBREG
729m 12 eAS {a\12b} a12b a12b
730
731
732
733doing 16 "expanded syntax"
734m 1 xP "a b c" "abc" "abc"
735m 2 xP "a b #oops\nc\td" "abcd" "abcd"
736m 3 x "a\\ b\\\tc" "a b\tc" "a b\tc"
737m 4 xP "a b\\#c" "ab#c" "ab#c"
738m 5 xP "a b\[c d]e" "ab e" "ab e"
739m 6 xP "a b\[c#d]e" "ab#e" "ab#e"
740m 7 xP "a b\[c#d]e" "abde" "abde"
741m 8 xSPB "ab{ d" "ab\{d" "ab\{d"
742m 9 xPQ "ab{ 1 , 2 }c" "abc" "abc"
743
744
745
746doing 17 "misc syntax"
747m 1 P a(?#comment)b ab ab
748
749
750
751doing 18 "unmatchable REs"
752f 1 I a^b ab
753
754
755
756doing 19 "case independence"
757m 1 &i ab Ab Ab
758m 2 &i {a[bc]} aC aC
759f 3 &i {a[^bc]} aB
760m 4 &iM {a[b-d]} aC aC
761f 5 &iM {a[^b-d]} aC
762
763
764
765doing 20 "directors and embedded options"
766e 1 & ***? BADPAT
767m 2 q ***? ***? ***?
768m 3 &P ***=a*b a*b a*b
769m 4 q ***=a*b ***=a*b ***=a*b
770m 5 bLP {***:\w+} ab ab
771m 6 eLP {***:\w+} ab ab
772e 7 & ***:***=a*b BADRPT
773m 8 &P ***:(?b)a+b a+b a+b
774m 9 P (?b)a+b a+b a+b
775e 10 e {(?b)\w+} BADRPT
776m 11 bAS {(?b)\w+} (?b)w+ (?b)w+
777m 12 iP (?c)a a a
778f 13 iP (?c)a A
779m 14 APS {(?e)\W+} WW WW
780m 15 P (?i)a+ Aa Aa
781f 16 P "(?m)a.b" "a\nb"
782m 17 P "(?m)^b" "a\nb" "b"
783f 18 P "(?n)a.b" "a\nb"
784m 19 P "(?n)^b" "a\nb" "b"
785f 20 P "(?p)a.b" "a\nb"
786f 21 P "(?p)^b" "a\nb"
787m 22 P (?q)a+b a+b a+b
788m 23 nP "(?s)a.b" "a\nb" "a\nb"
789m 24 xP "(?t)a b" "a b" "a b"
790m 25 P "(?w)a.b" "a\nb" "a\nb"
791m 26 P "(?w)^b" "a\nb" "b"
792m 27 P "(?x)a b" "ab" "ab"
793e 28 - (?z)ab BADOPT
794m 29 P (?ici)a+ Aa Aa
795e 30 P (?i)(?q)a+ BADRPT
796m 31 P (?q)(?i)a+ (?i)a+ (?i)a+
797m 32 P (?qe)a+ a a
798m 33 xP "(?q)a b" "a b" "a b"
799m 34 P "(?qx)a b" "a b" "a b"
800m 35 P (?qi)ab Ab Ab
801
802
803
804doing 21 "capturing"
805m 1 - a(b)c abc abc b
806m 2 P a(?:b)c xabc abc
807m 3 - a((b))c xabcy abc b b
808m 4 P a(?:(b))c abcy abc b
809m 5 P a((?:b))c abc abc b
810m 6 P a(?:(?:b))c abc abc
811i 7 Q "a(b){0}c" ac {0 1} {-1 -1}
812m 8 - a(b)c(d)e abcde abcde b d
813m 9 - (b)c(d)e bcde bcde b d
814m 10 - a(b)(d)e abde abde b d
815m 11 - a(b)c(d) abcd abcd b d
816m 12 - (ab)(cd) xabcdy abcd ab cd
817m 13 - a(b)?c xabcy abc b
818i 14 - a(b)?c xacy {1 2} {-1 -1}
819m 15 - a(b)?c(d)?e xabcdey abcde b d
820i 16 - a(b)?c(d)?e xacdey {1 4} {-1 -1} {3 3}
821i 17 - a(b)?c(d)?e xabcey {1 4} {2 2} {-1 -1}
822i 18 - a(b)?c(d)?e xacey {1 3} {-1 -1} {-1 -1}
823m 19 - a(b)*c xabcy abc b
824i 20 - a(b)*c xabbbcy {1 5} {4 4}
825i 21 - a(b)*c xacy {1 2} {-1 -1}
826m 22 - a(b*)c xabbbcy abbbc bbb
827m 23 - a(b*)c xacy ac ""
828f 24 - a(b)+c xacy
829m 25 - a(b)+c xabcy abc b
830i 26 - a(b)+c xabbbcy {1 5} {4 4}
831m 27 - a(b+)c xabbbcy abbbc bbb
832i 28 Q "a(b){2,3}c" xabbbcy {1 5} {4 4}
833i 29 Q "a(b){2,3}c" xabbcy {1 4} {3 3}
834f 30 Q "a(b){2,3}c" xabcy
835m 31 LP "\\y(\\w+)\\y" "-- abc-" "abc" "abc"
836m 32 - a((b|c)d+)+ abacdbd acdbd bd b
837m 33 N (.*).* abc abc abc
838m 34 N (a*)* bc "" ""
839
840
841
842doing 22 "multicharacter collating elements"
843# again ugh
844m 1 &+L {a[c]e} ace ace
845f 2 &+IL {a[c]h} ach
846m 3 &+L {a[[.ch.]]} ach ach
847f 4 &+L {a[[.ch.]]} ace
848m 5 &+L {a[c[.ch.]]} ac ac
849m 6 &+L {a[c[.ch.]]} ace ac
850m 7 &+L {a[c[.ch.]]} ache ach
851f 8 &+L {a[^c]e} ace
852m 9 &+L {a[^c]e} abe abe
853m 10 &+L {a[^c]e} ache ache
854f 11 &+L {a[^[.ch.]]} ach
855m 12 &+L {a[^[.ch.]]} ace ac
856m 13 &+L {a[^[.ch.]]} ac ac
857m 14 &+L {a[^[.ch.]]} abe ab
858f 15 &+L {a[^c[.ch.]]} ach
859f 16 &+L {a[^c[.ch.]]} ace
860f 17 &+L {a[^c[.ch.]]} ac
861m 18 &+L {a[^c[.ch.]]} abe ab
862m 19 &+L {a[^b]} ac ac
863m 20 &+L {a[^b]} ace ac
864m 21 &+L {a[^b]} ach ach
865f 22 &+L {a[^b]} abe
866
867
868
869doing 23 "lookahead constraints"
870m 1 HP a(?=b)b* ab ab
871f 2 HP a(?=b)b* a
872m 3 HP a(?=b)b*(?=c)c* abc abc
873f 4 HP a(?=b)b*(?=c)c* ab
874f 5 HP a(?!b)b* ab
875m 6 HP a(?!b)b* a a
876m 7 HP (?=b)b b b
877f 8 HP (?=b)b a
878
879
880
881doing 24 "non-greedy quantifiers"
882m 1 PT ab+? abb ab
883m 2 PT ab+?c abbc abbc
884m 3 PT ab*? abb a
885m 4 PT ab*?c abbc abbc
886m 5 PT ab?? ab a
887m 6 PT ab??c abc abc
888m 7 PQT "ab{2,4}?" abbbb abb
889m 8 PQT "ab{2,4}?c" abbbbc abbbbc
890m 9 - 3z* 123zzzz456 3zzzz
891m 10 PT 3z*? 123zzzz456 3
892m 11 - z*4 123zzzz456 zzzz4
893m 12 PT z*?4 123zzzz456 zzzz4
894
895
896
897doing 25 "mixed quantifiers"
898# this is very incomplete as yet
899# should include |
900m 1 PNT {^(.*?)(a*)$} xyza xyza xyz a
901m 2 PNT {^(.*?)(a*)$} xyzaa xyzaa xyz aa
902m 3 PNT {^(.*?)(a*)$} xyz xyz xyz ""
903
904
905
906doing 26 "tricky cases"
907# attempts to trick the matcher into accepting a short match
908m 1 - (week|wee)(night|knights) weeknights weeknights \
909 wee knights
910m 2 RP {a(bc*).*\1} abccbccb abccbccb b
911m 3 - {a(b.[bc]*)+} abcbd abcbd bd
912
913
914
915doing 27 "implementation misc."
916# duplicate arcs are suppressed
917m 1 P a(?:b|b)c abc abc
918# make color/subcolor relationship go back and forth
919m 2 & {[ab][ab][ab]} aba aba
920m 3 & {[ab][ab][ab][ab][ab][ab][ab]} abababa abababa
921
922
923
924doing 28 "boundary busters etc."
925# color-descriptor allocation changes at 10
926m 1 & abcdefghijkl abcdefghijkl abcdefghijkl
927# so does arc allocation
928m 2 P a(?:b|c|d|e|f|g|h|i|j|k|l|m)n agn agn
929# subexpression tracking also at 10
930m 3 - a(((((((((((((b)))))))))))))c abc abc b b b b b b b b b b b b b
931# state-set handling changes slightly at unsigned size (might be 64...)
932# (also stresses arc allocation)
933m 4 Q "ab{1,100}c" abbc abbc
934m 5 Q "ab{1,100}c" abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
935 abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
936m 6 Q "ab{1,100}c" \
937 abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
938 abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
939# force small cache and bust it, several ways
940m 7 LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh
941m 8 %LP {\w+abcdefgh} xyzabcdefgh xyzabcdefgh
942m 9 %LP {\w+abcdefghijklmnopqrst} xyzabcdefghijklmnopqrst \
943 xyzabcdefghijklmnopqrst
944i 10 %LP {\w+(abcdefgh)?} xyz {0 2} {-1 -1}
945i 11 %LP {\w+(abcdefgh)?} xyzabcdefg {0 9} {-1 -1}
946i 12 %LP {\w+(abcdefghijklmnopqrst)?} xyzabcdefghijklmnopqrs \
947 {0 21} {-1 -1}
948
949
950
951doing 29 "incomplete matches"
952p 1 t def abc {3 2} ""
953p 2 t bcd abc {1 2} ""
954p 3 t abc abab {0 3} ""
955p 4 t abc abdab {3 4} ""
956i 5 t abc abc {0 2} {0 2}
957i 6 t abc xyabc {2 4} {2 4}
958p 7 t abc+ xyab {2 3} ""
959i 8 t abc+ xyabc {2 4} {2 4}
960knownBug i 9 t abc+ xyabcd {2 4} {6 5}
961i 10 t abc+ xyabcdd {2 4} {7 6}
962p 11 tPT abc+? xyab {2 3} ""
963# the retain numbers in these two may look wrong, but they aren't
964i 12 tPT abc+? xyabc {2 4} {5 4}
965i 13 tPT abc+? xyabcc {2 4} {6 5}
966i 14 tPT abc+? xyabcd {2 4} {6 5}
967i 15 tPT abc+? xyabcdd {2 4} {7 6}
968i 16 t abcd|bc xyabc {3 4} {2 4}
969p 17 tn .*k "xx\nyyy" {3 5} ""
970
971
972doing 30 "misc. oddities and old bugs"
973e 1 & *** BADRPT
974m 2 N a?b* abb abb
975m 3 N a?b* bb bb
976m 4 & a*b aab aab
977m 5 & ^a*b aaaab aaaab
978m 6 &M {[0-6][1-2][0-3][0-6][1-6][0-6]} 010010 010010
979# temporary REG_BOSONLY kludge
980m 7 s abc abcd abc
981f 8 s abc xabcd
982# back to normal stuff
983m 9 HLP {(?n)^(?![t#])\S+} "tk\n\n#\n#\nit0" it0
984
985
986# flush any leftover complaints
987doing 0 "flush"
988
989# Tests resulting from bugs reported by users
990test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
991 set str {2:::DebugWin32}
992 set re {([[:xdigit:]])([[:space:]]*)}
993 list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
994 # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
995} {1 2 2 {}}
996
997test reg-32.1 {canmatch functionality -- at end} {
998 set pat {blah}
999 set line "asd asd"
1000 # can match at the final d, if '%' follows
1001 set res [testregexp -xflags -- c $pat $line resvar]
1002 lappend res $resvar
1003} {0 7}
1004
1005test reg-32.2 {canmatch functionality -- at end} {
1006 set pat {s%$}
1007 set line "asd asd"
1008 # can only match after the end of the string
1009 set res [testregexp -xflags -- c $pat $line resvar]
1010 lappend res $resvar
1011} {0 7}
1012
1013test reg-32.3 {canmatch functionality -- not last char} {
1014 set pat {[^d]%$}
1015 set line "asd asd"
1016 # can only match after the end of the string
1017 set res [testregexp -xflags -- c $pat $line resvar]
1018 lappend res $resvar
1019} {0 7}
1020
1021test reg-32.3.1 {canmatch functionality -- no match} {
1022 set pat {\Zx}
1023 set line "asd asd"
1024 # can match the last char, if followed by x
1025 set res [testregexp -xflags -- c $pat $line resvar]
1026 lappend res $resvar
1027} {0 -1}
1028
1029test reg-32.4 {canmatch functionality -- last char} {knownBug} {
1030 set pat {.x}
1031 set line "asd asd"
1032 # can match the last char, if followed by x
1033 set res [testregexp -xflags -- c $pat $line resvar]
1034 lappend res $resvar
1035} {0 6}
1036
1037test reg-32.4.1 {canmatch functionality -- last char} {knownBug} {
1038 set pat {.x$}
1039 set line "asd asd"
1040 # can match the last char, if followed by x
1041 set res [testregexp -xflags -- c $pat $line resvar]
1042 lappend res $resvar
1043} {0 6}
1044
1045test reg-32.5 {canmatch functionality -- last char} {knownBug} {
1046 set pat {.[^d]x$}
1047 set line "asd asd"
1048 # can match the last char, if followed by not-d and x.
1049 set res [testregexp -xflags -- c $pat $line resvar]
1050 lappend res $resvar
1051} {0 6}
1052
1053test reg-32.6 {canmatch functionality -- last char} {knownBug} {
1054 set pat {[^a]%[^\r\n]*$}
1055 set line "asd asd"
1056 # can match at the final d, if '%' follows
1057 set res [testregexp -xflags -- c $pat $line resvar]
1058 lappend res $resvar
1059} {0 6}
1060
1061test reg-32.7 {canmatch functionality -- last char} {knownBug} {
1062 set pat {[^a]%$}
1063 set line "asd asd"
1064 # can match at the final d, if '%' follows
1065 set res [testregexp -xflags -- c $pat $line resvar]
1066 lappend res $resvar
1067} {0 6}
1068
1069test reg-32.8 {canmatch functionality -- last char} {knownBug} {
1070 set pat {[^x]%$}
1071 set line "asd asd"
1072 # can match at the final d, if '%' follows
1073 set res [testregexp -xflags -- c $pat $line resvar]
1074 lappend res $resvar
1075} {0 6}
1076
1077test reg-32.9 {canmatch functionality -- more complex case} {knownBug} {
1078 set pat {((\B\B|\Bh+line)[ \t]*|[^\B]%[^\r\n]*)$}
1079 set line "asd asd"
1080 # can match at the final d, if '%' follows
1081 set res [testregexp -xflags -- c $pat $line resvar]
1082 lappend res $resvar
1083} {0 6}
1084
1085# Tests reg-33.*: Checks for bug fixes
1086
1087test reg-33.1 {Bug 230589} {
1088 regexp {[ ]*(^|[^%])%V} "*%V2" m s
1089} 1
1090
1091test reg-33.2 {Bug 504785} {
1092 regexp -inline {([^_.]*)([^.]*)\.(..)(.).*} bbcos_001_c01.q1la
1093} {bbcos_001_c01.q1la bbcos _001_c01 q1 l}
1094
1095test reg-33.3 {Bug 505048} {
1096 regexp {\A\s*[^<]*\s*<([^>]+)>} a<a>
1097} 1
1098
1099test reg-33.4 {Bug 505048} {
1100 regexp {\A\s*([^b]*)b} ab
1101} 1
1102
1103test reg-33.5 {Bug 505048} {
1104 regexp {\A\s*[^b]*(b)} ab
1105} 1
1106
1107test reg-33.6 {Bug 505048} {
1108 regexp {\A(\s*)[^b]*(b)} ab
1109} 1
1110
1111test reg-33.7 {Bug 505048} {
1112 regexp {\A\s*[^b]*b} ab
1113} 1
1114
1115test reg-33.8 {Bug 505048} {
1116 regexp -inline {\A\s*[^b]*b} ab
1117} ab
1118
1119test reg-33.9 {Bug 505048} {
1120 regexp -indices -inline {\A\s*[^b]*b} ab
1121} {{0 1}}
1122
1123test reg-33.10 {Bug 840258} {
1124 regsub {(^|\n)+\.*b} \n.b {} tmp
1125} 1
1126
1127test reg-33.11 {Bug 840258} {
1128 regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \
1129 "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
1130} 1
1131
1132# cleanup
1133::tcltest::cleanupTests
1134return