3 # This file contains a collection of tests for one or more of the Tcl
 
   4 # built-in commands.  Sourcing this file into Tcl runs the tests and
 
   5 # generates output for errors.  No output means no errors were found.
 
   6 # (Don't panic if you are seeing this as part of the reg distribution
 
   7 # and aren't using Tcl -- reg's own regression tester also knows how
 
   8 # to read this file, ignoring the Tcl-isms.)
 
  10 # Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
 
  14 if {[lsearch [namespace children] ::tcltest] == -1} {
 
  15     package require tcltest 2
 
  16     namespace import -force ::tcltest::*
 
  19 # All tests require the testregexp command, return if this
 
  20 # command doesn't exist
 
  22 ::tcltest::testConstraint testregexp \
 
  23         [expr {[info commands testregexp] != {}}]
 
  24 ::tcltest::testConstraint localeRegexp 0
 
  26 # This file uses some custom procedures, defined below, for regexp regression
 
  27 # testing.  The name of the procedure indicates the general nature of the
 
  29 #       e       compile error expected
 
  30 #       f       match failure expected
 
  32 #       i       successful match with -indices (used in checking things like
 
  33 #               nonparticipating subexpressions)
 
  34 #       p       unsuccessful match with -indices (!!) (used in checking
 
  35 #               partial-match reporting)
 
  36 # There is also "doing" which sets up title and major test number for each
 
  39 # The first 3 arguments are constant:  a minor number (which often gets
 
  40 # a letter or two suffixed to it internally), some flags, and the RE itself.
 
  41 # For e, the remaining argument is the name of the compile error expected,
 
  42 # less the leading "REG_".  For the rest, the next argument is the string
 
  43 # to try the match against.  Remaining arguments are the substring expected
 
  44 # to be matched, and any substrings expected to be matched by subexpressions.
 
  45 # (For f, these arguments are optional, and if present are ignored except
 
  46 # that they indicate how many subexpressions should be present in the RE.)
 
  47 # It is an error for the number of subexpression arguments to be wrong.
 
  48 # Cases involving nonparticipating subexpressions, checking where empty
 
  49 # substrings are located, etc. should be done using i and p.
 
  51 # The flag characters are complex and a bit eclectic.  Generally speaking, 
 
  52 # lowercase letters are compile options, uppercase are expected re_info
 
  53 # bits, and nonalphabetics are match options, controls for how the test is 
 
  54 # run, or testing options.  The one small surprise is that AREs are the
 
  55 # default, and you must explicitly request lesser flavors of RE.  The flags
 
  56 # are as follows.  It is admitted that some are not very mnemonic.
 
  57 # There are some others which are purely debugging tools and are not
 
  58 # useful in this file.
 
  60 #       -       no-op (placeholder)
 
  61 #       +       provide fake xy equivalence class and ch collating element
 
  62 #       %       force small state-set cache in matcher (to test cache replace)
 
  63 #       ^       beginning of string is not beginning of line
 
  64 #       $       end of string is not end of line
 
  65 #       *       test is Unicode-specific, needs big character set
 
  67 #       &       test as both ARE and BRE
 
  70 #       a       turn advanced-features bit on (error unless ERE already)
 
  71 #       q       literal string, no metacharacters at all
 
  73 #       i       case-independent matching
 
  74 #       o       ("opaque") no subexpression capture
 
  75 #       p       newlines are half-magic, excluded from . and [^ only
 
  76 #       w       newlines are half-magic, significant to ^ and $ only
 
  77 #       n       newlines are fully magic, both effects
 
  78 #       x       expanded RE syntax
 
  79 #       t       incomplete-match reporting
 
  81 #       A       backslash-_a_lphanumeric seen
 
  82 #       B       ERE/ARE literal-_b_race heuristic used
 
  83 #       E       backslash (_e_scape) seen within []
 
  84 #       H       looka_h_ead constraint seen
 
  85 #       I       _i_mpossible to match
 
  86 #       L       _l_ocale-specific construct seen
 
  87 #       M       unportable (_m_achine-specific) construct seen
 
  88 #       N       RE can match empty (_n_ull) string
 
  89 #       P       non-_P_OSIX construct seen
 
  90 #       Q       {} _q_uantifier seen
 
  91 #       R       back _r_eference seen
 
  92 #       S       POSIX-un_s_pecified syntax seen
 
  93 #       T       prefers shortest (_t_iny)
 
  94 #       U       saw original-POSIX botch:  unmatched right paren in ERE (_u_gh)
 
  96 # The one area we can't easily test is memory-allocation failures (which
 
  97 # are hard to provoke on command).  Embedded NULs also are not tested at
 
  98 # the moment, but this is a historical accident which should be fixed.
 
 102 # test procedures and related
 
 108 # re_info abbreviation mapping table
 
 109 set infonames(A) "REG_UBSALNUM"
 
 110 set infonames(B) "REG_UBRACES"
 
 111 set infonames(E) "REG_UBBS"
 
 112 set infonames(H) "REG_ULOOKAHEAD"
 
 113 set infonames(I) "REG_UIMPOSSIBLE"
 
 114 set infonames(L) "REG_ULOCALE"
 
 115 set infonames(M) "REG_UUNPORT"
 
 116 set infonames(N) "REG_UEMPTYMATCH"
 
 117 set infonames(P) "REG_UNONPOSIX"
 
 118 set infonames(Q) "REG_UBOUNDS"
 
 119 set infonames(R) "REG_UBACKREF"
 
 120 set infonames(S) "REG_UUNSPEC"
 
 121 set infonames(T) "REG_USHORTEST"
 
 122 set infonames(U) "REG_UPBOTCH"
 
 123 set infonameorder "RHQBAUEPSMLNIT"      ;# must match bit order, lsb first
 
 125 # set major test number and description
 
 126 proc doing {major desc} {
 
 127         global prefix description testbypassed
 
 129         if {$testbypassed != 0} {
 
 130                 puts stdout "!!! bypassed $testbypassed tests in\
 
 131                                          $prefix, `$description'"
 
 134         set prefix reg-$major
 
 135         set description "reg $desc"
 
 139 # build test number (internal)
 
 141         return [join $testid .]
 
 144 # build description, with possible modifiers (internal)
 
 149         if {[llength $testid] > 1} {
 
 150                 set d "([lreplace $testid 0 0]) $d"
 
 155 # build trailing options and flags argument from a flags string (internal)
 
 161         foreach f [split $fl ""] {
 
 162                 switch -exact -- $f {
 
 163                 "i" { lappend args "-nocase" }
 
 164                 "x" { lappend args "-expanded" }
 
 165                 "n" { lappend args "-line" }
 
 166                 "p" { lappend args "-linestop" }
 
 167                 "w" { lappend args "-lineanchor" }
 
 169                 default { append flags $f }
 
 172         if {[string compare $flags ""] != 0} {
 
 173                 lappend args -$xflags $flags
 
 178 # build info-flags list from a flags string (internal)
 
 179 proc infoflags {fl} {
 
 180         global infonames infonameorder
 
 183         foreach f [split $infonameorder ""] {
 
 184                 if {[string first $f $fl] >= 0} {
 
 185                         lappend ret $infonames($f)
 
 191 # compilation error expected
 
 192 proc e {testid flags re err} {
 
 193         global prefix ask errorCode
 
 195         # Tcl locale stuff doesn't do the ch/xy test fakery yet
 
 196         if {[string first "+" $flags] >= 0} {
 
 197             # This will register as a skipped test
 
 198             test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
 
 202         # if &, test as both ARE and BRE
 
 203         set amp [string first "&" $flags]
 
 205                 set f [string range $flags 0 [expr $amp - 1]]
 
 206                 append f [string range $flags [expr $amp + 1] end]
 
 207                 e [linsert $testid end ARE] ${f} $re $err
 
 208                 e [linsert $testid end BRE] ${f}b $re $err
 
 212         set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
 
 213         set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
 
 214         test $prefix.[tno $testid] [desc $testid] \
 
 215                 {testregexp} $run [list 1 REG_$err]
 
 218 # match failure expected
 
 219 proc f {testid flags re target args} {
 
 220         global prefix description ask
 
 222         # Tcl locale stuff doesn't do the ch/xy test fakery yet
 
 223         if {[string first "+" $flags] >= 0} {
 
 224             # This will register as a skipped test
 
 225             test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
 
 229         # if &, test as both ARE and BRE
 
 230         set amp [string first "&" $flags]
 
 232                 set f [string range $flags 0 [expr $amp - 1]]
 
 233                 append f [string range $flags [expr $amp + 1] end]
 
 234                 eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \
 
 236                 eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \
 
 242         set infoflags [infoflags $flags]
 
 243         set ccmd [concat [list testregexp -$ask] $f [list $re]]
 
 244         set nsub [expr [llength $args] - 1]
 
 246                 # didn't tell us number of subexps
 
 247                 set ccmd "lreplace \[$ccmd\] 0 0"
 
 248                 set info [list $infoflags]
 
 250                 set info [list $nsub $infoflags]
 
 252         lappend testid "compile"
 
 253         test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
 
 255         set testid [lreplace $testid end end "execute"]
 
 256         set ecmd [concat [list testregexp] $f [list $re $target]]
 
 257         test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0
 
 260 # match expected, internal routine that does the work
 
 261 # parameters like the "real" routines except they don't have "opts",
 
 262 #  which is a possibly-empty list of switches for the regexp match attempt
 
 263 # The ! flag is used to indicate expected match failure (for REG_EXPECT,
 
 264 #  which wants argument testing even in the event of failure).
 
 265 proc matchexpected {opts testid flags re target args} {
 
 266         global prefix description ask regBug
 
 268     if {[info exists regBug] && $regBug} {
 
 269         # This will register as a skipped test
 
 270         test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1}
 
 274         # Tcl locale stuff doesn't do the ch/xy test fakery yet
 
 275         if {[string first "+" $flags] >= 0} {
 
 276             # This will register as a skipped test
 
 277             test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
 
 281         # if &, test as both BRE and ARE
 
 282         set amp [string first "&" $flags]
 
 284                 set f [string range $flags 0 [expr $amp - 1]]
 
 285                 append f [string range $flags [expr $amp + 1] end]
 
 286                 eval [concat [list matchexpected $opts \
 
 287                         [linsert $testid end ARE] ${f} $re $target] $args]
 
 288                 eval [concat [list matchexpected $opts \
 
 289                         [linsert $testid end BRE] ${f}b $re $target] $args]
 
 294         set infoflags [infoflags $flags]
 
 295         set ccmd [concat [list testregexp -$ask] $f [list $re]]
 
 296         set ecmd [concat [list testregexp] $opts $f [list $re $target]]
 
 298         set nsub [expr [llength $args] - 1]
 
 301         for {set i 0} {$i <= $nsub} {incr i} {
 
 308                 append refs " \$$name"
 
 311         if {[string first "o" $flags] >= 0} {   ;# REG_NOSUB kludge
 
 312                 set nsub 0              ;# unsigned value cannot be -1
 
 314         if {[string first "t" $flags] >= 0} {   ;# REG_EXPECT
 
 315                 incr nsub -1            ;# the extra does not count
 
 317         set ecmd [concat $ecmd $names]
 
 318         set erun "list \[$ecmd\] $refs"
 
 320         if {[string first "!" $flags] >= 0} {
 
 323         set result [concat $retcode $args]
 
 325         set info [list $nsub $infoflags]
 
 326         lappend testid "compile"
 
 327         test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
 
 328         set testid [lreplace $testid end end "execute"]
 
 329         test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result
 
 332 # match expected (no missing, empty, or ambiguous submatches)
 
 333 # m testno flags re target mat submat ...
 
 335         eval matchexpected [linsert $args 0 [list]]
 
 338 # match expected (full fanciness)
 
 339 # i testno flags re target mat submat ...
 
 341         eval matchexpected [linsert $args 0 [list "-indices"]]
 
 344 # partial match expected
 
 345 # p testno flags re target mat "" ...
 
 346 # Quirk:  number of ""s must be one more than number of subREs.
 
 348         set f [lindex $args 1]                  ;# add ! flag
 
 349         set args [lreplace $args 1 1 "!$f"]
 
 350         eval matchexpected [linsert $args 0 [list "-indices"]]
 
 354 proc knownBug {args} {
 
 362 # the tests themselves
 
 366 # support functions and preliminary misc.
 
 367 # This is sensitive to changes in message wording, but we really have to
 
 368 # test the code->message expansion at least once.
 
 369 test reg-0.1 "regexp error reporting" {
 
 370         list [catch {regexp (*) ign} msg] $msg
 
 371 } {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
 
 375 doing 1 "basic sanity checks"
 
 378 m  3    &       abc             xyabxabce       abc
 
 382 doing 2 "invalid option combinations"
 
 391 doing 3 "basic syntax"
 
 401 doing 4 "parentheses"
 
 404 m  3    b       {\(a\)b}        ab      ab      a
 
 405 m  4    -       a((b)c)         abc     abc     bc      b
 
 406 m  5    -       a(b)(c)         abc     abc     b       c
 
 409 # sigh, we blew it on the specs here... someday this will be fixed in POSIX,
 
 410 #  but meanwhile, it's fixed in AREs
 
 414 m 11    P       a(?:b)c         abc     abc
 
 415 e 12    e       a(?:b)c         BADRPT
 
 416 i 13    S       a()b            ab      {0 1}   {1 0}
 
 418 i 15    S       a(|b)c          ac      {0 1}   {1 0}
 
 419 m 16    S       a(b|)c          abc     abc     b
 
 423 doing 5 "simple one-char matching"
 
 424 # general case of brackets done later
 
 427 m  3    &       {a[bc]d}        abd     abd
 
 428 m  4    &       {a[bc]d}        acd     acd
 
 431 m  7    &       {a[^bc]d}       aed     aed
 
 432 f  8    &p      "a\[^bc]d"      "a\nd"
 
 436 doing 6 "context-dependent syntax"
 
 447 m 10    n       "\n^"           "x\nb"  "\n"
 
 451 m 14    bS      {\(x$\)}        x       x       x
 
 453 m 16    b       {x$y}           "x\$y"  "x\$y"
 
 455 m 18    n       "x\$\n"         "x\n"   "x\n"
 
 461 doing 7 "simple quantifiers"
 
 481 m  1    NQ      "a{0,1}"        ""      ""
 
 484 e  4    -       "a{1,2,3}"      BADBR
 
 486 e  6    -       "a{1000}"       BADBR
 
 489 m  9    BS      "a{b"           "a\{b"  "a\{b"
 
 490 m 10    BS      "a{"            "a\{"   "a\{"
 
 491 m 11    bQ      "a\\{0,1\\}b"   cb      b
 
 492 e 12    b       "a\\{0,1"       EBRACE
 
 493 e 13    -       "a{0,1\\"       BADBR
 
 495 m 15    Q       "a{0,0}b"       ab      b
 
 496 m 16    Q       "a{0,1}b"       ab      ab
 
 498 m 18    Q       "a{0,2}b"       aab     aab
 
 499 m 19    Q       "a{0,}b"        aab     aab
 
 500 m 20    Q       "a{1,1}b"       aab     ab
 
 501 m 21    Q       "a{1,3}b"       aaaab   aaab
 
 503 m 23    Q       "a{1,}b"        aab     aab
 
 505 m 25    Q       "a{2,3}b"       aaaab   aaab
 
 507 m 27    Q       "a{2,}b"        aaaab   aaaab
 
 514 m  3    &       {a[[.-.]]}      a-      a-
 
 515 m  4    &L      {a[[.zero.]]}   a0      a0
 
 516 m  5    &LM     {a[[.zero.]-9]} a2      a2
 
 517 m  6    &M      {a[0-[.9.]]}    a2      a2
 
 518 m  7    &+L     {a[[=x=]]}      ax      ax
 
 519 m  8    &+L     {a[[=x=]]}      ay      ay
 
 520 f  9    &+L     {a[[=x=]]}      az
 
 521 e 10    &       {a[0-[=x=]]}    ERANGE
 
 522 m 11    &L      {a[[:digit:]]}  a0      a0
 
 523 e 12    &       {a[[:woopsie:]]}        ECTYPE
 
 524 f 13    &L      {a[[:digit:]]}  ab
 
 525 e 14    &       {a[0-[:digit:]]}        ERANGE
 
 526 m 15    &LP     {[[:<:]]a}      a       a
 
 527 m 16    &LP     {a[[:>:]]}      a       a
 
 528 e 17    &       {a[[..]]b}      ECOLLATE
 
 529 e 18    &       {a[[==]]b}      ECOLLATE
 
 530 e 19    &       {a[[::]]b}      ECTYPE
 
 531 e 20    &       {a[[.a}         EBRACK
 
 532 e 21    &       {a[[=a}         EBRACK
 
 533 e 22    &       {a[[:a}         EBRACK
 
 537 e 26    &       {a[b-c}         EBRACK
 
 538 m 27    &M      {a[b-c]}        ab      ab
 
 539 m 28    &       {a[b-b]}        ab      ab
 
 540 m 29    &M      {a[1-2]}        a2      a2
 
 541 e 30    &       {a[c-b]}        ERANGE
 
 542 e 31    &       {a[a-b-c]}      ERANGE
 
 543 m 32    &M      {a[--?]b}       a?b     a?b
 
 544 m 33    &       {a[---]b}       a-b     a-b
 
 545 m 34    &       {a[]b]c}        a]c     a]c
 
 546 m 35    EP      {a[\]]b}        a]b     a]b
 
 548 m 37    bE      {a[\]]b}        "a\\]b" "a\\]b"
 
 549 m 38    eE      {a[\]]b}        "a\\]b" "a\\]b"
 
 550 m 39    EP      {a[\\]b}        "a\\b"  "a\\b"
 
 551 m 40    eE      {a[\\]b}        "a\\b"  "a\\b"
 
 552 m 41    bE      {a[\\]b}        "a\\b"  "a\\b"
 
 553 e 42    -       {a[\Z]b}        EESCAPE
 
 554 m 43    &       {a[[b]c}        "a\[c"  "a\[c"
 
 555 m 44    EMP*    {a[\u00fe-\u0507][\u00ff-\u0300]b} \
 
 556                         "a\u0102\u02ffb"        "a\u0102\u02ffb"
 
 560 doing 10 "anchors and newlines"
 
 568 m  8    &n      "^a"            "b\na"  "a"
 
 569 i  9    &w      "^a"            "a\na"  {0 0}
 
 570 i 10    &n^     "^a"            "a\na"  {2 2}
 
 572 m 12    &n      "a\$"           "a\nb"  "a"
 
 573 i 13    &n      "a\$"           "a\na"  {0 0}
 
 577 m 17    b       {$$}            "\$"    "\$"
 
 580 i 20    &nN     "^\$"           "a\n\nb"        {2 1}
 
 582 m 22    b       {$^}            "\$^"   "\$^"
 
 585 f 25    ^nP     {\Aa}           "b\na"
 
 588 f 28    {$nP}   {a\Z}           "a\nb"
 
 596 doing 11 "boundary constraints"
 
 597 m  1    &LP     {[[:<:]]a}      a       a
 
 598 m  2    &LP     {[[:<:]]a}      -a      a
 
 599 f  3    &LP     {[[:<:]]a}      ba
 
 600 m  4    &LP     {a[[:>:]]}      a       a
 
 601 m  5    &LP     {a[[:>:]]}      a-      a
 
 602 f  6    &LP     {a[[:>:]]}      ab
 
 617 e 21    -       {[[:<:]]*}      BADRPT
 
 618 e 22    -       {[[:>:]]*}      BADRPT
 
 632 doing 12 "character classes"
 
 633 m  1    LP      {a\db}          a0b     a0b
 
 636 m  4    LP      {a\Db}          axb     axb
 
 637 m  5    LP      "a\\sb"         "a b"   "a b"
 
 638 m  6    LP      "a\\sb"         "a\tb"  "a\tb"
 
 639 m  7    LP      "a\\sb"         "a\nb"  "a\nb"
 
 641 m  9    LP      {a\Sb}          axb     axb
 
 642 f 10    LP      "a\\Sb"         "a b"
 
 643 m 11    LP      {a\wb}          axb     axb
 
 646 m 14    LP      {a\Wb}          a-b     a-b
 
 647 m 15    LP      {\y\w+z\y}      adze-guz        guz
 
 648 m 16    LPE     {a[\d]b}        a1b     a1b
 
 649 m 17    LPE     "a\[\\s]b"      "a b"   "a b"
 
 650 m 18    LPE     {a[\w]b}        axb     axb
 
 658 m  4    bAS     {a\wb}          awb     awb
 
 659 m  5    eAS     {a\wb}          awb     awb
 
 660 m  6    PL      "a\\ab"         "a\007b"        "a\007b"
 
 661 m  7    P       "a\\bb"         "a\bb"  "a\bb"
 
 662 m  8    P       {a\Bb}          "a\\b"  "a\\b"
 
 663 m  9    MP      "a\\chb"        "a\bb"  "a\bb"
 
 664 m 10    MP      "a\\cHb"        "a\bb"  "a\bb"
 
 665 m 11    LMP     "a\\e"          "a\033" "a\033"
 
 666 m 12    P       "a\\fb"         "a\fb"  "a\fb"
 
 667 m 13    P       "a\\nb"         "a\nb"  "a\nb"
 
 668 m 14    P       "a\\rb"         "a\rb"  "a\rb"
 
 669 m 15    P       "a\\tb"         "a\tb"  "a\tb"
 
 670 m 16    P       "a\\u0008x"     "a\bx"  "a\bx"
 
 671 e 17    -       {a\u008x}       EESCAPE
 
 672 m 18    P       "a\\u00088x"    "a\b8x" "a\b8x"
 
 673 m 19    P       "a\\U00000008x" "a\bx"  "a\bx"
 
 674 e 20    -       {a\U0000008x}   EESCAPE
 
 675 m 21    P       "a\\vb"         "a\vb"  "a\vb"
 
 676 m 22    MP      "a\\x08x"       "a\bx"  "a\bx"
 
 677 e 23    -       {a\xq}          EESCAPE
 
 678 m 24    MP      "a\\x0008x"     "a\bx"  "a\bx"
 
 680 m 26    MP      "a\\010b"       "a\bb"  "a\bb"
 
 684 doing 14 "back references"
 
 686 m  1    RP      {a(b*)c\1}      abbcbb  abbcbb  bb
 
 687 m  2    RP      {a(b*)c\1}      ac      ac      ""
 
 688 f  3    RP      {a(b*)c\1}      abbcb
 
 689 m  4    RP      {a(b*)\1}       abbcbb  abb     b
 
 690 m  5    RP      {a(b|bb)\1}     abbcbb  abb     b
 
 691 m  6    RP      {a([bc])\1}     abb     abb     b
 
 692 f  7    RP      {a([bc])\1}     abc
 
 693 m  8    RP      {a([bc])\1}     abcabb  abb     b
 
 694 f  9    RP      {a([bc])*\1}    abc
 
 695 f 10    RP      {a([bc])\1}     abB
 
 696 m 11    iRP     {a([bc])\1}     abB     abB     b
 
 697 m 12    RP      {a([bc])\1+}    abbb    abbb    b
 
 698 m 13    QRP     "a(\[bc])\\1{3,4}"      abbbb   abbbb   b
 
 699 f 14    QRP     "a(\[bc])\\1{3,4}"      abbb
 
 700 m 15    RP      {a([bc])\1*}    abbb    abbb    b
 
 701 m 16    RP      {a([bc])\1*}    ab      ab      b
 
 702 m 17    RP      {a([bc])(\1*)}  ab      ab      b       ""
 
 703 e 18    -       {a((b)\1)}      ESUBREG
 
 704 e 19    -       {a(b)c\2}       ESUBREG
 
 705 m 20    bR      {a\(b*\)c\1}    abbcbb  abbcbb  bb
 
 709 doing 15 "octal escapes vs back references"
 
 710 # initial zero is always octal
 
 711 m  1    MP      "a\\010b"       "a\bb"  "a\bb"
 
 712 m  2    MP      "a\\0070b"      "a\0070b"       "a\0070b"
 
 713 m  3    MP      "a\\07b"        "a\007b"        "a\007b"
 
 714 m  4    MP      "a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c"  "abbbbbbbbbb\007c" \
 
 715         "abbbbbbbbbb\007c"      "b"     "b"     "b"     "b"     "b"     "b" \
 
 717 # a single digit is always a backref
 
 719 # otherwise it's a backref only if within range (barf!)
 
 720 m  6    MP      "a\\10b"        "a\bb"  "a\bb"
 
 721 m  7    MP      {a\101b}        aAb     aAb
 
 722 m  8    RP      {a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c}   abbbbbbbbbbbc \
 
 723         abbbbbbbbbbbc   b       b       b       b       b       b       b \
 
 725 # but we're fussy about border cases -- guys who want octal should use the zero
 
 726 e  9    -       {a((((((((((b\10))))))))))c}    ESUBREG
 
 727 # BREs don't have octal, EREs don't have backrefs
 
 728 m 10    MP      "a\\12b"        "a\nb"  "a\nb"
 
 729 e 11    b       {a\12b}         ESUBREG
 
 730 m 12    eAS     {a\12b}         a12b    a12b
 
 734 doing 16 "expanded syntax"
 
 735 m  1    xP      "a b c"         "abc"   "abc"
 
 736 m  2    xP      "a b #oops\nc\td"       "abcd"  "abcd"
 
 737 m  3    x       "a\\ b\\\tc"    "a b\tc"        "a b\tc"
 
 738 m  4    xP      "a b\\#c"       "ab#c"  "ab#c"
 
 739 m  5    xP      "a b\[c d]e"    "ab e"  "ab e"
 
 740 m  6    xP      "a b\[c#d]e"    "ab#e"  "ab#e"
 
 741 m  7    xP      "a b\[c#d]e"    "abde"  "abde"
 
 742 m  8    xSPB    "ab{ d"         "ab\{d" "ab\{d"
 
 743 m  9    xPQ     "ab{ 1 , 2 }c"  "abc"   "abc"
 
 747 doing 17 "misc syntax"
 
 748 m  1    P       a(?#comment)b   ab      ab
 
 752 doing 18 "unmatchable REs"
 
 757 doing 19 "case independence"
 
 761 m  4    &iM     {a[b-d]}        aC      aC
 
 766 doing 20 "directors and embedded options"
 
 769 m  3    &P      ***=a*b         a*b     a*b
 
 770 m  4    q       ***=a*b         ***=a*b ***=a*b
 
 771 m  5    bLP     {***:\w+}       ab      ab
 
 772 m  6    eLP     {***:\w+}       ab      ab
 
 773 e  7    &       ***:***=a*b     BADRPT
 
 774 m  8    &P      ***:(?b)a+b     a+b     a+b
 
 775 m  9    P       (?b)a+b         a+b     a+b
 
 776 e 10    e       {(?b)\w+}       BADRPT
 
 777 m 11    bAS     {(?b)\w+}       (?b)w+  (?b)w+
 
 780 m 14    APS     {(?e)\W+}       WW      WW
 
 782 f 16    P       "(?m)a.b"       "a\nb"
 
 783 m 17    P       "(?m)^b"        "a\nb"  "b"
 
 784 f 18    P       "(?n)a.b"       "a\nb"
 
 785 m 19    P       "(?n)^b"        "a\nb"  "b"
 
 786 f 20    P       "(?p)a.b"       "a\nb"
 
 787 f 21    P       "(?p)^b"        "a\nb"
 
 788 m 22    P       (?q)a+b         a+b     a+b
 
 789 m 23    nP      "(?s)a.b"       "a\nb"  "a\nb"
 
 790 m 24    xP      "(?t)a b"       "a b"   "a b"
 
 791 m 25    P       "(?w)a.b"       "a\nb"  "a\nb"
 
 792 m 26    P       "(?w)^b"        "a\nb"  "b"
 
 793 m 27    P       "(?x)a b"       "ab"    "ab"
 
 795 m 29    P       (?ici)a+        Aa      Aa
 
 796 e 30    P       (?i)(?q)a+      BADRPT
 
 797 m 31    P       (?q)(?i)a+      (?i)a+  (?i)a+
 
 799 m 33    xP      "(?q)a b"       "a b"   "a b"
 
 800 m 34    P       "(?qx)a b"      "a b"   "a b"
 
 806 m  1    -       a(b)c           abc     abc     b
 
 807 m  2    P       a(?:b)c         xabc    abc
 
 808 m  3    -       a((b))c         xabcy   abc     b       b
 
 809 m  4    P       a(?:(b))c       abcy    abc     b
 
 810 m  5    P       a((?:b))c       abc     abc     b
 
 811 m  6    P       a(?:(?:b))c     abc     abc
 
 812 i  7    Q       "a(b){0}c"      ac      {0 1}   {-1 -1}
 
 813 m  8    -       a(b)c(d)e       abcde   abcde   b       d
 
 814 m  9    -       (b)c(d)e        bcde    bcde    b       d
 
 815 m 10    -       a(b)(d)e        abde    abde    b       d
 
 816 m 11    -       a(b)c(d)        abcd    abcd    b       d
 
 817 m 12    -       (ab)(cd)        xabcdy  abcd    ab      cd
 
 818 m 13    -       a(b)?c          xabcy   abc     b
 
 819 i 14    -       a(b)?c          xacy    {1 2}   {-1 -1}
 
 820 m 15    -       a(b)?c(d)?e     xabcdey abcde   b       d
 
 821 i 16    -       a(b)?c(d)?e     xacdey  {1 4}   {-1 -1} {3 3}
 
 822 i 17    -       a(b)?c(d)?e     xabcey  {1 4}   {2 2}   {-1 -1}
 
 823 i 18    -       a(b)?c(d)?e     xacey   {1 3}   {-1 -1} {-1 -1}
 
 824 m 19    -       a(b)*c          xabcy   abc     b
 
 825 i 20    -       a(b)*c          xabbbcy {1 5}   {4 4}
 
 826 i 21    -       a(b)*c          xacy    {1 2}   {-1 -1}
 
 827 m 22    -       a(b*)c          xabbbcy abbbc   bbb
 
 828 m 23    -       a(b*)c          xacy    ac      ""
 
 830 m 25    -       a(b)+c          xabcy   abc     b
 
 831 i 26    -       a(b)+c          xabbbcy {1 5}   {4 4}
 
 832 m 27    -       a(b+)c          xabbbcy abbbc   bbb
 
 833 i 28    Q       "a(b){2,3}c"    xabbbcy {1 5}   {4 4}
 
 834 i 29    Q       "a(b){2,3}c"    xabbcy  {1 4}   {3 3}
 
 835 f 30    Q       "a(b){2,3}c"    xabcy
 
 836 m 31    LP      "\\y(\\w+)\\y"  "-- abc-"       "abc"   "abc"
 
 837 m 32    -       a((b|c)d+)+     abacdbd acdbd   bd      b
 
 838 m 33    N       (.*).*          abc     abc     abc
 
 839 m 34    N       (a*)*           bc      ""      ""
 
 843 doing 22 "multicharacter collating elements"
 
 845 m  1    &+L     {a[c]e}         ace     ace
 
 847 m  3    &+L     {a[[.ch.]]}     ach     ach
 
 848 f  4    &+L     {a[[.ch.]]}     ace
 
 849 m  5    &+L     {a[c[.ch.]]}    ac      ac
 
 850 m  6    &+L     {a[c[.ch.]]}    ace     ac
 
 851 m  7    &+L     {a[c[.ch.]]}    ache    ach
 
 853 m  9    &+L     {a[^c]e}        abe     abe
 
 854 m 10    &+L     {a[^c]e}        ache    ache
 
 855 f 11    &+L     {a[^[.ch.]]}    ach
 
 856 m 12    &+L     {a[^[.ch.]]}    ace     ac
 
 857 m 13    &+L     {a[^[.ch.]]}    ac      ac
 
 858 m 14    &+L     {a[^[.ch.]]}    abe     ab
 
 859 f 15    &+L     {a[^c[.ch.]]}   ach
 
 860 f 16    &+L     {a[^c[.ch.]]}   ace
 
 861 f 17    &+L     {a[^c[.ch.]]}   ac
 
 862 m 18    &+L     {a[^c[.ch.]]}   abe     ab
 
 863 m 19    &+L     {a[^b]}         ac      ac
 
 864 m 20    &+L     {a[^b]}         ace     ac
 
 865 m 21    &+L     {a[^b]}         ach     ach
 
 870 doing 23 "lookahead constraints"
 
 871 m  1    HP      a(?=b)b*        ab      ab
 
 873 m  3    HP      a(?=b)b*(?=c)c* abc     abc
 
 874 f  4    HP      a(?=b)b*(?=c)c* ab
 
 882 doing 24 "non-greedy quantifiers"
 
 884 m  2    PT      ab+?c           abbc    abbc
 
 886 m  4    PT      ab*?c           abbc    abbc
 
 889 m  7    PQT     "ab{2,4}?"      abbbb   abb
 
 890 m  8    PQT     "ab{2,4}?c"     abbbbc  abbbbc
 
 891 m  9    -       3z*             123zzzz456      3zzzz
 
 892 m 10    PT      3z*?            123zzzz456      3
 
 893 m 11    -       z*4             123zzzz456      zzzz4
 
 894 m 12    PT      z*?4            123zzzz456      zzzz4
 
 898 doing 25 "mixed quantifiers"
 
 899 # this is very incomplete as yet
 
 901 m  1    PNT     {^(.*?)(a*)$}   xyza    xyza    xyz     a
 
 902 m  2    PNT     {^(.*?)(a*)$}   xyzaa   xyzaa   xyz     aa
 
 903 m  3    PNT     {^(.*?)(a*)$}   xyz     xyz     xyz     ""
 
 907 doing 26 "tricky cases"
 
 908 # attempts to trick the matcher into accepting a short match
 
 909 m  1    -       (week|wee)(night|knights)       weeknights      weeknights \
 
 911 m  2    RP      {a(bc*).*\1}    abccbccb        abccbccb        b
 
 912 m  3    -       {a(b.[bc]*)+}   abcbd   abcbd   bd
 
 916 doing 27 "implementation misc."
 
 917 # duplicate arcs are suppressed
 
 918 m  1    P       a(?:b|b)c       abc     abc
 
 919 # make color/subcolor relationship go back and forth
 
 920 m  2    &       {[ab][ab][ab]}  aba     aba
 
 921 m  3    &       {[ab][ab][ab][ab][ab][ab][ab]}  abababa abababa
 
 925 doing 28 "boundary busters etc."
 
 926 # color-descriptor allocation changes at 10
 
 927 m  1    &       abcdefghijkl    abcdefghijkl    abcdefghijkl
 
 928 # so does arc allocation
 
 929 m  2    P       a(?:b|c|d|e|f|g|h|i|j|k|l|m)n   agn     agn
 
 930 # subexpression tracking also at 10
 
 931 m  3    -       a(((((((((((((b)))))))))))))c   abc     abc     b       b       b       b       b       b       b       b       b       b       b       b       b
 
 932 # state-set handling changes slightly at unsigned size (might be 64...)
 
 933 # (also stresses arc allocation)
 
 934 m  4    Q       "ab{1,100}c"    abbc    abbc
 
 935 m  5    Q       "ab{1,100}c"    abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
 
 936         abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
 
 938         abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
 
 939         abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
 
 940 # force small cache and bust it, several ways
 
 941 m  7    LP      {\w+abcdefgh}   xyzabcdefgh     xyzabcdefgh
 
 942 m  8    %LP     {\w+abcdefgh}   xyzabcdefgh     xyzabcdefgh
 
 943 m  9    %LP     {\w+abcdefghijklmnopqrst}       xyzabcdefghijklmnopqrst \
 
 944         xyzabcdefghijklmnopqrst
 
 945 i 10    %LP     {\w+(abcdefgh)?}        xyz     {0 2}   {-1 -1}
 
 946 i 11    %LP     {\w+(abcdefgh)?}        xyzabcdefg      {0 9}   {-1 -1}
 
 947 i 12    %LP     {\w+(abcdefghijklmnopqrst)?}    xyzabcdefghijklmnopqrs \
 
 952 doing 29 "incomplete matches"
 
 953 p  1    t       def             abc     {3 2}   ""
 
 954 p  2    t       bcd             abc     {1 2}   ""
 
 955 p  3    t       abc             abab    {0 3}   ""
 
 956 p  4    t       abc             abdab   {3 4}   ""
 
 957 i  5    t       abc             abc     {0 2}   {0 2}
 
 958 i  6    t       abc             xyabc   {2 4}   {2 4}
 
 959 p  7    t       abc+            xyab    {2 3}   ""
 
 960 i  8    t       abc+            xyabc   {2 4}   {2 4}
 
 961 knownBug i  9   t       abc+            xyabcd  {2 4}   {6 5}
 
 962 i  10   t       abc+            xyabcdd {2 4}   {7 6}
 
 963 p  11   tPT     abc+?           xyab    {2 3}   ""
 
 964 # the retain numbers in these two may look wrong, but they aren't
 
 965 i  12   tPT     abc+?           xyabc   {2 4}   {5 4}
 
 966 i  13   tPT     abc+?           xyabcc  {2 4}   {6 5}
 
 967 i  14   tPT     abc+?           xyabcd  {2 4}   {6 5}
 
 968 i  15   tPT     abc+?           xyabcdd {2 4}   {7 6}
 
 969 i  16   t       abcd|bc         xyabc   {3 4}   {2 4}
 
 970 p  17   tn      .*k             "xx\nyyy"       {3 5}   ""
 
 973 doing 30 "misc. oddities and old bugs"
 
 978 m  5    &       ^a*b            aaaab   aaaab
 
 979 m  6    &M      {[0-6][1-2][0-3][0-6][1-6][0-6]}        010010  010010
 
 980 # temporary REG_BOSONLY kludge
 
 983 # back to normal stuff
 
 984 m  9    HLP     {(?n)^(?![t#])\S+}      "tk\n\n#\n#\nit0"       it0
 
 987 # flush any leftover complaints
 
 990 # Tests resulting from bugs reported by users
 
 991 test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
 
 992     set str {2:::DebugWin32}
 
 993     set re {([[:xdigit:]])([[:space:]]*)}
 
 994     list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
 
 995     # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
 
 998 test reg-32.1 {canmatch functionality -- at end} {
 
1001     # can match at the final d, if '%' follows
 
1002     set res [testregexp -xflags -- c $pat $line resvar]
 
1006 test reg-32.2 {canmatch functionality -- at end} {
 
1009     # can only match after the end of the string
 
1010     set res [testregexp -xflags -- c $pat $line resvar] 
 
1014 test reg-32.3 {canmatch functionality -- not last char} {
 
1017     # can only match after the end of the string
 
1018     set res [testregexp -xflags -- c $pat $line resvar]
 
1022 test reg-32.3.1 {canmatch functionality -- no match} {
 
1025     # can match the last char, if followed by x
 
1026     set res [testregexp -xflags -- c $pat $line resvar]
 
1030 test reg-32.4 {canmatch functionality -- last char} {knownBug} {
 
1033     # can match the last char, if followed by x
 
1034     set res [testregexp -xflags -- c $pat $line resvar]
 
1038 test reg-32.4.1 {canmatch functionality -- last char} {knownBug} {
 
1041     # can match the last char, if followed by x
 
1042     set res [testregexp -xflags -- c $pat $line resvar]
 
1046 test reg-32.5 {canmatch functionality -- last char} {knownBug} {
 
1049     # can match the last char, if followed by not-d and x.
 
1050     set res [testregexp -xflags -- c $pat $line resvar]
 
1054 test reg-32.6 {canmatch functionality -- last char} {knownBug} {
 
1055     set pat {[^a]%[^\r\n]*$}
 
1057     # can match at the final d, if '%' follows
 
1058     set res [testregexp -xflags -- c $pat $line resvar]
 
1062 test reg-32.7 {canmatch functionality -- last char} {knownBug} {
 
1065     # can match at the final d, if '%' follows
 
1066     set res [testregexp -xflags -- c $pat $line resvar]
 
1070 test reg-32.8 {canmatch functionality -- last char} {knownBug} {
 
1073     # can match at the final d, if '%' follows
 
1074     set res [testregexp -xflags -- c $pat $line resvar]
 
1078 test reg-32.9 {canmatch functionality -- more complex case} {knownBug} {
 
1079     set pat {((\B\B|\Bh+line)[ \t]*|[^\B]%[^\r\n]*)$}
 
1081     # can match at the final d, if '%' follows
 
1082     set res [testregexp -xflags -- c $pat $line resvar]
 
1086 # Tests reg-33.*: Checks for bug fixes
 
1088 test reg-33.1 {Bug 230589} {
 
1089     regexp {[ ]*(^|[^%])%V} "*%V2" m s
 
1092 test reg-33.2 {Bug 504785} {
 
1093     regexp -inline {([^_.]*)([^.]*)\.(..)(.).*} bbcos_001_c01.q1la
 
1094 } {bbcos_001_c01.q1la bbcos _001_c01 q1 l}
 
1096 test reg-33.3 {Bug 505048} {
 
1097     regexp {\A\s*[^<]*\s*<([^>]+)>} a<a>
 
1100 test reg-33.4 {Bug 505048} {
 
1101     regexp {\A\s*([^b]*)b} ab
 
1104 test reg-33.5 {Bug 505048} {
 
1105     regexp {\A\s*[^b]*(b)} ab
 
1108 test reg-33.6 {Bug 505048} {
 
1109     regexp {\A(\s*)[^b]*(b)} ab
 
1112 test reg-33.7 {Bug 505048} {
 
1113     regexp {\A\s*[^b]*b} ab
 
1116 test reg-33.8 {Bug 505048} {
 
1117     regexp -inline {\A\s*[^b]*b} ab
 
1120 test reg-33.9 {Bug 505048} {
 
1121     regexp -indices -inline {\A\s*[^b]*b} ab
 
1124 test reg-33.10 {Bug 840258} {
 
1125     regsub {(^|\n)+\.*b} \n.b {} tmp
 
1128 test reg-33.11 {Bug 840258} {
 
1129     regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \
 
1130             "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
 
1134 ::tcltest::cleanupTests