[wxWidgets.git] / tests / regex / regex.pl

#!/usr/bin/env perl
#############################################################################
# Name:        regex.pl
# Purpose:     Generate test code for wxRegEx from 'reg.test'
# Author:      Mike Wetherell
# RCS-ID:      $Id$
# Copyright:   (c) Mike Wetherell
# Licence:     wxWidgets licence
#############################################################################

#
# Notes:
#   See './regex.pl -h' for usage
#
#   Output at the moment is C++ using the cppunit testing framework. The
#   language/framework specifics are separated, with the following 5
#   subs as an interface: 'begin_output', 'begin_section', 'write_test',
#   'end_section' and 'end_output'. So for a different language/framework,
#   implement 5 new similar subs.
# 
#   I've avoided using 'use encoding "UTF-8"', since this wasn't available
#   in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
#   earler than perl 5.6.0 aren't going to work.
#

use strict;
use warnings;
use File::Basename;
#use encoding "UTF-8";  # enable in the future when perl 5.6.x is just a memory

# if 0 output is wide characters, if 1 output is utf8 encoded
my $utf = 1;

# quote a parameter (C++ helper)
#
sub quotecxx {
    my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
                "\n" => "n", "\r" => "r", "\t" => "t",
                "\013" => "v", '"' => '"', "\\" => "\\" );

    # working around lack of 'use encoding'
    if (!$utf) {
        $_ = pack "U0C*", unpack "C*", $_;
        use utf8;
    }

    s/[\000-\037"\\\177-\x{ffff}]/
        if ($esc{$&}) {
            "\\$esc{$&}";
        } elsif (ord($&) > 0x9f && !$utf) {
            sprintf "\\u%04x", ord($&);
        } else {
            sprintf "\\%03o", ord($&);
        }
    /ge;

    # working around lack of 'use encoding'
    if (!$utf) {
        no utf8;
        $_ = pack "C*", unpack "C*", $_;
    }

    return ($utf ? '"' : 'L"') . $_ . '"'
}

# start writing the output code (C++ interface)
#
sub begin_output {
    my ($from, $instructions) = @_;

    # embed it in the comment
    $from = "\n$from";
    $from =~ s/^(?:   )?/ * /mg;

    # $instructions contains information about the flags etc.
    if ($instructions) {
        $instructions = "\n$instructions";
        $instructions =~ s/^(?:   )?/ * /mg;
    }

    my $u = $utf ? " (UTF-8 encoded)" : "";

    print <<EOT;
/*
 * Test data for wxRegEx$u
$from$instructions */

EOT
}

my @classes;

# start a new section (C++ interface)
#
sub begin_section {
    my ($id, $title) = @_;
    my $class = "regextest_$id";
    $class =~ s/\W/_/g;
    push @classes, [$id, $class];

    print <<EOT;

/*
 * $id $title
 */

class $class : public RegExTestSuite
{
public:
    $class() : RegExTestSuite("regex.$id") { }
    static Test *suite();
};

Test *$class\::suite()
{
    RegExTestSuite *suite = new $class;

EOT
}

# output a test line (C++ interface)
#
sub write_test {
    my @args = @_;
    $_ = quotecxx for @args;
    print "    suite->add(" . (join ', ', @args) . ", NULL);\n"; 
}

# end a section (C++ interface)
#
sub end_section {
    my ($id, $class) = @{$classes[$#classes]};

    print <<EOT;

    return suite;
}

CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");

EOT
}

# finish off the output (C++ interface)
#
sub end_output {
    print <<EOT;

/*
 * A suite containing all the above suites
 */

class regextest : public TestSuite
{
public:
    regextest() : TestSuite("regex") { }
    static Test *suite();
};

Test *regextest::suite()
{
    TestSuite *suite = new regextest;

EOT
    print "    suite->addTest(".$_->[1]."::suite());\n" for @classes;

    print <<EOT;

    return suite;
}

CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
EOT
}

# Parse a tcl string. Handles curly quoting and double quoting.
#
sub parsetcl {
    my ($curly, $quote);
    # recursively defined expression that can parse balanced braces
    # warning: uses experimental features of perl, see perlop(1)
    $curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
    $quote = qr/"(?:\\"|[^"])*"/;
    my @tokens = shift =~ /($curly|$quote|\S+)/g;

    # now remove braces/quotes and unescape any escapes
    for (@tokens) {
        if (s/^{(.*)}$/$1/) {
            # for curly quoting, only unescape \{ and \}
            s/\\([{}])/$1/g;
        } else {
            s/^"(.*)"$/$1/;

            # unescape any escapes
            my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
                        "n" => "\n", "r" => "\r", "t" => "\t",
                        "v" => "\013" );
            my $x = qr/[[:xdigit:]]/;

            s/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
                if ($1 =~ m{^([0-7]+)}) {
                    chr(oct($1));
                } elsif ($1 =~ m{^x($x+)}) {
                    pack("C0U", hex($1) & 0xff);
                } elsif ($1 =~ m{^u($x+)}) {
                    pack("C0U", hex($1));
                } elsif ($esc{$1}) {
                    $esc{$1};
                } else {
                    $1;
                }
            /ge;
        }
    }

    return @tokens;
}

# helpers which keep track of whether begin_section has been called, so that
# end_section can be called when appropriate
#
my @doing = ("0", "");
my $in_section = 0;

sub handle_doing {
    end_section if $in_section;
    $in_section = 0;
    @doing = @_;
}

sub handle_test {
    begin_section(@doing) if !$in_section;
    $in_section = 1;
    write_test @_;
}

sub handle_end {
    end_section if $in_section;
    $in_section = 0;
    end_output;
}

# 'main' - start by parsing the command lines options.
#
my $badoption = !@ARGV;
my $utfdefault = $utf;
my $outputname;

for (my $i = 0; $i < @ARGV; ) {
    if ($ARGV[$i] !~ m{^-.}) {
        $i++;
        next;
    }

    if ($ARGV[$i] eq '--') {
        splice @ARGV, $i, 1;
        last;
    }

    if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) {       # -o : output file
        $outputname = $2 || splice @ARGV, $i + 1, 1;
    }

    for (split //, substr($ARGV[$i], 1)) {
        if (/u/i) {                                 # -u : utf-8 output
            $utf = 1;
        } elsif (/w/i) {                            # -w : wide char output
            $utf = 0;
        } else {
            $badoption = 1;
        }
    }

    splice @ARGV, $i, 1;
}

# Display help
#
if ($badoption) {
    my $prog = basename $0;
    my ($w, $u) = (" (default)", "          ");
    ($w, $u) = ($u, $w) if $utfdefault;
    
    print <<EOT;
Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
Generate test code for wxRegEx from 'reg.test'
Example: $prog -o regex.inc reg.test wxreg.test 

 -w$w   Output will be wide characters.
 -u$u   Output will be UTF-8 encoded.

Input files should be in UTF-8. If no input files are specified input is
read from stdin. If no output file is specified output is written to stdout.
See the comments in reg.test for details of the input file format.
EOT
    exit 0;
}

# Open the output file
#
open STDOUT, ">$outputname" if $outputname;

# Read in the files and initially parse just the comments for copyright
# information and instructions on the tests
#
my @input;                  # slurped input files stripped of comments
my $files = "";             # copyright info from the input comments
my $instructions = "";      # test instructions from the input comments

do {
    my $inputname = basename $ARGV[0] if @ARGV;

    # slurp input
    undef $/;
    my $in = <>;

    # remove escaped newlines
    $in =~ s/(?<!\\)\\\n//g;

    # record the copyrights of the input files
    for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
        s/[\s:]+/ /g;
        $files .= "  ";
        $files .= $inputname . ": " if $inputname && $inputname ne '-';
        $files .= "$_\n";
    }

    # Parse the comments for instructions on the tests, which look like this:
    #    i    successful match with -indices (used in checking things like
    #         nonparticipating subexpressions)
    if (!$instructions) {
        my $sp = qr{\t|   +};                   # tab or three or more spaces
        my @instructions = $in =~
            /\n(
                (?:
                    \#$sp\S?$sp\S[^\n]+\n       # instruction line
                    (?:\#$sp$sp\S[^\n]+\n)*     # continuation lines (if any)
                )+
            )/gx;

        if (@instructions) {
            $instructions[0] = "Test types:\n$instructions[0]";
            if (@instructions > 1) {
                $instructions[1] = "Flag characters:\n$instructions[1]";
            }
            $instructions = join "\n", @instructions;
            $instructions =~ s/^#([^\t]?)/ $1/mg;
        }
    }

    # @input is the input of all files (stipped of comments)
    $in =~ s/^#.*$//mg;
    push @input, $in;

} while $ARGV[0];

# Make a string naming the generator, the input files and copyright info
#
my $from = "Generated " . localtime() . " by " . basename $0;
$from =~ s/[\s]+/ /g;
if ($files) {
    if ($files =~ /:/) {
        $from .= " from the following files:";
    } else {
        $from .= " from work with the following copyright:";
    }
}
$from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g);  # word-wrap
$from .= "\n$files" if $files;

# Now start to print the code
#
begin_output $from, $instructions;

# numbers for 'extra' sections
my $extra = 1;

for (@input)
{
    # Print the main tests
    #
    # Test lines look like this:
    # m  3  b       {\(a\)b}        ab      ab      a
    # 
    # Also looks for heading lines, e.g.:
    # doing 4 "parentheses"
    #
    for (split "\n") {
        if (/^doing\s+(\S+)\s+(\S.*)/) {
            handle_doing parsetcl "$1 $2";
        } elsif (/^[efimp]\s/) {
            handle_test parsetcl $_;
        }
    }

    # Extra tests
    #
    # The expression below matches something like this:
    #   test reg-33.8 {Bug 505048} {
    #       regexp -inline {\A\s*[^b]*b} ab
    #   } ab
    #   
    # The three subexpressions then return these parts: 
    #   $extras[$i]     = '{Bug 505048}',
    #   $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
    #   $extras[$i + 2] = 'ab'
    #
    my @extras = /\ntest\s+\S+\s*(\{.*?\})\s*\{\n       # line 1
                  \s*regexp\s+([^\n]+)\n                # line 2
                  \}\s*(\S[^\n]*)/gx;                   # line 3

    handle_doing "extra_" . $extra++, "checks for bug fixes" if @extras;

    for (my $i = 0; $i < @extras; $i += 3) {
        my $id = $extras[$i];

        # further parse the middle line into options and the rest (i.e. $args)
        my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;

        my @args = parsetcl $args;
        $#args = 1;     # only want the first two

        # now handle the options
        my $test    = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
        my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';

        # get them all in the right order and print
        unshift @args, $test, parsetcl($id), $results ? '-' : 'o';
        push @args, parsetcl(parsetcl($results)) if $results;
        handle_test @args;
    }
}

# finish
#
handle_end;
Commit	Line	Data
d7204bba	1	#!/usr/bin/env perl
e70833fb VS	2	#############################################################################
	3	# Name: regex.pl
	4	# Purpose: Generate test code for wxRegEx from 'reg.test'
	5	# Author: Mike Wetherell
	6	# RCS-ID: $Id$
	7	# Copyright: (c) Mike Wetherell
	8	# Licence: wxWidgets licence
	9	#############################################################################
	10
	11	#
	12	# Notes:
	13	# See './regex.pl -h' for usage
	14	#
	15	# Output at the moment is C++ using the cppunit testing framework. The
	16	# language/framework specifics are separated, with the following 5
	17	# subs as an interface: 'begin_output', 'begin_section', 'write_test',
	18	# 'end_section' and 'end_output'. So for a different language/framework,
	19	# implement 5 new similar subs.
	20	#
	21	# I've avoided using 'use encoding "UTF-8"', since this wasn't available
	22	# in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
	23	# earler than perl 5.6.0 aren't going to work.
	24	#
	25
	26	use strict;
d7204bba	27	use warnings;
e70833fb VS	28	use File::Basename;
	29	#use encoding "UTF-8"; # enable in the future when perl 5.6.x is just a memory
	30
	31	# if 0 output is wide characters, if 1 output is utf8 encoded
	32	my $utf = 1;
	33
	34	# quote a parameter (C++ helper)
	35	#
	36	sub quotecxx {
	37	my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
	38	"\n" => "n", "\r" => "r", "\t" => "t",
	39	"\013" => "v", '"' => '"', "\\" => "\\" );
	40
	41	# working around lack of 'use encoding'
a7ea35b0 MW	42	if (!$utf) {
	43	$_ = pack "U0C", unpack "C", $_;
	44	use utf8;
	45	}
e70833fb VS	46
	47	s/[\000-\037"\\\177-\x{ffff}]/
	48	if ($esc{$&}) {
	49	"\\$esc{$&}";
a7ea35b0 MW	50	} elsif (ord($&) > 0x9f && !$utf) {
a7ea35b0 MW	51	sprintf "\\u%04x", ord($&);
e70833fb VS	52	} else {
	53	sprintf "\\%03o", ord($&);
	54	}
	55	/ge;
	56
	57	# working around lack of 'use encoding'
a7ea35b0 MW	58	if (!$utf) {
	59	no utf8;
	60	$_ = pack "C", unpack "C", $_;
	61	}
e70833fb VS	62
	63	return ($utf ? '"' : 'L"') . $_ . '"'
	64	}
	65
	66	# start writing the output code (C++ interface)
	67	#
	68	sub begin_output {
	69	my ($from, $instructions) = @_;
	70
	71	# embed it in the comment
	72	$from = "\n$from";
	73	$from =~ s/^(?: )?/ * /mg;
	74
	75	# $instructions contains information about the flags etc.
	76	if ($instructions) {
	77	$instructions = "\n$instructions";
	78	$instructions =~ s/^(?: )?/ * /mg;
	79	}
	80
	81	my $u = $utf ? " (UTF-8 encoded)" : "";
	82
	83	print <<EOT;
	84	/*
	85	* Test data for wxRegEx$u
	86	$from$instructions */
	87
	88	EOT
	89	}
	90
	91	my @classes;
	92
	93	# start a new section (C++ interface)
	94	#
	95	sub begin_section {
	96	my ($id, $title) = @_;
	97	my $class = "regextest_$id";
	98	$class =~ s/\W/_/g;
	99	push @classes, [$id, $class];
	100
	101	print <<EOT;
	102
	103	/*
	104	* $id $title
	105	*/
	106
	107	class $class : public RegExTestSuite
	108	{
	109	public:
	110	$class() : RegExTestSuite("regex.$id") { }
	111	static Test *suite();
	112	};
	113
	114	Test *$class\::suite()
	115	{
	116	RegExTestSuite *suite = new $class;
	117
	118	EOT
	119	}
	120
	121	# output a test line (C++ interface)
	122	#
	123	sub write_test {
	124	my @args = @_;
	125	$_ = quotecxx for @args;
126	print " suite->add(" . (join ', ', @args) . ", NULL);\n";
127	}
128
129	# end a section (C++ interface)
130	#
131	sub end_section {
132	my ($id, $class) = @{$classes[$#classes]};
133
134	print <<EOT;
135
136	return suite;
137	}
138
139	CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
140
141	EOT
142	}
143
144	# finish off the output (C++ interface)
145	#
146	sub end_output {
147	print <<EOT;
148
149	/*
150	* A suite containing all the above suites
151	*/
152
153	class regextest : public TestSuite
154	{
155	public:
156	regextest() : TestSuite("regex") { }
157	static Test *suite();
158	};
159
160	Test *regextest::suite()
161	{
162	TestSuite *suite = new regextest;
163
164	EOT
165	print " suite->addTest(".$_->[1]."::suite());\n" for @classes;
166
167	print <<EOT;
168
169	return suite;
170	}
171
172	CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
173	CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
174	EOT
175	}
176
177	# Parse a tcl string. Handles curly quoting and double quoting.
178	#
179	sub parsetcl {
180	my ($curly, $quote);
181	# recursively defined expression that can parse balanced braces
182	# warning: uses experimental features of perl, see perlop(1)
183	$curly = qr/\{(?:(?>(?:\\[{}]\|[^{}])+)\|(??{$curly}))*\}/;
184	$quote = qr/"(?:\\"\|[^"])*"/;
185	my @tokens = shift =~ /($curly\|$quote\|\S+)/g;
186
187	# now remove braces/quotes and unescape any escapes
188	for (@tokens) {
189	if (s/^{(.*)}$/$1/) {
190	# for curly quoting, only unescape \{ and \}
191	s/\\([{}])/$1/g;
192	} else {
193	s/^"(.*)"$/$1/;
194
195	# unescape any escapes
196	my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
197	"n" => "\n", "r" => "\r", "t" => "\t",
198	"v" => "\013" );
199	my $x = qr/[[:xdigit:]]/;
200
201	s/\\([0-7]{1,3}\|x$x+\|u$x{1,4}\|.)/
202	if ($1 =~ m{^([0-7]+)}) {
203	chr(oct($1));
204	} elsif ($1 =~ m{^x($x+)}) {
205	pack("C0U", hex($1) & 0xff);
206	} elsif ($1 =~ m{^u($x+)}) {
207	pack("C0U", hex($1));
208	} elsif ($esc{$1}) {
209	$esc{$1};
210	} else {
211	$1;
212	}
213	/ge;
214	}
215	}
216
217	return @tokens;
218	}
219
220	# helpers which keep track of whether begin_section has been called, so that
221	# end_section can be called when appropriate
222	#
223	my @doing = ("0", "");
224	my $in_section = 0;
225
226	sub handle_doing {
227	end_section if $in_section;
228	$in_section = 0;
229	@doing = @_;
230	}
231
232	sub handle_test {
233	begin_section(@doing) if !$in_section;
234	$in_section = 1;
235	write_test @_;
236	}
237
238	sub handle_end {
239	end_section if $in_section;
240	$in_section = 0;
241	end_output;
242	}
243
244	# 'main' - start by parsing the command lines options.
245	#
246	my $badoption = !@ARGV;
247	my $utfdefault = $utf;
248	my $outputname;
249
250	for (my $i = 0; $i < @ARGV; ) {
251	if ($ARGV[$i] !~ m{^-.}) {
252	$i++;
253	next;
254	}
255
256	if ($ARGV[$i] eq '--') {
257	splice @ARGV, $i, 1;
258	last;
259	}
260
261	if ($ARGV[$i] =~ s{^-(.)o(.)$}{-$1}i) { # -o : output file
262	$outputname = $2 \|\| splice @ARGV, $i + 1, 1;
263	}
264
265	for (split //, substr($ARGV[$i], 1)) {
266	if (/u/i) { # -u : utf-8 output
267	$utf = 1;
268	} elsif (/w/i) { # -w : wide char output
269	$utf = 0;
270	} else {
271	$badoption = 1;
272	}
273	}
274
275	splice @ARGV, $i, 1;
276	}
277
278	# Display help
279	#
280	if ($badoption) {
281	my $prog = basename $0;
282	my ($w, $u) = (" (default)", " ");
283	($w, $u) = ($u, $w) if $utfdefault;
284
285	print <<EOT;
286	Usage: $prog [-u\|-w] [-o OUTPUT] [FILE...]
287	Generate test code for wxRegEx from 'reg.test'
288	Example: $prog -o regex.inc reg.test wxreg.test
289
290	-w$w Output will be wide characters.
291	-u$u Output will be UTF-8 encoded.
292
293	Input files should be in UTF-8. If no input files are specified input is
294	read from stdin. If no output file is specified output is written to stdout.
d7204bba	295	See the comments in reg.test for details of the input file format.
e70833fb VS	296	EOT
	297	exit 0;
	298	}
	299
	300	# Open the output file
	301	#
	302	open STDOUT, ">$outputname" if $outputname;
	303
	304	# Read in the files and initially parse just the comments for copyright
	305	# information and instructions on the tests
	306	#
	307	my @input; # slurped input files stripped of comments
	308	my $files = ""; # copyright info from the input comments
	309	my $instructions = ""; # test instructions from the input comments
	310
	311	do {
	312	my $inputname = basename $ARGV[0] if @ARGV;
	313
	314	# slurp input
	315	undef $/;
	316	my $in = <>;
	317
	318	# remove escaped newlines
	319	$in =~ s/(?<!\\)\\\n//g;
	320
	321	# record the copyrights of the input files
	322	for ($in =~ /^#[\t ](.copyright.*)$/mig) {
	323	s/[\s:]+/ /g;
	324	$files .= " ";
	325	$files .= $inputname . ": " if $inputname && $inputname ne '-';
	326	$files .= "$_\n";
	327	}
	328
	329	# Parse the comments for instructions on the tests, which look like this:
	330	# i successful match with -indices (used in checking things like
	331	# nonparticipating subexpressions)
	332	if (!$instructions) {
	333	my $sp = qr{\t\| +}; # tab or three or more spaces
	334	my @instructions = $in =~
	335	/\n(
	336	(?:
	337	\#$sp\S?$sp\S[^\n]+\n # instruction line
	338	(?:\#$sp$sp\S[^\n]+\n)* # continuation lines (if any)
	339	)+
	340	)/gx;
	341
	342	if (@instructions) {
	343	$instructions[0] = "Test types:\n$instructions[0]";
	344	if (@instructions > 1) {
	345	$instructions[1] = "Flag characters:\n$instructions[1]";
	346	}
	347	$instructions = join "\n", @instructions;
	348	$instructions =~ s/^#([^\t]?)/ $1/mg;
	349	}
	350	}
	351
	352	# @input is the input of all files (stipped of comments)
	353	$in =~ s/^#.*$//mg;
	354	push @input, $in;
	355
	356	} while $ARGV[0];
	357
	358	# Make a string naming the generator, the input files and copyright info
	359	#
360	my $from = "Generated " . localtime() . " by " . basename $0;
361	$from =~ s/[\s]+/ /g;
362	if ($files) {
363	if ($files =~ /:/) {
364	$from .= " from the following files:";
365	} else {
366	$from .= " from work with the following copyright:";
367	}
368	}
369	$from = join("\n", $from =~ /(.{0,76}(?:\s\|$))/g); # word-wrap
370	$from .= "\n$files" if $files;
371
372	# Now start to print the code
373	#
374	begin_output $from, $instructions;
375
376	# numbers for 'extra' sections
377	my $extra = 1;
378
379	for (@input)
380	{
381	# Print the main tests
382	#
383	# Test lines look like this:
384	# m 3 b {\(a\)b} ab ab a
385	#
386	# Also looks for heading lines, e.g.:
387	# doing 4 "parentheses"
388	#
389	for (split "\n") {
390	if (/^doing\s+(\S+)\s+(\S.*)/) {
391	handle_doing parsetcl "$1 $2";
392	} elsif (/^[efimp]\s/) {
393	handle_test parsetcl $_;
394	}
395	}
396
397	# Extra tests
398	#
399	# The expression below matches something like this:
400	# test reg-33.8 {Bug 505048} {
401	# regexp -inline {\A\s[^b]b} ab
402	# } ab
403	#
404	# The three subexpressions then return these parts:
405	# $extras[$i] = '{Bug 505048}',
406	# $extras[$i + 1] = '-inline {\A\s[^b]b} ab'
407	# $extras[$i + 2] = 'ab'
408	#
409	my @extras = /\ntest\s+\S+\s(\{.?\})\s*\{\n # line 1
410	\s*regexp\s+([^\n]+)\n # line 2
411	\}\s(\S[^\n])/gx; # line 3
412
413	handle_doing "extra_" . $extra++, "checks for bug fixes" if @extras;
414
415	for (my $i = 0; $i < @extras; $i += 3) {
416	my $id = $extras[$i];
417
418	# further parse the middle line into options and the rest (i.e. $args)
419	my ($opts, $args) = $extras[$i + 1] =~ /^\s((?:-\S+\s+))([^\s-].*)/;
420
421	my @args = parsetcl $args;
422	$#args = 1; # only want the first two
423
424	# now handle the options
425	my $test = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
426	my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
427
428	# get them all in the right order and print
bc10103e	429	unshift @args, $test, parsetcl($id), $results ? '-' : 'o';
e70833fb VS	430	push @args, parsetcl(parsetcl($results)) if $results;
	431	handle_test @args;
	432	}
	433	}
	434
	435	# finish
	436	#
	437	handle_end;