]>
git.saurik.com Git - wxWidgets.git/blob - tests/regex/regex.pl
2 #############################################################################
4 # Purpose: Generate test code for wxRegEx from 'reg.test'
5 # Author: Mike Wetherell
6 # Copyright: (c) Mike Wetherell
7 # Licence: wxWindows licence
8 #############################################################################
12 # See './regex.pl -h' for usage
14 # Output at the moment is C++ using the cppunit testing framework. The
15 # language/framework specifics are separated, with the following 5
16 # subs as an interface: 'begin_output', 'begin_section', 'write_test',
17 # 'end_section' and 'end_output'. So for a different language/framework,
18 # implement 5 new similar subs.
20 # I've avoided using 'use encoding "UTF-8"', since this wasn't available
21 # in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
22 # earler than perl 5.6.0 aren't going to work.
28 #use encoding "UTF-8"; # enable in the future when perl 5.6.x is just a memory
30 # if 0 output is wide characters, if 1 output is utf8 encoded
33 # quote a parameter (C++ helper)
36 my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
37 "\n" => "n", "\r" => "r", "\t" => "t",
38 "\013" => "v", '"' => '"', "\\" => "\\" );
40 # working around lack of 'use encoding'
42 $_ = pack "U0C*", unpack "C*", $_;
46 s
/[\000-\037"\\\177-\x{ffff}]/
49 } elsif (ord($&) > 0x9f && !$utf) {
50 sprintf "\\u%04x", ord($&);
52 sprintf "\\%03o", ord($&);
56 # working around lack of 'use encoding'
59 $_ = pack "C*", unpack "C*", $_;
62 return ($utf ? '"' : 'L"') . $_ . '"'
65 # start writing the output code (C++ interface)
68 my ($from, $instructions) = @_;
70 # embed it in the comment
72 $from =~ s/^(?: )?/ * /mg;
74 # $instructions contains information about the flags etc.
76 $instructions = "\n$instructions";
77 $instructions =~ s/^(?: )?/ * /mg;
80 my $u = $utf ? " (UTF-8 encoded)" : "";
84 * Test data for wxRegEx$u
92 # start a new section (C++ interface)
95 my ($id, $title) = @_;
96 my $class = "regextest_$id";
98 push @classes, [$id, $class];
106 class $class : public RegExTestSuite
109 $class() : RegExTestSuite("regex.$id") { }
110 static Test *suite();
113 Test *$class\::suite()
115 RegExTestSuite *suite = new $class;
120 # output a test line (C++ interface)
124 $_ = quotecxx
for @args;
125 print " suite->add(" . (join ', ', @args) . ", NULL);\n";
128 # end a section (C++ interface)
131 my ($id, $class) = @{$classes[$#classes]};
138 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
143 # finish off the output (C++ interface)
149 * A suite containing all the above suites
152 class regextest : public TestSuite
155 regextest() : TestSuite("regex") { }
156 static Test *suite();
159 Test *regextest::suite()
161 TestSuite *suite = new regextest;
164 print " suite->addTest(".$_->[1]."::suite());\n" for @classes;
171 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
172 CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
176 # Parse a tcl string. Handles curly quoting and double quoting.
180 # recursively defined expression that can parse balanced braces
181 # warning: uses experimental features of perl, see perlop(1)
182 $curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
183 $quote = qr/"(?:\\"|[^"])*"/;
184 my @tokens = shift =~ /($curly|$quote|\S+)/g;
186 # now remove braces/quotes and unescape any escapes
188 if (s/^{(.*)}$/$1/) {
189 # for curly quoting, only unescape \{ and \}
194 # unescape any escapes
195 my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
196 "n" => "\n", "r" => "\r", "t" => "\t",
198 my $x = qr/[[:xdigit:]]/;
200 s
/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
201 if ($1 =~ m{^([0-7]+)}) {
203 } elsif ($1 =~ m{^x($x+)}) {
204 pack("C0U", hex($1) & 0xff);
205 } elsif ($1 =~ m{^u($x+)}) {
206 pack("C0U", hex($1));
219 # helpers which keep track of whether begin_section has been called, so that
220 # end_section can be called when appropriate
222 my @doing = ("0", "");
226 end_section
if $in_section;
232 begin_section
(@doing) if !$in_section;
238 end_section
if $in_section;
243 # 'main' - start by parsing the command lines options.
245 my $badoption = !@ARGV;
246 my $utfdefault = $utf;
249 for (my $i = 0; $i < @ARGV; ) {
250 if ($ARGV[$i] !~ m{^-.}) {
255 if ($ARGV[$i] eq '--') {
260 if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) { # -o : output file
261 $outputname = $2 || splice @ARGV, $i + 1, 1;
264 for (split //, substr($ARGV[$i], 1)) {
265 if (/u/i) { # -u : utf-8 output
267 } elsif (/w/i) { # -w : wide char output
280 my $prog = basename
$0;
281 my ($w, $u) = (" (default)", " ");
282 ($w, $u) = ($u, $w) if $utfdefault;
285 Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
286 Generate test code for wxRegEx from 'reg.test'
287 Example: $prog -o regex.inc reg.test wxreg.test
289 -w$w Output will be wide characters.
290 -u$u Output will be UTF-8 encoded.
292 Input files should be in UTF-8. If no input files are specified input is
293 read from stdin. If no output file is specified output is written to stdout.
294 See the comments in reg.test for details of the input file format.
299 # Open the output file
301 open STDOUT
, ">$outputname" if $outputname;
303 # Read in the files and initially parse just the comments for copyright
304 # information and instructions on the tests
306 my @input; # slurped input files stripped of comments
307 my $files = ""; # copyright info from the input comments
308 my $instructions = ""; # test instructions from the input comments
311 my $inputname = basename
$ARGV[0] if @ARGV;
317 # remove escaped newlines
318 $in =~ s/(?<!\\)\\\n//g;
320 # record the copyrights of the input files
321 for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
324 $files .= $inputname . ": " if $inputname && $inputname ne '-';
328 # Parse the comments for instructions on the tests, which look like this:
329 # i successful match with -indices (used in checking things like
330 # nonparticipating subexpressions)
331 if (!$instructions) {
332 my $sp = qr{\t| +}; # tab or three or more spaces
333 my @instructions = $in =~
336 \#
$sp\S
?$sp\S
[^\n]+\n # instruction line
337 (?:\#
$sp$sp\S
[^\n]+\n)* # continuation lines (if any)
342 $instructions[0] = "Test types:\n$instructions[0]";
343 if (@instructions > 1) {
344 $instructions[1] = "Flag characters:\n$instructions[1]";
346 $instructions = join "\n", @instructions;
347 $instructions =~ s/^#([^\t]?)/ $1/mg;
351 # @input is the input of all files (stipped of comments)
357 # Make a string naming the generator, the input files and copyright info
359 my $from = "Generated " . localtime() . " by " . basename
$0;
360 $from =~ s/[\s]+/ /g;
363 $from .= " from the following files:";
365 $from .= " from work with the following copyright:";
368 $from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g); # word-wrap
369 $from .= "\n$files" if $files;
371 # Now start to print the code
373 begin_output
$from, $instructions;
375 # numbers for 'extra' sections
380 # Print the main tests
382 # Test lines look like this:
383 # m 3 b {\(a\)b} ab ab a
385 # Also looks for heading lines, e.g.:
386 # doing 4 "parentheses"
389 if (/^doing\s+(\S+)\s+(\S.*)/) {
390 handle_doing parsetcl
"$1 $2";
391 } elsif (/^[efimp]\s/) {
392 handle_test parsetcl
$_;
398 # The expression below matches something like this:
399 # test reg-33.8 {Bug 505048} {
400 # regexp -inline {\A\s*[^b]*b} ab
403 # The three subexpressions then return these parts:
404 # $extras[$i] = '{Bug 505048}',
405 # $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
406 # $extras[$i + 2] = 'ab'
408 my @extras = /\ntest\s
+\S
+\s
*(\
{.*?\
})\s
*\
{\n # line 1
409 \s
*regexp\s
+([^\n]+)\n # line 2
410 \
}\s
*(\S
[^\n]*)/gx
; # line 3
412 handle_doing
"extra_" . $extra++, "checks for bug fixes" if @extras;
414 for (my $i = 0; $i < @extras; $i += 3) {
415 my $id = $extras[$i];
417 # further parse the middle line into options and the rest (i.e. $args)
418 my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;
420 my @args = parsetcl
$args;
421 $#args = 1; # only want the first two
423 # now handle the options
424 my $test = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
425 my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
427 # get them all in the right order and print
428 unshift @args, $test, parsetcl
($id), $results ? '-' : 'o';
429 push @args, parsetcl
(parsetcl
($results)) if $results;