]>
git.saurik.com Git - wxWidgets.git/blob - tests/regex/regex.pl
2 #############################################################################
4 # Purpose: Generate test code for wxRegEx from 'reg.test'
5 # Author: Mike Wetherell
7 # Copyright: (c) Mike Wetherell
8 # Licence: wxWidgets licence
9 #############################################################################
13 # See './regex.pl -h' for usage
15 # Output at the moment is C++ using the cppunit testing framework. The
16 # language/framework specifics are separated, with the following 5
17 # subs as an interface: 'begin_output', 'begin_section', 'write_test',
18 # 'end_section' and 'end_output'. So for a different language/framework,
19 # implement 5 new similar subs.
21 # I've avoided using 'use encoding "UTF-8"', since this wasn't available
22 # in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
23 # earler than perl 5.6.0 aren't going to work.
29 #use encoding "UTF-8"; # enable in the future when perl 5.6.x is just a memory
31 # if 0 output is wide characters, if 1 output is utf8 encoded
34 # quote a parameter (C++ helper)
37 my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
38 "\n" => "n", "\r" => "r", "\t" => "t",
39 "\013" => "v", '"' => '"', "\\" => "\\" );
41 # working around lack of 'use encoding'
42 $_ = pack "U0C*", unpack "C*", $_;
45 s
/[\000-\037"\\\177-\x{ffff}]/
48 } elsif (ord($&) > 0x9f) {
52 sprintf "\\u%04x", ord($&);
55 sprintf "\\%03o", ord($&);
59 # working around lack of 'use encoding'
61 $_ = pack "C*", unpack "C*", $_;
63 return ($utf ? '"' : 'L"') . $_ . '"'
66 # start writing the output code (C++ interface)
69 my ($from, $instructions) = @_;
71 # embed it in the comment
73 $from =~ s/^(?: )?/ * /mg;
75 # $instructions contains information about the flags etc.
77 $instructions = "\n$instructions";
78 $instructions =~ s/^(?: )?/ * /mg;
81 my $u = $utf ? " (UTF-8 encoded)" : "";
85 * Test data for wxRegEx$u
93 # start a new section (C++ interface)
96 my ($id, $title) = @_;
97 my $class = "regextest_$id";
99 push @classes, [$id, $class];
107 class $class : public RegExTestSuite
110 $class() : RegExTestSuite("regex.$id") { }
111 static Test *suite();
114 Test *$class\::suite()
116 RegExTestSuite *suite = new $class;
121 # output a test line (C++ interface)
125 $_ = quotecxx
for @args;
126 print " suite->add(" . (join ', ', @args) . ", NULL);\n";
129 # end a section (C++ interface)
132 my ($id, $class) = @{$classes[$#classes]};
139 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
144 # finish off the output (C++ interface)
150 * A suite containing all the above suites
153 class regextest : public TestSuite
156 regextest() : TestSuite("regex") { }
157 static Test *suite();
160 Test *regextest::suite()
162 TestSuite *suite = new regextest;
165 print " suite->addTest(".$_->[1]."::suite());\n" for @classes;
172 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
173 CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
177 # Parse a tcl string. Handles curly quoting and double quoting.
181 # recursively defined expression that can parse balanced braces
182 # warning: uses experimental features of perl, see perlop(1)
183 $curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
184 $quote = qr/"(?:\\"|[^"])*"/;
185 my @tokens = shift =~ /($curly|$quote|\S+)/g;
187 # now remove braces/quotes and unescape any escapes
189 if (s/^{(.*)}$/$1/) {
190 # for curly quoting, only unescape \{ and \}
195 # unescape any escapes
196 my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
197 "n" => "\n", "r" => "\r", "t" => "\t",
199 my $x = qr/[[:xdigit:]]/;
201 s
/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
202 if ($1 =~ m{^([0-7]+)}) {
204 } elsif ($1 =~ m{^x($x+)}) {
205 pack("C0U", hex($1) & 0xff);
206 } elsif ($1 =~ m{^u($x+)}) {
207 pack("C0U", hex($1));
220 # helpers which keep track of whether begin_section has been called, so that
221 # end_section can be called when appropriate
223 my @doing = ("0", "");
227 end_section
if $in_section;
233 begin_section
(@doing) if !$in_section;
239 end_section
if $in_section;
244 # 'main' - start by parsing the command lines options.
246 my $badoption = !@ARGV;
247 my $utfdefault = $utf;
250 for (my $i = 0; $i < @ARGV; ) {
251 if ($ARGV[$i] !~ m{^-.}) {
256 if ($ARGV[$i] eq '--') {
261 if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) { # -o : output file
262 $outputname = $2 || splice @ARGV, $i + 1, 1;
265 for (split //, substr($ARGV[$i], 1)) {
266 if (/u/i) { # -u : utf-8 output
268 } elsif (/w/i) { # -w : wide char output
281 my $prog = basename
$0;
282 my ($w, $u) = (" (default)", " ");
283 ($w, $u) = ($u, $w) if $utfdefault;
286 Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
287 Generate test code for wxRegEx from 'reg.test'
288 Example: $prog -o regex.inc reg.test wxreg.test
290 -w$w Output will be wide characters.
291 -u$u Output will be UTF-8 encoded.
293 Input files should be in UTF-8. If no input files are specified input is
294 read from stdin. If no output file is specified output is written to stdout.
295 See the comments in reg.test for details of the input file format.
300 # Open the output file
302 open STDOUT
, ">$outputname" if $outputname;
304 # Read in the files and initially parse just the comments for copyright
305 # information and instructions on the tests
307 my @input; # slurped input files stripped of comments
308 my $files = ""; # copyright info from the input comments
309 my $instructions = ""; # test instructions from the input comments
312 my $inputname = basename
$ARGV[0] if @ARGV;
318 # remove escaped newlines
319 $in =~ s/(?<!\\)\\\n//g;
321 # record the copyrights of the input files
322 for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
325 $files .= $inputname . ": " if $inputname && $inputname ne '-';
329 # Parse the comments for instructions on the tests, which look like this:
330 # i successful match with -indices (used in checking things like
331 # nonparticipating subexpressions)
332 if (!$instructions) {
333 my $sp = qr{\t| +}; # tab or three or more spaces
334 my @instructions = $in =~
337 \#
$sp\S
?$sp\S
[^\n]+\n # instruction line
338 (?:\#
$sp$sp\S
[^\n]+\n)* # continuation lines (if any)
343 $instructions[0] = "Test types:\n$instructions[0]";
344 if (@instructions > 1) {
345 $instructions[1] = "Flag characters:\n$instructions[1]";
347 $instructions = join "\n", @instructions;
348 $instructions =~ s/^#([^\t]?)/ $1/mg;
352 # @input is the input of all files (stipped of comments)
358 # Make a string naming the generator, the input files and copyright info
360 my $from = "Generated " . localtime() . " by " . basename
$0;
361 $from =~ s/[\s]+/ /g;
364 $from .= " from the following files:";
366 $from .= " from work with the following copyright:";
369 $from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g); # word-wrap
370 $from .= "\n$files" if $files;
372 # Now start to print the code
374 begin_output
$from, $instructions;
376 # numbers for 'extra' sections
381 # Print the main tests
383 # Test lines look like this:
384 # m 3 b {\(a\)b} ab ab a
386 # Also looks for heading lines, e.g.:
387 # doing 4 "parentheses"
390 if (/^doing\s+(\S+)\s+(\S.*)/) {
391 handle_doing parsetcl
"$1 $2";
392 } elsif (/^[efimp]\s/) {
393 handle_test parsetcl
$_;
399 # The expression below matches something like this:
400 # test reg-33.8 {Bug 505048} {
401 # regexp -inline {\A\s*[^b]*b} ab
404 # The three subexpressions then return these parts:
405 # $extras[$i] = '{Bug 505048}',
406 # $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
407 # $extras[$i + 2] = 'ab'
409 my @extras = /\ntest\s
+\S
+\s
*(\
{.*?\
})\s
*\
{\n # line 1
410 \s
*regexp\s
+([^\n]+)\n # line 2
411 \
}\s
*(\S
[^\n]*)/gx
; # line 3
413 handle_doing
"extra_" . $extra++, "checks for bug fixes" if @extras;
415 for (my $i = 0; $i < @extras; $i += 3) {
416 my $id = $extras[$i];
418 # further parse the middle line into options and the rest (i.e. $args)
419 my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;
421 my @args = parsetcl
$args;
422 $#args = 1; # only want the first two
424 # now handle the options
425 my $test = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
426 my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
428 # get them all in the right order and print
429 unshift @args, $test, parsetcl
($id), '-';
430 push @args, parsetcl
(parsetcl
($results)) if $results;