]>
git.saurik.com Git - wxWidgets.git/blob - tests/regex/regex.pl
2 #############################################################################
4 # Purpose: Generate test code for wxRegEx from 'reg.test'
5 # Author: Mike Wetherell
7 # Copyright: (c) Mike Wetherell
8 # Licence: wxWidgets licence
9 #############################################################################
13 # See './regex.pl -h' for usage
15 # Output at the moment is C++ using the cppunit testing framework. The
16 # language/framework specifics are separated, with the following 5
17 # subs as an interface: 'begin_output', 'begin_section', 'write_test',
18 # 'end_section' and 'end_output'. So for a different language/framework,
19 # implement 5 new similar subs.
21 # I've avoided using 'use encoding "UTF-8"', since this wasn't available
22 # in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
23 # earler than perl 5.6.0 aren't going to work.
28 #use encoding "UTF-8"; # enable in the future when perl 5.6.x is just a memory
30 # if 0 output is wide characters, if 1 output is utf8 encoded
33 # quote a parameter (C++ helper)
36 my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
37 "\n" => "n", "\r" => "r", "\t" => "t",
38 "\013" => "v", '"' => '"', "\\" => "\\" );
40 # working around lack of 'use encoding'
41 $_ = pack "U0C*", unpack "C*", $_;
44 s
/[\000-\037"\\\177-\x{ffff}]/
47 } elsif (ord($&) > 0x9f) {
51 sprintf "\\u%04x", ord($&);
54 sprintf "\\%03o", ord($&);
58 # working around lack of 'use encoding'
60 $_ = pack "C*", unpack "C*", $_;
62 return ($utf ? '"' : 'L"') . $_ . '"'
65 # start writing the output code (C++ interface)
68 my ($from, $instructions) = @_;
70 # embed it in the comment
72 $from =~ s/^(?: )?/ * /mg;
74 # $instructions contains information about the flags etc.
76 $instructions = "\n$instructions";
77 $instructions =~ s/^(?: )?/ * /mg;
80 my $u = $utf ? " (UTF-8 encoded)" : "";
84 * Test data for wxRegEx$u
92 # start a new section (C++ interface)
95 my ($id, $title) = @_;
96 my $class = "regextest_$id";
98 push @classes, [$id, $class];
106 class $class : public RegExTestSuite
109 $class() : RegExTestSuite("regex.$id") { }
110 static Test *suite();
113 Test *$class\::suite()
115 RegExTestSuite *suite = new $class;
120 # output a test line (C++ interface)
124 $_ = quotecxx
for @args;
125 print " suite->add(" . (join ', ', @args) . ", NULL);\n";
128 # end a section (C++ interface)
131 my ($id, $class) = @{$classes[$#classes]};
138 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
143 # finish off the output (C++ interface)
149 * A suite containing all the above suites
152 class regextest : public TestSuite
155 regextest() : TestSuite("regex") { }
156 static Test *suite();
159 Test *regextest::suite()
161 TestSuite *suite = new regextest;
164 print " suite->addTest(".$_->[1]."::suite());\n" for @classes;
171 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
172 CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
176 # Parse a tcl string. Handles curly quoting and double quoting.
180 # recursively defined expression that can parse balanced braces
181 # warning: uses experimental features of perl, see perlop(1)
182 $curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
183 $quote = qr/"(?:\\"|[^"])*"/;
184 my @tokens = shift =~ /($curly|$quote|\S+)/g;
186 # now remove braces/quotes and unescape any escapes
188 if (s/^{(.*)}$/$1/) {
189 # for curly quoting, only unescape \{ and \}
194 # unescape any escapes
195 my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
196 "n" => "\n", "r" => "\r", "t" => "\t",
198 my $x = qr/[[:xdigit:]]/;
200 s
/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
201 if ($1 =~ m{^([0-7]+)}) {
203 } elsif ($1 =~ m{^x($x+)}) {
204 pack("C0U", hex($1) & 0xff);
205 } elsif ($1 =~ m{^u($x+)}) {
206 pack("C0U", hex($1));
219 # helpers which keep track of whether begin_section has been called, so that
220 # end_section can be called when appropriate
222 my @doing = ("0", "");
226 end_section
if $in_section;
232 begin_section
(@doing) if !$in_section;
238 end_section
if $in_section;
243 # 'main' - start by parsing the command lines options.
245 my $badoption = !@ARGV;
246 my $utfdefault = $utf;
249 for (my $i = 0; $i < @ARGV; ) {
250 if ($ARGV[$i] !~ m{^-.}) {
255 if ($ARGV[$i] eq '--') {
260 if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) { # -o : output file
261 $outputname = $2 || splice @ARGV, $i + 1, 1;
264 for (split //, substr($ARGV[$i], 1)) {
265 if (/u/i) { # -u : utf-8 output
267 } elsif (/w/i) { # -w : wide char output
280 my $prog = basename
$0;
281 my ($w, $u) = (" (default)", " ");
282 ($w, $u) = ($u, $w) if $utfdefault;
285 Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
286 Generate test code for wxRegEx from 'reg.test'
287 Example: $prog -o regex.inc reg.test wxreg.test
289 -w$w Output will be wide characters.
290 -u$u Output will be UTF-8 encoded.
292 Input files should be in UTF-8. If no input files are specified input is
293 read from stdin. If no output file is specified output is written to stdout.
294 See the comments in reg.test (in src/regex) for details of the input file
300 # Open the output file
302 open STDOUT
, ">$outputname" if $outputname;
304 # Read in the files and initially parse just the comments for copyright
305 # information and instructions on the tests
307 my @input; # slurped input files stripped of comments
308 my $files = ""; # copyright info from the input comments
309 my $instructions = ""; # test instructions from the input comments
312 my $inputname = basename
$ARGV[0] if @ARGV;
318 # remove escaped newlines
319 $in =~ s/(?<!\\)\\\n//g;
321 # record the copyrights of the input files
322 for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
325 $files .= $inputname . ": " if $inputname && $inputname ne '-';
329 # Parse the comments for instructions on the tests, which look like this:
330 # i successful match with -indices (used in checking things like
331 # nonparticipating subexpressions)
332 if (!$instructions) {
333 my $sp = qr{\t| +}; # tab or three or more spaces
334 my @instructions = $in =~
337 \#
$sp\S
?$sp\S
[^\n]+\n # instruction line
338 (?:\#
$sp$sp\S
[^\n]+\n)* # continuation lines (if any)
343 $instructions[0] = "Test types:\n$instructions[0]";
344 if (@instructions > 1) {
345 $instructions[1] = "Flag characters:\n$instructions[1]";
347 $instructions = join "\n", @instructions;
348 $instructions =~ s/^#([^\t]?)/ $1/mg;
352 # @input is the input of all files (stipped of comments)
358 # Make a string naming the generator, the input files and copyright info
360 my $from = "Generated " . localtime() . " by " . basename
$0;
361 $from =~ s/[\s]+/ /g;
364 $from .= " from the following files:";
366 $from .= " from work with the following copyright:";
369 $from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g); # word-wrap
370 $from .= "\n$files" if $files;
372 # Now start to print the code
374 begin_output
$from, $instructions;
376 # numbers for 'extra' sections
381 # Print the main tests
383 # Test lines look like this:
384 # m 3 b {\(a\)b} ab ab a
386 # Also looks for heading lines, e.g.:
387 # doing 4 "parentheses"
390 if (/^doing\s+(\S+)\s+(\S.*)/) {
391 handle_doing parsetcl
"$1 $2";
392 } elsif (/^[efimp]\s/) {
393 handle_test parsetcl
$_;
399 # The expression below matches something like this:
400 # test reg-33.8 {Bug 505048} {
401 # regexp -inline {\A\s*[^b]*b} ab
404 # The three subexpressions then return these parts:
405 # $extras[$i] = '{Bug 505048}',
406 # $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
407 # $extras[$i + 2] = 'ab'
409 my @extras = /\ntest\s
+\S
+\s
*(\
{.*?\
})\s
*\
{\n # line 1
410 \s
*regexp\s
+([^\n]+)\n # line 2
411 \
}\s
*(\S
[^\n]*)/gx
; # line 3
413 handle_doing
"extra_" . $extra++, "checks for bug fixes" if @extras;
415 for (my $i = 0; $i < @extras; $i += 3) {
416 my $id = $extras[$i];
418 # further parse the middle line into options and the rest (i.e. $args)
419 my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;
421 my @args = parsetcl
$args;
422 $#args = 1; # only want the first two
424 # now handle the options
425 my $test = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
426 my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
428 # get them all in the right order and print
429 unshift @args, $test, parsetcl
($id), '-';
430 push @args, parsetcl
(parsetcl
($results)) if $results;