]> git.saurik.com Git - wxWidgets.git/blame - tests/regex/regex.pl
Use wxFindWindowAtPoint() for hit testing in wxPopupTransientWindow.
[wxWidgets.git] / tests / regex / regex.pl
CommitLineData
d7204bba 1#!/usr/bin/env perl
e70833fb
VS
2#############################################################################
3# Name: regex.pl
4# Purpose: Generate test code for wxRegEx from 'reg.test'
5# Author: Mike Wetherell
e70833fb 6# Copyright: (c) Mike Wetherell
526954c5 7# Licence: wxWindows licence
e70833fb
VS
8#############################################################################
9
10#
11# Notes:
12# See './regex.pl -h' for usage
13#
14# Output at the moment is C++ using the cppunit testing framework. The
15# language/framework specifics are separated, with the following 5
16# subs as an interface: 'begin_output', 'begin_section', 'write_test',
17# 'end_section' and 'end_output'. So for a different language/framework,
18# implement 5 new similar subs.
19#
20# I've avoided using 'use encoding "UTF-8"', since this wasn't available
21# in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
22# earler than perl 5.6.0 aren't going to work.
23#
24
25use strict;
d7204bba 26use warnings;
e70833fb
VS
27use File::Basename;
28#use encoding "UTF-8"; # enable in the future when perl 5.6.x is just a memory
29
30# if 0 output is wide characters, if 1 output is utf8 encoded
31my $utf = 1;
32
33# quote a parameter (C++ helper)
34#
35sub quotecxx {
36 my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
37 "\n" => "n", "\r" => "r", "\t" => "t",
38 "\013" => "v", '"' => '"', "\\" => "\\" );
39
40 # working around lack of 'use encoding'
a7ea35b0
MW
41 if (!$utf) {
42 $_ = pack "U0C*", unpack "C*", $_;
43 use utf8;
44 }
e70833fb
VS
45
46 s/[\000-\037"\\\177-\x{ffff}]/
47 if ($esc{$&}) {
48 "\\$esc{$&}";
a7ea35b0
MW
49 } elsif (ord($&) > 0x9f && !$utf) {
50 sprintf "\\u%04x", ord($&);
e70833fb
VS
51 } else {
52 sprintf "\\%03o", ord($&);
53 }
54 /ge;
55
56 # working around lack of 'use encoding'
a7ea35b0
MW
57 if (!$utf) {
58 no utf8;
59 $_ = pack "C*", unpack "C*", $_;
60 }
e70833fb
VS
61
62 return ($utf ? '"' : 'L"') . $_ . '"'
63}
64
65# start writing the output code (C++ interface)
66#
67sub begin_output {
68 my ($from, $instructions) = @_;
69
70 # embed it in the comment
71 $from = "\n$from";
72 $from =~ s/^(?: )?/ * /mg;
73
74 # $instructions contains information about the flags etc.
75 if ($instructions) {
76 $instructions = "\n$instructions";
77 $instructions =~ s/^(?: )?/ * /mg;
78 }
79
80 my $u = $utf ? " (UTF-8 encoded)" : "";
81
82 print <<EOT;
83/*
84 * Test data for wxRegEx$u
85$from$instructions */
86
87EOT
88}
89
90my @classes;
91
92# start a new section (C++ interface)
93#
94sub begin_section {
95 my ($id, $title) = @_;
96 my $class = "regextest_$id";
97 $class =~ s/\W/_/g;
98 push @classes, [$id, $class];
99
100 print <<EOT;
101
102/*
103 * $id $title
104 */
105
106class $class : public RegExTestSuite
107{
108public:
109 $class() : RegExTestSuite("regex.$id") { }
110 static Test *suite();
111};
112
113Test *$class\::suite()
114{
115 RegExTestSuite *suite = new $class;
116
117EOT
118}
119
120# output a test line (C++ interface)
121#
122sub write_test {
123 my @args = @_;
124 $_ = quotecxx for @args;
125 print " suite->add(" . (join ', ', @args) . ", NULL);\n";
126}
127
128# end a section (C++ interface)
129#
130sub end_section {
131 my ($id, $class) = @{$classes[$#classes]};
132
133 print <<EOT;
134
135 return suite;
136}
137
138CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
139
140EOT
141}
142
143# finish off the output (C++ interface)
144#
145sub end_output {
146 print <<EOT;
147
148/*
149 * A suite containing all the above suites
150 */
151
152class regextest : public TestSuite
153{
154public:
155 regextest() : TestSuite("regex") { }
156 static Test *suite();
157};
158
159Test *regextest::suite()
160{
161 TestSuite *suite = new regextest;
162
163EOT
164 print " suite->addTest(".$_->[1]."::suite());\n" for @classes;
165
166 print <<EOT;
167
168 return suite;
169}
170
171CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
172CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
173EOT
174}
175
176# Parse a tcl string. Handles curly quoting and double quoting.
177#
178sub parsetcl {
179 my ($curly, $quote);
180 # recursively defined expression that can parse balanced braces
181 # warning: uses experimental features of perl, see perlop(1)
182 $curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
183 $quote = qr/"(?:\\"|[^"])*"/;
184 my @tokens = shift =~ /($curly|$quote|\S+)/g;
185
186 # now remove braces/quotes and unescape any escapes
187 for (@tokens) {
188 if (s/^{(.*)}$/$1/) {
189 # for curly quoting, only unescape \{ and \}
190 s/\\([{}])/$1/g;
191 } else {
192 s/^"(.*)"$/$1/;
193
194 # unescape any escapes
195 my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
196 "n" => "\n", "r" => "\r", "t" => "\t",
197 "v" => "\013" );
198 my $x = qr/[[:xdigit:]]/;
199
200 s/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
201 if ($1 =~ m{^([0-7]+)}) {
202 chr(oct($1));
203 } elsif ($1 =~ m{^x($x+)}) {
204 pack("C0U", hex($1) & 0xff);
205 } elsif ($1 =~ m{^u($x+)}) {
206 pack("C0U", hex($1));
207 } elsif ($esc{$1}) {
208 $esc{$1};
209 } else {
210 $1;
211 }
212 /ge;
213 }
214 }
215
216 return @tokens;
217}
218
219# helpers which keep track of whether begin_section has been called, so that
220# end_section can be called when appropriate
221#
222my @doing = ("0", "");
223my $in_section = 0;
224
225sub handle_doing {
226 end_section if $in_section;
227 $in_section = 0;
228 @doing = @_;
229}
230
231sub handle_test {
232 begin_section(@doing) if !$in_section;
233 $in_section = 1;
234 write_test @_;
235}
236
237sub handle_end {
238 end_section if $in_section;
239 $in_section = 0;
240 end_output;
241}
242
243# 'main' - start by parsing the command lines options.
244#
245my $badoption = !@ARGV;
246my $utfdefault = $utf;
247my $outputname;
248
249for (my $i = 0; $i < @ARGV; ) {
250 if ($ARGV[$i] !~ m{^-.}) {
251 $i++;
252 next;
253 }
254
255 if ($ARGV[$i] eq '--') {
256 splice @ARGV, $i, 1;
257 last;
258 }
259
260 if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) { # -o : output file
261 $outputname = $2 || splice @ARGV, $i + 1, 1;
262 }
263
264 for (split //, substr($ARGV[$i], 1)) {
265 if (/u/i) { # -u : utf-8 output
266 $utf = 1;
267 } elsif (/w/i) { # -w : wide char output
268 $utf = 0;
269 } else {
270 $badoption = 1;
271 }
272 }
273
274 splice @ARGV, $i, 1;
275}
276
277# Display help
278#
279if ($badoption) {
280 my $prog = basename $0;
281 my ($w, $u) = (" (default)", " ");
282 ($w, $u) = ($u, $w) if $utfdefault;
283
284 print <<EOT;
285Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
286Generate test code for wxRegEx from 'reg.test'
287Example: $prog -o regex.inc reg.test wxreg.test
288
289 -w$w Output will be wide characters.
290 -u$u Output will be UTF-8 encoded.
291
292Input files should be in UTF-8. If no input files are specified input is
293read from stdin. If no output file is specified output is written to stdout.
d7204bba 294See the comments in reg.test for details of the input file format.
e70833fb
VS
295EOT
296 exit 0;
297}
298
299# Open the output file
300#
301open STDOUT, ">$outputname" if $outputname;
302
303# Read in the files and initially parse just the comments for copyright
304# information and instructions on the tests
305#
306my @input; # slurped input files stripped of comments
307my $files = ""; # copyright info from the input comments
308my $instructions = ""; # test instructions from the input comments
309
310do {
311 my $inputname = basename $ARGV[0] if @ARGV;
312
313 # slurp input
314 undef $/;
315 my $in = <>;
316
317 # remove escaped newlines
318 $in =~ s/(?<!\\)\\\n//g;
319
320 # record the copyrights of the input files
321 for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
322 s/[\s:]+/ /g;
323 $files .= " ";
324 $files .= $inputname . ": " if $inputname && $inputname ne '-';
325 $files .= "$_\n";
326 }
327
328 # Parse the comments for instructions on the tests, which look like this:
329 # i successful match with -indices (used in checking things like
330 # nonparticipating subexpressions)
331 if (!$instructions) {
332 my $sp = qr{\t| +}; # tab or three or more spaces
333 my @instructions = $in =~
334 /\n(
335 (?:
336 \#$sp\S?$sp\S[^\n]+\n # instruction line
337 (?:\#$sp$sp\S[^\n]+\n)* # continuation lines (if any)
338 )+
339 )/gx;
340
341 if (@instructions) {
342 $instructions[0] = "Test types:\n$instructions[0]";
343 if (@instructions > 1) {
344 $instructions[1] = "Flag characters:\n$instructions[1]";
345 }
346 $instructions = join "\n", @instructions;
347 $instructions =~ s/^#([^\t]?)/ $1/mg;
348 }
349 }
350
351 # @input is the input of all files (stipped of comments)
352 $in =~ s/^#.*$//mg;
353 push @input, $in;
354
355} while $ARGV[0];
356
357# Make a string naming the generator, the input files and copyright info
358#
359my $from = "Generated " . localtime() . " by " . basename $0;
360$from =~ s/[\s]+/ /g;
361if ($files) {
362 if ($files =~ /:/) {
363 $from .= " from the following files:";
364 } else {
365 $from .= " from work with the following copyright:";
366 }
367}
368$from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g); # word-wrap
369$from .= "\n$files" if $files;
370
371# Now start to print the code
372#
373begin_output $from, $instructions;
374
375# numbers for 'extra' sections
376my $extra = 1;
377
378for (@input)
379{
380 # Print the main tests
381 #
382 # Test lines look like this:
383 # m 3 b {\(a\)b} ab ab a
384 #
385 # Also looks for heading lines, e.g.:
386 # doing 4 "parentheses"
387 #
388 for (split "\n") {
389 if (/^doing\s+(\S+)\s+(\S.*)/) {
390 handle_doing parsetcl "$1 $2";
391 } elsif (/^[efimp]\s/) {
392 handle_test parsetcl $_;
393 }
394 }
395
396 # Extra tests
397 #
398 # The expression below matches something like this:
399 # test reg-33.8 {Bug 505048} {
400 # regexp -inline {\A\s*[^b]*b} ab
401 # } ab
402 #
403 # The three subexpressions then return these parts:
404 # $extras[$i] = '{Bug 505048}',
405 # $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
406 # $extras[$i + 2] = 'ab'
407 #
408 my @extras = /\ntest\s+\S+\s*(\{.*?\})\s*\{\n # line 1
409 \s*regexp\s+([^\n]+)\n # line 2
410 \}\s*(\S[^\n]*)/gx; # line 3
411
412 handle_doing "extra_" . $extra++, "checks for bug fixes" if @extras;
413
414 for (my $i = 0; $i < @extras; $i += 3) {
415 my $id = $extras[$i];
416
417 # further parse the middle line into options and the rest (i.e. $args)
418 my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;
419
420 my @args = parsetcl $args;
421 $#args = 1; # only want the first two
422
423 # now handle the options
424 my $test = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
425 my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
426
427 # get them all in the right order and print
bc10103e 428 unshift @args, $test, parsetcl($id), $results ? '-' : 'o';
e70833fb
VS
429 push @args, parsetcl(parsetcl($results)) if $results;
430 handle_test @args;
431 }
432}
433
434# finish
435#
436handle_end;