]>
Commit | Line | Data |
---|---|---|
b37bf2e1 A |
1 | #!/usr/bin/perl -w |
2 | # | |
3 | # This is JavaScriptCore's variant of the PCRE library. While this library | |
4 | # started out as a copy of PCRE, many of the features of PCRE have been | |
5 | # removed. This library now supports only the regular expression features | |
6 | # required by the JavaScript language specification, and has only the functions | |
7 | # needed by JavaScriptCore and the rest of WebKit. | |
8 | # | |
9 | # Originally written by Philip Hazel | |
10 | # Copyright (c) 1997-2006 University of Cambridge | |
11 | # Copyright (C) 2002, 2004, 2006, 2007, 2008 Apple Inc. All rights reserved. | |
12 | # | |
13 | # ----------------------------------------------------------------------------- | |
14 | # Redistribution and use in source and binary forms, with or without | |
15 | # modification, are permitted provided that the following conditions are met: | |
16 | # | |
17 | # * Redistributions of source code must retain the above copyright notice, | |
18 | # this list of conditions and the following disclaimer. | |
19 | # | |
20 | # * Redistributions in binary form must reproduce the above copyright | |
21 | # notice, this list of conditions and the following disclaimer in the | |
22 | # documentation and/or other materials provided with the distribution. | |
23 | # | |
24 | # * Neither the name of the University of Cambridge nor the names of its | |
25 | # contributors may be used to endorse or promote products derived from | |
26 | # this software without specific prior written permission. | |
27 | # | |
28 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
29 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
32 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
33 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
34 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
35 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
36 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
37 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
38 | # POSSIBILITY OF SUCH DAMAGE. | |
39 | # ----------------------------------------------------------------------------- | |
40 | ||
41 | # This is a freestanding support program to generate a file containing | |
42 | # character tables. The tables are built according to the default C | |
43 | # locale. | |
44 | ||
45 | use strict; | |
46 | ||
47 | use File::Basename; | |
48 | use File::Spec; | |
49 | use File::Temp; | |
50 | ||
51 | sub readHeaderValues(); | |
52 | ||
53 | my %pcre_internal; | |
54 | ||
55 | if (scalar(@ARGV) != 1) { | |
56 | print STDERR "Usage: ", basename($0), " output-file\n"; | |
57 | exit 1; | |
58 | } | |
59 | ||
60 | my $outputFile = shift @ARGV; | |
61 | ||
62 | readHeaderValues(); | |
63 | ||
64 | open(OUT, ">", $outputFile) or die "$!"; | |
65 | binmode(OUT); | |
66 | ||
67 | printf(OUT | |
68 | "/*************************************************\n" . | |
69 | "* Perl-Compatible Regular Expressions *\n" . | |
70 | "*************************************************/\n\n" . | |
71 | "/* This file is automatically written by the dftables auxiliary \n" . | |
72 | "program. If you edit it by hand, you might like to edit the Makefile to \n" . | |
73 | "prevent its ever being regenerated.\n\n"); | |
74 | printf(OUT | |
75 | "This file contains the default tables for characters with codes less than\n" . | |
76 | "128 (ASCII characters). These tables are used when no external tables are\n" . | |
77 | "passed to PCRE. */\n\n" . | |
78 | "const unsigned char kjs_pcre_default_tables[%d] = {\n\n" . | |
79 | "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length}); | |
80 | ||
81 | if ($pcre_internal{lcc_offset} != 0) { | |
82 | die "lcc_offset != 0"; | |
83 | } | |
84 | ||
85 | printf(OUT " "); | |
86 | for (my $i = 0; $i < 128; $i++) { | |
87 | if (($i & 7) == 0 && $i != 0) { | |
88 | printf(OUT "\n "); | |
89 | } | |
90 | printf(OUT "0x%02X", ord(lc(chr($i)))); | |
91 | if ($i != 127) { | |
92 | printf(OUT ", "); | |
93 | } | |
94 | } | |
95 | printf(OUT ",\n\n"); | |
96 | ||
97 | printf(OUT "/* This table is a case flipping table. */\n\n"); | |
98 | ||
99 | if ($pcre_internal{fcc_offset} != 128) { | |
100 | die "fcc_offset != 128"; | |
101 | } | |
102 | ||
103 | printf(OUT " "); | |
104 | for (my $i = 0; $i < 128; $i++) { | |
105 | if (($i & 7) == 0 && $i != 0) { | |
106 | printf(OUT "\n "); | |
107 | } | |
108 | my $c = chr($i); | |
109 | printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c))); | |
110 | if ($i != 127) { | |
111 | printf(OUT ", "); | |
112 | } | |
113 | } | |
114 | printf(OUT ",\n\n"); | |
115 | ||
116 | printf(OUT | |
117 | "/* This table contains bit maps for various character classes.\n" . | |
118 | "Each map is 32 bytes long and the bits run from the least\n" . | |
119 | "significant end of each byte. The classes are: space, digit, word. */\n\n"); | |
120 | ||
121 | if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) { | |
122 | die "cbits_offset != fcc_offset + 128"; | |
123 | } | |
124 | ||
125 | my @cbit_table = (0) x $pcre_internal{cbit_length}; | |
126 | for (my $i = ord('0'); $i <= ord('9'); $i++) { | |
127 | $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7); | |
128 | } | |
129 | $cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7); | |
130 | for (my $i = 0; $i < 128; $i++) { | |
131 | my $c = chr($i); | |
132 | if ($c =~ /[[:alnum:]]/) { | |
133 | $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7); | |
134 | } | |
135 | if ($c =~ /[[:space:]]/) { | |
136 | $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7); | |
137 | } | |
138 | } | |
139 | ||
140 | printf(OUT " "); | |
141 | for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) { | |
142 | if (($i & 7) == 0 && $i != 0) { | |
143 | if (($i & 31) == 0) { | |
144 | printf(OUT "\n"); | |
145 | } | |
146 | printf(OUT "\n "); | |
147 | } | |
148 | printf(OUT "0x%02X", $cbit_table[$i]); | |
149 | if ($i != $pcre_internal{cbit_length} - 1) { | |
150 | printf(OUT ", "); | |
151 | } | |
152 | } | |
153 | printf(OUT ",\n\n"); | |
154 | ||
155 | printf(OUT | |
156 | "/* This table identifies various classes of character by individual bits:\n" . | |
157 | " 0x%02x white space character\n" . | |
158 | " 0x%02x hexadecimal digit\n" . | |
159 | " 0x%02x alphanumeric or '_'\n*/\n\n", | |
160 | $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word}); | |
161 | ||
162 | if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) { | |
163 | die "ctypes_offset != cbits_offset + cbit_length"; | |
164 | } | |
165 | ||
166 | printf(OUT " "); | |
167 | for (my $i = 0; $i < 128; $i++) { | |
168 | my $x = 0; | |
169 | my $c = chr($i); | |
170 | if ($c =~ /[[:space:]]/) { | |
171 | $x += $pcre_internal{ctype_space}; | |
172 | } | |
173 | if ($c =~ /[[:xdigit:]]/) { | |
174 | $x += $pcre_internal{ctype_xdigit}; | |
175 | } | |
176 | if ($c =~ /[[:alnum:]_]/) { | |
177 | $x += $pcre_internal{ctype_word}; | |
178 | } | |
179 | printf(OUT "0x%02X", $x); | |
180 | if ($i != 127) { | |
181 | printf(OUT ", "); | |
182 | } else { | |
183 | printf(OUT "};"); | |
184 | } | |
185 | if (($i & 7) == 7) { | |
186 | printf(OUT " /* "); | |
187 | my $d = chr($i - 7); | |
188 | if ($d =~ /[[:print:]]/) { | |
189 | printf(OUT " %c -", $i - 7); | |
190 | } else { | |
191 | printf(OUT "%3d-", $i - 7); | |
192 | } | |
193 | if ($c =~ m/[[:print:]]/) { | |
194 | printf(OUT " %c ", $i); | |
195 | } else { | |
196 | printf(OUT "%3d", $i); | |
197 | } | |
198 | printf(OUT " */\n"); | |
199 | if ($i != 127) { | |
200 | printf(OUT " "); | |
201 | } | |
202 | } | |
203 | } | |
204 | ||
205 | if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) { | |
206 | die "tables_length != ctypes_offset + 128"; | |
207 | } | |
208 | ||
209 | printf(OUT "\n\n/* End of chartables.c */\n"); | |
210 | ||
211 | close(OUT); | |
212 | ||
213 | exit 0; | |
214 | ||
215 | sub readHeaderValues() | |
216 | { | |
217 | my @variables = qw( | |
218 | cbit_digit | |
219 | cbit_length | |
220 | cbit_space | |
221 | cbit_word | |
222 | cbits_offset | |
223 | ctype_space | |
224 | ctype_word | |
225 | ctype_xdigit | |
226 | ctypes_offset | |
227 | fcc_offset | |
228 | lcc_offset | |
229 | tables_length | |
230 | ); | |
231 | ||
232 | local $/ = undef; | |
233 | ||
234 | my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h"); | |
235 | ||
236 | my $fh = new File::Temp( | |
237 | DIR => ($ENV{'TMPDIR'} || "/tmp"), | |
238 | SUFFIX => ".in", | |
239 | TEMPLATE => basename($0) . "-XXXXXXXX", | |
240 | UNLINK => 0, | |
241 | ); | |
242 | my $tempFile = $fh->filename(); | |
243 | ||
244 | print $fh "#define DFTABLES\n\n"; | |
245 | ||
246 | open(HEADER, "<", $headerPath) or die "$!"; | |
247 | print $fh <HEADER>; | |
248 | close(HEADER); | |
249 | ||
250 | print $fh "\n\n"; | |
251 | ||
252 | for my $v (@variables) { | |
253 | print $fh "\$pcre_internal{\"$v\"} = $v;\n"; | |
254 | } | |
255 | ||
256 | close($fh); | |
257 | ||
258 | open(CPP, "cpp '$tempFile' |") or die "$!"; | |
259 | my $content = <CPP>; | |
260 | close(CPP); | |
261 | ||
262 | eval $content; | |
263 | die "$@" if $@; | |
264 | } |