Commit | Line | Data |
---|---|---|
b37bf2e1 A |
1 | #! /usr/bin/perl -w |
2 | # | |
3 | # Static Hashtable Generator | |
4 | # | |
5 | # (c) 2000-2002 by Harri Porten <porten@kde.org> and | |
6 | # David Faure <faure@kde.org> | |
7 | # Modified (c) 2004 by Nikolas Zimmermann <wildfox@kde.org> | |
8 | # Copyright (C) 2007 Apple Inc. All rights reserved. | |
9 | # | |
10 | # Part of the KJS library. | |
11 | # | |
12 | # This library is free software; you can redistribute it and/or | |
13 | # modify it under the terms of the GNU Lesser General Public | |
14 | # License as published by the Free Software Foundation; either | |
15 | # version 2 of the License, or (at your option) any later version. | |
16 | # | |
17 | # This library is distributed in the hope that it will be useful, | |
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 | # Lesser General Public License for more details. | |
21 | # | |
22 | # You should have received a copy of the GNU Lesser General Public | |
23 | # License along with this library; if not, write to the Free Software | |
24 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
25 | # | |
26 | ||
27 | use strict; | |
28 | ||
29 | my $file = $ARGV[0]; | |
30 | shift; | |
31 | my $includelookup = 0; | |
32 | ||
33 | # Use -i as second argument to make it include "lookup.h" | |
34 | $includelookup = 1 if (defined($ARGV[0]) && $ARGV[0] eq "-i"); | |
35 | ||
36 | # Use -n as second argument to make it use the third argument as namespace parameter ie. -n KDOM | |
37 | my $useNameSpace = $ARGV[1] if (defined($ARGV[0]) && $ARGV[0] eq "-n"); | |
38 | ||
39 | print STDERR "Creating hashtable for $file\n"; | |
40 | open(IN, $file) or die "No such file $file"; | |
41 | ||
42 | my @keys = (); | |
43 | my @values = (); | |
44 | my @attrs = (); | |
45 | my @params = (); | |
46 | my @hashes = (); | |
47 | my @table = (); | |
48 | my @links = (); | |
49 | ||
50 | my $inside = 0; | |
51 | my $name; | |
52 | my $size; | |
53 | my $hashSizeMask; | |
54 | my $banner = 0; | |
55 | sub calcTable(); | |
56 | sub output(); | |
57 | sub hashValue($); | |
58 | ||
59 | while (<IN>) { | |
60 | chop; | |
61 | s/^\s*//g; | |
62 | if (/^\#|^$/) { | |
63 | # comment. do nothing | |
64 | } elsif (/^\@begin/ && !$inside) { | |
65 | if (/^\@begin\s*([:_\w]+)\s*\d*\s*$/) { | |
66 | $inside = 1; | |
67 | $name = $1; | |
68 | } else { | |
69 | printf STDERR "WARNING: \@begin without table name and hashsize, skipping $_\n"; | |
70 | } | |
71 | } elsif (/^\@end\s*$/ && $inside) { | |
72 | ||
73 | calcTable(); | |
74 | ||
75 | output(); | |
76 | @keys = (); | |
77 | @values = (); | |
78 | @attrs = (); | |
79 | @params = (); | |
80 | @table = (); | |
81 | @links = (); | |
82 | @hashes = (); | |
83 | $inside = 0; | |
84 | } elsif (/^(\S+)\s*(\S+)\s*([\w\|]*)\s*(\w*)\s*$/ && $inside) { | |
85 | my $key = $1; | |
86 | my $val = $2; | |
87 | my $att = $3; | |
88 | my $param = $4; | |
89 | push(@keys, $key); | |
90 | push(@values, $val); | |
91 | push(@hashes, hashValue($key)); | |
92 | printf STDERR "WARNING: Number of arguments missing for $key/$val\n" | |
93 | if ( $att =~ m/Function/ && length($param) == 0); | |
94 | push(@attrs, length($att) > 0 ? $att : "0"); | |
95 | push(@params, length($param) > 0 ? $param : "0"); | |
96 | } elsif ($inside) { | |
97 | die "invalid data {" . $_ . "}"; | |
98 | } | |
99 | } | |
100 | ||
101 | die "missing closing \@end" if ($inside); | |
102 | ||
103 | sub ceilingToPowerOf2 | |
104 | { | |
105 | my ($size) = @_; | |
106 | ||
107 | my $powerOf2 = 1; | |
108 | while ($size > $powerOf2) { | |
109 | $powerOf2 <<= 1; | |
110 | } | |
111 | ||
112 | return $powerOf2; | |
113 | } | |
114 | ||
115 | sub calcTable() { | |
116 | my $hashsize = ceilingToPowerOf2(2 * @keys); | |
117 | $hashSizeMask = $hashsize - 1; | |
118 | $size = $hashsize; | |
119 | my $collisions = 0; | |
120 | my $maxdepth = 0; | |
121 | my $i = 0; | |
122 | foreach my $key (@keys) { | |
123 | my $depth = 0; | |
124 | my $h = hashValue($key) % $hashsize; | |
125 | while (defined($table[$h])) { | |
126 | if (defined($links[$h])) { | |
127 | $h = $links[$h]; | |
128 | $depth++; | |
129 | } else { | |
130 | $collisions++; | |
131 | $links[$h] = $size; | |
132 | $h = $size; | |
133 | $size++; | |
134 | } | |
135 | } | |
136 | $table[$h] = $i; | |
137 | $i++; | |
138 | $maxdepth = $depth if ( $depth > $maxdepth); | |
139 | } | |
140 | ||
141 | # Ensure table is big enough (in case of undef entries at the end) | |
142 | if ( $#table+1 < $size ) { | |
143 | $#table = $size-1; | |
144 | } | |
145 | } | |
146 | ||
147 | sub leftShift($$) { | |
148 | my ($value, $distance) = @_; | |
149 | return (($value << $distance) & 0xFFFFFFFF); | |
150 | } | |
151 | ||
152 | # Paul Hsieh's SuperFastHash | |
153 | # http://www.azillionmonkeys.com/qed/hash.html | |
154 | # Ported from UString.. | |
155 | sub hashValue($) { | |
156 | my @chars = split(/ */, $_[0]); | |
157 | ||
158 | # This hash is designed to work on 16-bit chunks at a time. But since the normal case | |
159 | # (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they | |
160 | # were 16-bit chunks, which should give matching results | |
161 | ||
162 | my $EXP2_32 = 4294967296; | |
163 | ||
164 | my $hash = 0x9e3779b9; | |
165 | my $l = scalar @chars; #I wish this was in Ruby --- Maks | |
166 | my $rem = $l & 1; | |
167 | $l = $l >> 1; | |
168 | ||
169 | my $s = 0; | |
170 | ||
171 | # Main loop | |
172 | for (; $l > 0; $l--) { | |
173 | $hash += ord($chars[$s]); | |
174 | my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash; | |
175 | $hash = (leftShift($hash, 16)% $EXP2_32) ^ $tmp; | |
176 | $s += 2; | |
177 | $hash += $hash >> 11; | |
178 | $hash %= $EXP2_32; | |
179 | } | |
180 | ||
181 | # Handle end case | |
182 | if ($rem !=0) { | |
183 | $hash += ord($chars[$s]); | |
184 | $hash ^= (leftShift($hash, 11)% $EXP2_32); | |
185 | $hash += $hash >> 17; | |
186 | } | |
187 | ||
188 | # Force "avalanching" of final 127 bits | |
189 | $hash ^= leftShift($hash, 3); | |
190 | $hash += ($hash >> 5); | |
191 | $hash = ($hash% $EXP2_32); | |
192 | $hash ^= (leftShift($hash, 2)% $EXP2_32); | |
193 | $hash += ($hash >> 15); | |
194 | $hash = $hash% $EXP2_32; | |
195 | $hash ^= (leftShift($hash, 10)% $EXP2_32); | |
196 | ||
197 | # this avoids ever returning a hash code of 0, since that is used to | |
198 | # signal "hash not computed yet", using a value that is likely to be | |
199 | # effectively the same as 0 when the low bits are masked | |
200 | $hash = 0x80000000 if ($hash == 0); | |
201 | ||
202 | return $hash; | |
203 | } | |
204 | ||
205 | sub output() { | |
206 | if (!$banner) { | |
207 | $banner = 1; | |
208 | print "/* Automatically generated from $file using $0. DO NOT EDIT ! */\n"; | |
209 | } | |
210 | ||
211 | my $nameEntries = "${name}Entries"; | |
212 | $nameEntries =~ s/:/_/g; | |
213 | ||
214 | print "\n#include \"lookup.h\"\n" if ($includelookup); | |
215 | if ($useNameSpace) { | |
216 | print "\nnamespace ${useNameSpace}\n{\n"; | |
217 | print "\nusing namespace KJS;"; | |
218 | } else { | |
219 | print "\nnamespace KJS {\n"; | |
220 | } | |
221 | print "\nstatic const struct HashEntry ".$nameEntries."[] = {\n"; | |
222 | my $i = 0; | |
223 | ||
224 | foreach my $entry (@table) { | |
225 | if (defined($entry)) { | |
226 | my $key = $keys[$entry]; | |
227 | print " \{ \"" . $key . "\""; | |
228 | print ", { (intptr_t)" . $values[$entry] . " }"; | |
229 | print ", " . $attrs[$entry]; | |
230 | print ", " . $params[$entry]; | |
231 | print ", "; | |
232 | if (defined($links[$i])) { | |
233 | print "&" . $nameEntries . "[" . $links[$i] . "]" . " \}"; | |
234 | } else { | |
235 | print "0 \}" | |
236 | } | |
237 | print "/* " . $hashes[$entry] . " */ "; | |
238 | } else { | |
239 | print " { 0, { 0 }, 0, 0, 0 }"; | |
240 | } | |
241 | print "," unless ($i == $size - 1); | |
242 | print "\n"; | |
243 | $i++; | |
244 | } | |
245 | ||
246 | print "};\n\n"; | |
247 | print "const struct HashTable $name = "; | |
248 | print "\{ 3, $size, $nameEntries, $hashSizeMask \};\n\n"; | |
249 | print "} // namespace\n"; | |
250 | } |