]>
Commit | Line | Data |
---|---|---|
1 | # Copyright (C) 2011 Apple Inc. All rights reserved. | |
2 | # Copyright (C) 2012 Sony Network Entertainment. All rights reserved. | |
3 | # | |
4 | # Redistribution and use in source and binary forms, with or without | |
5 | # modification, are permitted provided that the following conditions | |
6 | # are met: | |
7 | # 1. Redistributions of source code must retain the above copyright | |
8 | # notice, this list of conditions and the following disclaimer. | |
9 | # 2. Redistributions in binary form must reproduce the above copyright | |
10 | # notice, this list of conditions and the following disclaimer in the | |
11 | # documentation and/or other materials provided with the distribution. | |
12 | # | |
13 | # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
14 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
17 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | ||
25 | import sys | |
26 | import string | |
27 | import operator | |
28 | ||
29 | keywordsText = open(sys.argv[1]).read() | |
30 | ||
31 | # A second argument signifies that the output | |
32 | # should be redirected to a file | |
33 | redirect_to_file = len(sys.argv) > 2 | |
34 | ||
35 | # Change stdout to point to the file if requested | |
36 | if redirect_to_file: | |
37 | file_output = open(sys.argv[-1], "w") | |
38 | sys.stdout = file_output | |
39 | ||
40 | # Observed weights of the most common keywords, rounded to 2.s.d | |
41 | keyWordWeights = { | |
42 | "catch": 0.01, | |
43 | "try": 0.01, | |
44 | "while": 0.01, | |
45 | "case": 0.01, | |
46 | "break": 0.01, | |
47 | "new": 0.01, | |
48 | "in": 0.01, | |
49 | "typeof": 0.02, | |
50 | "true": 0.02, | |
51 | "false": 0.02, | |
52 | "for": 0.03, | |
53 | "null": 0.03, | |
54 | "else": 0.03, | |
55 | "return": 0.13, | |
56 | "var": 0.13, | |
57 | "if": 0.16, | |
58 | "function": 0.18, | |
59 | "this": 0.18, | |
60 | } | |
61 | ||
62 | ||
63 | def allWhitespace(str): | |
64 | for c in str: | |
65 | if not(c in string.whitespace): | |
66 | return False | |
67 | return True | |
68 | ||
69 | ||
70 | def parseKeywords(keywordsText): | |
71 | ||
72 | if sys.platform == "cygwin": | |
73 | keywordsText = keywordsText.replace("\r\n", "\n") | |
74 | ||
75 | lines = keywordsText.split("\n") | |
76 | lines = [line.split("#")[0] for line in lines] | |
77 | lines = [line for line in lines if (not allWhitespace(line))] | |
78 | name = lines[0].split() | |
79 | terminator = lines[-1] | |
80 | ||
81 | if not name[0] == "@begin": | |
82 | raise Exception("expected description beginning with @begin") | |
83 | if not terminator == "@end": | |
84 | raise Exception("expected description ending with @end") | |
85 | ||
86 | lines = lines[1:-1] # trim off the old heading | |
87 | return [line.split() for line in lines] | |
88 | ||
89 | ||
90 | def makePadding(size): | |
91 | str = "" | |
92 | for i in range(size): | |
93 | str = str + " " | |
94 | return str | |
95 | ||
96 | ||
97 | class Trie: | |
98 | def __init__(self, prefix): | |
99 | self.prefix = prefix | |
100 | self.keys = {} | |
101 | self.value = None | |
102 | ||
103 | def insert(self, key, value): | |
104 | if len(key) == 0: | |
105 | self.value = value | |
106 | return | |
107 | if not (key[0] in self.keys): | |
108 | self.keys[key[0]] = Trie(key[0]) | |
109 | self.keys[key[0]].insert(key[1:], value) | |
110 | ||
111 | def coalesce(self): | |
112 | keys = {} | |
113 | for k, v in self.keys.items(): | |
114 | t = v.coalesce() | |
115 | keys[t.prefix] = t | |
116 | self.keys = keys | |
117 | if self.value != None: | |
118 | return self | |
119 | if len(self.keys) != 1: | |
120 | return self | |
121 | # Python 3: for() loop for compatibility. Use next() when Python 2.6 is the baseline. | |
122 | for (prefix, suffix) in self.keys.items(): | |
123 | res = Trie(self.prefix + prefix) | |
124 | res.value = suffix.value | |
125 | res.keys = suffix.keys | |
126 | return res | |
127 | ||
128 | def fillOut(self, prefix=""): | |
129 | self.fullPrefix = prefix + self.prefix | |
130 | weight = 0 | |
131 | if self.fullPrefix in keyWordWeights: | |
132 | weight = weight + keyWordWeights[self.fullPrefix] | |
133 | self.selfWeight = weight | |
134 | for trie in self.keys.values(): | |
135 | trie.fillOut(self.fullPrefix) | |
136 | weight = weight + trie.weight | |
137 | self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)] | |
138 | self.weight = weight | |
139 | ||
140 | def printSubTreeAsC(self, typeName, indent): | |
141 | str = makePadding(indent) | |
142 | ||
143 | if self.value != None: | |
144 | print(str + "if (!isIdentPartIncludingEscape(code+%d, m_codeEnd)) {" % (len(self.fullPrefix))) | |
145 | print(str + " internalShift<%d>();" % len(self.fullPrefix)) | |
146 | print(str + " if (shouldCreateIdentifier)") | |
147 | print(str + (" data->ident = &m_vm->propertyNames->%sKeyword;" % self.fullPrefix)) | |
148 | print(str + " return " + self.value + ";") | |
149 | print(str + "}") | |
150 | rootIndex = len(self.fullPrefix) | |
151 | itemCount = 0 | |
152 | for k, trie in self.keys: | |
153 | baseIndex = rootIndex | |
154 | if (baseIndex > 0) and (len(k) == 3): | |
155 | baseIndex = baseIndex - 1 | |
156 | k = trie.fullPrefix[baseIndex] + k | |
157 | test = [("'%s'" % c) for c in k] | |
158 | if len(test) == 1: | |
159 | comparison = "code[%d] == %s" % (baseIndex, test[0]) | |
160 | else: | |
161 | base = "code" | |
162 | if baseIndex > 0: | |
163 | base = "code + %d" % baseIndex | |
164 | comparison = ("COMPARE_%d%sS(%s, " % (len(test), typeName, base)) + ", ".join(test) + ")" | |
165 | if itemCount == 0: | |
166 | print(str + "if (" + comparison + ") {") | |
167 | else: | |
168 | print(str + "} else if (" + comparison + ") {") | |
169 | ||
170 | trie.printSubTreeAsC(typeName, indent + 4) | |
171 | itemCount = itemCount + 1 | |
172 | ||
173 | if itemCount == len(self.keys): | |
174 | print(str + "}") | |
175 | ||
176 | def maxLength(self): | |
177 | max = len(self.fullPrefix) | |
178 | for (_, trie) in self.keys: | |
179 | l = trie.maxLength() | |
180 | if l > max: | |
181 | max = l | |
182 | return max | |
183 | ||
184 | def printAsC(self): | |
185 | print("namespace JSC {") | |
186 | print("") | |
187 | print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd);") | |
188 | print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd);") | |
189 | # max length + 1 so we don't need to do any bounds checking at all | |
190 | print("static const int maxTokenLength = %d;" % (self.maxLength() + 1)) | |
191 | print("") | |
192 | print("template <>") | |
193 | print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseKeyword(JSTokenData* data)") | |
194 | print("{") | |
195 | print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);") | |
196 | print("") | |
197 | print(" const UChar* code = m_code;") | |
198 | self.printSubTreeAsC("UCHAR", 4) | |
199 | print(" return IDENT;") | |
200 | print("}") | |
201 | print("") | |
202 | print("template <>") | |
203 | print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseKeyword(JSTokenData* data)") | |
204 | print("{") | |
205 | print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);") | |
206 | print("") | |
207 | print(" const LChar* code = m_code;") | |
208 | self.printSubTreeAsC("CHAR", 4) | |
209 | print(" return IDENT;") | |
210 | print("}") | |
211 | print("") | |
212 | print("} // namespace JSC") | |
213 | ||
214 | keywords = parseKeywords(keywordsText) | |
215 | trie = Trie("") | |
216 | for k, v in keywords: | |
217 | trie.insert(k, v) | |
218 | trie.coalesce() | |
219 | trie.fillOut() | |
220 | print("// This file was generated by KeywordLookupGenerator.py. Do not edit.") | |
221 | print(""" | |
222 | #if CPU(NEEDS_ALIGNED_ACCESS) | |
223 | ||
224 | #define COMPARE_2CHARS(address, char1, char2) \\ | |
225 | (((address)[0] == char1) && ((address)[1] == char2)) | |
226 | #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ | |
227 | (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4)) | |
228 | #define COMPARE_2UCHARS(address, char1, char2) \\ | |
229 | (((address)[0] == char1) && ((address)[1] == char2)) | |
230 | #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ | |
231 | (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4)) | |
232 | ||
233 | #else // CPU(NEEDS_ALIGNED_ACCESS) | |
234 | ||
235 | #if CPU(BIG_ENDIAN) | |
236 | ||
237 | #define CHARPAIR_TOUINT16(a, b) \\ | |
238 | ((((uint16_t)(a)) << 8) + (uint16_t)(b)) | |
239 | #define CHARQUAD_TOUINT32(a, b, c, d) \\ | |
240 | ((((uint32_t)(CHARPAIR_TOUINT16(a, b))) << 16) + CHARPAIR_TOUINT16(c, d)) | |
241 | #define UCHARPAIR_TOUINT32(a, b) \\ | |
242 | ((((uint32_t)(a)) << 16) + (uint32_t)(b)) | |
243 | #define UCHARQUAD_TOUINT64(a, b, c, d) \\ | |
244 | ((((uint64_t)(UCHARQUAD_TOUINT64(a, b))) << 32) + UCHARPAIR_TOUINT32(c, d)) | |
245 | ||
246 | #else // CPU(BIG_ENDIAN) | |
247 | ||
248 | #define CHARPAIR_TOUINT16(a, b) \\ | |
249 | ((((uint16_t)(b)) << 8) + (uint16_t)(a)) | |
250 | #define CHARQUAD_TOUINT32(a, b, c, d) \\ | |
251 | ((((uint32_t)(CHARPAIR_TOUINT16(c, d))) << 16) + CHARPAIR_TOUINT16(a, b)) | |
252 | #define UCHARPAIR_TOUINT32(a, b) \\ | |
253 | ((((uint32_t)(b)) << 16) + (uint32_t)(a)) | |
254 | #define UCHARQUAD_TOUINT64(a, b, c, d) \\ | |
255 | ((((uint64_t)(UCHARPAIR_TOUINT32(c, d))) << 32) + UCHARPAIR_TOUINT32(a, b)) | |
256 | ||
257 | #endif // CPU(BIG_ENDIAN) | |
258 | ||
259 | ||
260 | #define COMPARE_2CHARS(address, char1, char2) \\ | |
261 | ((reinterpret_cast<const uint16_t*>(address))[0] == CHARPAIR_TOUINT16(char1, char2)) | |
262 | #define COMPARE_2UCHARS(address, char1, char2) \\ | |
263 | ((reinterpret_cast<const uint32_t*>(address))[0] == UCHARPAIR_TOUINT32(char1, char2)) | |
264 | ||
265 | #if CPU(X86_64) | |
266 | ||
267 | #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ | |
268 | ((reinterpret_cast<const uint32_t*>(address))[0] == CHARQUAD_TOUINT32(char1, char2, char3, char4)) | |
269 | #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ | |
270 | ((reinterpret_cast<const uint64_t*>(address))[0] == UCHARQUAD_TOUINT64(char1, char2, char3, char4)) | |
271 | ||
272 | #else // CPU(X86_64) | |
273 | ||
274 | #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ | |
275 | (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4)) | |
276 | #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ | |
277 | (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4)) | |
278 | ||
279 | #endif // CPU(X86_64) | |
280 | ||
281 | #endif // CPU(NEEDS_ALIGNED_ACCESS) | |
282 | ||
283 | #define COMPARE_3CHARS(address, char1, char2, char3) \\ | |
284 | (COMPARE_2CHARS(address, char1, char2) && ((address)[2] == (char3))) | |
285 | #define COMPARE_3UCHARS(address, char1, char2, char3) \\ | |
286 | (COMPARE_2UCHARS(address, char1, char2) && ((address)[2] == (char3))) | |
287 | #define COMPARE_5CHARS(address, char1, char2, char3, char4, char5) \\ | |
288 | (COMPARE_4CHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5))) | |
289 | #define COMPARE_5UCHARS(address, char1, char2, char3, char4, char5) \\ | |
290 | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5))) | |
291 | #define COMPARE_6CHARS(address, char1, char2, char3, char4, char5, char6) \\ | |
292 | (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_2CHARS(address + 4, char5, char6)) | |
293 | #define COMPARE_6UCHARS(address, char1, char2, char3, char4, char5, char6) \\ | |
294 | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_2UCHARS(address + 4, char5, char6)) | |
295 | #define COMPARE_7CHARS(address, char1, char2, char3, char4, char5, char6, char7) \\ | |
296 | (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 3, char4, char5, char6, char7)) | |
297 | #define COMPARE_7UCHARS(address, char1, char2, char3, char4, char5, char6, char7) \\ | |
298 | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 3, char4, char5, char6, char7)) | |
299 | #define COMPARE_8CHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\ | |
300 | (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 4, char5, char6, char7, char8)) | |
301 | #define COMPARE_8UCHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\ | |
302 | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 4, char5, char6, char7, char8)) | |
303 | """) | |
304 | ||
305 | trie.printAsC() | |
306 | ||
307 | # Close the redirected file if requested | |
308 | if (redirect_to_file): | |
309 | file_output.close() | |
310 | sys.stdout = sys.__stdout__ |