]>
Commit | Line | Data |
---|---|---|
6fe7ccc8 A |
1 | # Copyright (C) 2011 Apple Inc. All rights reserved. |
2 | # Copyright (C) 2012 Sony Network Entertainment. All rights reserved. | |
14957cd0 A |
3 | # |
4 | # Redistribution and use in source and binary forms, with or without | |
5 | # modification, are permitted provided that the following conditions | |
6 | # are met: | |
7 | # 1. Redistributions of source code must retain the above copyright | |
8 | # notice, this list of conditions and the following disclaimer. | |
9 | # 2. Redistributions in binary form must reproduce the above copyright | |
10 | # notice, this list of conditions and the following disclaimer in the | |
11 | # documentation and/or other materials provided with the distribution. | |
12 | # | |
13 | # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
14 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
17 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | ||
25 | import sys | |
26 | import string | |
27 | import operator | |
28 | ||
29 | keywordsText = open(sys.argv[1]).read() | |
30 | ||
6fe7ccc8 A |
31 | # A second argument signifies that the output |
32 | # should be redirected to a file | |
33 | redirect_to_file = len(sys.argv) > 2 | |
34 | ||
35 | # Change stdout to point to the file if requested | |
36 | if redirect_to_file: | |
37 | file_output = open(sys.argv[-1], "w") | |
38 | sys.stdout = file_output | |
39 | ||
14957cd0 A |
40 | # Observed weights of the most common keywords, rounded to 2.s.d |
41 | keyWordWeights = { | |
42 | "catch": 0.01, | |
43 | "try": 0.01, | |
44 | "while": 0.01, | |
45 | "case": 0.01, | |
46 | "break": 0.01, | |
47 | "new": 0.01, | |
48 | "in": 0.01, | |
49 | "typeof": 0.02, | |
50 | "true": 0.02, | |
51 | "false": 0.02, | |
52 | "for": 0.03, | |
53 | "null": 0.03, | |
54 | "else": 0.03, | |
55 | "return": 0.13, | |
56 | "var": 0.13, | |
57 | "if": 0.16, | |
58 | "function": 0.18, | |
59 | "this": 0.18, | |
60 | } | |
61 | ||
62 | ||
63 | def allWhitespace(str): | |
64 | for c in str: | |
65 | if not(c in string.whitespace): | |
66 | return False | |
67 | return True | |
68 | ||
69 | ||
70 | def parseKeywords(keywordsText): | |
71 | lines = keywordsText.split("\n") | |
72 | lines = [line.split("#")[0] for line in lines] | |
73 | lines = [line for line in lines if (not allWhitespace(line))] | |
74 | name = lines[0].split() | |
75 | terminator = lines[-1] | |
76 | if not name[0] == "@begin": | |
77 | raise Exception("expected description beginning with @begin") | |
78 | if not terminator == "@end": | |
79 | raise Exception("expected description ending with @end") | |
80 | ||
81 | lines = lines[1:-1] # trim off the old heading | |
82 | return [line.split() for line in lines] | |
83 | ||
84 | ||
85 | def makePadding(size): | |
86 | str = "" | |
87 | for i in range(size): | |
88 | str = str + " " | |
89 | return str | |
90 | ||
91 | ||
92 | class Trie: | |
93 | def __init__(self, prefix): | |
94 | self.prefix = prefix | |
95 | self.keys = {} | |
96 | self.value = None | |
97 | ||
98 | def insert(self, key, value): | |
99 | if len(key) == 0: | |
100 | self.value = value | |
101 | return | |
102 | if not (key[0] in self.keys): | |
103 | self.keys[key[0]] = Trie(key[0]) | |
104 | self.keys[key[0]].insert(key[1:], value) | |
105 | ||
106 | def coalesce(self): | |
107 | keys = {} | |
108 | for k, v in self.keys.items(): | |
109 | t = v.coalesce() | |
110 | keys[t.prefix] = t | |
111 | self.keys = keys | |
112 | if self.value != None: | |
113 | return self | |
114 | if len(self.keys) != 1: | |
115 | return self | |
6fe7ccc8 A |
116 | # Python 3: for() loop for compatibility. Use next() when Python 2.6 is the baseline. |
117 | for (prefix, suffix) in self.keys.items(): | |
118 | res = Trie(self.prefix + prefix) | |
119 | res.value = suffix.value | |
120 | res.keys = suffix.keys | |
121 | return res | |
14957cd0 A |
122 | |
123 | def fillOut(self, prefix=""): | |
124 | self.fullPrefix = prefix + self.prefix | |
125 | weight = 0 | |
126 | if self.fullPrefix in keyWordWeights: | |
127 | weight = weight + keyWordWeights[self.fullPrefix] | |
128 | self.selfWeight = weight | |
129 | for trie in self.keys.values(): | |
130 | trie.fillOut(self.fullPrefix) | |
131 | weight = weight + trie.weight | |
132 | self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)] | |
133 | self.weight = weight | |
134 | ||
6fe7ccc8 | 135 | def printSubTreeAsC(self, typeName, indent): |
14957cd0 A |
136 | str = makePadding(indent) |
137 | ||
138 | if self.value != None: | |
139 | print(str + "if (!isIdentPart(code[%d])) {" % (len(self.fullPrefix))) | |
6fe7ccc8 | 140 | print(str + " internalShift<%d>();" % len(self.fullPrefix)) |
14957cd0 A |
141 | print(str + " if (shouldCreateIdentifier)") |
142 | print(str + (" data->ident = &m_globalData->propertyNames->%sKeyword;" % self.fullPrefix)) | |
143 | print(str + " return " + self.value + ";") | |
144 | print(str + "}") | |
145 | rootIndex = len(self.fullPrefix) | |
146 | itemCount = 0 | |
147 | for k, trie in self.keys: | |
148 | baseIndex = rootIndex | |
149 | if (baseIndex > 0) and (len(k) == 3): | |
150 | baseIndex = baseIndex - 1 | |
151 | k = trie.fullPrefix[baseIndex] + k | |
152 | test = [("'%s'" % c) for c in k] | |
153 | if len(test) == 1: | |
154 | comparison = "code[%d] == %s" % (baseIndex, test[0]) | |
155 | else: | |
156 | base = "code" | |
157 | if baseIndex > 0: | |
158 | base = "code + %d" % baseIndex | |
6fe7ccc8 | 159 | comparison = ("COMPARE_%d%sS(%s, " % (len(test), typeName, base)) + ", ".join(test) + ")" |
14957cd0 A |
160 | if itemCount == 0: |
161 | print(str + "if (" + comparison + ") {") | |
162 | else: | |
6fe7ccc8 | 163 | print(str + "} else if (" + comparison + ") {") |
14957cd0 | 164 | |
6fe7ccc8 | 165 | trie.printSubTreeAsC(typeName, indent + 4) |
14957cd0 | 166 | itemCount = itemCount + 1 |
6fe7ccc8 A |
167 | |
168 | if itemCount == len(self.keys): | |
169 | print(str + "}") | |
14957cd0 A |
170 | |
171 | def maxLength(self): | |
172 | max = len(self.fullPrefix) | |
173 | for (_, trie) in self.keys: | |
174 | l = trie.maxLength() | |
175 | if l > max: | |
176 | max = l | |
177 | return max | |
178 | ||
179 | def printAsC(self): | |
180 | print("namespace JSC {") | |
6fe7ccc8 A |
181 | print("") |
182 | print("static ALWAYS_INLINE bool isIdentPart(LChar c);") | |
183 | print("static ALWAYS_INLINE bool isIdentPart(UChar c);") | |
14957cd0 A |
184 | # max length + 1 so we don't need to do any bounds checking at all |
185 | print("static const int maxTokenLength = %d;" % (self.maxLength() + 1)) | |
6fe7ccc8 A |
186 | print("") |
187 | print("template <>") | |
188 | print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseKeyword(JSTokenData* data)") | |
189 | print("{") | |
14957cd0 | 190 | print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);") |
6fe7ccc8 | 191 | print("") |
14957cd0 | 192 | print(" const UChar* code = m_code;") |
6fe7ccc8 | 193 | self.printSubTreeAsC("UCHAR", 4) |
14957cd0 A |
194 | print(" return IDENT;") |
195 | print("}") | |
6fe7ccc8 A |
196 | print("") |
197 | print("template <>") | |
198 | print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseKeyword(JSTokenData* data)") | |
199 | print("{") | |
200 | print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);") | |
201 | print("") | |
202 | print(" const LChar* code = m_code;") | |
203 | self.printSubTreeAsC("CHAR", 4) | |
204 | print(" return IDENT;") | |
14957cd0 | 205 | print("}") |
6fe7ccc8 A |
206 | print("") |
207 | print("} // namespace JSC") | |
14957cd0 A |
208 | |
209 | keywords = parseKeywords(keywordsText) | |
210 | trie = Trie("") | |
211 | for k, v in keywords: | |
212 | trie.insert(k, v) | |
213 | trie.coalesce() | |
214 | trie.fillOut() | |
215 | print("// This file was generated by KeywordLookupGenerator.py. Do not edit.") | |
216 | print(""" | |
14957cd0 A |
217 | #if CPU(NEEDS_ALIGNED_ACCESS) |
218 | ||
6fe7ccc8 A |
219 | #define COMPARE_2CHARS(address, char1, char2) \\ |
220 | (((address)[0] == char1) && ((address)[1] == char2)) | |
221 | #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ | |
222 | (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4)) | |
223 | #define COMPARE_2UCHARS(address, char1, char2) \\ | |
14957cd0 | 224 | (((address)[0] == char1) && ((address)[1] == char2)) |
6fe7ccc8 A |
225 | #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ |
226 | (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4)) | |
14957cd0 | 227 | |
6fe7ccc8 | 228 | #else // CPU(NEEDS_ALIGNED_ACCESS) |
14957cd0 A |
229 | |
230 | #if CPU(BIG_ENDIAN) | |
6fe7ccc8 A |
231 | |
232 | #define CHARPAIR_TOUINT16(a, b) \\ | |
233 | ((((uint16_t)(a)) << 8) + (uint16_t)(b)) | |
234 | #define CHARQUAD_TOUINT32(a, b, c, d) \\ | |
235 | ((((uint32_t)(CHARPAIR_TOUINT16(a, b))) << 16) + CHARPAIR_TOUINT16(c, d)) | |
236 | #define UCHARPAIR_TOUINT32(a, b) \\ | |
237 | ((((uint32_t)(a)) << 16) + (uint32_t)(b)) | |
238 | #define UCHARQUAD_TOUINT64(a, b, c, d) \\ | |
239 | ((((uint64_t)(UCHARQUAD_TOUINT64(a, b))) << 32) + UCHARPAIR_TOUINT32(c, d)) | |
240 | ||
241 | #else // CPU(BIG_ENDIAN) | |
242 | ||
243 | #define CHARPAIR_TOUINT16(a, b) \\ | |
244 | ((((uint16_t)(b)) << 8) + (uint16_t)(a)) | |
245 | #define CHARQUAD_TOUINT32(a, b, c, d) \\ | |
246 | ((((uint32_t)(CHARPAIR_TOUINT16(c, d))) << 16) + CHARPAIR_TOUINT16(a, b)) | |
247 | #define UCHARPAIR_TOUINT32(a, b) \\ | |
248 | ((((uint32_t)(b)) << 16) + (uint32_t)(a)) | |
249 | #define UCHARQUAD_TOUINT64(a, b, c, d) \\ | |
250 | ((((uint64_t)(UCHARPAIR_TOUINT32(c, d))) << 32) + UCHARPAIR_TOUINT32(a, b)) | |
251 | ||
252 | #endif // CPU(BIG_ENDIAN) | |
253 | ||
254 | ||
255 | #define COMPARE_2CHARS(address, char1, char2) \\ | |
256 | (((uint16_t*)(address))[0] == CHARPAIR_TOUINT16(char1, char2)) | |
257 | #define COMPARE_2UCHARS(address, char1, char2) \\ | |
258 | (((uint32_t*)(address))[0] == UCHARPAIR_TOUINT32(char1, char2)) | |
259 | ||
14957cd0 A |
260 | #if CPU(X86_64) |
261 | ||
6fe7ccc8 A |
262 | #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ |
263 | (((uint32_t*)(address))[0] == CHARQUAD_TOUINT32(char1, char2, char3, char4)) | |
264 | #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ | |
265 | (((uint64_t*)(address))[0] == UCHARQUAD_TOUINT64(char1, char2, char3, char4)) | |
266 | ||
267 | #else // CPU(X86_64) | |
268 | ||
269 | #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ | |
270 | (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4)) | |
271 | #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ | |
272 | (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4)) | |
273 | ||
274 | #endif // CPU(X86_64) | |
275 | ||
276 | #endif // CPU(NEEDS_ALIGNED_ACCESS) | |
277 | ||
278 | #define COMPARE_3CHARS(address, char1, char2, char3) \\ | |
279 | (COMPARE_2CHARS(address, char1, char2) && ((address)[2] == (char3))) | |
280 | #define COMPARE_3UCHARS(address, char1, char2, char3) \\ | |
281 | (COMPARE_2UCHARS(address, char1, char2) && ((address)[2] == (char3))) | |
282 | #define COMPARE_5CHARS(address, char1, char2, char3, char4, char5) \\ | |
283 | (COMPARE_4CHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5))) | |
284 | #define COMPARE_5UCHARS(address, char1, char2, char3, char4, char5) \\ | |
285 | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5))) | |
286 | #define COMPARE_6CHARS(address, char1, char2, char3, char4, char5, char6) \\ | |
287 | (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_2CHARS(address + 4, char5, char6)) | |
288 | #define COMPARE_6UCHARS(address, char1, char2, char3, char4, char5, char6) \\ | |
289 | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_2UCHARS(address + 4, char5, char6)) | |
290 | #define COMPARE_7CHARS(address, char1, char2, char3, char4, char5, char6, char7) \\ | |
291 | (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 3, char4, char5, char6, char7)) | |
292 | #define COMPARE_7UCHARS(address, char1, char2, char3, char4, char5, char6, char7) \\ | |
293 | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 3, char4, char5, char6, char7)) | |
294 | #define COMPARE_8CHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\ | |
295 | (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 4, char5, char6, char7, char8)) | |
296 | #define COMPARE_8UCHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\ | |
297 | (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 4, char5, char6, char7, char8)) | |
14957cd0 A |
298 | """) |
299 | ||
300 | trie.printAsC() | |
6fe7ccc8 A |
301 | |
302 | # Close the redirected file if requested | |
303 | if (redirect_to_file): | |
304 | file_output.close() | |
305 | sys.stdout = sys.__stdout__ |