]>
Commit | Line | Data |
---|---|---|
14957cd0 A |
1 | # Copyright (C) 2010 Apple Inc. All rights reserved. |
2 | # | |
3 | # Redistribution and use in source and binary forms, with or without | |
4 | # modification, are permitted provided that the following conditions | |
5 | # are met: | |
6 | # 1. Redistributions of source code must retain the above copyright | |
7 | # notice, this list of conditions and the following disclaimer. | |
8 | # 2. Redistributions in binary form must reproduce the above copyright | |
9 | # notice, this list of conditions and the following disclaimer in the | |
10 | # documentation and/or other materials provided with the distribution. | |
11 | # | |
12 | # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
13 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
15 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
16 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
17 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
18 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
19 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
20 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
21 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
22 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
23 | ||
24 | import sys | |
25 | import string | |
26 | import operator | |
27 | ||
28 | keywordsText = open(sys.argv[1]).read() | |
29 | ||
30 | # Observed weights of the most common keywords, rounded to 2.s.d | |
31 | keyWordWeights = { | |
32 | "catch": 0.01, | |
33 | "try": 0.01, | |
34 | "while": 0.01, | |
35 | "case": 0.01, | |
36 | "break": 0.01, | |
37 | "new": 0.01, | |
38 | "in": 0.01, | |
39 | "typeof": 0.02, | |
40 | "true": 0.02, | |
41 | "false": 0.02, | |
42 | "for": 0.03, | |
43 | "null": 0.03, | |
44 | "else": 0.03, | |
45 | "return": 0.13, | |
46 | "var": 0.13, | |
47 | "if": 0.16, | |
48 | "function": 0.18, | |
49 | "this": 0.18, | |
50 | } | |
51 | ||
52 | ||
53 | def allWhitespace(str): | |
54 | for c in str: | |
55 | if not(c in string.whitespace): | |
56 | return False | |
57 | return True | |
58 | ||
59 | ||
60 | def parseKeywords(keywordsText): | |
61 | lines = keywordsText.split("\n") | |
62 | lines = [line.split("#")[0] for line in lines] | |
63 | lines = [line for line in lines if (not allWhitespace(line))] | |
64 | name = lines[0].split() | |
65 | terminator = lines[-1] | |
66 | if not name[0] == "@begin": | |
67 | raise Exception("expected description beginning with @begin") | |
68 | if not terminator == "@end": | |
69 | raise Exception("expected description ending with @end") | |
70 | ||
71 | lines = lines[1:-1] # trim off the old heading | |
72 | return [line.split() for line in lines] | |
73 | ||
74 | ||
75 | def makePadding(size): | |
76 | str = "" | |
77 | for i in range(size): | |
78 | str = str + " " | |
79 | return str | |
80 | ||
81 | ||
82 | class Trie: | |
83 | def __init__(self, prefix): | |
84 | self.prefix = prefix | |
85 | self.keys = {} | |
86 | self.value = None | |
87 | ||
88 | def insert(self, key, value): | |
89 | if len(key) == 0: | |
90 | self.value = value | |
91 | return | |
92 | if not (key[0] in self.keys): | |
93 | self.keys[key[0]] = Trie(key[0]) | |
94 | self.keys[key[0]].insert(key[1:], value) | |
95 | ||
96 | def coalesce(self): | |
97 | keys = {} | |
98 | for k, v in self.keys.items(): | |
99 | t = v.coalesce() | |
100 | keys[t.prefix] = t | |
101 | self.keys = keys | |
102 | if self.value != None: | |
103 | return self | |
104 | if len(self.keys) != 1: | |
105 | return self | |
106 | (prefix, suffix) = self.keys.items()[0] | |
107 | res = Trie(self.prefix + prefix) | |
108 | res.value = suffix.value | |
109 | res.keys = suffix.keys | |
110 | return res | |
111 | ||
112 | def fillOut(self, prefix=""): | |
113 | self.fullPrefix = prefix + self.prefix | |
114 | weight = 0 | |
115 | if self.fullPrefix in keyWordWeights: | |
116 | weight = weight + keyWordWeights[self.fullPrefix] | |
117 | self.selfWeight = weight | |
118 | for trie in self.keys.values(): | |
119 | trie.fillOut(self.fullPrefix) | |
120 | weight = weight + trie.weight | |
121 | self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)] | |
122 | self.weight = weight | |
123 | ||
124 | def printSubTreeAsC(self, indent): | |
125 | str = makePadding(indent) | |
126 | ||
127 | if self.value != None: | |
128 | print(str + "if (!isIdentPart(code[%d])) {" % (len(self.fullPrefix))) | |
129 | print(str + " internalShift<%d, DoNotBoundsCheck>();" % len(self.fullPrefix)) | |
130 | print(str + " if (shouldCreateIdentifier)") | |
131 | print(str + (" data->ident = &m_globalData->propertyNames->%sKeyword;" % self.fullPrefix)) | |
132 | print(str + " return " + self.value + ";") | |
133 | print(str + "}") | |
134 | rootIndex = len(self.fullPrefix) | |
135 | itemCount = 0 | |
136 | for k, trie in self.keys: | |
137 | baseIndex = rootIndex | |
138 | if (baseIndex > 0) and (len(k) == 3): | |
139 | baseIndex = baseIndex - 1 | |
140 | k = trie.fullPrefix[baseIndex] + k | |
141 | test = [("'%s'" % c) for c in k] | |
142 | if len(test) == 1: | |
143 | comparison = "code[%d] == %s" % (baseIndex, test[0]) | |
144 | else: | |
145 | base = "code" | |
146 | if baseIndex > 0: | |
147 | base = "code + %d" % baseIndex | |
148 | comparison = ("COMPARE_CHARACTERS%d(%s, " % (len(test), base)) + ", ".join(test) + ")" | |
149 | if itemCount == 0: | |
150 | print(str + "if (" + comparison + ") {") | |
151 | else: | |
152 | print(str + "else if (" + comparison + ") {") | |
153 | ||
154 | trie.printSubTreeAsC(indent + 4) | |
155 | itemCount = itemCount + 1 | |
156 | print(str + "}") | |
157 | ||
158 | def maxLength(self): | |
159 | max = len(self.fullPrefix) | |
160 | for (_, trie) in self.keys: | |
161 | l = trie.maxLength() | |
162 | if l > max: | |
163 | max = l | |
164 | return max | |
165 | ||
166 | def printAsC(self): | |
167 | print("namespace JSC {") | |
168 | print("static ALWAYS_INLINE bool isIdentPart(int c);") | |
169 | # max length + 1 so we don't need to do any bounds checking at all | |
170 | print("static const int maxTokenLength = %d;" % (self.maxLength() + 1)) | |
171 | print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseKeyword(JSTokenData* data) {") | |
172 | print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);") | |
173 | print(" const UChar* code = m_code;") | |
174 | self.printSubTreeAsC(4) | |
175 | print(" return IDENT;") | |
176 | print("}") | |
177 | print("}") | |
178 | ||
179 | keywords = parseKeywords(keywordsText) | |
180 | trie = Trie("") | |
181 | for k, v in keywords: | |
182 | trie.insert(k, v) | |
183 | trie.coalesce() | |
184 | trie.fillOut() | |
185 | print("// This file was generated by KeywordLookupGenerator.py. Do not edit.") | |
186 | print(""" | |
187 | ||
188 | #if CPU(NEEDS_ALIGNED_ACCESS) | |
189 | ||
190 | #define COMPARE_CHARACTERS2(address, char1, char2) \ | |
191 | (((address)[0] == char1) && ((address)[1] == char2)) | |
192 | #define COMPARE_CHARACTERS4(address, char1, char2, char3, char4) \ | |
193 | (COMPARE_CHARACTERS2(address, char1, char2) && COMPARE_CHARACTERS2((address) + 2, char3, char4)) | |
194 | ||
195 | #else | |
196 | ||
197 | #if CPU(BIG_ENDIAN) | |
198 | #define CHARPAIR_TOUINT32(a, b) ((((uint32_t)(a)) << 16) + (uint32_t)(b)) | |
199 | #define CHARQUAD_TOUINT64(a, b, c, d) ((((uint64_t)(CHARPAIR_TOUINT32(a, b))) << 32) + CHARPAIR_TOUINT32(c, d)) | |
200 | #else | |
201 | #define CHARPAIR_TOUINT32(a, b) ((((uint32_t)(b)) << 16) + (uint32_t)(a)) | |
202 | #define CHARQUAD_TOUINT64(a, b, c, d) ((((uint64_t)(CHARPAIR_TOUINT32(c, d))) << 32) + CHARPAIR_TOUINT32(a, b)) | |
203 | #endif | |
204 | ||
205 | #define COMPARE_CHARACTERS2(address, char1, char2) \ | |
206 | (((uint32_t*)(address))[0] == CHARPAIR_TOUINT32(char1, char2)) | |
207 | #if CPU(X86_64) | |
208 | ||
209 | #define COMPARE_CHARACTERS4(address, char1, char2, char3, char4) \ | |
210 | (((uint64_t*)(address))[0] == CHARQUAD_TOUINT64(char1, char2, char3, char4)) | |
211 | #else | |
212 | #define COMPARE_CHARACTERS4(address, char1, char2, char3, char4) \ | |
213 | (COMPARE_CHARACTERS2(address, char1, char2) && COMPARE_CHARACTERS2((address) + 2, char3, char4)) | |
214 | #endif | |
215 | ||
216 | #endif | |
217 | ||
218 | #define COMPARE_CHARACTERS3(address, char1, char2, char3) \ | |
219 | (COMPARE_CHARACTERS2(address, char1, char2) && ((address)[2] == (char3))) | |
220 | #define COMPARE_CHARACTERS5(address, char1, char2, char3, char4, char5) \ | |
221 | (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && ((address)[4] == (char5))) | |
222 | #define COMPARE_CHARACTERS6(address, char1, char2, char3, char4, char5, char6) \ | |
223 | (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && COMPARE_CHARACTERS2(address + 4, char5, char6)) | |
224 | #define COMPARE_CHARACTERS7(address, char1, char2, char3, char4, char5, char6, char7) \ | |
225 | (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && COMPARE_CHARACTERS4(address + 3, char4, char5, char6, char7)) | |
226 | #define COMPARE_CHARACTERS8(address, char1, char2, char3, char4, char5, char6, char7, char8) \ | |
227 | (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && COMPARE_CHARACTERS4(address + 4, char5, char6, char7, char8)) | |
228 | """) | |
229 | ||
230 | trie.printAsC() |