]> git.saurik.com Git - apple/javascriptcore.git/blame - KeywordLookupGenerator.py
JavaScriptCore-903.tar.gz
[apple/javascriptcore.git] / KeywordLookupGenerator.py
CommitLineData
14957cd0
A
1# Copyright (C) 2010 Apple Inc. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions
5# are met:
6# 1. Redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer.
8# 2. Redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution.
11#
12# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
13# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
15# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
16# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
17# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
18# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
19# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
20# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
24import sys
25import string
26import operator
27
28keywordsText = open(sys.argv[1]).read()
29
30# Observed weights of the most common keywords, rounded to 2.s.d
31keyWordWeights = {
32 "catch": 0.01,
33 "try": 0.01,
34 "while": 0.01,
35 "case": 0.01,
36 "break": 0.01,
37 "new": 0.01,
38 "in": 0.01,
39 "typeof": 0.02,
40 "true": 0.02,
41 "false": 0.02,
42 "for": 0.03,
43 "null": 0.03,
44 "else": 0.03,
45 "return": 0.13,
46 "var": 0.13,
47 "if": 0.16,
48 "function": 0.18,
49 "this": 0.18,
50}
51
52
53def allWhitespace(str):
54 for c in str:
55 if not(c in string.whitespace):
56 return False
57 return True
58
59
60def parseKeywords(keywordsText):
61 lines = keywordsText.split("\n")
62 lines = [line.split("#")[0] for line in lines]
63 lines = [line for line in lines if (not allWhitespace(line))]
64 name = lines[0].split()
65 terminator = lines[-1]
66 if not name[0] == "@begin":
67 raise Exception("expected description beginning with @begin")
68 if not terminator == "@end":
69 raise Exception("expected description ending with @end")
70
71 lines = lines[1:-1] # trim off the old heading
72 return [line.split() for line in lines]
73
74
75def makePadding(size):
76 str = ""
77 for i in range(size):
78 str = str + " "
79 return str
80
81
82class Trie:
83 def __init__(self, prefix):
84 self.prefix = prefix
85 self.keys = {}
86 self.value = None
87
88 def insert(self, key, value):
89 if len(key) == 0:
90 self.value = value
91 return
92 if not (key[0] in self.keys):
93 self.keys[key[0]] = Trie(key[0])
94 self.keys[key[0]].insert(key[1:], value)
95
96 def coalesce(self):
97 keys = {}
98 for k, v in self.keys.items():
99 t = v.coalesce()
100 keys[t.prefix] = t
101 self.keys = keys
102 if self.value != None:
103 return self
104 if len(self.keys) != 1:
105 return self
106 (prefix, suffix) = self.keys.items()[0]
107 res = Trie(self.prefix + prefix)
108 res.value = suffix.value
109 res.keys = suffix.keys
110 return res
111
112 def fillOut(self, prefix=""):
113 self.fullPrefix = prefix + self.prefix
114 weight = 0
115 if self.fullPrefix in keyWordWeights:
116 weight = weight + keyWordWeights[self.fullPrefix]
117 self.selfWeight = weight
118 for trie in self.keys.values():
119 trie.fillOut(self.fullPrefix)
120 weight = weight + trie.weight
121 self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)]
122 self.weight = weight
123
124 def printSubTreeAsC(self, indent):
125 str = makePadding(indent)
126
127 if self.value != None:
128 print(str + "if (!isIdentPart(code[%d])) {" % (len(self.fullPrefix)))
129 print(str + " internalShift<%d, DoNotBoundsCheck>();" % len(self.fullPrefix))
130 print(str + " if (shouldCreateIdentifier)")
131 print(str + (" data->ident = &m_globalData->propertyNames->%sKeyword;" % self.fullPrefix))
132 print(str + " return " + self.value + ";")
133 print(str + "}")
134 rootIndex = len(self.fullPrefix)
135 itemCount = 0
136 for k, trie in self.keys:
137 baseIndex = rootIndex
138 if (baseIndex > 0) and (len(k) == 3):
139 baseIndex = baseIndex - 1
140 k = trie.fullPrefix[baseIndex] + k
141 test = [("'%s'" % c) for c in k]
142 if len(test) == 1:
143 comparison = "code[%d] == %s" % (baseIndex, test[0])
144 else:
145 base = "code"
146 if baseIndex > 0:
147 base = "code + %d" % baseIndex
148 comparison = ("COMPARE_CHARACTERS%d(%s, " % (len(test), base)) + ", ".join(test) + ")"
149 if itemCount == 0:
150 print(str + "if (" + comparison + ") {")
151 else:
152 print(str + "else if (" + comparison + ") {")
153
154 trie.printSubTreeAsC(indent + 4)
155 itemCount = itemCount + 1
156 print(str + "}")
157
158 def maxLength(self):
159 max = len(self.fullPrefix)
160 for (_, trie) in self.keys:
161 l = trie.maxLength()
162 if l > max:
163 max = l
164 return max
165
166 def printAsC(self):
167 print("namespace JSC {")
168 print("static ALWAYS_INLINE bool isIdentPart(int c);")
169 # max length + 1 so we don't need to do any bounds checking at all
170 print("static const int maxTokenLength = %d;" % (self.maxLength() + 1))
171 print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseKeyword(JSTokenData* data) {")
172 print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);")
173 print(" const UChar* code = m_code;")
174 self.printSubTreeAsC(4)
175 print(" return IDENT;")
176 print("}")
177 print("}")
178
179keywords = parseKeywords(keywordsText)
180trie = Trie("")
181for k, v in keywords:
182 trie.insert(k, v)
183trie.coalesce()
184trie.fillOut()
185print("// This file was generated by KeywordLookupGenerator.py. Do not edit.")
186print("""
187
188#if CPU(NEEDS_ALIGNED_ACCESS)
189
190#define COMPARE_CHARACTERS2(address, char1, char2) \
191 (((address)[0] == char1) && ((address)[1] == char2))
192#define COMPARE_CHARACTERS4(address, char1, char2, char3, char4) \
193 (COMPARE_CHARACTERS2(address, char1, char2) && COMPARE_CHARACTERS2((address) + 2, char3, char4))
194
195#else
196
197#if CPU(BIG_ENDIAN)
198#define CHARPAIR_TOUINT32(a, b) ((((uint32_t)(a)) << 16) + (uint32_t)(b))
199#define CHARQUAD_TOUINT64(a, b, c, d) ((((uint64_t)(CHARPAIR_TOUINT32(a, b))) << 32) + CHARPAIR_TOUINT32(c, d))
200#else
201#define CHARPAIR_TOUINT32(a, b) ((((uint32_t)(b)) << 16) + (uint32_t)(a))
202#define CHARQUAD_TOUINT64(a, b, c, d) ((((uint64_t)(CHARPAIR_TOUINT32(c, d))) << 32) + CHARPAIR_TOUINT32(a, b))
203#endif
204
205#define COMPARE_CHARACTERS2(address, char1, char2) \
206 (((uint32_t*)(address))[0] == CHARPAIR_TOUINT32(char1, char2))
207#if CPU(X86_64)
208
209#define COMPARE_CHARACTERS4(address, char1, char2, char3, char4) \
210 (((uint64_t*)(address))[0] == CHARQUAD_TOUINT64(char1, char2, char3, char4))
211#else
212#define COMPARE_CHARACTERS4(address, char1, char2, char3, char4) \
213 (COMPARE_CHARACTERS2(address, char1, char2) && COMPARE_CHARACTERS2((address) + 2, char3, char4))
214#endif
215
216#endif
217
218#define COMPARE_CHARACTERS3(address, char1, char2, char3) \
219 (COMPARE_CHARACTERS2(address, char1, char2) && ((address)[2] == (char3)))
220#define COMPARE_CHARACTERS5(address, char1, char2, char3, char4, char5) \
221 (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
222#define COMPARE_CHARACTERS6(address, char1, char2, char3, char4, char5, char6) \
223 (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && COMPARE_CHARACTERS2(address + 4, char5, char6))
224#define COMPARE_CHARACTERS7(address, char1, char2, char3, char4, char5, char6, char7) \
225 (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && COMPARE_CHARACTERS4(address + 3, char4, char5, char6, char7))
226#define COMPARE_CHARACTERS8(address, char1, char2, char3, char4, char5, char6, char7, char8) \
227 (COMPARE_CHARACTERS4(address, char1, char2, char3, char4) && COMPARE_CHARACTERS4(address + 4, char5, char6, char7, char8))
228""")
229
230trie.printAsC()