]> git.saurik.com Git - apple/javascriptcore.git/blob - offlineasm/parser.rb
JavaScriptCore-1097.3.3.tar.gz
[apple/javascriptcore.git] / offlineasm / parser.rb
1 # Copyright (C) 2011 Apple Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions
5 # are met:
6 # 1. Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # 2. Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 #
12 # THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
13 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
14 # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
15 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
16 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
18 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
22 # THE POSSIBILITY OF SUCH DAMAGE.
23
24 require "ast"
25 require "instructions"
26 require "pathname"
27 require "registers"
28 require "self_hash"
29
30 class CodeOrigin
31 attr_reader :fileName, :lineNumber
32
33 def initialize(fileName, lineNumber)
34 @fileName = fileName
35 @lineNumber = lineNumber
36 end
37
38 def to_s
39 "#{fileName}:#{lineNumber}"
40 end
41 end
42
43 class Token
44 attr_reader :codeOrigin, :string
45
46 def initialize(codeOrigin, string)
47 @codeOrigin = codeOrigin
48 @string = string
49 end
50
51 def ==(other)
52 if other.is_a? Token
53 @string == other.string
54 else
55 @string == other
56 end
57 end
58
59 def =~(other)
60 @string =~ other
61 end
62
63 def to_s
64 "#{@string.inspect} at #{codeOrigin}"
65 end
66
67 def parseError(*comment)
68 if comment.empty?
69 raise "Parse error: #{to_s}"
70 else
71 raise "Parse error: #{to_s}: #{comment[0]}"
72 end
73 end
74 end
75
76 #
77 # The lexer. Takes a string and returns an array of tokens.
78 #
79
80 def lex(str, fileName)
81 fileName = Pathname.new(fileName)
82 result = []
83 lineNumber = 1
84 while not str.empty?
85 case str
86 when /\A\#([^\n]*)/
87 # comment, ignore
88 when /\A\n/
89 result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
90 lineNumber += 1
91 when /\A[a-zA-Z]([a-zA-Z0-9_]*)/
92 result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
93 when /\A\.([a-zA-Z0-9_]*)/
94 result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
95 when /\A_([a-zA-Z0-9_]*)/
96 result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
97 when /\A([ \t]+)/
98 # whitespace, ignore
99 when /\A0x([0-9a-fA-F]+)/
100 result << Token.new(CodeOrigin.new(fileName, lineNumber), $&.hex.to_s)
101 when /\A0([0-7]+)/
102 result << Token.new(CodeOrigin.new(fileName, lineNumber), $&.oct.to_s)
103 when /\A([0-9]+)/
104 result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
105 when /\A::/
106 result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
107 when /\A[:,\(\)\[\]=\+\-~\|&^*]/
108 result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
109 else
110 raise "Lexer error at #{CodeOrigin.new(fileName, lineNumber).to_s}, unexpected sequence #{str[0..20].inspect}"
111 end
112 str = $~.post_match
113 end
114 result
115 end
116
117 #
118 # Token identification.
119 #
120
121 def isRegister(token)
122 token =~ REGISTER_PATTERN
123 end
124
125 def isInstruction(token)
126 token =~ INSTRUCTION_PATTERN
127 end
128
129 def isKeyword(token)
130 token =~ /\A((true)|(false)|(if)|(then)|(else)|(elsif)|(end)|(and)|(or)|(not)|(macro)|(const)|(sizeof)|(error)|(include))\Z/ or
131 token =~ REGISTER_PATTERN or
132 token =~ INSTRUCTION_PATTERN
133 end
134
135 def isIdentifier(token)
136 token =~ /\A[a-zA-Z]([a-zA-Z0-9_]*)\Z/ and not isKeyword(token)
137 end
138
139 def isLabel(token)
140 token =~ /\A_([a-zA-Z0-9_]*)\Z/
141 end
142
143 def isLocalLabel(token)
144 token =~ /\A\.([a-zA-Z0-9_]*)\Z/
145 end
146
147 def isVariable(token)
148 isIdentifier(token) or isRegister(token)
149 end
150
151 def isInteger(token)
152 token =~ /\A[0-9]/
153 end
154
155 #
156 # The parser. Takes an array of tokens and returns an AST. Methods
157 # other than parse(tokens) are not for public consumption.
158 #
159
160 class Parser
161 def initialize(data, fileName)
162 @tokens = lex(data, fileName)
163 @idx = 0
164 end
165
166 def parseError(*comment)
167 if @tokens[@idx]
168 @tokens[@idx].parseError(*comment)
169 else
170 if comment.empty?
171 raise "Parse error at end of file"
172 else
173 raise "Parse error at end of file: #{comment[0]}"
174 end
175 end
176 end
177
178 def consume(regexp)
179 if regexp
180 parseError unless @tokens[@idx] =~ regexp
181 else
182 parseError unless @idx == @tokens.length
183 end
184 @idx += 1
185 end
186
187 def skipNewLine
188 while @tokens[@idx] == "\n"
189 @idx += 1
190 end
191 end
192
193 def parsePredicateAtom
194 if @tokens[@idx] == "not"
195 @idx += 1
196 parsePredicateAtom
197 elsif @tokens[@idx] == "("
198 @idx += 1
199 skipNewLine
200 result = parsePredicate
201 parseError unless @tokens[@idx] == ")"
202 @idx += 1
203 result
204 elsif @tokens[@idx] == "true"
205 result = True.instance
206 @idx += 1
207 result
208 elsif @tokens[@idx] == "false"
209 result = False.instance
210 @idx += 1
211 result
212 elsif isIdentifier @tokens[@idx]
213 result = Setting.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
214 @idx += 1
215 result
216 else
217 parseError
218 end
219 end
220
221 def parsePredicateAnd
222 result = parsePredicateAtom
223 while @tokens[@idx] == "and"
224 codeOrigin = @tokens[@idx].codeOrigin
225 @idx += 1
226 skipNewLine
227 right = parsePredicateAtom
228 result = And.new(codeOrigin, result, right)
229 end
230 result
231 end
232
233 def parsePredicate
234 # some examples of precedence:
235 # not a and b -> (not a) and b
236 # a and b or c -> (a and b) or c
237 # a or b and c -> a or (b and c)
238
239 result = parsePredicateAnd
240 while @tokens[@idx] == "or"
241 codeOrigin = @tokens[@idx].codeOrigin
242 @idx += 1
243 skipNewLine
244 right = parsePredicateAnd
245 result = Or.new(codeOrigin, result, right)
246 end
247 result
248 end
249
250 def parseVariable
251 if isRegister(@tokens[@idx])
252 if @tokens[@idx] =~ FPR_PATTERN
253 result = FPRegisterID.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
254 else
255 result = RegisterID.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
256 end
257 elsif isIdentifier(@tokens[@idx])
258 result = Variable.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
259 else
260 parseError
261 end
262 @idx += 1
263 result
264 end
265
266 def parseAddress(offset)
267 parseError unless @tokens[@idx] == "["
268 codeOrigin = @tokens[@idx].codeOrigin
269
270 # Three possibilities:
271 # [] -> AbsoluteAddress
272 # [a] -> Address
273 # [a,b] -> BaseIndex with scale = 1
274 # [a,b,c] -> BaseIndex
275
276 @idx += 1
277 if @tokens[@idx] == "]"
278 @idx += 1
279 return AbsoluteAddress.new(codeOrigin, offset)
280 end
281 a = parseVariable
282 if @tokens[@idx] == "]"
283 result = Address.new(codeOrigin, a, offset)
284 else
285 parseError unless @tokens[@idx] == ","
286 @idx += 1
287 b = parseVariable
288 if @tokens[@idx] == "]"
289 result = BaseIndex.new(codeOrigin, a, b, 1, offset)
290 else
291 parseError unless @tokens[@idx] == ","
292 @idx += 1
293 parseError unless ["1", "2", "4", "8"].member? @tokens[@idx].string
294 c = @tokens[@idx].string.to_i
295 @idx += 1
296 parseError unless @tokens[@idx] == "]"
297 result = BaseIndex.new(codeOrigin, a, b, c, offset)
298 end
299 end
300 @idx += 1
301 result
302 end
303
304 def parseColonColon
305 skipNewLine
306 codeOrigin = @tokens[@idx].codeOrigin
307 parseError unless isIdentifier @tokens[@idx]
308 names = [@tokens[@idx].string]
309 @idx += 1
310 while @tokens[@idx] == "::"
311 @idx += 1
312 parseError unless isIdentifier @tokens[@idx]
313 names << @tokens[@idx].string
314 @idx += 1
315 end
316 raise if names.empty?
317 [codeOrigin, names]
318 end
319
320 def parseExpressionAtom
321 skipNewLine
322 if @tokens[@idx] == "-"
323 @idx += 1
324 NegImmediate.new(@tokens[@idx - 1].codeOrigin, parseExpressionAtom)
325 elsif @tokens[@idx] == "~"
326 @idx += 1
327 BitnotImmediate.new(@tokens[@idx - 1].codeOrigin, parseExpressionAtom)
328 elsif @tokens[@idx] == "("
329 @idx += 1
330 result = parseExpression
331 parseError unless @tokens[@idx] == ")"
332 @idx += 1
333 result
334 elsif isInteger @tokens[@idx]
335 result = Immediate.new(@tokens[@idx].codeOrigin, @tokens[@idx].string.to_i)
336 @idx += 1
337 result
338 elsif isIdentifier @tokens[@idx]
339 codeOrigin, names = parseColonColon
340 if names.size > 1
341 StructOffset.forField(codeOrigin, names[0..-2].join('::'), names[-1])
342 else
343 Variable.forName(codeOrigin, names[0])
344 end
345 elsif isRegister @tokens[@idx]
346 parseVariable
347 elsif @tokens[@idx] == "sizeof"
348 @idx += 1
349 codeOrigin, names = parseColonColon
350 Sizeof.forName(codeOrigin, names.join('::'))
351 else
352 parseError
353 end
354 end
355
356 def parseExpressionMul
357 skipNewLine
358 result = parseExpressionAtom
359 while @tokens[@idx] == "*"
360 if @tokens[@idx] == "*"
361 @idx += 1
362 result = MulImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionAtom)
363 else
364 raise
365 end
366 end
367 result
368 end
369
370 def couldBeExpression
371 @tokens[@idx] == "-" or @tokens[@idx] == "~" or @tokens[@idx] == "sizeof" or isInteger(@tokens[@idx]) or isVariable(@tokens[@idx]) or @tokens[@idx] == "("
372 end
373
374 def parseExpressionAdd
375 skipNewLine
376 result = parseExpressionMul
377 while @tokens[@idx] == "+" or @tokens[@idx] == "-"
378 if @tokens[@idx] == "+"
379 @idx += 1
380 result = AddImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionMul)
381 elsif @tokens[@idx] == "-"
382 @idx += 1
383 result = SubImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionMul)
384 else
385 raise
386 end
387 end
388 result
389 end
390
391 def parseExpressionAnd
392 skipNewLine
393 result = parseExpressionAdd
394 while @tokens[@idx] == "&"
395 @idx += 1
396 result = AndImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionAdd)
397 end
398 result
399 end
400
401 def parseExpression
402 skipNewLine
403 result = parseExpressionAnd
404 while @tokens[@idx] == "|" or @tokens[@idx] == "^"
405 if @tokens[@idx] == "|"
406 @idx += 1
407 result = OrImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionAnd)
408 elsif @tokens[@idx] == "^"
409 @idx += 1
410 result = XorImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionAnd)
411 else
412 raise
413 end
414 end
415 result
416 end
417
418 def parseOperand(comment)
419 skipNewLine
420 if couldBeExpression
421 expr = parseExpression
422 if @tokens[@idx] == "["
423 parseAddress(expr)
424 else
425 expr
426 end
427 elsif @tokens[@idx] == "["
428 parseAddress(Immediate.new(@tokens[@idx].codeOrigin, 0))
429 elsif isLabel @tokens[@idx]
430 result = LabelReference.new(@tokens[@idx].codeOrigin, Label.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string))
431 @idx += 1
432 result
433 elsif isLocalLabel @tokens[@idx]
434 result = LocalLabelReference.new(@tokens[@idx].codeOrigin, LocalLabel.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string))
435 @idx += 1
436 result
437 else
438 parseError(comment)
439 end
440 end
441
442 def parseMacroVariables
443 skipNewLine
444 consume(/\A\(\Z/)
445 variables = []
446 loop {
447 skipNewLine
448 if @tokens[@idx] == ")"
449 @idx += 1
450 break
451 elsif isIdentifier(@tokens[@idx])
452 variables << Variable.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
453 @idx += 1
454 skipNewLine
455 if @tokens[@idx] == ")"
456 @idx += 1
457 break
458 elsif @tokens[@idx] == ","
459 @idx += 1
460 else
461 parseError
462 end
463 else
464 parseError
465 end
466 }
467 variables
468 end
469
470 def parseSequence(final, comment)
471 firstCodeOrigin = @tokens[@idx].codeOrigin
472 list = []
473 loop {
474 if (@idx == @tokens.length and not final) or (final and @tokens[@idx] =~ final)
475 break
476 elsif @tokens[@idx] == "\n"
477 # ignore
478 @idx += 1
479 elsif @tokens[@idx] == "const"
480 @idx += 1
481 parseError unless isVariable @tokens[@idx]
482 variable = Variable.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
483 @idx += 1
484 parseError unless @tokens[@idx] == "="
485 @idx += 1
486 value = parseOperand("while inside of const #{variable.name}")
487 list << ConstDecl.new(@tokens[@idx].codeOrigin, variable, value)
488 elsif @tokens[@idx] == "error"
489 list << Error.new(@tokens[@idx].codeOrigin)
490 @idx += 1
491 elsif @tokens[@idx] == "if"
492 codeOrigin = @tokens[@idx].codeOrigin
493 @idx += 1
494 skipNewLine
495 predicate = parsePredicate
496 consume(/\A((then)|(\n))\Z/)
497 skipNewLine
498 ifThenElse = IfThenElse.new(codeOrigin, predicate, parseSequence(/\A((else)|(end)|(elsif))\Z/, "while inside of \"if #{predicate.dump}\""))
499 list << ifThenElse
500 while @tokens[@idx] == "elsif"
501 codeOrigin = @tokens[@idx].codeOrigin
502 @idx += 1
503 skipNewLine
504 predicate = parsePredicate
505 consume(/\A((then)|(\n))\Z/)
506 skipNewLine
507 elseCase = IfThenElse.new(codeOrigin, predicate, parseSequence(/\A((else)|(end)|(elsif))\Z/, "while inside of \"if #{predicate.dump}\""))
508 ifThenElse.elseCase = elseCase
509 ifThenElse = elseCase
510 end
511 if @tokens[@idx] == "else"
512 @idx += 1
513 ifThenElse.elseCase = parseSequence(/\Aend\Z/, "while inside of else case for \"if #{predicate.dump}\"")
514 @idx += 1
515 else
516 parseError unless @tokens[@idx] == "end"
517 @idx += 1
518 end
519 elsif @tokens[@idx] == "macro"
520 codeOrigin = @tokens[@idx].codeOrigin
521 @idx += 1
522 skipNewLine
523 parseError unless isIdentifier(@tokens[@idx])
524 name = @tokens[@idx].string
525 @idx += 1
526 variables = parseMacroVariables
527 body = parseSequence(/\Aend\Z/, "while inside of macro #{name}")
528 @idx += 1
529 list << Macro.new(codeOrigin, name, variables, body)
530 elsif isInstruction @tokens[@idx]
531 codeOrigin = @tokens[@idx].codeOrigin
532 name = @tokens[@idx].string
533 @idx += 1
534 if (not final and @idx == @tokens.size) or (final and @tokens[@idx] =~ final)
535 # Zero operand instruction, and it's the last one.
536 list << Instruction.new(codeOrigin, name, [])
537 break
538 elsif @tokens[@idx] == "\n"
539 # Zero operand instruction.
540 list << Instruction.new(codeOrigin, name, [])
541 @idx += 1
542 else
543 # It's definitely an instruction, and it has at least one operand.
544 operands = []
545 endOfSequence = false
546 loop {
547 operands << parseOperand("while inside of instruction #{name}")
548 if (not final and @idx == @tokens.size) or (final and @tokens[@idx] =~ final)
549 # The end of the instruction and of the sequence.
550 endOfSequence = true
551 break
552 elsif @tokens[@idx] == ","
553 # Has another operand.
554 @idx += 1
555 elsif @tokens[@idx] == "\n"
556 # The end of the instruction.
557 @idx += 1
558 break
559 else
560 parseError("Expected a comma, newline, or #{final} after #{operands.last.dump}")
561 end
562 }
563 list << Instruction.new(codeOrigin, name, operands)
564 if endOfSequence
565 break
566 end
567 end
568 elsif isIdentifier @tokens[@idx]
569 codeOrigin = @tokens[@idx].codeOrigin
570 name = @tokens[@idx].string
571 @idx += 1
572 if @tokens[@idx] == "("
573 # Macro invocation.
574 @idx += 1
575 operands = []
576 skipNewLine
577 if @tokens[@idx] == ")"
578 @idx += 1
579 else
580 loop {
581 skipNewLine
582 if @tokens[@idx] == "macro"
583 # It's a macro lambda!
584 codeOriginInner = @tokens[@idx].codeOrigin
585 @idx += 1
586 variables = parseMacroVariables
587 body = parseSequence(/\Aend\Z/, "while inside of anonymous macro passed as argument to #{name}")
588 @idx += 1
589 operands << Macro.new(codeOriginInner, nil, variables, body)
590 else
591 operands << parseOperand("while inside of macro call to #{name}")
592 end
593 skipNewLine
594 if @tokens[@idx] == ")"
595 @idx += 1
596 break
597 elsif @tokens[@idx] == ","
598 @idx += 1
599 else
600 parseError "Unexpected #{@tokens[@idx].string.inspect} while parsing invocation of macro #{name}"
601 end
602 }
603 end
604 list << MacroCall.new(codeOrigin, name, operands)
605 else
606 parseError "Expected \"(\" after #{name}"
607 end
608 elsif isLabel @tokens[@idx] or isLocalLabel @tokens[@idx]
609 codeOrigin = @tokens[@idx].codeOrigin
610 name = @tokens[@idx].string
611 @idx += 1
612 parseError unless @tokens[@idx] == ":"
613 # It's a label.
614 if isLabel name
615 list << Label.forName(codeOrigin, name)
616 else
617 list << LocalLabel.forName(codeOrigin, name)
618 end
619 @idx += 1
620 elsif @tokens[@idx] == "include"
621 @idx += 1
622 parseError unless isIdentifier(@tokens[@idx])
623 moduleName = @tokens[@idx].string
624 fileName = @tokens[@idx].codeOrigin.fileName.dirname + (moduleName + ".asm")
625 @idx += 1
626 $stderr.puts "offlineasm: Including file #{fileName}"
627 list << parse(fileName)
628 else
629 parseError "Expecting terminal #{final} #{comment}"
630 end
631 }
632 Sequence.new(firstCodeOrigin, list)
633 end
634 end
635
636 def parseData(data, fileName)
637 parser = Parser.new(data, fileName)
638 parser.parseSequence(nil, "")
639 end
640
641 def parse(fileName)
642 parseData(IO::read(fileName), fileName)
643 end
644
645 def parseHash(fileName)
646 dirHash(Pathname.new(fileName).dirname, /\.asm$/)
647 end
648