]>
git.saurik.com Git - apple/javascriptcore.git/blob - offlineasm/parser.rb
1 # Copyright (C) 2011 Apple Inc. All rights reserved.
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions
6 # 1. Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # 2. Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
12 # THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
13 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
14 # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
15 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
16 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
18 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
22 # THE POSSIBILITY OF SUCH DAMAGE.
26 require "instructions"
32 attr_reader
:fileName, :lineNumber
34 def initialize(fileName
, lineNumber
)
36 @lineNumber = lineNumber
40 "#{fileName}:#{lineNumber}"
45 attr_reader
:codeOrigin, :string
47 def initialize(codeOrigin
, string
)
48 @codeOrigin = codeOrigin
54 @string == other
.string
65 "#{@string.inspect} at #{codeOrigin}"
68 def parseError(*comment
)
70 raise "Parse error: #{to_s}"
72 raise "Parse error: #{to_s}: #{comment[0]}"
78 attr_reader
:codeOrigin, :type, :string
79 def initialize(codeOrigin
, type
, string
)
80 @codeOrigin = codeOrigin
87 # The lexer. Takes a string and returns an array of tokens.
90 def lex(str
, fileName
)
91 fileName
= Pathname
.new(fileName
)
95 whitespaceFound
= false
100 when /\A\/\
/\ ?([^\n]*)/
103 annotationType
= whitespaceFound
? :local : :global
105 # We've found a '\n'. Emit the last comment recorded if appropriate:
106 # We need to parse annotations regardless of whether the backend does
107 # anything with them or not. This is because the C++ backend may make
108 # use of this for its cloopDo debugging utility even if
109 # enableInstrAnnotations is not enabled.
111 result
<< Annotation
.new(CodeOrigin
.new(fileName
, lineNumber
),
112 annotationType
, annotation
)
115 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
117 when /\A[a-zA-Z]([a-zA-Z0-9_]*)/
118 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
119 when /\A\.([a-zA-Z0-9_]*)/
120 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
121 when /\A_([a-zA-Z0-9_]*)/
122 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
125 whitespaceFound
= true
128 when /\A0x([0-9a-fA-F]+)/
129 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&.hex
.to_s
)
131 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&.oct
.to_s
)
133 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
135 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
136 when /\A[:,\(\)\[\]=\+\-~\|&^*]/
137 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
139 raise "Lexer error at #{CodeOrigin.new(fileName, lineNumber).to_s}, unexpected sequence #{str[0..20].inspect}"
141 whitespaceFound
= false
148 # Token identification.
151 def isRegister(token
)
152 token
=~ REGISTER_PATTERN
155 def isInstruction(token
)
156 token
=~ INSTRUCTION_PATTERN
160 token
=~
/\A((true)|(false)|(if)|(then)|(else)|(elsif)|(end)|(and)|(or)|(not)|(macro)|(const)|(sizeof)|(error)|(include))\Z/ or
161 token
=~ REGISTER_PATTERN
or
162 token
=~ INSTRUCTION_PATTERN
165 def isIdentifier(token
)
166 token
=~
/\A[a-zA-Z]([a-zA-Z0-9_]*)\Z/ and not isKeyword(token
)
170 token
=~
/\A_([a-zA-Z0-9_]*)\Z/
173 def isLocalLabel(token
)
174 token
=~
/\A\.([a-zA-Z0-9_]*)\Z/
177 def isVariable(token
)
178 isIdentifier(token
) or isRegister(token
)
186 # The parser. Takes an array of tokens and returns an AST. Methods
187 # other than parse(tokens) are not for public consumption.
191 def initialize(data, fileName
)
192 @tokens = lex(data, fileName
)
197 def parseError(*comment
)
199 @tokens[@idx].parseError(*comment
)
202 raise "Parse error at end of file"
204 raise "Parse error at end of file: #{comment[0]}"
211 parseError
unless @tokens[@idx] =~ regexp
213 parseError
unless @idx == @tokens.length
219 while @tokens[@idx] == "\n"
224 def parsePredicateAtom
225 if @tokens[@idx] == "not"
226 codeOrigin
= @tokens[@idx].codeOrigin
228 Not
.new(codeOrigin
, parsePredicateAtom
)
229 elsif @tokens[@idx] == "("
232 result
= parsePredicate
233 parseError
unless @tokens[@idx] == ")"
236 elsif @tokens[@idx] == "true"
237 result
= True
.instance
240 elsif @tokens[@idx] == "false"
241 result
= False
.instance
244 elsif isIdentifier
@tokens[@idx]
245 result
= Setting
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
253 def parsePredicateAnd
254 result
= parsePredicateAtom
255 while @tokens[@idx] == "and"
256 codeOrigin
= @tokens[@idx].codeOrigin
259 right
= parsePredicateAtom
260 result
= And
.new(codeOrigin
, result
, right
)
266 # some examples of precedence:
267 # not a and b -> (not a) and b
268 # a and b or c -> (a and b) or c
269 # a or b and c -> a or (b and c)
271 result
= parsePredicateAnd
272 while @tokens[@idx] == "or"
273 codeOrigin
= @tokens[@idx].codeOrigin
276 right
= parsePredicateAnd
277 result
= Or
.new(codeOrigin
, result
, right
)
283 if isRegister(@tokens[@idx])
284 if @tokens[@idx] =~ FPR_PATTERN
285 result
= FPRegisterID
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
287 result
= RegisterID
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
289 elsif isIdentifier(@tokens[@idx])
290 result
= Variable
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
298 def parseAddress(offset
)
299 parseError
unless @tokens[@idx] == "["
300 codeOrigin
= @tokens[@idx].codeOrigin
302 # Three possibilities:
303 # [] -> AbsoluteAddress
305 # [a,b] -> BaseIndex with scale = 1
306 # [a,b,c] -> BaseIndex
309 if @tokens[@idx] == "]"
311 return AbsoluteAddress
.new(codeOrigin
, offset
)
314 if @tokens[@idx] == "]"
315 result
= Address
.new(codeOrigin
, a
, offset
)
317 parseError
unless @tokens[@idx] == ","
320 if @tokens[@idx] == "]"
321 result
= BaseIndex
.new(codeOrigin
, a
, b
, 1, offset
)
323 parseError
unless @tokens[@idx] == ","
325 parseError
unless ["1", "2", "4", "8"].member
? @tokens[@idx].string
326 c
= @tokens[@idx].string
.to_i
328 parseError
unless @tokens[@idx] == "]"
329 result
= BaseIndex
.new(codeOrigin
, a
, b
, c
, offset
)
338 codeOrigin
= @tokens[@idx].codeOrigin
339 parseError
unless isIdentifier
@tokens[@idx]
340 names
= [@tokens[@idx].string
]
342 while @tokens[@idx] == "::"
344 parseError
unless isIdentifier
@tokens[@idx]
345 names
<< @tokens[@idx].string
348 raise if names
.empty
?
352 def parseExpressionAtom
354 if @tokens[@idx] == "-"
356 NegImmediate
.new(@tokens[@idx - 1].codeOrigin
, parseExpressionAtom
)
357 elsif @tokens[@idx] == "~"
359 BitnotImmediate
.new(@tokens[@idx - 1].codeOrigin
, parseExpressionAtom
)
360 elsif @tokens[@idx] == "("
362 result
= parseExpression
363 parseError
unless @tokens[@idx] == ")"
366 elsif isInteger
@tokens[@idx]
367 result
= Immediate
.new(@tokens[@idx].codeOrigin
, @tokens[@idx].string
.to_i
)
370 elsif isIdentifier
@tokens[@idx]
371 codeOrigin
, names
= parseColonColon
373 StructOffset
.forField(codeOrigin
, names
[0..-2].join('::'), names
[-1])
375 Variable
.forName(codeOrigin
, names
[0])
377 elsif isRegister
@tokens[@idx]
379 elsif @tokens[@idx] == "sizeof"
381 codeOrigin
, names
= parseColonColon
382 Sizeof
.forName(codeOrigin
, names
.join('::'))
388 def parseExpressionMul
390 result
= parseExpressionAtom
391 while @tokens[@idx] == "*"
392 if @tokens[@idx] == "*"
394 result
= MulImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionAtom
)
402 def couldBeExpression
403 @tokens[@idx] == "-" or @tokens[@idx] == "~" or @tokens[@idx] == "sizeof" or isInteger(@tokens[@idx]) or isVariable(@tokens[@idx]) or @tokens[@idx] == "("
406 def parseExpressionAdd
408 result
= parseExpressionMul
409 while @tokens[@idx] == "+" or @tokens[@idx] == "-"
410 if @tokens[@idx] == "+
"
412 result
= AddImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionMul
)
413 elsif @tokens[@idx] == "-"
415 result
= SubImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionMul
)
423 def parseExpressionAnd
425 result
= parseExpressionAdd
426 while @tokens[@idx] == "&"
428 result
= AndImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionAdd
)
435 result
= parseExpressionAnd
436 while @tokens[@idx] == "|" or @tokens[@idx] == "^"
437 if @tokens[@idx] == "|"
439 result
= OrImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionAnd
)
440 elsif @tokens[@idx] == "^"
442 result
= XorImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionAnd
)
450 def parseOperand(comment
)
453 expr
= parseExpression
454 if @tokens[@idx] == "["
459 elsif @tokens[@idx] == "["
460 parseAddress(Immediate
.new(@tokens[@idx].codeOrigin
, 0))
461 elsif isLabel
@tokens[@idx]
462 result
= LabelReference
.new(@tokens[@idx].codeOrigin
, Label
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
))
465 elsif isLocalLabel
@tokens[@idx]
466 result
= LocalLabelReference
.new(@tokens[@idx].codeOrigin
, LocalLabel
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
))
474 def parseMacroVariables
480 if @tokens[@idx] == ")"
483 elsif isIdentifier(@tokens[@idx])
484 variables
<< Variable
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
487 if @tokens[@idx] == ")"
490 elsif @tokens[@idx] == ","
502 def parseSequence(final
, comment
)
503 firstCodeOrigin
= @tokens[@idx].codeOrigin
506 if (@idx == @tokens.length
and not final
) or (final
and @tokens[@idx] =~ final
)
508 elsif @tokens[@idx].is_a
? Annotation
509 # This is the only place where we can encounter a global
510 # annotation, and hence need to be able to distinguish between
512 # globalAnnotations are the ones that start from column 0. All
513 # others are considered localAnnotations. The only reason to
514 # distinguish between them is so that we can format the output
515 # nicely as one would expect.
517 codeOrigin
= @tokens[@idx].codeOrigin
518 annotationOpcode
= (@tokens[@idx].type
== :global) ? "globalAnnotation" : "localAnnotation"
519 list
<< Instruction
.new(codeOrigin
, annotationOpcode
, [], @tokens[@idx].string
)
521 @idx +
= 2 # Consume the newline as well.
522 elsif @tokens[@idx] == "\n"
525 elsif @tokens[@idx] == "const"
527 parseError
unless isVariable
@tokens[@idx]
528 variable
= Variable
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
530 parseError
unless @tokens[@idx] == "="
532 value
= parseOperand("while inside of const #{variable.name}")
533 list
<< ConstDecl
.new(@tokens[@idx].codeOrigin
, variable
, value
)
534 elsif @tokens[@idx] == "error"
535 list
<< Error
.new(@tokens[@idx].codeOrigin
)
537 elsif @tokens[@idx] == "if"
538 codeOrigin
= @tokens[@idx].codeOrigin
541 predicate
= parsePredicate
542 consume(/\A((then)|(\n))\Z/)
544 ifThenElse
= IfThenElse
.new(codeOrigin
, predicate
, parseSequence(/\A((else)|(end)|(elsif))\Z/, "while inside of \"if #{predicate.dump}\""))
546 while @tokens[@idx] == "elsif"
547 codeOrigin
= @tokens[@idx].codeOrigin
550 predicate
= parsePredicate
551 consume(/\A((then)|(\n))\Z/)
553 elseCase
= IfThenElse
.new(codeOrigin
, predicate
, parseSequence(/\A((else)|(end)|(elsif))\Z/, "while inside of \"if #{predicate.dump}\""))
554 ifThenElse
.elseCase
= elseCase
555 ifThenElse
= elseCase
557 if @tokens[@idx] == "else"
559 ifThenElse
.elseCase
= parseSequence(/\Aend\Z/, "while inside of else case for \"if #{predicate.dump}\"")
562 parseError
unless @tokens[@idx] == "end"
565 elsif @tokens[@idx] == "macro"
566 codeOrigin
= @tokens[@idx].codeOrigin
569 parseError
unless isIdentifier(@tokens[@idx])
570 name
= @tokens[@idx].string
572 variables
= parseMacroVariables
573 body
= parseSequence(/\Aend\Z/, "while inside of macro #{name}")
575 list
<< Macro
.new(codeOrigin
, name
, variables
, body
)
576 elsif isInstruction
@tokens[@idx]
577 codeOrigin
= @tokens[@idx].codeOrigin
578 name
= @tokens[@idx].string
580 if (not final
and @idx == @tokens.size
) or (final
and @tokens[@idx] =~ final
)
581 # Zero operand instruction, and it's the last one.
582 list
<< Instruction
.new(codeOrigin
, name
, [], @annotation)
585 elsif @tokens[@idx].is_a
? Annotation
586 list
<< Instruction
.new(codeOrigin
, name
, [], @tokens[@idx].string
)
588 @idx +
= 2 # Consume the newline as well.
589 elsif @tokens[@idx] == "\n"
590 # Zero operand instruction.
591 list
<< Instruction
.new(codeOrigin
, name
, [], @annotation)
595 # It's definitely an instruction, and it has at least one operand.
597 endOfSequence
= false
599 operands
<< parseOperand("while inside of instruction #{name}")
600 if (not final
and @idx == @tokens.size
) or (final
and @tokens[@idx] =~ final
)
601 # The end of the instruction and of the sequence.
604 elsif @tokens[@idx] == ","
605 # Has another operand.
607 elsif @tokens[@idx].is_a
? Annotation
608 @annotation = @tokens[@idx].string
609 @idx +
= 2 # Consume the newline as well.
611 elsif @tokens[@idx] == "\n"
612 # The end of the instruction.
616 parseError("Expected a comma, newline, or #{final} after #{operands.last.dump}")
619 list
<< Instruction
.new(codeOrigin
, name
, operands
, @annotation)
626 # Check for potential macro invocation:
627 elsif isIdentifier
@tokens[@idx]
628 codeOrigin
= @tokens[@idx].codeOrigin
629 name
= @tokens[@idx].string
631 if @tokens[@idx] == "("
636 if @tokens[@idx] == ")"
641 if @tokens[@idx] == "macro"
642 # It's a macro lambda!
643 codeOriginInner
= @tokens[@idx].codeOrigin
645 variables
= parseMacroVariables
646 body
= parseSequence(/\Aend\Z/, "while inside of anonymous macro passed as argument to #{name}")
648 operands
<< Macro
.new(codeOriginInner
, nil, variables
, body
)
650 operands
<< parseOperand("while inside of macro call to #{name}")
653 if @tokens[@idx] == ")"
656 elsif @tokens[@idx] == ","
659 parseError
"Unexpected #{@tokens[@idx].string.inspect} while parsing invocation of macro #{name}"
663 # Check if there's a trailing annotation after the macro invoke:
664 if @tokens[@idx].is_a
? Annotation
665 @annotation = @tokens[@idx].string
666 @idx +
= 2 # Consume the newline as well.
668 list
<< MacroCall
.new(codeOrigin
, name
, operands
, @annotation)
671 parseError
"Expected \"(\" after #{name}"
673 elsif isLabel
@tokens[@idx] or isLocalLabel
@tokens[@idx]
674 codeOrigin
= @tokens[@idx].codeOrigin
675 name
= @tokens[@idx].string
677 parseError
unless @tokens[@idx] == ":"
680 list
<< Label
.forName(codeOrigin
, name
)
682 list
<< LocalLabel
.forName(codeOrigin
, name
)
685 elsif @tokens[@idx] == "include"
687 parseError
unless isIdentifier(@tokens[@idx])
688 moduleName
= @tokens[@idx].string
689 fileName
= @tokens[@idx].codeOrigin
.fileName
.dirname +
(moduleName +
".asm")
691 $stderr.puts
"offlineasm: Including file #{fileName}"
692 list
<< parse(fileName
)
694 parseError
"Expecting terminal #{final} #{comment}"
697 Sequence
.new(firstCodeOrigin
, list
)
701 def parseData(data, fileName
)
702 parser
= Parser
.new(data, fileName
)
703 parser
.parseSequence(nil, "")
707 parseData(IO
::read(fileName
), fileName
)
710 def parseHash(fileName
)
711 dirHash(Pathname
.new(fileName
).dirname
, /\.asm$/)