]>
git.saurik.com Git - apple/javascriptcore.git/blob - offlineasm/parser.rb
1 # Copyright (C) 2011 Apple Inc. All rights reserved.
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions
6 # 1. Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # 2. Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
12 # THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
13 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
14 # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
15 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
16 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
18 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
21 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
22 # THE POSSIBILITY OF SUCH DAMAGE.
25 require "instructions"
31 attr_reader
:fileName, :lineNumber
33 def initialize(fileName
, lineNumber
)
35 @lineNumber = lineNumber
39 "#{fileName}:#{lineNumber}"
44 attr_reader
:codeOrigin, :string
46 def initialize(codeOrigin
, string
)
47 @codeOrigin = codeOrigin
53 @string == other
.string
64 "#{@string.inspect} at #{codeOrigin}"
67 def parseError(*comment
)
69 raise "Parse error: #{to_s}"
71 raise "Parse error: #{to_s}: #{comment[0]}"
77 # The lexer. Takes a string and returns an array of tokens.
80 def lex(str
, fileName
)
81 fileName
= Pathname
.new(fileName
)
89 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
91 when /\A[a-zA-Z]([a-zA-Z0-9_]*)/
92 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
93 when /\A\.([a-zA-Z0-9_]*)/
94 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
95 when /\A_([a-zA-Z0-9_]*)/
96 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
99 when /\A0x([0-9a-fA-F]+)/
100 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&.hex
.to_s
)
102 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&.oct
.to_s
)
104 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
106 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
107 when /\A[:,\(\)\[\]=\+\-~\|&^*]/
108 result
<< Token
.new(CodeOrigin
.new(fileName
, lineNumber
), $
&)
110 raise "Lexer error at #{CodeOrigin.new(fileName, lineNumber).to_s}, unexpected sequence #{str[0..20].inspect}"
118 # Token identification.
121 def isRegister(token
)
122 token
=~ REGISTER_PATTERN
125 def isInstruction(token
)
126 token
=~ INSTRUCTION_PATTERN
130 token
=~
/\A((true)|(false)|(if)|(then)|(else)|(elsif)|(end)|(and)|(or)|(not)|(macro)|(const)|(sizeof)|(error)|(include))\Z/ or
131 token
=~ REGISTER_PATTERN
or
132 token
=~ INSTRUCTION_PATTERN
135 def isIdentifier(token
)
136 token
=~
/\A[a-zA-Z]([a-zA-Z0-9_]*)\Z/ and not isKeyword(token
)
140 token
=~
/\A_([a-zA-Z0-9_]*)\Z/
143 def isLocalLabel(token
)
144 token
=~
/\A\.([a-zA-Z0-9_]*)\Z/
147 def isVariable(token
)
148 isIdentifier(token
) or isRegister(token
)
156 # The parser. Takes an array of tokens and returns an AST. Methods
157 # other than parse(tokens) are not for public consumption.
161 def initialize(data, fileName
)
162 @tokens = lex(data, fileName
)
166 def parseError(*comment
)
168 @tokens[@idx].parseError(*comment
)
171 raise "Parse error at end of file"
173 raise "Parse error at end of file: #{comment[0]}"
180 parseError
unless @tokens[@idx] =~ regexp
182 parseError
unless @idx == @tokens.length
188 while @tokens[@idx] == "\n"
193 def parsePredicateAtom
194 if @tokens[@idx] == "not"
197 elsif @tokens[@idx] == "("
200 result
= parsePredicate
201 parseError
unless @tokens[@idx] == ")"
204 elsif @tokens[@idx] == "true"
205 result
= True
.instance
208 elsif @tokens[@idx] == "false"
209 result
= False
.instance
212 elsif isIdentifier
@tokens[@idx]
213 result
= Setting
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
221 def parsePredicateAnd
222 result
= parsePredicateAtom
223 while @tokens[@idx] == "and"
224 codeOrigin
= @tokens[@idx].codeOrigin
227 right
= parsePredicateAtom
228 result
= And
.new(codeOrigin
, result
, right
)
234 # some examples of precedence:
235 # not a and b -> (not a) and b
236 # a and b or c -> (a and b) or c
237 # a or b and c -> a or (b and c)
239 result
= parsePredicateAnd
240 while @tokens[@idx] == "or"
241 codeOrigin
= @tokens[@idx].codeOrigin
244 right
= parsePredicateAnd
245 result
= Or
.new(codeOrigin
, result
, right
)
251 if isRegister(@tokens[@idx])
252 if @tokens[@idx] =~ FPR_PATTERN
253 result
= FPRegisterID
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
255 result
= RegisterID
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
257 elsif isIdentifier(@tokens[@idx])
258 result
= Variable
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
266 def parseAddress(offset
)
267 parseError
unless @tokens[@idx] == "["
268 codeOrigin
= @tokens[@idx].codeOrigin
270 # Three possibilities:
271 # [] -> AbsoluteAddress
273 # [a,b] -> BaseIndex with scale = 1
274 # [a,b,c] -> BaseIndex
277 if @tokens[@idx] == "]"
279 return AbsoluteAddress
.new(codeOrigin
, offset
)
282 if @tokens[@idx] == "]"
283 result
= Address
.new(codeOrigin
, a
, offset
)
285 parseError
unless @tokens[@idx] == ","
288 if @tokens[@idx] == "]"
289 result
= BaseIndex
.new(codeOrigin
, a
, b
, 1, offset
)
291 parseError
unless @tokens[@idx] == ","
293 parseError
unless ["1", "2", "4", "8"].member
? @tokens[@idx].string
294 c
= @tokens[@idx].string
.to_i
296 parseError
unless @tokens[@idx] == "]"
297 result
= BaseIndex
.new(codeOrigin
, a
, b
, c
, offset
)
306 codeOrigin
= @tokens[@idx].codeOrigin
307 parseError
unless isIdentifier
@tokens[@idx]
308 names
= [@tokens[@idx].string
]
310 while @tokens[@idx] == "::"
312 parseError
unless isIdentifier
@tokens[@idx]
313 names
<< @tokens[@idx].string
316 raise if names
.empty
?
320 def parseExpressionAtom
322 if @tokens[@idx] == "-"
324 NegImmediate
.new(@tokens[@idx - 1].codeOrigin
, parseExpressionAtom
)
325 elsif @tokens[@idx] == "~"
327 BitnotImmediate
.new(@tokens[@idx - 1].codeOrigin
, parseExpressionAtom
)
328 elsif @tokens[@idx] == "("
330 result
= parseExpression
331 parseError
unless @tokens[@idx] == ")"
334 elsif isInteger
@tokens[@idx]
335 result
= Immediate
.new(@tokens[@idx].codeOrigin
, @tokens[@idx].string
.to_i
)
338 elsif isIdentifier
@tokens[@idx]
339 codeOrigin
, names
= parseColonColon
341 StructOffset
.forField(codeOrigin
, names
[0..-2].join('::'), names
[-1])
343 Variable
.forName(codeOrigin
, names
[0])
345 elsif isRegister
@tokens[@idx]
347 elsif @tokens[@idx] == "sizeof"
349 codeOrigin
, names
= parseColonColon
350 Sizeof
.forName(codeOrigin
, names
.join('::'))
356 def parseExpressionMul
358 result
= parseExpressionAtom
359 while @tokens[@idx] == "*"
360 if @tokens[@idx] == "*"
362 result
= MulImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionAtom
)
370 def couldBeExpression
371 @tokens[@idx] == "-" or @tokens[@idx] == "~" or @tokens[@idx] == "sizeof" or isInteger(@tokens[@idx]) or isVariable(@tokens[@idx]) or @tokens[@idx] == "("
374 def parseExpressionAdd
376 result
= parseExpressionMul
377 while @tokens[@idx] == "+" or @tokens[@idx] == "-"
378 if @tokens[@idx] == "+
"
380 result
= AddImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionMul
)
381 elsif @tokens[@idx] == "-"
383 result
= SubImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionMul
)
391 def parseExpressionAnd
393 result
= parseExpressionAdd
394 while @tokens[@idx] == "&"
396 result
= AndImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionAdd
)
403 result
= parseExpressionAnd
404 while @tokens[@idx] == "|" or @tokens[@idx] == "^"
405 if @tokens[@idx] == "|"
407 result
= OrImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionAnd
)
408 elsif @tokens[@idx] == "^"
410 result
= XorImmediates
.new(@tokens[@idx - 1].codeOrigin
, result
, parseExpressionAnd
)
418 def parseOperand(comment
)
421 expr
= parseExpression
422 if @tokens[@idx] == "["
427 elsif @tokens[@idx] == "["
428 parseAddress(Immediate
.new(@tokens[@idx].codeOrigin
, 0))
429 elsif isLabel
@tokens[@idx]
430 result
= LabelReference
.new(@tokens[@idx].codeOrigin
, Label
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
))
433 elsif isLocalLabel
@tokens[@idx]
434 result
= LocalLabelReference
.new(@tokens[@idx].codeOrigin
, LocalLabel
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
))
442 def parseMacroVariables
448 if @tokens[@idx] == ")"
451 elsif isIdentifier(@tokens[@idx])
452 variables
<< Variable
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
455 if @tokens[@idx] == ")"
458 elsif @tokens[@idx] == ","
470 def parseSequence(final
, comment
)
471 firstCodeOrigin
= @tokens[@idx].codeOrigin
474 if (@idx == @tokens.length
and not final
) or (final
and @tokens[@idx] =~ final
)
476 elsif @tokens[@idx] == "\n"
479 elsif @tokens[@idx] == "const"
481 parseError
unless isVariable
@tokens[@idx]
482 variable
= Variable
.forName(@tokens[@idx].codeOrigin
, @tokens[@idx].string
)
484 parseError
unless @tokens[@idx] == "="
486 value
= parseOperand("while inside of const #{variable.name}")
487 list
<< ConstDecl
.new(@tokens[@idx].codeOrigin
, variable
, value
)
488 elsif @tokens[@idx] == "error"
489 list
<< Error
.new(@tokens[@idx].codeOrigin
)
491 elsif @tokens[@idx] == "if"
492 codeOrigin
= @tokens[@idx].codeOrigin
495 predicate
= parsePredicate
496 consume(/\A((then)|(\n))\Z/)
498 ifThenElse
= IfThenElse
.new(codeOrigin
, predicate
, parseSequence(/\A((else)|(end)|(elsif))\Z/, "while inside of \"if #{predicate.dump}\""))
500 while @tokens[@idx] == "elsif"
501 codeOrigin
= @tokens[@idx].codeOrigin
504 predicate
= parsePredicate
505 consume(/\A((then)|(\n))\Z/)
507 elseCase
= IfThenElse
.new(codeOrigin
, predicate
, parseSequence(/\A((else)|(end)|(elsif))\Z/, "while inside of \"if #{predicate.dump}\""))
508 ifThenElse
.elseCase
= elseCase
509 ifThenElse
= elseCase
511 if @tokens[@idx] == "else"
513 ifThenElse
.elseCase
= parseSequence(/\Aend\Z/, "while inside of else case for \"if #{predicate.dump}\"")
516 parseError
unless @tokens[@idx] == "end"
519 elsif @tokens[@idx] == "macro"
520 codeOrigin
= @tokens[@idx].codeOrigin
523 parseError
unless isIdentifier(@tokens[@idx])
524 name
= @tokens[@idx].string
526 variables
= parseMacroVariables
527 body
= parseSequence(/\Aend\Z/, "while inside of macro #{name}")
529 list
<< Macro
.new(codeOrigin
, name
, variables
, body
)
530 elsif isInstruction
@tokens[@idx]
531 codeOrigin
= @tokens[@idx].codeOrigin
532 name
= @tokens[@idx].string
534 if (not final
and @idx == @tokens.size
) or (final
and @tokens[@idx] =~ final
)
535 # Zero operand instruction, and it's the last one.
536 list
<< Instruction
.new(codeOrigin
, name
, [])
538 elsif @tokens[@idx] == "\n"
539 # Zero operand instruction.
540 list
<< Instruction
.new(codeOrigin
, name
, [])
543 # It's definitely an instruction, and it has at least one operand.
545 endOfSequence
= false
547 operands
<< parseOperand("while inside of instruction #{name}")
548 if (not final
and @idx == @tokens.size
) or (final
and @tokens[@idx] =~ final
)
549 # The end of the instruction and of the sequence.
552 elsif @tokens[@idx] == ","
553 # Has another operand.
555 elsif @tokens[@idx] == "\n"
556 # The end of the instruction.
560 parseError("Expected a comma, newline, or #{final} after #{operands.last.dump}")
563 list
<< Instruction
.new(codeOrigin
, name
, operands
)
568 elsif isIdentifier
@tokens[@idx]
569 codeOrigin
= @tokens[@idx].codeOrigin
570 name
= @tokens[@idx].string
572 if @tokens[@idx] == "("
577 if @tokens[@idx] == ")"
582 if @tokens[@idx] == "macro"
583 # It's a macro lambda!
584 codeOriginInner
= @tokens[@idx].codeOrigin
586 variables
= parseMacroVariables
587 body
= parseSequence(/\Aend\Z/, "while inside of anonymous macro passed as argument to #{name}")
589 operands
<< Macro
.new(codeOriginInner
, nil, variables
, body
)
591 operands
<< parseOperand("while inside of macro call to #{name}")
594 if @tokens[@idx] == ")"
597 elsif @tokens[@idx] == ","
600 parseError
"Unexpected #{@tokens[@idx].string.inspect} while parsing invocation of macro #{name}"
604 list
<< MacroCall
.new(codeOrigin
, name
, operands
)
606 parseError
"Expected \"(\" after #{name}"
608 elsif isLabel
@tokens[@idx] or isLocalLabel
@tokens[@idx]
609 codeOrigin
= @tokens[@idx].codeOrigin
610 name
= @tokens[@idx].string
612 parseError
unless @tokens[@idx] == ":"
615 list
<< Label
.forName(codeOrigin
, name
)
617 list
<< LocalLabel
.forName(codeOrigin
, name
)
620 elsif @tokens[@idx] == "include"
622 parseError
unless isIdentifier(@tokens[@idx])
623 moduleName
= @tokens[@idx].string
624 fileName
= @tokens[@idx].codeOrigin
.fileName
.dirname +
(moduleName +
".asm")
626 $stderr.puts
"offlineasm: Including file #{fileName}"
627 list
<< parse(fileName
)
629 parseError
"Expecting terminal #{final} #{comment}"
632 Sequence
.new(firstCodeOrigin
, list
)
636 def parseData(data, fileName
)
637 parser
= Parser
.new(data, fileName
)
638 parser
.parseSequence(nil, "")
642 parseData(IO
::read(fileName
), fileName
)
645 def parseHash(fileName
)
646 dirHash(Pathname
.new(fileName
).dirname
, /\.asm$/)