2017-05-17 21:05:50 +01:00
|
|
|
# Copyright (c) 2017, Google Inc.
|
|
|
|
#
|
|
|
|
# Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
# purpose with or without fee is hereby granted, provided that the above
|
|
|
|
# copyright notice and this permission notice appear in all copies.
|
|
|
|
#
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
|
|
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
|
|
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
|
|
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
|
|
|
|
|
|
|
# This is a rough parser for x86-64 and ppc64le assembly designed to work with
|
|
|
|
# https://github.com/pointlander/peg. delocate.go has a go:generate line for
|
|
|
|
# rebuilding delocate.peg.go from this file.
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
type Asm Peg {}
|
|
|
|
|
|
|
|
AsmFile <- Statement* !.
|
|
|
|
Statement <- WS? (Label / ((GlobalDirective /
|
|
|
|
LocationDirective /
|
|
|
|
LabelContainingDirective /
|
|
|
|
Instruction /
|
|
|
|
Directive /
|
|
|
|
Comment / ) WS? ((Comment? '\n') / ';')))
|
|
|
|
GlobalDirective <- (".global" / ".globl") WS SymbolName
|
|
|
|
Directive <- '.' DirectiveName (WS Args)?
|
|
|
|
DirectiveName <- [[A-Z0-9_]]+
|
|
|
|
LocationDirective <- (".file" / ".loc") WS [^#\n]+
|
|
|
|
Args <- Arg ((WS? ',' WS?) Arg)*
|
|
|
|
Arg <- QuotedArg / [[0-9a-z%+\-_@.]]*
|
|
|
|
QuotedArg <- '"' QuotedText '"'
|
|
|
|
QuotedText <- (EscapedChar / [^"])*
|
|
|
|
LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs
|
|
|
|
LabelContainingDirectiveName <- ".long" / ".set" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type"
|
|
|
|
SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)*
|
|
|
|
SymbolArg <- Offset /
|
|
|
|
SymbolType /
|
|
|
|
(Offset / LocalSymbol / SymbolName / Dot) WS? Operator WS? (Offset / LocalSymbol / SymbolName) /
|
|
|
|
LocalSymbol TCMarker? /
|
|
|
|
SymbolName Offset /
|
|
|
|
SymbolName TCMarker?
|
|
|
|
SymbolType <- '@function' / '@object'
|
|
|
|
Dot <- '.'
|
|
|
|
TCMarker <- '[TC]'
|
|
|
|
EscapedChar <- '\\' .
|
|
|
|
WS <- [ \t]+
|
|
|
|
Comment <- '#' [^\n]*
|
|
|
|
Label <- (LocalSymbol / LocalLabel / SymbolName) ':'
|
|
|
|
SymbolName <- [[A-Z._]][[A-Z.0-9$_]]*
|
|
|
|
LocalSymbol <- '.L' [[A-Z.0-9$_]]+
|
|
|
|
LocalLabel <- [0-9][0-9$]*
|
|
|
|
LocalLabelRef <- [0-9][0-9$]*[bf]
|
|
|
|
Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)?
|
|
|
|
InstructionName <- [[A-Z]][[A-Z0-9]]* [.+\-]?
|
|
|
|
InstructionArg <- IndirectionIndicator? (RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / MemoryRef)
|
2017-06-08 01:38:32 +01:00
|
|
|
TOCRefHigh <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@ha"
|
|
|
|
TOCRefLow <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@l"
|
2017-05-17 21:05:50 +01:00
|
|
|
IndirectionIndicator <- '*'
|
|
|
|
RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) / ('$'? ((Offset Offset) / Offset))) ![fb:(+\-]
|
|
|
|
# Compilers only output a very limited number of expression forms. Rather than
|
|
|
|
# implement a full expression parser, this enumerate those forms plus a few
|
|
|
|
# that appear in our hand-written assembly.
|
|
|
|
MemoryRef <- (SymbolRef BaseIndexScale /
|
|
|
|
SymbolRef /
|
2017-06-06 21:03:53 +01:00
|
|
|
Offset* BaseIndexScale /
|
2017-05-17 21:05:50 +01:00
|
|
|
SegmentRegister Offset BaseIndexScale /
|
|
|
|
SegmentRegister BaseIndexScale /
|
|
|
|
SegmentRegister Offset /
|
|
|
|
BaseIndexScale)
|
2017-06-06 21:03:53 +01:00
|
|
|
SymbolRef <- (Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)?
|
2017-05-17 21:05:50 +01:00
|
|
|
BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')'
|
|
|
|
Operator <- [+\-]
|
|
|
|
Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+)
|
|
|
|
Section <- [[A-Z@]]+
|
|
|
|
SegmentRegister <- '%' [c-gs] 's:'
|