f64a6eeaf0
Most importantly, this version of delocate works for ppc64le. It should also work for x86-64, but will need significant testing to make sure that it covers all the cases that the previous delocate.go covered. It's less stringtastic than the old code, however the parser isn't as nice as I would have liked. I thought that the reason we put up with AT&T syntax with Intel is so that assembly syntax could be somewhat consistent across platforms. At least for ppc64le, that does not appear to be the case. Change-Id: Ic7e3c6acc3803d19f2c3ff5620c5e39703d74212 Reviewed-on: https://boringssl-review.googlesource.com/16464 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
86 lines
3.9 KiB
Plaintext
86 lines
3.9 KiB
Plaintext
# Copyright (c) 2017, Google Inc.
|
|
#
|
|
# Permission to use, copy, modify, and/or distribute this software for any
|
|
# purpose with or without fee is hereby granted, provided that the above
|
|
# copyright notice and this permission notice appear in all copies.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
|
|
|
# This is a rough parser for x86-64 and ppc64le assembly designed to work with
|
|
# https://github.com/pointlander/peg. delocate.go has a go:generate line for
|
|
# rebuilding delocate.peg.go from this file.
|
|
|
|
package main
|
|
|
|
type Asm Peg {}
|
|
|
|
AsmFile <- Statement* !.
|
|
Statement <- WS? (Label / ((GlobalDirective /
|
|
LocationDirective /
|
|
LabelContainingDirective /
|
|
Instruction /
|
|
Directive /
|
|
Comment / ) WS? ((Comment? '\n') / ';')))
|
|
GlobalDirective <- (".global" / ".globl") WS SymbolName
|
|
Directive <- '.' DirectiveName (WS Args)?
|
|
DirectiveName <- [[A-Z0-9_]]+
|
|
LocationDirective <- (".file" / ".loc") WS [^#\n]+
|
|
Args <- Arg ((WS? ',' WS?) Arg)*
|
|
Arg <- QuotedArg / [[0-9a-z%+\-_@.]]*
|
|
QuotedArg <- '"' QuotedText '"'
|
|
QuotedText <- (EscapedChar / [^"])*
|
|
LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs
|
|
LabelContainingDirectiveName <- ".long" / ".set" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type"
|
|
SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)*
|
|
SymbolArg <- Offset /
|
|
SymbolType /
|
|
(Offset / LocalSymbol / SymbolName / Dot) WS? Operator WS? (Offset / LocalSymbol / SymbolName) /
|
|
LocalSymbol TCMarker? /
|
|
SymbolName Offset /
|
|
SymbolName TCMarker?
|
|
SymbolType <- '@function' / '@object'
|
|
Dot <- '.'
|
|
TCMarker <- '[TC]'
|
|
EscapedChar <- '\\' .
|
|
WS <- [ \t]+
|
|
Comment <- '#' [^\n]*
|
|
Label <- (LocalSymbol / LocalLabel / SymbolName) ':'
|
|
SymbolName <- [[A-Z._]][[A-Z.0-9$_]]*
|
|
LocalSymbol <- '.L' [[A-Z.0-9$_]]+
|
|
LocalLabel <- [0-9][0-9$]*
|
|
LocalLabelRef <- [0-9][0-9$]*[bf]
|
|
Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)?
|
|
InstructionName <- [[A-Z]][[A-Z0-9]]* [.+\-]?
|
|
InstructionArg <- IndirectionIndicator? (RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / MemoryRef)
|
|
TOCRefHigh <- '.TOC.-' ('0b' / ('.Lfunc_gep' [0-9]+)) "@ha"
|
|
TOCRefLow <- '.TOC.-' ('0b' / ('.Lfunc_gep' [0-9]+)) "@l"
|
|
IndirectionIndicator <- '*'
|
|
RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) / ('$'? ((Offset Offset) / Offset))) ![fb:(+\-]
|
|
# Compilers only output a very limited number of expression forms. Rather than
|
|
# implement a full expression parser, this enumerate those forms plus a few
|
|
# that appear in our hand-written assembly.
|
|
MemoryRef <- (SymbolRef BaseIndexScale /
|
|
# Some hand-written assembly has two offsets together, e.g.
|
|
# “foo+16+32”. These are never GOT references.
|
|
SymbolRef Offset BaseIndexScale /
|
|
SymbolRef /
|
|
Offset BaseIndexScale /
|
|
Offset Offset BaseIndexScale /
|
|
Offset Offset Offset BaseIndexScale /
|
|
SegmentRegister Offset BaseIndexScale /
|
|
SegmentRegister BaseIndexScale /
|
|
SegmentRegister Offset /
|
|
BaseIndexScale)
|
|
SymbolRef <- (Offset '+')? (LocalSymbol / SymbolName) Offset? ('@' Section)?
|
|
BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')'
|
|
Operator <- [+\-]
|
|
Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+)
|
|
Section <- [[A-Z@]]+
|
|
SegmentRegister <- '%' [c-gs] 's:'
|