boringssl/util/fipstools/delocate.peg
Adam Langley f64a6eeaf0 Switch to new delocate tool.
Most importantly, this version of delocate works for ppc64le. It should
also work for x86-64, but will need significant testing to make sure
that it covers all the cases that the previous delocate.go covered.

It's less stringtastic than the old code, however the parser isn't as
nice as I would have liked. I thought that the reason we put up with
AT&T syntax with Intel is so that assembly syntax could be somewhat
consistent across platforms. At least for ppc64le, that does not appear
to be the case.

Change-Id: Ic7e3c6acc3803d19f2c3ff5620c5e39703d74212
Reviewed-on: https://boringssl-review.googlesource.com/16464
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-05-30 18:00:16 +00:00

86 lines
3.9 KiB
Plaintext

# Copyright (c) 2017, Google Inc.
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
# This is a rough parser for x86-64 and ppc64le assembly designed to work with
# https://github.com/pointlander/peg. delocate.go has a go:generate line for
# rebuilding delocate.peg.go from this file.
package main
type Asm Peg {}
AsmFile <- Statement* !.
Statement <- WS? (Label / ((GlobalDirective /
LocationDirective /
LabelContainingDirective /
Instruction /
Directive /
Comment / ) WS? ((Comment? '\n') / ';')))
GlobalDirective <- (".global" / ".globl") WS SymbolName
Directive <- '.' DirectiveName (WS Args)?
DirectiveName <- [[A-Z0-9_]]+
LocationDirective <- (".file" / ".loc") WS [^#\n]+
Args <- Arg ((WS? ',' WS?) Arg)*
Arg <- QuotedArg / [[0-9a-z%+\-_@.]]*
QuotedArg <- '"' QuotedText '"'
QuotedText <- (EscapedChar / [^"])*
LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs
LabelContainingDirectiveName <- ".long" / ".set" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type"
SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)*
SymbolArg <- Offset /
SymbolType /
(Offset / LocalSymbol / SymbolName / Dot) WS? Operator WS? (Offset / LocalSymbol / SymbolName) /
LocalSymbol TCMarker? /
SymbolName Offset /
SymbolName TCMarker?
SymbolType <- '@function' / '@object'
Dot <- '.'
TCMarker <- '[TC]'
EscapedChar <- '\\' .
WS <- [ \t]+
Comment <- '#' [^\n]*
Label <- (LocalSymbol / LocalLabel / SymbolName) ':'
SymbolName <- [[A-Z._]][[A-Z.0-9$_]]*
LocalSymbol <- '.L' [[A-Z.0-9$_]]+
LocalLabel <- [0-9][0-9$]*
LocalLabelRef <- [0-9][0-9$]*[bf]
Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)?
InstructionName <- [[A-Z]][[A-Z0-9]]* [.+\-]?
InstructionArg <- IndirectionIndicator? (RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / MemoryRef)
TOCRefHigh <- '.TOC.-' ('0b' / ('.Lfunc_gep' [0-9]+)) "@ha"
TOCRefLow <- '.TOC.-' ('0b' / ('.Lfunc_gep' [0-9]+)) "@l"
IndirectionIndicator <- '*'
RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) / ('$'? ((Offset Offset) / Offset))) ![fb:(+\-]
# Compilers only output a very limited number of expression forms. Rather than
# implement a full expression parser, this enumerate those forms plus a few
# that appear in our hand-written assembly.
MemoryRef <- (SymbolRef BaseIndexScale /
# Some hand-written assembly has two offsets together, e.g.
# “foo+16+32”. These are never GOT references.
SymbolRef Offset BaseIndexScale /
SymbolRef /
Offset BaseIndexScale /
Offset Offset BaseIndexScale /
Offset Offset Offset BaseIndexScale /
SegmentRegister Offset BaseIndexScale /
SegmentRegister BaseIndexScale /
SegmentRegister Offset /
BaseIndexScale)
SymbolRef <- (Offset '+')? (LocalSymbol / SymbolName) Offset? ('@' Section)?
BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')'
Operator <- [+\-]
Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+)
Section <- [[A-Z@]]+
SegmentRegister <- '%' [c-gs] 's:'