boringssl/util/fipstools/delocate.peg

81 lines
3.7 KiB
Plaintext
Raw Normal View History

# Copyright (c) 2017, Google Inc.
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
# This is a rough parser for x86-64 and ppc64le assembly designed to work with
# https://github.com/pointlander/peg. delocate.go has a go:generate line for
# rebuilding delocate.peg.go from this file.
package main
type Asm Peg {}
AsmFile <- Statement* !.
Statement <- WS? (Label / ((GlobalDirective /
LocationDirective /
LabelContainingDirective /
Instruction /
Directive /
Comment / ) WS? ((Comment? '\n') / ';')))
GlobalDirective <- (".global" / ".globl") WS SymbolName
Directive <- '.' DirectiveName (WS Args)?
DirectiveName <- [[A-Z0-9_]]+
LocationDirective <- (".file" / ".loc") WS [^#\n]+
Args <- Arg ((WS? ',' WS?) Arg)*
Arg <- QuotedArg / [[0-9a-z%+\-_@.]]*
QuotedArg <- '"' QuotedText '"'
QuotedText <- (EscapedChar / [^"])*
LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs
LabelContainingDirectiveName <- ".long" / ".set" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type"
SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)*
SymbolArg <- Offset /
SymbolType /
(Offset / LocalSymbol / SymbolName / Dot) WS? Operator WS? (Offset / LocalSymbol / SymbolName) /
LocalSymbol TCMarker? /
SymbolName Offset /
SymbolName TCMarker?
SymbolType <- '@function' / '@object'
Dot <- '.'
TCMarker <- '[TC]'
EscapedChar <- '\\' .
WS <- [ \t]+
Comment <- '#' [^\n]*
Label <- (LocalSymbol / LocalLabel / SymbolName) ':'
SymbolName <- [[A-Z._]][[A-Z.0-9$_]]*
LocalSymbol <- '.L' [[A-Z.0-9$_]]+
LocalLabel <- [0-9][0-9$]*
LocalLabelRef <- [0-9][0-9$]*[bf]
Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)? (WS? '{' InstructionArg '}')*
InstructionName <- [[A-Z]][[A-Z0-9]]* [.+\-]?
InstructionArg <- IndirectionIndicator? (RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / MemoryRef)
TOCRefHigh <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@ha"
TOCRefLow <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@l"
IndirectionIndicator <- '*'
RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) / ('$'? ((Offset Offset) / Offset))) ![fb:(+\-]
# Compilers only output a very limited number of expression forms. Rather than
# implement a full expression parser, this enumerate those forms plus a few
# that appear in our hand-written assembly.
MemoryRef <- (SymbolRef BaseIndexScale /
SymbolRef /
Support more complex offset / symbol section interleavings. LLVM likes to emit offsets of the form foo@toc@ha+16, which we didn't support. Generalize parseMemRef to handle this case and avoid some of the repeated offset special-cases. Offsets are now always folded into the SymbolRef. This still does not quite implement a fully general GAS-compatible parser as GAS's parser is insane. GAS in x86_64 will happily accept things like: 1@GOTPCREL+foo blah1@GOTPCREL-blah2+blah3-blah4+blah5 # GOTPCREL modifies blah5, rest # of expression is an offset. GAS actually textually pulls @GOTPCREL out of the input partway through parsing the expression and parses the modified input! Then its normal parser goes and maintains a running expression of a specific type and, at each term, attempts to merge it into what it currently has. So adding and subtracting symbols is not commutative (signs must alternate or so) and the last symbol wins. However its PPC64 parser is not as general and just terminates each expression after @toc@ha and friends, except that it special-cases foo@toc@ha+16: if it can parse one more expression after @toc@ha AND it is a constant expression, then it is added into the running offset. Otherwise it leaves that data unconsumed. This is all ridiculous, so just generalize our parser slightly to cover foo@toc@ha+16 and see how far we get from there. Change-Id: I65970791fc10fb2638fd7be8cc841900eb997c9c Reviewed-on: https://boringssl-review.googlesource.com/16944 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-06-06 21:03:53 +01:00
Offset* BaseIndexScale /
SegmentRegister Offset BaseIndexScale /
SegmentRegister BaseIndexScale /
SegmentRegister Offset /
BaseIndexScale)
Support more complex offset / symbol section interleavings. LLVM likes to emit offsets of the form foo@toc@ha+16, which we didn't support. Generalize parseMemRef to handle this case and avoid some of the repeated offset special-cases. Offsets are now always folded into the SymbolRef. This still does not quite implement a fully general GAS-compatible parser as GAS's parser is insane. GAS in x86_64 will happily accept things like: 1@GOTPCREL+foo blah1@GOTPCREL-blah2+blah3-blah4+blah5 # GOTPCREL modifies blah5, rest # of expression is an offset. GAS actually textually pulls @GOTPCREL out of the input partway through parsing the expression and parses the modified input! Then its normal parser goes and maintains a running expression of a specific type and, at each term, attempts to merge it into what it currently has. So adding and subtracting symbols is not commutative (signs must alternate or so) and the last symbol wins. However its PPC64 parser is not as general and just terminates each expression after @toc@ha and friends, except that it special-cases foo@toc@ha+16: if it can parse one more expression after @toc@ha AND it is a constant expression, then it is added into the running offset. Otherwise it leaves that data unconsumed. This is all ridiculous, so just generalize our parser slightly to cover foo@toc@ha+16 and see how far we get from there. Change-Id: I65970791fc10fb2638fd7be8cc841900eb997c9c Reviewed-on: https://boringssl-review.googlesource.com/16944 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-06-06 21:03:53 +01:00
SymbolRef <- (Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)?
BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')'
Operator <- [+\-]
Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+)
Section <- [[A-Z@]]+
SegmentRegister <- '%' [c-gs] 's:'