diff --git a/util/fipstools/delocate.go b/util/fipstools/delocate.go index 827e4461..d58e5bec 100644 --- a/util/fipstools/delocate.go +++ b/util/fipstools/delocate.go @@ -795,6 +795,9 @@ const ( instrTransformingMove instrJump instrConditionalMove + // instrCombine merges the source and destination in some fashion, for example + // a 2-operand bitwise operation. + instrCombine instrOther ) @@ -820,6 +823,11 @@ func classifyInstruction(instr string, args []*node32) instructionType { return instrJump } + case "orq", "andq", "xorq": + if len(args) == 2 { + return instrCombine + } + case "vpbroadcastq": if len(args) == 2 { return instrTransformingMove @@ -855,24 +863,41 @@ func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section st } } -func saveRegister(w stringWriter) wrapperFunc { +func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc { return func(k func()) { - w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. - w.WriteString("\tpushq %rax\n") + if !redzoneCleared { + w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. + defer w.WriteString("\tleaq 128(%rsp), %rsp\n") + } + w.WriteString("\tpushfq\n") k() - w.WriteString("\tpopq %rax\n") - w.WriteString("\tleaq 128(%rsp), %rsp\n") + w.WriteString("\tpopfq\n") } } -func moveTo(w stringWriter, target string, isAVX bool) wrapperFunc { +func saveRegister(w stringWriter, avoidReg string) (wrapperFunc, string) { + reg := "%rax" + if reg == avoidReg { + reg = "%rbx" + } + + return func(k func()) { + w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. + w.WriteString("\tpushq " + reg + "\n") + k() + w.WriteString("\tpopq " + reg + "\n") + w.WriteString("\tleaq 128(%rsp), %rsp\n") + }, reg +} + +func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc { return func(k func()) { k() prefix := "" if isAVX { prefix = "v" } - w.WriteString("\t" + prefix + "movq %rax, " + target + "\n") + w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n") } } @@ -883,6 +908,13 @@ func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFun } } +func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { + return func(k func()) { + k() + w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n") + } +} + func isValidLEATarget(reg string) bool { return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm") } @@ -938,16 +970,9 @@ Args: symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) changed = didChange - if symbol == "OPENSSL_ia32cap_P" { - var ok bool - if section == "GOTPCREL" { - ok = instructionName == "movq" - } else if section == "" { - ok = instructionName == "leaq" - } - - if !ok { - return nil, fmt.Errorf("instruction %q referenced OPENSSL_ia32cap_P in section %q, should be a movq from GOTPCREL or a direct leaq", instructionName, section) + if symbol == "OPENSSL_ia32cap_P" && section == "" { + if instructionName != "leaq" { + return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName) } if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 { @@ -963,13 +988,14 @@ Args: } changed = true + + // Flag-altering instructions (i.e. addq) are going to be used so the + // flags need to be preserved. + wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */)) + wrappers = append(wrappers, func(k func()) { - d.output.WriteString("\tleaq\t-128(%rsp), %rsp\n") // Clear the red zone. - d.output.WriteString("\tpushfq\n") d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n") d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n") - d.output.WriteString("\tpopfq\n") - d.output.WriteString("\tleaq\t128(%rsp), %rsp\n") }) break Args @@ -1021,6 +1047,7 @@ Args: // Reduce the instruction to movq symbol@GOTPCREL, targetReg. var targetReg string + var redzoneCleared bool switch classifyInstruction(instructionName, argNodes) { case instrPush: wrappers = append(wrappers, push(d.output)) @@ -1038,23 +1065,45 @@ Args: if isValidLEATarget(targetReg) { return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.") } + case instrCombine: + targetReg = d.contents(argNodes[1]) + if !isValidLEATarget(targetReg) { + return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers") + } + saveRegWrapper, tempReg := saveRegister(d.output, targetReg) + redzoneCleared = true + wrappers = append(wrappers, saveRegWrapper) + + wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg)) + targetReg = tempReg default: return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) } - var redzoneCleared bool if !isValidLEATarget(targetReg) { // Sometimes the compiler will load from the GOT to an // XMM register, which is not a valid target of an LEA // instruction. - wrappers = append(wrappers, saveRegister(d.output)) + saveRegWrapper, tempReg := saveRegister(d.output, "") + wrappers = append(wrappers, saveRegWrapper) isAVX := strings.HasPrefix(instructionName, "v") - wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX)) - targetReg = "%rax" + wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg)) + targetReg = tempReg + if redzoneCleared { + return nil, fmt.Errorf("internal error: Red Zone was already cleared") + } redzoneCleared = true } - if useGOT { + if symbol == "OPENSSL_ia32cap_P" { + // Flag-altering instructions (i.e. addq) are going to be used so the + // flags need to be preserved. + wrappers = append(wrappers, saveFlags(d.output, redzoneCleared)) + wrappers = append(wrappers, func(k func()) { + d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n") + d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n") + }) + } else if useGOT { wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared)) } else { wrappers = append(wrappers, func(k func()) { @@ -1239,8 +1288,8 @@ func transform(w stringWriter, inputs []inputFile) error { } w.WriteString(".text\n") - w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"\n", maxObservedFileNumber + 1)) - w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber + 1)) + w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"\n", maxObservedFileNumber+1)) + w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1)) w.WriteString("BORINGSSL_bcm_text_start:\n") for _, input := range inputs { @@ -1250,7 +1299,7 @@ func transform(w stringWriter, inputs []inputFile) error { } w.WriteString(".text\n") - w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber + 1)) + w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1)) w.WriteString("BORINGSSL_bcm_text_end:\n") // Emit redirector functions. Each is a single jump instruction. diff --git a/util/fipstools/testdata/x86_64-GOTRewrite/in.s b/util/fipstools/testdata/x86_64-GOTRewrite/in.s index 0f9c70ef..ccbc0bf3 100644 --- a/util/fipstools/testdata/x86_64-GOTRewrite/in.s +++ b/util/fipstools/testdata/x86_64-GOTRewrite/in.s @@ -6,6 +6,12 @@ foo: # As is the equivalent GOTPCREL movq. movq OPENSSL_ia32cap_P@GOTPCREL(%rip), %r12 + # And a non-movq instruction via the GOT. + orq OPENSSL_ia32cap_P@GOTPCREL(%rip), %r12 + + # ... which targets the default temp register + orq OPENSSL_ia32cap_P@GOTPCREL(%rip), %rax + # Test that GOTPCREL accesses get translated. They are handled # differently for local and external symbols. diff --git a/util/fipstools/testdata/x86_64-GOTRewrite/out.s b/util/fipstools/testdata/x86_64-GOTRewrite/out.s index 0485c871..3d421e56 100644 --- a/util/fipstools/testdata/x86_64-GOTRewrite/out.s +++ b/util/fipstools/testdata/x86_64-GOTRewrite/out.s @@ -7,21 +7,45 @@ BORINGSSL_bcm_text_start: foo: # leaq of OPENSSL_ia32cap_P is supported. # WAS leaq OPENSSL_ia32cap_P(%rip), %r11 - leaq -128(%rsp), %rsp + leaq -128(%rsp), %rsp pushfq leaq OPENSSL_ia32cap_addr_delta(%rip), %r11 addq (%r11), %r11 popfq - leaq 128(%rsp), %rsp + leaq 128(%rsp), %rsp # As is the equivalent GOTPCREL movq. # WAS movq OPENSSL_ia32cap_P@GOTPCREL(%rip), %r12 - leaq -128(%rsp), %rsp + leaq -128(%rsp), %rsp pushfq leaq OPENSSL_ia32cap_addr_delta(%rip), %r12 addq (%r12), %r12 popfq - leaq 128(%rsp), %rsp + leaq 128(%rsp), %rsp + + # And a non-movq instruction via the GOT. +# WAS orq OPENSSL_ia32cap_P@GOTPCREL(%rip), %r12 + leaq -128(%rsp), %rsp + pushq %rax + pushfq + leaq OPENSSL_ia32cap_addr_delta(%rip), %rax + addq (%rax), %rax + popfq + orq %rax, %r12 + popq %rax + leaq 128(%rsp), %rsp + + # ... which targets the default temp register +# WAS orq OPENSSL_ia32cap_P@GOTPCREL(%rip), %rax + leaq -128(%rsp), %rsp + pushq %rbx + pushfq + leaq OPENSSL_ia32cap_addr_delta(%rip), %rbx + addq (%rbx), %rbx + popfq + orq %rbx, %rax + popq %rbx + leaq 128(%rsp), %rsp # Test that GOTPCREL accesses get translated. They are handled # differently for local and external symbols.