crypto/tls: fix deadlock when racing to complete handshake.
After renegotiation support was added (af125a5193c) it's possible for a Write to block on a Read when racing to complete the handshake: 1. The Write determines that a handshake is needed and tries to take the neccesary locks in the correct order. 2. The Read also determines that a handshake is needed and wins the race to take the locks. 3. The Read goroutine completes the handshake and wins a race to unlock and relock c.in, which it'll hold when waiting for more network data. If the application-level protocol requires the Write to complete before data can be read then the system as a whole will deadlock. Unfortunately it doesn't appear possible to reverse the locking order of c.in and handshakeMutex because we might read a renegotiation request at any point and need to be able to do a handshake without unlocking. So this change adds a sync.Cond that indicates that a goroutine has committed to doing a handshake. Other interested goroutines can wait on that Cond when needed. The test for this isn't great. I was able to reproduce the deadlock with it only when building with -race. (Because -race happened to alter the timing just enough.) Fixes #17101. Change-Id: I4e8757f7b82a84e46c9963a977d089f0fb675495 Reviewed-on: https://go-review.googlesource.com/29164 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
37110e801a
commit
b7ba182f28
60
conn.go
60
conn.go
@ -29,6 +29,10 @@ type Conn struct {
|
|||||||
|
|
||||||
// constant after handshake; protected by handshakeMutex
|
// constant after handshake; protected by handshakeMutex
|
||||||
handshakeMutex sync.Mutex // handshakeMutex < in.Mutex, out.Mutex, errMutex
|
handshakeMutex sync.Mutex // handshakeMutex < in.Mutex, out.Mutex, errMutex
|
||||||
|
// handshakeCond, if not nil, indicates that a goroutine is committed
|
||||||
|
// to running the handshake for this Conn. Other goroutines that need
|
||||||
|
// to wait for the handshake can wait on this, under handshakeMutex.
|
||||||
|
handshakeCond *sync.Cond
|
||||||
handshakeErr error // error resulting from handshake
|
handshakeErr error // error resulting from handshake
|
||||||
vers uint16 // TLS version
|
vers uint16 // TLS version
|
||||||
haveVers bool // version has been negotiated
|
haveVers bool // version has been negotiated
|
||||||
@ -1206,26 +1210,50 @@ func (c *Conn) Handshake() error {
|
|||||||
// need to check whether a handshake is pending (such as Write) to
|
// need to check whether a handshake is pending (such as Write) to
|
||||||
// block.
|
// block.
|
||||||
//
|
//
|
||||||
// Thus we take c.handshakeMutex first and, if we find that a handshake
|
// Thus we first take c.handshakeMutex to check whether a handshake is
|
||||||
// is needed, then we unlock, acquire c.in and c.handshakeMutex in the
|
// needed.
|
||||||
// correct order, and check again.
|
//
|
||||||
|
// If so then, previously, this code would unlock handshakeMutex and
|
||||||
|
// then lock c.in and handshakeMutex in the correct order to run the
|
||||||
|
// handshake. The problem was that it was possible for a Read to
|
||||||
|
// complete the handshake once handshakeMutex was unlocked and then
|
||||||
|
// keep c.in while waiting for network data. Thus a concurrent
|
||||||
|
// operation could be blocked on c.in.
|
||||||
|
//
|
||||||
|
// Thus handshakeCond is used to signal that a goroutine is committed
|
||||||
|
// to running the handshake and other goroutines can wait on it if they
|
||||||
|
// need. handshakeCond is protected by handshakeMutex.
|
||||||
c.handshakeMutex.Lock()
|
c.handshakeMutex.Lock()
|
||||||
defer c.handshakeMutex.Unlock()
|
defer c.handshakeMutex.Unlock()
|
||||||
|
|
||||||
for i := 0; i < 2; i++ {
|
for {
|
||||||
if i == 1 {
|
|
||||||
c.handshakeMutex.Unlock()
|
|
||||||
c.in.Lock()
|
|
||||||
defer c.in.Unlock()
|
|
||||||
c.handshakeMutex.Lock()
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := c.handshakeErr; err != nil {
|
if err := c.handshakeErr; err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if c.handshakeComplete {
|
if c.handshakeComplete {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
if c.handshakeCond == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
c.handshakeCond.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set handshakeCond to indicate that this goroutine is committing to
|
||||||
|
// running the handshake.
|
||||||
|
c.handshakeCond = sync.NewCond(&c.handshakeMutex)
|
||||||
|
c.handshakeMutex.Unlock()
|
||||||
|
|
||||||
|
c.in.Lock()
|
||||||
|
defer c.in.Unlock()
|
||||||
|
|
||||||
|
c.handshakeMutex.Lock()
|
||||||
|
|
||||||
|
// The handshake cannot have completed when handshakeMutex was unlocked
|
||||||
|
// because this goroutine set handshakeCond.
|
||||||
|
if c.handshakeErr != nil || c.handshakeComplete {
|
||||||
|
panic("handshake should not have been able to complete after handshakeCond was set")
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.isClient {
|
if c.isClient {
|
||||||
@ -1240,6 +1268,16 @@ func (c *Conn) Handshake() error {
|
|||||||
// alert that might be left in the buffer.
|
// alert that might be left in the buffer.
|
||||||
c.flush()
|
c.flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if c.handshakeErr == nil && !c.handshakeComplete {
|
||||||
|
panic("handshake should have had a result.")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wake any other goroutines that are waiting for this handshake to
|
||||||
|
// complete.
|
||||||
|
c.handshakeCond.Broadcast()
|
||||||
|
c.handshakeCond = nil
|
||||||
|
|
||||||
return c.handshakeErr
|
return c.handshakeErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1168,3 +1168,57 @@ func TestAlertFlushing(t *testing.T) {
|
|||||||
t.Errorf("expected server handshake to complete with one write, but saw %d", n)
|
t.Errorf("expected server handshake to complete with one write, but saw %d", n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHandshakeRace(t *testing.T) {
|
||||||
|
// This test races a Read and Write to try and complete a handshake in
|
||||||
|
// order to provide some evidence that there are no races or deadlocks
|
||||||
|
// in the handshake locking.
|
||||||
|
for i := 0; i < 32; i++ {
|
||||||
|
c, s := net.Pipe()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
server := Server(s, testConfig)
|
||||||
|
if err := server.Handshake(); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var request [1]byte
|
||||||
|
if n, err := server.Read(request[:]); err != nil || n != 1 {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
server.Write(request[:])
|
||||||
|
server.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
startWrite := make(chan struct{})
|
||||||
|
startRead := make(chan struct{})
|
||||||
|
readDone := make(chan struct{})
|
||||||
|
|
||||||
|
client := Client(c, testConfig)
|
||||||
|
go func() {
|
||||||
|
<-startWrite
|
||||||
|
var request [1]byte
|
||||||
|
client.Write(request[:])
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-startRead
|
||||||
|
var reply [1]byte
|
||||||
|
if n, err := client.Read(reply[:]); err != nil || n != 1 {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
c.Close()
|
||||||
|
readDone <- struct{}{}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if i&1 == 1 {
|
||||||
|
startWrite <- struct{}{}
|
||||||
|
startRead <- struct{}{}
|
||||||
|
} else {
|
||||||
|
startRead <- struct{}{}
|
||||||
|
startWrite <- struct{}{}
|
||||||
|
}
|
||||||
|
<-readDone
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user