Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0d5021f
fix: propagate context deadline to readPrelogin to prevent hangs
dlevy-msft-sql Apr 24, 2026
205da72
fix: align TestLoginTimeout error handling with TestQueryTimeout
dlevy-msft-sql Apr 25, 2026
d5a8863
fix: close connection on prelogin error, extract timeout helper, impr…
dlevy-msft-sql Apr 26, 2026
1e6a016
fix: check ctx.Err() before deadline in preloginTimeout
dlevy-msft-sql Apr 26, 2026
1f33bbf
fix: close connection on context cancel during prelogin read
dlevy-msft-sql Apr 26, 2026
02fc140
fix: use deferred close to prevent socket leaks on all connect error …
dlevy-msft-sql Apr 26, 2026
0caca81
fix: prevent data race on timeout restore and double close on reroute
dlevy-msft-sql Apr 26, 2026
997d219
test: assert error types in prelogin integration tests
dlevy-msft-sql Apr 26, 2026
ef5cff6
fix: wait for cancel-watcher goroutine before restoring timeout
dlevy-msft-sql Apr 26, 2026
c2f757b
fix: clarify cancel watcher comment to match unconditional behavior
dlevy-msft-sql Apr 26, 2026
e128bf0
fix: defer cancel and stop timer in context cancel test
dlevy-msft-sql Apr 26, 2026
0967bd8
fix: wrap db.Conn in goroutine with hard timeout in integration tests
dlevy-msft-sql Apr 26, 2026
f635122
fix: handle socket timeout racing with context deadline in prelogin
dlevy-msft-sql Apr 26, 2026
f82172f
fix: remove overly broad strings.Contains timeout fallback in test
dlevy-msft-sql Apr 26, 2026
dd27d3d
test: add coverage for socket timeout, expired context, and routing r…
dlevy-msft-sql Apr 27, 2026
7b213d2
style: align whitespace in routing test
dlevy-msft-sql Apr 27, 2026
9955d5a
fix: improve prelogin timeout error handling and test assertions
dlevy-msft-sql Apr 27, 2026
b8cf900
fix: use t.Errorf in goroutines and handle Accept errors in routing test
dlevy-msft-sql Apr 27, 2026
3b19ff3
fix: add tokenLoginAck to routing mock and use PingContext with timeout
dlevy-msft-sql Apr 27, 2026
5e5c647
test: improve patch coverage for prelogin context deadline
dlevy-msft-sql Apr 27, 2026
5358a71
fix: avoid t.Fatal from goroutine in TestConnectSuccessfulPreloginAnd…
dlevy-msft-sql Apr 27, 2026
4d3b4ac
fix: correct comment order in pastDeadlineContext documentation
dlevy-msft-sql Apr 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 208 additions & 0 deletions prelogin_deadline_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
package mssql

import (
"context"
"database/sql"
"fmt"
"net"
"testing"
"time"
)

func TestPreloginTimeout(t *testing.T) {
t.Run("no deadline keeps connection timeout", func(t *testing.T) {
got, err := preloginTimeout(context.Background(), 30*time.Second)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != 30*time.Second {
t.Fatalf("timeout=%v, want %v", got, 30*time.Second)
}
})

t.Run("sooner deadline wins", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 250*time.Millisecond)
defer cancel()

got, err := preloginTimeout(ctx, 30*time.Second)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got <= 0 || got > 250*time.Millisecond {
t.Fatalf("timeout=%v, want a positive value no greater than %v", got, 250*time.Millisecond)
}
})

t.Run("shorter connection timeout stays in effect", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

got, err := preloginTimeout(ctx, 250*time.Millisecond)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != 250*time.Millisecond {
t.Fatalf("timeout=%v, want %v", got, 250*time.Millisecond)
}
})

t.Run("zero connection timeout uses context deadline", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 250*time.Millisecond)
defer cancel()

got, err := preloginTimeout(ctx, 0)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got <= 0 || got > 250*time.Millisecond {
t.Fatalf("timeout=%v, want a positive value no greater than %v", got, 250*time.Millisecond)
}
})

t.Run("expired deadline returns context error", func(t *testing.T) {
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(-time.Second))
defer cancel()

_, err := preloginTimeout(ctx, 30*time.Second)
if err == nil {
t.Fatal("expected error, got nil")
}
if err != context.DeadlineExceeded {
t.Fatalf("error=%v, want %v", err, context.DeadlineExceeded)
}
})

t.Run("canceled context without deadline returns context error", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()

_, err := preloginTimeout(ctx, 30*time.Second)
if err == nil {
t.Fatal("expected error, got nil")
}
if err != context.Canceled {
t.Fatalf("error=%v, want %v", err, context.Canceled)
}
})
}

// TestPreloginRespectsContextDeadline verifies that readPrelogin honors the
// context deadline rather than hanging for the full ConnTimeout when the
// server never responds.
func TestPreloginRespectsContextDeadline(t *testing.T) {
// Start a TCP listener that accepts connections but never sends data,
// simulating a server that hangs during prelogin.
addr := &net.TCPAddr{IP: net.IP{127, 0, 0, 1}}
listener, err := net.ListenTCP("tcp", addr)
if err != nil {
t.Fatal("Cannot start listener:", err)
}
defer listener.Close()
resolved := listener.Addr().(*net.TCPAddr)

done := make(chan struct{})
defer close(done)

go func() {
for {
conn, err := listener.Accept()
if err != nil {
return
}
// Read the prelogin request but never respond.
buf := make([]byte, 4096)
_, _ = conn.Read(buf)
// Hold connection open until the test finishes.
<-done
conn.Close()
}
}()

// Use a long ConnTimeout (30s) so if the context deadline is NOT
// respected, the test will hang noticeably.
dsn := fmt.Sprintf("sqlserver://sa:unused@%s:%d?connection+timeout=30&dial+timeout=2&protocol=tcp&encrypt=disable",
resolved.IP.String(), resolved.Port)

db, err := sql.Open("sqlserver", dsn)
if err != nil {
t.Fatal("sql.Open failed:", err)
}
defer db.Close()

// Context with a short deadline — this is the one that should win.
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
defer cancel()

start := time.Now()
conn, err := db.Conn(ctx)
elapsed := time.Since(start)

if err == nil {
conn.Close()
t.Fatal("Expected connection to fail, but it succeeded")
}

// The connection should fail well before the full ConnTimeout (30s).
// We use a generous 5s bound to avoid flakes on slow CI; the real
// expectation is ~500ms from the context deadline.
if elapsed > 5*time.Second {
t.Errorf("Connection took %v, expected it to respect the 500ms context deadline", elapsed)
Comment thread
dlevy-msft-sql marked this conversation as resolved.
Outdated
}
Comment thread
dlevy-msft-sql marked this conversation as resolved.
}

// TestPreloginRespectsContextCancel verifies that readPrelogin unblocks
// when the context is canceled even without a deadline set.
func TestPreloginRespectsContextCancel(t *testing.T) {
addr := &net.TCPAddr{IP: net.IP{127, 0, 0, 1}}
listener, err := net.ListenTCP("tcp", addr)
if err != nil {
t.Fatal("Cannot start listener:", err)
}
defer listener.Close()
resolved := listener.Addr().(*net.TCPAddr)

done := make(chan struct{})
defer close(done)

go func() {
for {
conn, err := listener.Accept()
if err != nil {
return
}
buf := make([]byte, 4096)
_, _ = conn.Read(buf)
<-done
conn.Close()
}
}()

// connTimeout=30 and no context deadline: without the cancel watcher,
// this would block for the full 30s.
dsn := fmt.Sprintf("sqlserver://sa:unused@%s:%d?connection+timeout=30&dial+timeout=2&protocol=tcp&encrypt=disable",
resolved.IP.String(), resolved.Port)

db, err := sql.Open("sqlserver", dsn)
if err != nil {
t.Fatal("sql.Open failed:", err)
}
defer db.Close()

ctx, cancel := context.WithCancel(context.Background())

// Cancel after 500ms to simulate a caller-driven cancellation.
time.AfterFunc(500*time.Millisecond, cancel)
Comment thread
dlevy-msft-sql marked this conversation as resolved.
Outdated

start := time.Now()
conn, err := db.Conn(ctx)
elapsed := time.Since(start)

if err == nil {
conn.Close()
t.Fatal("Expected connection to fail, but it succeeded")
}

if elapsed > 5*time.Second {
t.Errorf("Connection took %v, expected it to respect context cancellation within ~500ms", elapsed)
}
Comment thread
dlevy-msft-sql marked this conversation as resolved.
}
48 changes: 38 additions & 10 deletions queries_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2267,6 +2267,32 @@ func getLatency(t *testing.T) time.Duration {
return time.Since(now)
}

// isAcceptableTimeoutErr reports whether err is one of the expected outcomes
// after a context deadline fires during query execution.
//
// After the context deadline, the driver sends ATTENTION to cancel.
// Depending on timing, OS, and SQL Server version the surfaced error
// can be any of:
// - context.DeadlineExceeded
// - a net.Error with Timeout() (i/o timeout on read/write)
// - SQL Server error 3980 ("batch aborted") when the server
// acknowledges ATTENTION before the client sees the timeout
// - the driver's explicit cancel-confirmation failure when ATTENTION
// was sent but the server never completed the cancellation handshake
func isAcceptableTimeoutErr(err error) bool {
if errors.Is(err, context.DeadlineExceeded) {
return true
}
if ne := (net.Error)(nil); errors.As(err, &ne) && ne.Timeout() {
return true
}
if sqlErr := (Error{}); errors.As(err, &sqlErr) &&
(sqlErr.Number == 3980 || sqlErr.Message == "did not get cancellation confirmation from the server") {
return true
}
return false
}

func TestQueryTimeout(t *testing.T) {
conn, logger := open(t)
defer conn.Close()
Expand All @@ -2276,8 +2302,11 @@ func TestQueryTimeout(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), latency+2000*time.Millisecond)
defer cancel()
_, err := conn.ExecContext(ctx, "waitfor delay '00:00:20'")
if err != context.DeadlineExceeded {
t.Errorf("ExecContext expected to fail with DeadlineExceeded but it returned %v", err)
if err == nil {
t.Fatal("ExecContext expected to fail but succeeded")
}
if !isAcceptableTimeoutErr(err) {
t.Fatalf("wrong kind of error for query timeout: %T: %v", err, err)
}

// connection should be usable after timeout
Expand All @@ -2301,14 +2330,13 @@ func TestLoginTimeout(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), latency+200*time.Millisecond)
defer cancel()
_, err := conn.ExecContext(ctx, "waitfor delay '00:00:03'")
t.Logf("Got error type %v: %s ", reflect.TypeOf(err), err.Error())
if oe, ok := err.(*net.OpError); ok {
if !oe.Timeout() {
t.Fatalf("Got non-timeout error %s", oe.Error())
}
// The type of error is not guaranteed so just look for "timeout"
} else if err != context.DeadlineExceeded && !strings.Contains(err.Error(), "timeout") {
t.Fatalf("wrong kind of error for login or query timeout: %+v", err)
if err == nil {
t.Fatal("ExecContext expected to fail but succeeded")
}
// With very low latency, the login completes before the deadline and
// this degenerates into a query-timeout scenario.
if !isAcceptableTimeoutErr(err) {
t.Fatalf("wrong kind of error for login or query timeout: %T: %v", err, err)
Comment thread
dlevy-msft-sql marked this conversation as resolved.
}

// connection should be usable after timeout
Expand Down
60 changes: 60 additions & 0 deletions tds.go
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,28 @@ func getTLSConn(conn *timeoutConn, p msdsn.Config, alpnSeq string) (tlsConn *tls
return tlsConn, nil
}

func preloginTimeout(ctx context.Context, connTimeout time.Duration) (time.Duration, error) {
if err := ctx.Err(); err != nil {
return 0, err
}

deadline, ok := ctx.Deadline()
if !ok {
return connTimeout, nil
}
Comment thread
dlevy-msft-sql marked this conversation as resolved.
Comment thread
dlevy-msft-sql marked this conversation as resolved.

ctxTimeout := time.Until(deadline)
if ctxTimeout <= 0 {
return 0, context.DeadlineExceeded
}

if connTimeout == 0 || ctxTimeout < connTimeout {
return ctxTimeout, nil
}

return connTimeout, nil
}

func connect(ctx context.Context, c *Connector, logger ContextLogger, p msdsn.Config) (res *tdsSession, err error) {
var cbt *integratedauth.ChannelBindings
isTransportEncrypted := false
Expand Down Expand Up @@ -1206,8 +1228,46 @@ initiate_connection:
return nil, err
}

// Ensure the prelogin read respects the context deadline so connect()
// does not hang indefinitely when the server never responds.
// We temporarily reduce toconn.timeout rather than using SetReadDeadline
// because timeoutConn.Read() calls SetDeadline(now+timeout) on every
// read, which would overwrite a SetReadDeadline value.
origTimeout := toconn.timeout
toconn.timeout, err = preloginTimeout(ctx, origTimeout)
if err != nil {
toconn.Close()
Comment thread
dlevy-msft-sql marked this conversation as resolved.
Outdated
return nil, err
}
Comment thread
dlevy-msft-sql marked this conversation as resolved.

// If the context has no deadline but is cancelable and connTimeout is 0,
// the read could block indefinitely. Watch ctx.Done() and close the
// connection to unblock readPrelogin on cancellation.
Comment thread
dlevy-msft-sql marked this conversation as resolved.
Outdated
cancelDone := make(chan struct{})
go func() {
select {
case <-ctx.Done():
toconn.Close()
case <-cancelDone:
}
}()

fields, err = readPrelogin(outbuf)
close(cancelDone)

// Restore the original timeout for subsequent reads.
toconn.timeout = origTimeout

// If the context was canceled, the watcher goroutine may have closed
// the connection. Return the context error regardless of whether the
// read itself succeeded to avoid using a potentially closed conn.
if ctxErr := ctx.Err(); ctxErr != nil {
toconn.Close()
return nil, ctxErr
}

if err != nil {
Comment thread
dlevy-msft-sql marked this conversation as resolved.
toconn.Close()
return nil, err
}

Expand Down
Loading