Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/release-notes/release-notes-0.21.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@

## RPC Additions

* The `SendOnion` RPC is now fully [idempotent](
https://github.com/lightningnetwork/lnd/pull/10473), providing a critical
reliability improvement for external payment orchestrators (such as a remote
`ChannelRouter`). Callers can now safely retry a `SendOnion` request after a
network timeout or ambiguous error without risking a duplicate payment. If a
request with the same `attempt_id` has already been processed, the RPC will
now return a `DUPLICATE_HTLC` error, serving as a definitive acknowledgment
that the dispatch was received. This allows clients to build more resilient
payment-sending logic.

* [Added support for coordinator-based MuSig2 signing
patterns](https://github.com/lightningnetwork/lnd/pull/10436) with two new
RPCs: `MuSig2RegisterCombinedNonce` allows registering a pre-aggregated
Expand Down
5 changes: 5 additions & 0 deletions htlcswitch/switch.go
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,11 @@ func (s *Switch) CleanStore(keepPids map[uint64]struct{}) error {
return s.attemptStore.CleanStore(keepPids)
}

// AttemptStore provides access to the Switch's underlying attempt store.
func (s *Switch) AttemptStore() AttemptStore {
return s.attemptStore
}

// SendHTLC is used by other subsystems which aren't belong to htlc switch
// package in order to send the htlc update. The attemptID used MUST be unique
// for this HTLC, and MUST be used only once, otherwise the switch might reject
Expand Down
4 changes: 4 additions & 0 deletions itest/list_on_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,10 @@ var allTestCases = []*lntest.TestCase{
Name: "send onion twice",
TestFunc: testSendOnionTwice,
},
{
Name: "send onion concurrency",
TestFunc: testSendOnionConcurrency,
},
{
Name: "track onion",
TestFunc: testTrackOnion,
Expand Down
165 changes: 151 additions & 14 deletions itest/lnd_sendonion_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package itest

import (
"context"
"sync"

"github.com/btcsuite/btcd/btcec/v2"
"github.com/btcsuite/btcd/btcutil"
sphinx "github.com/lightningnetwork/lightning-onion"
Expand All @@ -9,9 +12,12 @@ import (
"github.com/lightningnetwork/lnd/lnrpc/invoicesrpc"
"github.com/lightningnetwork/lnd/lnrpc/switchrpc"
"github.com/lightningnetwork/lnd/lntest"
"github.com/lightningnetwork/lnd/lntest/rpc"
"github.com/lightningnetwork/lnd/lntypes"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/stretchr/testify/require"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

// testSendOnion tests the basic success case for the SendOnion RPC. It
Expand Down Expand Up @@ -178,13 +184,18 @@ func testSendOnionTwice(ht *lntest.HarnessTest) {
// While the first onion is still in-flight, we'll send the same onion
// again with the same attempt ID. This should error as our Switch will
// detect duplicate ADDs for *in-flight* HTLCs.
resp = alice.RPC.SendOnion(sendReq)
ht.Logf("SendOnion resp: %+v, code: %v", resp, resp.ErrorCode)
require.False(ht, resp.Success, "expected failure on onion send")
require.Equal(ht, resp.ErrorCode,
switchrpc.ErrorCode_DUPLICATE_HTLC,
ctxt, cancel := context.WithTimeout(context.Background(),
rpc.DefaultTimeout)
defer cancel()

_, err := alice.RPC.Switch.SendOnion(ctxt, sendReq)
require.Error(ht, err, "expected failure on onion send")

// Check that we get the expected gRPC error.
s, ok := status.FromError(err)
require.True(ht, ok, "expected gRPC status error")
require.Equal(ht, codes.AlreadyExists, s.Code(),
"unexpected error code")
require.Equal(ht, resp.ErrorMessage, htlcswitch.ErrDuplicateAdd.Error())

// Dave settles the invoice.
dave.RPC.SettleInvoice(preimage[:])
Expand All @@ -203,14 +214,140 @@ func testSendOnionTwice(ht *lntest.HarnessTest) {
require.Equal(ht, preimage[:], trackResp.Preimage)

// Now that the original HTLC attempt has settled, we'll send the same
// onion again with the same attempt ID.
//
// NOTE: Currently, this does not error. When we make SendOnion fully
// duplicate safe, this should be updated to assert an error is
// returned.
resp = alice.RPC.SendOnion(sendReq)
require.True(ht, resp.Success, "expected successful onion send")
require.Empty(ht, resp.ErrorMessage, "unexpected failure to send onion")
// onion again with the same attempt ID. Confirm that this is also
// prevented.
ctxt, cancel = context.WithTimeout(context.Background(),
rpc.DefaultTimeout)
defer cancel()

_, err = alice.RPC.Switch.SendOnion(ctxt, sendReq)
require.Error(ht, err, "expected failure on onion send")

// Check that we get the expected gRPC error.
s, ok = status.FromError(err)
require.True(ht, ok, "expected gRPC status error")
require.Equal(ht, codes.AlreadyExists, s.Code(),
"unexpected error code")
}

// testSendOnionConcurrency simulates a client that crashes and attempts to
// retry a payment with the same attempt ID concurrently. This test provides a
// strong guarantee that the SendOnion RPC is idempotent and correctly prevents
// duplicate payment attempts from succeeding.
func testSendOnionConcurrency(ht *lntest.HarnessTest) {
// Create a two-node context consisting of Alice and Bob.
const chanAmt = btcutil.Amount(100000)
const numNodes = 2
nodeCfgs := make([][]string, numNodes)
chanPoints, nodes := ht.CreateSimpleNetwork(
nodeCfgs, lntest.OpenChannelParams{Amt: chanAmt},
)
alice, bob := nodes[0], nodes[1]
defer ht.CloseChannel(alice, chanPoints[0])

// Make sure Alice knows about the channel.
aliceBobChan := ht.AssertChannelInGraph(alice, chanPoints[0])

const paymentAmt = 10000

// Request an invoice from Bob so he is expecting payment.
_, rHashes, invoices := ht.CreatePayReqs(bob, paymentAmt, 1)
paymentHash := rHashes[0]

// Query for a route to pay from Alice to Bob.
routesReq := &lnrpc.QueryRoutesRequest{
PubKey: bob.PubKeyStr,
Amt: paymentAmt,
}
routes := alice.RPC.QueryRoutes(routesReq)
route := routes.Routes[0]
finalHop := route.Hops[len(route.Hops)-1]
finalHop.MppRecord = &lnrpc.MPPRecord{
PaymentAddr: invoices[0].PaymentAddr,
TotalAmtMsat: int64(lnwire.NewMSatFromSatoshis(paymentAmt)),
}

// Construct the onion for the route.
onionReq := &switchrpc.BuildOnionRequest{
Route: route,
PaymentHash: paymentHash,
}
onionResp := alice.RPC.BuildOnion(onionReq)

// Create the SendOnion request that all goroutines will use.
// The AttemptId MUST be the same for all calls.
sendReq := &switchrpc.SendOnionRequest{
FirstHopChanId: aliceBobChan.ChannelId,
Amount: route.TotalAmtMsat,
Timelock: route.TotalTimeLock,
PaymentHash: paymentHash,
OnionBlob: onionResp.OnionBlob,
AttemptId: 42,
}

const numConcurrentRequests = 50
var wg sync.WaitGroup
wg.Add(numConcurrentRequests)

// Use channels to collect the results from each goroutine.
resultsChan := make(chan error,
numConcurrentRequests)

// Launch all requests concurrently to simulate a retry storm.
for i := 0; i < numConcurrentRequests; i++ {
go func() {
defer wg.Done()
ctxt, cancel := context.WithTimeout(
context.Background(),
rpc.DefaultTimeout,
)
defer cancel()

_, err := alice.RPC.Switch.SendOnion(ctxt, sendReq)
resultsChan <- err
}()
}

wg.Wait()
close(resultsChan)

// We expect exactly one successful dispatch and the rest to be
// rejected as duplicates.
successCount := 0
duplicateCount := 0

for err := range resultsChan {
// A nil error indicates a successful dispatch.
if err == nil {
successCount++
continue
}

// For non-nil errors, we should receive a gRPC status error.
s, ok := status.FromError(err)
if !ok {
// If it's not a gRPC status error, it's an unexpected
// condition.
ht.Fatalf("unexpected error from SendOnion: %v, "+
"code: %v", s.Err().Error(), s.Code())
}

// Check if the error code indicates a duplicate acknowledgment.
if s.Code() == codes.AlreadyExists {
duplicateCount++
} else {
ht.Fatalf("unexpected error from SendOnion: %v, "+
"code: %v", s.Err().Error(), s.Code())
}
}

// Confirm that only a single dispatch succeeds.
require.Equal(ht, 1, successCount, "expected exactly one success")
require.Equal(ht, numConcurrentRequests-1, duplicateCount,
"expected all other attempts to be duplicates")

// The invoice should eventually show as settled for Bob.
ht.AssertInvoiceSettled(bob, invoices[0].PaymentAddr)
}

// testTrackOnion exercises the SwitchRPC server's TrackOnion endpoint,
Expand Down
4 changes: 4 additions & 0 deletions lnrpc/switchrpc/config_active.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ type Config struct {
// be dispatched.
HtlcDispatcher routing.PaymentAttemptDispatcher

// AttemptStore provides the means by which the RPC server can manage
// the state of an HTLC attempt, including initializing and failing it.
AttemptStore htlcswitch.AttemptStore

// ChannelInfoAccessor defines an interface for accessing channel
// information necessary for dispatching payment attempts, specifically
// methods for fetching links by public key.
Expand Down
4 changes: 4 additions & 0 deletions lnrpc/switchrpc/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ func createNewSubServer(configRegistry lnrpc.SubServerConfigDispatcher) (
return nil, nil, fmt.Errorf("route processor must be set to " +
"create SwitchRPC")
}
if config.AttemptStore == nil {
return nil, nil, fmt.Errorf("attempt store must be set to " +
"create SwitchRPC")
}

return New(config)
}
Expand Down
19 changes: 19 additions & 0 deletions lnrpc/switchrpc/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,22 @@ func (m *mockRouteProcessor) UnmarshallRoute(route *lnrpc.Route) (

return m.unmarshallRoute, m.unmarshallErr
}

// mockAttemptStore is a mock implementation of the AttemptStore interface.
type mockAttemptStore struct {
htlcswitch.AttemptStore
initErr error
failErr error
}

// InitAttempt returns the mocked initErr.
func (m *mockAttemptStore) InitAttempt(attemptID uint64) error {
return m.initErr
}

// FailPendingAttempt returns the mocked failErr.
func (m *mockAttemptStore) FailPendingAttempt(attemptID uint64,
reason *htlcswitch.LinkError) error {

return m.failErr
}
31 changes: 28 additions & 3 deletions lnrpc/switchrpc/switch.proto
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,34 @@ option go_package = "github.com/lightningnetwork/lnd/lnrpc/switchrpc";
// subsystem of the daemon.
service Switch {
/*
SendOnion attempts to make a payment via the specified onion. This
method differs from SendPayment in that the instance need not be aware of
the full details of the payment route.
SendOnion provides an idempotent API for dispatching a pre-formed onion
packet, which is the primary entry point for a remote router.

To safely handle network failures, a client can and should retry this RPC
after a timeout or disconnection. Retries MUST use the exact same
attempt_id to allow the server to correctly detect duplicate requests.

A client interacting with this RPC must handle four distinct categories of
outcomes, communicated via gRPC status codes:

1. SUCCESS (gRPC code OK): A definitive confirmation that the HTLC has
been successfully dispatched. The client can proceed to track the
payment's final result via the `TrackOnion` RPC.

2. DUPLICATE ACKNOWLEDGMENT (gRPC code AlreadyExists): A definitive
acknowledgment that a request with the same attempt_id has already
been successfully processed. A retrying client should interpret this
as a success and proceed to tracking the payment's result.

3. AMBIGUOUS FAILURE (gRPC code Unavailable or DeadlineExceeded): An
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps we should add that context cancelled is also such an error and that this should be the fallback if an error can't be classified in terms of the other cases (we could make this the last point), since it's always safe to retry this RPC.

ambiguous error occurred (e.g., the server is shutting down or the
client timed out). The state of the HTLC dispatch is unknown. The
client MUST retry the exact same request to resolve the ambiguity.

4. DEFINITIVE FAILURE (gRPC code FailedPrecondition, InvalidArgument, etc.):
A definitive failure is a guarantee that the HTLC was not and will not be
dispatched. The client should fail the attempt and may retry with a new
route and/or new attempt_id.
*/
rpc SendOnion (SendOnionRequest) returns (SendOnionResponse);

Expand Down
3 changes: 2 additions & 1 deletion lnrpc/switchrpc/switch.swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
},
"/v2/switch/onion": {
"post": {
"summary": "SendOnion attempts to make a payment via the specified onion. This\nmethod differs from SendPayment in that the instance need not be aware of\nthe full details of the payment route.",
"summary": "SendOnion provides an idempotent API for dispatching a pre-formed onion\npacket, which is the primary entry point for a remote router.",
"description": "To safely handle network failures, a client can and should retry this RPC\nafter a timeout or disconnection. Retries MUST use the exact same\nattempt_id to allow the server to correctly detect duplicate requests.\n\nA client interacting with this RPC must handle four distinct categories of\noutcomes, communicated via gRPC status codes:\n\n1. SUCCESS (gRPC code OK): A definitive confirmation that the HTLC has\nbeen successfully dispatched. The client can proceed to track the\npayment's final result via the `TrackOnion` RPC.\n\n2. DUPLICATE ACKNOWLEDGMENT (gRPC code AlreadyExists): A definitive\nacknowledgment that a request with the same attempt_id has already\nbeen successfully processed. A retrying client should interpret this\nas a success and proceed to tracking the payment's result.\n\n3. AMBIGUOUS FAILURE (gRPC code Unavailable or DeadlineExceeded): An\nambiguous error occurred (e.g., the server is shutting down or the\nclient timed out). The state of the HTLC dispatch is unknown. The\nclient MUST retry the exact same request to resolve the ambiguity.\n\n4. DEFINITIVE FAILURE (gRPC code FailedPrecondition, InvalidArgument, etc.):\nA definitive failure is a guarantee that the HTLC was not and will not be\ndispatched. The client should fail the attempt and may retry with a new\nroute and/or new attempt_id.",
"operationId": "Switch_SendOnion",
"responses": {
"200": {
Expand Down
Loading
Loading