diff --git a/docs/release-notes/release-notes-0.21.0.md b/docs/release-notes/release-notes-0.21.0.md index 8dc5aba7c3..2650ac7d5d 100644 --- a/docs/release-notes/release-notes-0.21.0.md +++ b/docs/release-notes/release-notes-0.21.0.md @@ -67,6 +67,16 @@ ## RPC Additions +* The `SendOnion` RPC is now fully [idempotent]( + https://github.com/lightningnetwork/lnd/pull/10473), providing a critical + reliability improvement for external payment orchestrators (such as a remote + `ChannelRouter`). Callers can now safely retry a `SendOnion` request after a + network timeout or ambiguous error without risking a duplicate payment. If a + request with the same `attempt_id` has already been processed, the RPC will + now return a `DUPLICATE_HTLC` error, serving as a definitive acknowledgment + that the dispatch was received. This allows clients to build more resilient + payment-sending logic. + * [Added support for coordinator-based MuSig2 signing patterns](https://github.com/lightningnetwork/lnd/pull/10436) with two new RPCs: `MuSig2RegisterCombinedNonce` allows registering a pre-aggregated diff --git a/htlcswitch/switch.go b/htlcswitch/switch.go index c80a280f41..8b7a26e207 100644 --- a/htlcswitch/switch.go +++ b/htlcswitch/switch.go @@ -561,6 +561,11 @@ func (s *Switch) CleanStore(keepPids map[uint64]struct{}) error { return s.attemptStore.CleanStore(keepPids) } +// AttemptStore provides access to the Switch's underlying attempt store. +func (s *Switch) AttemptStore() AttemptStore { + return s.attemptStore +} + // SendHTLC is used by other subsystems which aren't belong to htlc switch // package in order to send the htlc update. The attemptID used MUST be unique // for this HTLC, and MUST be used only once, otherwise the switch might reject diff --git a/itest/list_on_test.go b/itest/list_on_test.go index a26ff06287..79b6403280 100644 --- a/itest/list_on_test.go +++ b/itest/list_on_test.go @@ -767,6 +767,10 @@ var allTestCases = []*lntest.TestCase{ Name: "send onion twice", TestFunc: testSendOnionTwice, }, + { + Name: "send onion concurrency", + TestFunc: testSendOnionConcurrency, + }, { Name: "track onion", TestFunc: testTrackOnion, diff --git a/itest/lnd_sendonion_test.go b/itest/lnd_sendonion_test.go index bff161eeaa..f45839533d 100644 --- a/itest/lnd_sendonion_test.go +++ b/itest/lnd_sendonion_test.go @@ -1,6 +1,9 @@ package itest import ( + "context" + "sync" + "github.com/btcsuite/btcd/btcec/v2" "github.com/btcsuite/btcd/btcutil" sphinx "github.com/lightningnetwork/lightning-onion" @@ -9,9 +12,12 @@ import ( "github.com/lightningnetwork/lnd/lnrpc/invoicesrpc" "github.com/lightningnetwork/lnd/lnrpc/switchrpc" "github.com/lightningnetwork/lnd/lntest" + "github.com/lightningnetwork/lnd/lntest/rpc" "github.com/lightningnetwork/lnd/lntypes" "github.com/lightningnetwork/lnd/lnwire" "github.com/stretchr/testify/require" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" ) // testSendOnion tests the basic success case for the SendOnion RPC. It @@ -178,13 +184,18 @@ func testSendOnionTwice(ht *lntest.HarnessTest) { // While the first onion is still in-flight, we'll send the same onion // again with the same attempt ID. This should error as our Switch will // detect duplicate ADDs for *in-flight* HTLCs. - resp = alice.RPC.SendOnion(sendReq) - ht.Logf("SendOnion resp: %+v, code: %v", resp, resp.ErrorCode) - require.False(ht, resp.Success, "expected failure on onion send") - require.Equal(ht, resp.ErrorCode, - switchrpc.ErrorCode_DUPLICATE_HTLC, + ctxt, cancel := context.WithTimeout(context.Background(), + rpc.DefaultTimeout) + defer cancel() + + _, err := alice.RPC.Switch.SendOnion(ctxt, sendReq) + require.Error(ht, err, "expected failure on onion send") + + // Check that we get the expected gRPC error. + s, ok := status.FromError(err) + require.True(ht, ok, "expected gRPC status error") + require.Equal(ht, codes.AlreadyExists, s.Code(), "unexpected error code") - require.Equal(ht, resp.ErrorMessage, htlcswitch.ErrDuplicateAdd.Error()) // Dave settles the invoice. dave.RPC.SettleInvoice(preimage[:]) @@ -203,14 +214,140 @@ func testSendOnionTwice(ht *lntest.HarnessTest) { require.Equal(ht, preimage[:], trackResp.Preimage) // Now that the original HTLC attempt has settled, we'll send the same - // onion again with the same attempt ID. - // - // NOTE: Currently, this does not error. When we make SendOnion fully - // duplicate safe, this should be updated to assert an error is - // returned. - resp = alice.RPC.SendOnion(sendReq) - require.True(ht, resp.Success, "expected successful onion send") - require.Empty(ht, resp.ErrorMessage, "unexpected failure to send onion") + // onion again with the same attempt ID. Confirm that this is also + // prevented. + ctxt, cancel = context.WithTimeout(context.Background(), + rpc.DefaultTimeout) + defer cancel() + + _, err = alice.RPC.Switch.SendOnion(ctxt, sendReq) + require.Error(ht, err, "expected failure on onion send") + + // Check that we get the expected gRPC error. + s, ok = status.FromError(err) + require.True(ht, ok, "expected gRPC status error") + require.Equal(ht, codes.AlreadyExists, s.Code(), + "unexpected error code") +} + +// testSendOnionConcurrency simulates a client that crashes and attempts to +// retry a payment with the same attempt ID concurrently. This test provides a +// strong guarantee that the SendOnion RPC is idempotent and correctly prevents +// duplicate payment attempts from succeeding. +func testSendOnionConcurrency(ht *lntest.HarnessTest) { + // Create a two-node context consisting of Alice and Bob. + const chanAmt = btcutil.Amount(100000) + const numNodes = 2 + nodeCfgs := make([][]string, numNodes) + chanPoints, nodes := ht.CreateSimpleNetwork( + nodeCfgs, lntest.OpenChannelParams{Amt: chanAmt}, + ) + alice, bob := nodes[0], nodes[1] + defer ht.CloseChannel(alice, chanPoints[0]) + + // Make sure Alice knows about the channel. + aliceBobChan := ht.AssertChannelInGraph(alice, chanPoints[0]) + + const paymentAmt = 10000 + + // Request an invoice from Bob so he is expecting payment. + _, rHashes, invoices := ht.CreatePayReqs(bob, paymentAmt, 1) + paymentHash := rHashes[0] + + // Query for a route to pay from Alice to Bob. + routesReq := &lnrpc.QueryRoutesRequest{ + PubKey: bob.PubKeyStr, + Amt: paymentAmt, + } + routes := alice.RPC.QueryRoutes(routesReq) + route := routes.Routes[0] + finalHop := route.Hops[len(route.Hops)-1] + finalHop.MppRecord = &lnrpc.MPPRecord{ + PaymentAddr: invoices[0].PaymentAddr, + TotalAmtMsat: int64(lnwire.NewMSatFromSatoshis(paymentAmt)), + } + + // Construct the onion for the route. + onionReq := &switchrpc.BuildOnionRequest{ + Route: route, + PaymentHash: paymentHash, + } + onionResp := alice.RPC.BuildOnion(onionReq) + + // Create the SendOnion request that all goroutines will use. + // The AttemptId MUST be the same for all calls. + sendReq := &switchrpc.SendOnionRequest{ + FirstHopChanId: aliceBobChan.ChannelId, + Amount: route.TotalAmtMsat, + Timelock: route.TotalTimeLock, + PaymentHash: paymentHash, + OnionBlob: onionResp.OnionBlob, + AttemptId: 42, + } + + const numConcurrentRequests = 50 + var wg sync.WaitGroup + wg.Add(numConcurrentRequests) + + // Use channels to collect the results from each goroutine. + resultsChan := make(chan error, + numConcurrentRequests) + + // Launch all requests concurrently to simulate a retry storm. + for i := 0; i < numConcurrentRequests; i++ { + go func() { + defer wg.Done() + ctxt, cancel := context.WithTimeout( + context.Background(), + rpc.DefaultTimeout, + ) + defer cancel() + + _, err := alice.RPC.Switch.SendOnion(ctxt, sendReq) + resultsChan <- err + }() + } + + wg.Wait() + close(resultsChan) + + // We expect exactly one successful dispatch and the rest to be + // rejected as duplicates. + successCount := 0 + duplicateCount := 0 + + for err := range resultsChan { + // A nil error indicates a successful dispatch. + if err == nil { + successCount++ + continue + } + + // For non-nil errors, we should receive a gRPC status error. + s, ok := status.FromError(err) + if !ok { + // If it's not a gRPC status error, it's an unexpected + // condition. + ht.Fatalf("unexpected error from SendOnion: %v, "+ + "code: %v", s.Err().Error(), s.Code()) + } + + // Check if the error code indicates a duplicate acknowledgment. + if s.Code() == codes.AlreadyExists { + duplicateCount++ + } else { + ht.Fatalf("unexpected error from SendOnion: %v, "+ + "code: %v", s.Err().Error(), s.Code()) + } + } + + // Confirm that only a single dispatch succeeds. + require.Equal(ht, 1, successCount, "expected exactly one success") + require.Equal(ht, numConcurrentRequests-1, duplicateCount, + "expected all other attempts to be duplicates") + + // The invoice should eventually show as settled for Bob. + ht.AssertInvoiceSettled(bob, invoices[0].PaymentAddr) } // testTrackOnion exercises the SwitchRPC server's TrackOnion endpoint, diff --git a/lnrpc/switchrpc/config_active.go b/lnrpc/switchrpc/config_active.go index 851461d2a4..af3691f0bc 100644 --- a/lnrpc/switchrpc/config_active.go +++ b/lnrpc/switchrpc/config_active.go @@ -36,6 +36,10 @@ type Config struct { // be dispatched. HtlcDispatcher routing.PaymentAttemptDispatcher + // AttemptStore provides the means by which the RPC server can manage + // the state of an HTLC attempt, including initializing and failing it. + AttemptStore htlcswitch.AttemptStore + // ChannelInfoAccessor defines an interface for accessing channel // information necessary for dispatching payment attempts, specifically // methods for fetching links by public key. diff --git a/lnrpc/switchrpc/driver.go b/lnrpc/switchrpc/driver.go index c8ceb03e68..6e43de3807 100644 --- a/lnrpc/switchrpc/driver.go +++ b/lnrpc/switchrpc/driver.go @@ -49,6 +49,10 @@ func createNewSubServer(configRegistry lnrpc.SubServerConfigDispatcher) ( return nil, nil, fmt.Errorf("route processor must be set to " + "create SwitchRPC") } + if config.AttemptStore == nil { + return nil, nil, fmt.Errorf("attempt store must be set to " + + "create SwitchRPC") + } return New(config) } diff --git a/lnrpc/switchrpc/mock.go b/lnrpc/switchrpc/mock.go index 0fc66c849d..95d727f60d 100644 --- a/lnrpc/switchrpc/mock.go +++ b/lnrpc/switchrpc/mock.go @@ -72,3 +72,22 @@ func (m *mockRouteProcessor) UnmarshallRoute(route *lnrpc.Route) ( return m.unmarshallRoute, m.unmarshallErr } + +// mockAttemptStore is a mock implementation of the AttemptStore interface. +type mockAttemptStore struct { + htlcswitch.AttemptStore + initErr error + failErr error +} + +// InitAttempt returns the mocked initErr. +func (m *mockAttemptStore) InitAttempt(attemptID uint64) error { + return m.initErr +} + +// FailPendingAttempt returns the mocked failErr. +func (m *mockAttemptStore) FailPendingAttempt(attemptID uint64, + reason *htlcswitch.LinkError) error { + + return m.failErr +} diff --git a/lnrpc/switchrpc/switch.proto b/lnrpc/switchrpc/switch.proto index 06fa60fbb6..bfff1c0c0a 100644 --- a/lnrpc/switchrpc/switch.proto +++ b/lnrpc/switchrpc/switch.proto @@ -10,9 +10,34 @@ option go_package = "github.com/lightningnetwork/lnd/lnrpc/switchrpc"; // subsystem of the daemon. service Switch { /* - SendOnion attempts to make a payment via the specified onion. This - method differs from SendPayment in that the instance need not be aware of - the full details of the payment route. + SendOnion provides an idempotent API for dispatching a pre-formed onion + packet, which is the primary entry point for a remote router. + + To safely handle network failures, a client can and should retry this RPC + after a timeout or disconnection. Retries MUST use the exact same + attempt_id to allow the server to correctly detect duplicate requests. + + A client interacting with this RPC must handle four distinct categories of + outcomes, communicated via gRPC status codes: + + 1. SUCCESS (gRPC code OK): A definitive confirmation that the HTLC has + been successfully dispatched. The client can proceed to track the + payment's final result via the `TrackOnion` RPC. + + 2. DUPLICATE ACKNOWLEDGMENT (gRPC code AlreadyExists): A definitive + acknowledgment that a request with the same attempt_id has already + been successfully processed. A retrying client should interpret this + as a success and proceed to tracking the payment's result. + + 3. AMBIGUOUS FAILURE (gRPC code Unavailable or DeadlineExceeded): An + ambiguous error occurred (e.g., the server is shutting down or the + client timed out). The state of the HTLC dispatch is unknown. The + client MUST retry the exact same request to resolve the ambiguity. + + 4. DEFINITIVE FAILURE (gRPC code FailedPrecondition, InvalidArgument, etc.): + A definitive failure is a guarantee that the HTLC was not and will not be + dispatched. The client should fail the attempt and may retry with a new + route and/or new attempt_id. */ rpc SendOnion (SendOnionRequest) returns (SendOnionResponse); diff --git a/lnrpc/switchrpc/switch.swagger.json b/lnrpc/switchrpc/switch.swagger.json index 21cc6a9599..1c4523e9c7 100644 --- a/lnrpc/switchrpc/switch.swagger.json +++ b/lnrpc/switchrpc/switch.swagger.json @@ -52,7 +52,8 @@ }, "/v2/switch/onion": { "post": { - "summary": "SendOnion attempts to make a payment via the specified onion. This\nmethod differs from SendPayment in that the instance need not be aware of\nthe full details of the payment route.", + "summary": "SendOnion provides an idempotent API for dispatching a pre-formed onion\npacket, which is the primary entry point for a remote router.", + "description": "To safely handle network failures, a client can and should retry this RPC\nafter a timeout or disconnection. Retries MUST use the exact same\nattempt_id to allow the server to correctly detect duplicate requests.\n\nA client interacting with this RPC must handle four distinct categories of\noutcomes, communicated via gRPC status codes:\n\n1. SUCCESS (gRPC code OK): A definitive confirmation that the HTLC has\nbeen successfully dispatched. The client can proceed to track the\npayment's final result via the `TrackOnion` RPC.\n\n2. DUPLICATE ACKNOWLEDGMENT (gRPC code AlreadyExists): A definitive\nacknowledgment that a request with the same attempt_id has already\nbeen successfully processed. A retrying client should interpret this\nas a success and proceed to tracking the payment's result.\n\n3. AMBIGUOUS FAILURE (gRPC code Unavailable or DeadlineExceeded): An\nambiguous error occurred (e.g., the server is shutting down or the\nclient timed out). The state of the HTLC dispatch is unknown. The\nclient MUST retry the exact same request to resolve the ambiguity.\n\n4. DEFINITIVE FAILURE (gRPC code FailedPrecondition, InvalidArgument, etc.):\nA definitive failure is a guarantee that the HTLC was not and will not be\ndispatched. The client should fail the attempt and may retry with a new\nroute and/or new attempt_id.", "operationId": "Switch_SendOnion", "responses": { "200": { diff --git a/lnrpc/switchrpc/switch_grpc.pb.go b/lnrpc/switchrpc/switch_grpc.pb.go index cd06275a97..977eba1e29 100644 --- a/lnrpc/switchrpc/switch_grpc.pb.go +++ b/lnrpc/switchrpc/switch_grpc.pb.go @@ -18,9 +18,34 @@ const _ = grpc.SupportPackageIsVersion7 // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. type SwitchClient interface { - // SendOnion attempts to make a payment via the specified onion. This - // method differs from SendPayment in that the instance need not be aware of - // the full details of the payment route. + // SendOnion provides an idempotent API for dispatching a pre-formed onion + // packet, which is the primary entry point for a remote router. + // + // To safely handle network failures, a client can and should retry this RPC + // after a timeout or disconnection. Retries MUST use the exact same + // attempt_id to allow the server to correctly detect duplicate requests. + // + // A client interacting with this RPC must handle four distinct categories of + // outcomes, communicated via gRPC status codes: + // + // 1. SUCCESS (gRPC code OK): A definitive confirmation that the HTLC has + // been successfully dispatched. The client can proceed to track the + // payment's final result via the `TrackOnion` RPC. + // + // 2. DUPLICATE ACKNOWLEDGMENT (gRPC code AlreadyExists): A definitive + // acknowledgment that a request with the same attempt_id has already + // been successfully processed. A retrying client should interpret this + // as a success and proceed to tracking the payment's result. + // + // 3. AMBIGUOUS FAILURE (gRPC code Unavailable or DeadlineExceeded): An + // ambiguous error occurred (e.g., the server is shutting down or the + // client timed out). The state of the HTLC dispatch is unknown. The + // client MUST retry the exact same request to resolve the ambiguity. + // + // 4. DEFINITIVE FAILURE (gRPC code FailedPrecondition, InvalidArgument, etc.): + // A definitive failure is a guarantee that the HTLC was not and will not be + // dispatched. The client should fail the attempt and may retry with a new + // route and/or new attempt_id. SendOnion(ctx context.Context, in *SendOnionRequest, opts ...grpc.CallOption) (*SendOnionResponse, error) // TrackOnion allows callers to query whether or not a payment dispatched via // SendOnion succeeded or failed. @@ -78,9 +103,34 @@ func (c *switchClient) BuildOnion(ctx context.Context, in *BuildOnionRequest, op // All implementations must embed UnimplementedSwitchServer // for forward compatibility type SwitchServer interface { - // SendOnion attempts to make a payment via the specified onion. This - // method differs from SendPayment in that the instance need not be aware of - // the full details of the payment route. + // SendOnion provides an idempotent API for dispatching a pre-formed onion + // packet, which is the primary entry point for a remote router. + // + // To safely handle network failures, a client can and should retry this RPC + // after a timeout or disconnection. Retries MUST use the exact same + // attempt_id to allow the server to correctly detect duplicate requests. + // + // A client interacting with this RPC must handle four distinct categories of + // outcomes, communicated via gRPC status codes: + // + // 1. SUCCESS (gRPC code OK): A definitive confirmation that the HTLC has + // been successfully dispatched. The client can proceed to track the + // payment's final result via the `TrackOnion` RPC. + // + // 2. DUPLICATE ACKNOWLEDGMENT (gRPC code AlreadyExists): A definitive + // acknowledgment that a request with the same attempt_id has already + // been successfully processed. A retrying client should interpret this + // as a success and proceed to tracking the payment's result. + // + // 3. AMBIGUOUS FAILURE (gRPC code Unavailable or DeadlineExceeded): An + // ambiguous error occurred (e.g., the server is shutting down or the + // client timed out). The state of the HTLC dispatch is unknown. The + // client MUST retry the exact same request to resolve the ambiguity. + // + // 4. DEFINITIVE FAILURE (gRPC code FailedPrecondition, InvalidArgument, etc.): + // A definitive failure is a guarantee that the HTLC was not and will not be + // dispatched. The client should fail the attempt and may retry with a new + // route and/or new attempt_id. SendOnion(context.Context, *SendOnionRequest) (*SendOnionResponse, error) // TrackOnion allows callers to query whether or not a payment dispatched via // SendOnion succeeded or failed. diff --git a/lnrpc/switchrpc/switch_server.go b/lnrpc/switchrpc/switch_server.go index 2f54a11908..0db05f5b08 100644 --- a/lnrpc/switchrpc/switch_server.go +++ b/lnrpc/switchrpc/switch_server.go @@ -236,39 +236,203 @@ func (r *ServerShell) CreateSubServer( return subServer, macPermissions, nil } -// SendOnion handles the incoming request to send a payment using a -// preconstructed onion blob provided by the caller. +// SendOnion provides an idempotent API for dispatching a pre-formed onion +// packet. This RPC is the primary entry point for a remote router that wishes +// to forward a payment through this lnd instance. +// +// To safely handle network failures, a client can and should retry this RPC +// after a timeout or disconnection. Retries MUST use the exact same +// attempt id to allow the server to correctly detect duplicate requests. +// +// The server guarantees safety against duplicate attempts by following a +// "write-ahead" style approach. Upon receiving a request, it first writes a +// durable record of the intent to dispatch the payment, marking the attempt as +// PENDING. Only after this record is secured does it proceed with validation +// and dispatch. If any of these subsequent steps fail, the server synchronously +// transitions the PENDING record to a final FAILED state, ensuring attempts are +// not "orphaned" in an initialized but not actually dispatched state. +// +// A client interacting with this RPC must handle four distinct categories of +// outcomes: +// +// 1. SUCCESS: This is a definitive confirmation that the HTLC has been +// successfully dispatched and is now in-flight. The client can proceed to track +// the payment's final result via the `TrackOnion` RPC. +// +// 2. DUPLICATE ACKNOWLEDGMENT: This is not a payment-level failure. It is a +// definitive acknowledgment that a request with the same attempt id has already +// been successfully processed. A retrying client should interpret this as a +// success for the attempt and proceed to tracking the payment's result. +// +// 3. AMBIGUOUS FAILURE (e.g, gRPC `codes.Unavailable` status): +// This error is returned if the server fails during the critical initial step +// of writing the PENDING record. In this state, the server cannot be certain +// whether the HTLC was dispatched. The client MUST retry the *exact same +// request* with the original attempt id to resolve this ambiguity. A client +// MUST NOT treat this error as a definitive failure and move on to a new +// attempt ID; doing so is the definition of a "leaked attempt" and risks a +// duplicate payment if the original ambiguous attempt succeeded. +// +// 4. DEFINITIVE FAILURE (any other error): For all other failures (e.g., +// invalid parameters, local policy violations), this RPC will return a +// definitive error. A definitive failure is a guarantee that the HTLC was not +// and will not be dispatched. To prevent orphaned records, the server is +// responsible for transitioning the PENDING attempt to a FAILED state. func (s *Server) SendOnion(_ context.Context, req *SendOnionRequest) (*SendOnionResponse, error) { + // First, we initialize the attempt in our persistent store. This serves + // as a durable record of our intent to send and gates the attempt id + // for concurrent callers, allowing them to safely retry. + // + // NOTE: This durable write MUST happen before any dynamic validation + // (e.g., liquidity, peer connectivity). This ordering guarantees that a + // client's retry of a timed-out request will receive a stable acknowle- + // dgement as duplicate, rather than a new, misleading transient error. + // This prevents the client from incorrectly concluding the original + // attempt failed, which would risk a duplicate attempt. + attemptID := req.AttemptId + err := s.cfg.AttemptStore.InitAttempt(attemptID) + if err != nil { + // A record for this attempt ID already exists. This indicates a + // client-side retry of a request that was already successfully + // registered. We return a distinct error to signal that the + // dispatch request is acknowledged and that the caller can + // safely proceed to track the result. + if errors.Is(err, htlcswitch.ErrPaymentIDAlreadyExists) { + log.Debugf("Attempt id=%v already exists", attemptID) + + return nil, status.Errorf(codes.AlreadyExists, + "payment with attempt ID %d already exists", + attemptID) + } + + // If we receive an initialization error, we'll return the error + // directly to the caller so they can handle the ambiguity. + // + // TODO(calvin): actually transport the error signal across the + // rpc boundary possibly using grpc st.WithDetails(). + log.Errorf("Unable to initialize attempt id=%d: %v", attemptID, + err) + + return nil, status.Errorf(codes.Unavailable, "unable to "+ + "initialize attempt id=%d: %v", attemptID, err) + } + + // Perform all RPC-level pre-dispatch checks. + chanID, htlcAdd, validationErr := s.validateAndPrepareOnion(req) + + // If request validation fails, we transition the attempt from an + // initialized to a terminally failed state to prevent an orphaned + // attempt. An orphaned PENDING record would cause any subsequent + // TrackOnion call for this attempt to hang indefinitely. + if validationErr != nil { + log.Warnf("Validation failed for attempt %d: %v. Failing "+ + "pending attempt.", req.AttemptId, validationErr) + + s.rollbackAttempt(req.AttemptId, "validation failure") + + // Return the original, more specific validation error to the + // client. + return nil, validationErr + } + + log.Debugf("Dispatching HTLC attempt(id=%v, amt=%v) for payment=%x "+ + "via channel=%s", req.AttemptId, req.Amount, + htlcAdd.PaymentHash, chanID) + + // With the attempt initialized, we now dispatch the HTLC. + dispatchErr := s.cfg.HtlcDispatcher.SendHTLC(chanID, attemptID, htlcAdd) + if dispatchErr != nil { + // The core dispatch logic failed definitively. We'll + // transition the attempt from an initialized to a terminally + // failed state to prevent an orphaned attempt. + log.Warnf("Dispatch failed for attempt %d: %v. Failing "+ + "pending attempt.", req.AttemptId, dispatchErr) + + s.rollbackAttempt(req.AttemptId, "dispatch failure") + + // Translate the internal dispatch error into a gRPC status + // with rich details for the client. + message, code := translateErrorForRPC(dispatchErr) + + return &SendOnionResponse{ + Success: false, + ErrorMessage: message, + ErrorCode: code, + }, nil + } + + // The onion attempt was successfully dispatched. + return &SendOnionResponse{Success: true}, nil +} + +// rollbackAttempt is a helper which transitions the given attempt from an +// initialized (PENDING) state to a FAILED state in the persistent store. +func (s *Server) rollbackAttempt(attemptID uint64, context string) { + // We use a generic failure reason, as this is an internal rollback. + // The original, more specific error is returned to the client. + failReason := &lnwire.FailTemporaryNodeFailure{} + + err := s.cfg.AttemptStore.FailPendingAttempt( + attemptID, htlcswitch.NewLinkError(failReason), + ) + if err != nil { + log.Errorf("Unable to roll back attempt %d after %s: %v", + attemptID, context, err) + } +} + +// validateAndPrepareOnion performs the pre-checks and preparation for a +// SendOnion request. It returns the channel ID, the HTLC to be sent, and any +// validation error. +func (s *Server) validateAndPrepareOnion(req *SendOnionRequest) ( + lnwire.ShortChannelID, *lnwire.UpdateAddHTLC, error) { + + var chanID lnwire.ShortChannelID + var htlcAdd *lnwire.UpdateAddHTLC + var err error + if len(req.OnionBlob) != lnwire.OnionPacketSize { - return nil, status.Errorf(codes.InvalidArgument, + err = status.Errorf( + codes.InvalidArgument, "onion blob size=%d does not match expected %d bytes", - len(req.OnionBlob), lnwire.OnionPacketSize) + len(req.OnionBlob), lnwire.OnionPacketSize, + ) + + return chanID, htlcAdd, err } if len(req.PaymentHash) == 0 { - return nil, status.Error(codes.InvalidArgument, - "payment hash is required") + err = status.Error( + codes.InvalidArgument, "payment hash is required") + + return chanID, htlcAdd, err } if req.Amount <= 0 { - return nil, status.Error(codes.InvalidArgument, - "amount must be greater than zero") + err = status.Error(codes.InvalidArgument, + "amount must be greater than zero", + ) + + return chanID, htlcAdd, err } var ( amount = lnwire.MilliSatoshi(req.Amount) pubkeySet = len(req.FirstHopPubkey) != 0 channelIDSet = req.FirstHopChanId != 0 - chanID lnwire.ShortChannelID ) switch { case pubkeySet == channelIDSet: - return nil, status.Error(codes.InvalidArgument, + err = status.Error( + codes.InvalidArgument, "must specify exactly one of first_hop_pubkey or "+ - "first_hop_chan_id") + "first_hop_chan_id", + ) + + return chanID, htlcAdd, err case channelIDSet: // Case 1: The caller provided the first hop chan id directly. @@ -277,35 +441,44 @@ func (s *Server) SendOnion(_ context.Context, case pubkeySet: // Case 2: Convert the first hop pubkey into a format usable by // the forwarding subsystem. - firstHop, err := btcec.ParsePubKey(req.FirstHopPubkey) - if err != nil { - return nil, status.Errorf(codes.InvalidArgument, + firstHop, parseErr := btcec.ParsePubKey(req.FirstHopPubkey) + if parseErr != nil { + err = status.Errorf(codes.InvalidArgument, "invalid first hop pubkey=%x: %v", - req.FirstHopPubkey, err) + req.FirstHopPubkey, parseErr) + + return chanID, htlcAdd, err } // Find an eligible channel ID for the given first-hop pubkey. chanID, err = s.findEligibleChannelID(firstHop, amount) if err != nil { - return nil, status.Errorf(codes.Internal, + err = status.Errorf(codes.FailedPrecondition, "unable to find eligible channel for "+ "pubkey=%x: %v", firstHop.SerializeCompressed(), err) + + return chanID, htlcAdd, err } } - hash, err := lntypes.MakeHash(req.PaymentHash) - if err != nil { - return nil, status.Errorf(codes.InvalidArgument, - "invalid payment_hash=%x: %v", req.PaymentHash, err) + hash, parseErr := lntypes.MakeHash(req.PaymentHash) + if parseErr != nil { + err = status.Errorf(codes.InvalidArgument, + "invalid payment_hash=%x: %v", req.PaymentHash, + parseErr) + + return chanID, htlcAdd, err } var blindingPoint lnwire.BlindingPointRecord if len(req.BlindingPoint) > 0 { - pubkey, err := btcec.ParsePubKey(req.BlindingPoint) - if err != nil { - return nil, status.Errorf(codes.InvalidArgument, - "invalid blinding point: %v", err) + pubkey, parseErr := btcec.ParsePubKey(req.BlindingPoint) + if parseErr != nil { + err = status.Errorf(codes.InvalidArgument, + "invalid blinding point: %v", parseErr) + + return chanID, htlcAdd, err } blindingPoint = tlv.SomeRecordT( @@ -318,7 +491,7 @@ func (s *Server) SendOnion(_ context.Context, // Craft an HTLC packet to send to the htlcswitch. The metadata within // this packet will be used to route the payment through the network, // starting with the first-hop. - htlcAdd := &lnwire.UpdateAddHTLC{ + htlcAdd = &lnwire.UpdateAddHTLC{ Amount: amount, Expiry: req.Timelock, PaymentHash: hash, @@ -328,21 +501,7 @@ func (s *Server) SendOnion(_ context.Context, ExtraData: lnwire.ExtraOpaqueData(req.ExtraData), } - log.Debugf("Dispatching HTLC attempt(id=%v, amt=%v) for payment=%v "+ - "via channel=%s", req.AttemptId, req.Amount, hash, chanID) - - // Send the HTLC to the first hop directly by way of the HTLCSwitch. - err = s.cfg.HtlcDispatcher.SendHTLC(chanID, req.AttemptId, htlcAdd) - if err != nil { - message, code := translateErrorForRPC(err) - return &SendOnionResponse{ - Success: false, - ErrorMessage: message, - ErrorCode: code, - }, nil - } - - return &SendOnionResponse{Success: true}, nil + return chanID, htlcAdd, err } // findEligibleChannelID attempts to find an eligible channel based on the diff --git a/lnrpc/switchrpc/switch_server_test.go b/lnrpc/switchrpc/switch_server_test.go index 87fcde057d..d0dd7fde69 100644 --- a/lnrpc/switchrpc/switch_server_test.go +++ b/lnrpc/switchrpc/switch_server_test.go @@ -137,18 +137,13 @@ func TestSendOnion(t *testing.T) { setup: func(t *testing.T, s *Server, req *SendOnionRequest) { - // Mock a duplicate error from the dispatcher, - // which is the new way to signal a duplicate - // attempt for the same ID. - payer, ok := s.cfg.HtlcDispatcher.(*mockPayer) + // Mock a duplicate error from the attempt + // store. + store, ok := s.cfg.AttemptStore.(*mockAttemptStore) require.True(t, ok) - payer.sendErr = htlcswitch.ErrDuplicateAdd - }, - expectedResponse: &SendOnionResponse{ - Success: false, - ErrorMessage: htlcswitch.ErrDuplicateAdd.Error(), - ErrorCode: ErrorCode_DUPLICATE_HTLC, + store.initErr = htlcswitch.ErrPaymentIDAlreadyExists }, + expectedErrCode: codes.AlreadyExists, }, } @@ -160,6 +155,7 @@ func TestSendOnion(t *testing.T) { // isolation. server, _, err := New(&Config{ HtlcDispatcher: &mockPayer{}, + AttemptStore: &mockAttemptStore{}, }) require.NoError(t, err) diff --git a/subrpcserver_config.go b/subrpcserver_config.go index 4350f67c24..7c1cda4aef 100644 --- a/subrpcserver_config.go +++ b/subrpcserver_config.go @@ -331,6 +331,10 @@ func (s *subRPCServerConfigs) PopulateDependencies(cfg *Config, reflect.ValueOf(routerBackend), ) + subCfgValue.FieldByName("AttemptStore").Set( + reflect.ValueOf(htlcSwitch.AttemptStore()), + ) + case *watchtowerrpc.Config: subCfgValue := extractReflectValue(subCfg)