@@ -3871,44 +3871,68 @@ fn test_single_channel_multiple_mpp() {
3871
3871
// `ChannelManager::claim_payment` we'd end up hanging waiting to apply a
3872
3872
// `ChannelMonitorUpdate` until after it completed. See the commit which introduced this test
3873
3873
// for more info.
3874
- let chanmon_cfgs = create_chanmon_cfgs ( 7 ) ;
3875
- let node_cfgs = create_node_cfgs ( 7 , & chanmon_cfgs) ;
3876
- let node_chanmgrs = create_node_chanmgrs ( 7 , & node_cfgs, & [ None ; 7 ] ) ;
3877
- let mut nodes = create_network ( 7 , & node_cfgs, & node_chanmgrs) ;
3878
-
3879
- let node_5_id = nodes[ 5 ] . node . get_our_node_id ( ) ;
3880
- let node_6_id = nodes[ 6 ] . node . get_our_node_id ( ) ;
3881
-
3882
- // Send an MPP payment in four parts along the path shown from top to bottom
3883
- // 0
3884
- // 1 2 3 4
3885
- // 5
3886
- // 6
3874
+ let chanmon_cfgs = create_chanmon_cfgs ( 9 ) ;
3875
+ let node_cfgs = create_node_cfgs ( 9 , & chanmon_cfgs) ;
3876
+ let node_chanmgrs = create_node_chanmgrs ( 9 , & node_cfgs, & [ None ; 9 ] ) ;
3877
+ let mut nodes = create_network ( 9 , & node_cfgs, & node_chanmgrs) ;
3878
+
3879
+ let node_7_id = nodes[ 7 ] . node . get_our_node_id ( ) ;
3880
+ let node_8_id = nodes[ 8 ] . node . get_our_node_id ( ) ;
3881
+
3882
+ // Send an MPP payment in six parts along the path shown from top to bottom
3883
+ // 0
3884
+ // 1 2 3 4 5 6
3885
+ // 7
3886
+ // 8
3887
+ //
3888
+ // We can in theory reproduce this issue with fewer channels/HTLCs, but getting this test
3889
+ // robust is rather challenging. We rely on having the main test thread wait on locks held in
3890
+ // the background `claim_funds` thread and unlocking when the `claim_funds` thread completes a
3891
+ // single `ChannelMonitorUpdate`.
3892
+ // This thread calls `get_and_clear_pending_msg_events()` and `handle_revoke_and_ack()`, both
3893
+ // of which require `ChannelManager` locks, but we have to make sure this thread gets a chance
3894
+ // to be blocked on the mutexes before we let the background thread wake `claim_funds` so that
3895
+ // the mutex can switch to this main thread.
3896
+ // This relies on our locks being fair, but also on our threads getting runtime during the test
3897
+ // run, which can be pretty competitive. Thus we do a dumb dance to be as conservative as
3898
+ // possible - we have a background thread which completes a `ChannelMonitorUpdate` (by sending
3899
+ // into the `write_blocker` mpsc) but it doesn't run until a mpsc channel sends from this main
3900
+ // thread to the background thread, and then we let it sleep a while before we send the
3901
+ // `ChannelMonitorUpdate` unblocker.
3902
+ // Further, we give ourselves two chances each time, needing 4 HTLCs just to unlock our two
3903
+ // `ChannelManager` calls. We then need a few remaining HTLCs to actually trigger the bug, so
3904
+ // we use 6 HTLCs.
3905
+ const MAX_THREAD_INIT_TIME : std:: time:: Duration = std:: time:: Duration :: from_secs ( 1 ) ;
3887
3906
3888
3907
create_announced_chan_between_nodes_with_value ( & nodes, 0 , 1 , 100_000 , 0 ) ;
3889
3908
create_announced_chan_between_nodes_with_value ( & nodes, 0 , 2 , 100_000 , 0 ) ;
3890
3909
create_announced_chan_between_nodes_with_value ( & nodes, 0 , 3 , 100_000 , 0 ) ;
3891
3910
create_announced_chan_between_nodes_with_value ( & nodes, 0 , 4 , 100_000 , 0 ) ;
3892
- create_announced_chan_between_nodes_with_value ( & nodes, 1 , 5 , 100_000 , 0 ) ;
3893
- create_announced_chan_between_nodes_with_value ( & nodes, 2 , 5 , 100_000 , 0 ) ;
3894
- create_announced_chan_between_nodes_with_value ( & nodes, 3 , 5 , 100_000 , 0 ) ;
3895
- create_announced_chan_between_nodes_with_value ( & nodes, 4 , 5 , 100_000 , 0 ) ;
3896
- create_announced_chan_between_nodes_with_value ( & nodes, 5 , 6 , 1_000_000 , 0 ) ;
3911
+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 5 , 100_000 , 0 ) ;
3912
+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 6 , 100_000 , 0 ) ;
3897
3913
3898
- let ( mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash ! ( & nodes[ 0 ] , nodes[ 6 ] , 30_000_000 ) ;
3914
+ create_announced_chan_between_nodes_with_value ( & nodes, 1 , 7 , 100_000 , 0 ) ;
3915
+ create_announced_chan_between_nodes_with_value ( & nodes, 2 , 7 , 100_000 , 0 ) ;
3916
+ create_announced_chan_between_nodes_with_value ( & nodes, 3 , 7 , 100_000 , 0 ) ;
3917
+ create_announced_chan_between_nodes_with_value ( & nodes, 4 , 7 , 100_000 , 0 ) ;
3918
+ create_announced_chan_between_nodes_with_value ( & nodes, 5 , 7 , 100_000 , 0 ) ;
3919
+ create_announced_chan_between_nodes_with_value ( & nodes, 6 , 7 , 100_000 , 0 ) ;
3920
+ create_announced_chan_between_nodes_with_value ( & nodes, 7 , 8 , 1_000_000 , 0 ) ;
3899
3921
3900
- send_along_route_with_secret ( & nodes[ 0 ] , route, & [ & [ & nodes[ 1 ] , & nodes[ 5 ] , & nodes[ 6 ] ] , & [ & nodes[ 2 ] , & nodes[ 5 ] , & nodes[ 6 ] ] , & [ & nodes[ 3 ] , & nodes[ 5 ] , & nodes[ 6 ] ] , & [ & nodes[ 4 ] , & nodes[ 5 ] , & nodes[ 6 ] ] ] , 30_000_000 , payment_hash, payment_secret) ;
3922
+ let ( mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash ! ( & nodes[ 0 ] , nodes[ 8 ] , 50_000_000 ) ;
3923
+
3924
+ send_along_route_with_secret ( & nodes[ 0 ] , route, & [ & [ & nodes[ 1 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 2 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 3 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 4 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 5 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 6 ] , & nodes[ 7 ] , & nodes[ 8 ] ] ] , 50_000_000 , payment_hash, payment_secret) ;
3901
3925
3902
3926
let ( do_a_write, blocker) = std:: sync:: mpsc:: sync_channel ( 0 ) ;
3903
- * nodes[ 6 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = Some ( blocker) ;
3927
+ * nodes[ 8 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = Some ( blocker) ;
3904
3928
3905
3929
// Until we have std::thread::scoped we have to unsafe { turn off the borrow checker }.
3906
3930
// We do this by casting a pointer to a `TestChannelManager` to a pointer to a
3907
3931
// `TestChannelManager` with different (in this case 'static) lifetime.
3908
3932
// This is even suggested in the second example at
3909
3933
// https://doc.rust-lang.org/std/mem/fn.transmute.html#examples
3910
3934
let claim_node: & ' static TestChannelManager < ' static , ' static > =
3911
- unsafe { std:: mem:: transmute ( nodes[ 6 ] . node as & TestChannelManager ) } ;
3935
+ unsafe { std:: mem:: transmute ( nodes[ 8 ] . node as & TestChannelManager ) } ;
3912
3936
let thrd = std:: thread:: spawn ( move || {
3913
3937
// Initiate the claim in a background thread as it will immediately block waiting on the
3914
3938
// `write_blocker` we set above.
@@ -3924,85 +3948,126 @@ fn test_single_channel_multiple_mpp() {
3924
3948
// `claim_funds` is holding. Thus, we release a second write after a small sleep in the
3925
3949
// background to give `claim_funds` a chance to step forward, unblocking
3926
3950
// `get_and_clear_pending_msg_events`.
3927
- const MAX_THREAD_INIT_TIME : std:: time:: Duration = std:: time:: Duration :: from_millis ( 10 ) ;
3928
3951
let do_a_write_background = do_a_write. clone ( ) ;
3952
+ let ( start_thrd2, block_thrd2) = std:: sync:: mpsc:: sync_channel ( 0 ) ;
3929
3953
let thrd2 = std:: thread:: spawn ( move || {
3954
+ block_thrd2. recv ( ) . unwrap ( ) ;
3955
+ std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
3956
+ do_a_write_background. send ( ( ) ) . unwrap ( ) ;
3930
3957
std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
3931
3958
do_a_write_background. send ( ( ) ) . unwrap ( ) ;
3932
3959
} ) ;
3933
- let first_updates = get_htlc_update_msgs ( & nodes[ 6 ] , & nodes[ 5 ] . node . get_our_node_id ( ) ) ;
3960
+ start_thrd2. send ( ( ) ) . unwrap ( ) ;
3961
+ let first_updates = get_htlc_update_msgs ( & nodes[ 8 ] , & nodes[ 7 ] . node . get_our_node_id ( ) ) ;
3934
3962
thrd2. join ( ) . unwrap ( ) ;
3935
3963
3936
- nodes[ 5 ] . node . peer_disconnected ( nodes[ 1 ] . node . get_our_node_id ( ) ) ;
3937
- nodes[ 5 ] . node . peer_disconnected ( nodes[ 2 ] . node . get_our_node_id ( ) ) ;
3938
- nodes[ 5 ] . node . peer_disconnected ( nodes[ 3 ] . node . get_our_node_id ( ) ) ;
3939
- nodes[ 5 ] . node . peer_disconnected ( nodes[ 4 ] . node . get_our_node_id ( ) ) ;
3940
-
3941
- nodes[ 5 ] . node . handle_update_fulfill_htlc ( node_6_id, & first_updates. update_fulfill_htlcs [ 0 ] ) ;
3942
- check_added_monitors ( & nodes[ 5 ] , 1 ) ;
3943
- expect_payment_forwarded ! ( nodes[ 5 ] , nodes[ 1 ] , nodes[ 6 ] , Some ( 1000 ) , false , false ) ;
3944
- nodes[ 5 ] . node . handle_commitment_signed ( node_6_id, & first_updates. commitment_signed ) ;
3945
- check_added_monitors ( & nodes[ 5 ] , 1 ) ;
3946
- let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 5 ] , & node_6_id) ;
3964
+ // Disconnect node 6 from all its peers so it doesn't bother to fail the HTLCs back
3965
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 1 ] . node . get_our_node_id ( ) ) ;
3966
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 2 ] . node . get_our_node_id ( ) ) ;
3967
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 3 ] . node . get_our_node_id ( ) ) ;
3968
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 4 ] . node . get_our_node_id ( ) ) ;
3969
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 5 ] . node . get_our_node_id ( ) ) ;
3970
+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 6 ] . node . get_our_node_id ( ) ) ;
3971
+
3972
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & first_updates. update_fulfill_htlcs [ 0 ] ) ;
3973
+ check_added_monitors ( & nodes[ 7 ] , 1 ) ;
3974
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 1 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
3975
+ nodes[ 7 ] . node . handle_commitment_signed ( node_8_id, & first_updates. commitment_signed ) ;
3976
+ check_added_monitors ( & nodes[ 7 ] , 1 ) ;
3977
+ let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 7 ] , & node_8_id) ;
3947
3978
3948
3979
// Now, handle the `revoke_and_ack` from node 5. Note that `claim_funds` is still blocked on
3949
3980
// our peer lock, so we have to release a write to let it process.
3981
+ // After this call completes, the channel previously would be locked up and should not be able
3982
+ // to make further progress.
3950
3983
let do_a_write_background = do_a_write. clone ( ) ;
3984
+ let ( start_thrd3, block_thrd3) = std:: sync:: mpsc:: sync_channel ( 0 ) ;
3951
3985
let thrd3 = std:: thread:: spawn ( move || {
3986
+ block_thrd3. recv ( ) . unwrap ( ) ;
3987
+ std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
3988
+ do_a_write_background. send ( ( ) ) . unwrap ( ) ;
3952
3989
std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
3953
3990
do_a_write_background. send ( ( ) ) . unwrap ( ) ;
3954
3991
} ) ;
3955
- nodes[ 6 ] . node . handle_revoke_and_ack ( node_5_id, & raa) ;
3992
+ start_thrd3. send ( ( ) ) . unwrap ( ) ;
3993
+ nodes[ 8 ] . node . handle_revoke_and_ack ( node_7_id, & raa) ;
3956
3994
thrd3. join ( ) . unwrap ( ) ;
3995
+ assert ! ( !thrd. is_finished( ) ) ;
3957
3996
3958
3997
let thrd4 = std:: thread:: spawn ( move || {
3959
- std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
3960
3998
do_a_write. send ( ( ) ) . unwrap ( ) ;
3961
3999
do_a_write. send ( ( ) ) . unwrap ( ) ;
3962
4000
} ) ;
3963
4001
3964
4002
thrd4. join ( ) . unwrap ( ) ;
3965
4003
thrd. join ( ) . unwrap ( ) ;
3966
4004
3967
- expect_payment_claimed ! ( nodes[ 6 ] , payment_hash, 30_000_000 ) ;
4005
+ expect_payment_claimed ! ( nodes[ 8 ] , payment_hash, 50_000_000 ) ;
3968
4006
3969
- // At the end, we should have 5 ChannelMonitorUpdates - 4 for HTLC claims, and one for the
4007
+ // At the end, we should have 7 ChannelMonitorUpdates - 6 for HTLC claims, and one for the
3970
4008
// above `revoke_and_ack`.
3971
- check_added_monitors ( & nodes[ 6 ] , 5 ) ;
3972
-
3973
- // Now drive everything to the end, at least as far as node 6 is concerned...
3974
- * nodes[ 6 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = None ;
3975
- nodes[ 6 ] . node . handle_commitment_signed ( node_5_id, & cs) ;
3976
- check_added_monitors ( & nodes[ 6 ] , 1 ) ;
3977
-
3978
- let ( updates, raa) = get_updates_and_revoke ( & nodes[ 6 ] , & nodes[ 5 ] . node . get_our_node_id ( ) ) ;
3979
- nodes[ 5 ] . node . handle_update_fulfill_htlc ( node_6_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
3980
- expect_payment_forwarded ! ( nodes[ 5 ] , nodes[ 2 ] , nodes[ 6 ] , Some ( 1000 ) , false , false ) ;
3981
- nodes[ 5 ] . node . handle_update_fulfill_htlc ( node_6_id, & updates. update_fulfill_htlcs [ 1 ] ) ;
3982
- expect_payment_forwarded ! ( nodes[ 5 ] , nodes[ 3 ] , nodes[ 6 ] , Some ( 1000 ) , false , false ) ;
3983
- nodes[ 5 ] . node . handle_commitment_signed ( node_6_id, & updates. commitment_signed ) ;
3984
- nodes[ 5 ] . node . handle_revoke_and_ack ( node_6_id, & raa) ;
3985
- check_added_monitors ( & nodes[ 5 ] , 4 ) ;
3986
-
3987
- let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 5 ] , & node_6_id) ;
3988
-
3989
- nodes[ 6 ] . node . handle_revoke_and_ack ( node_5_id, & raa) ;
3990
- nodes[ 6 ] . node . handle_commitment_signed ( node_5_id, & cs) ;
3991
- check_added_monitors ( & nodes[ 6 ] , 2 ) ;
3992
-
3993
- let ( updates, raa) = get_updates_and_revoke ( & nodes[ 6 ] , & nodes[ 5 ] . node . get_our_node_id ( ) ) ;
3994
- nodes[ 5 ] . node . handle_update_fulfill_htlc ( node_6_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
3995
- expect_payment_forwarded ! ( nodes[ 5 ] , nodes[ 4 ] , nodes[ 6 ] , Some ( 1000 ) , false , false ) ;
3996
- nodes[ 5 ] . node . handle_commitment_signed ( node_6_id, & updates. commitment_signed ) ;
3997
- nodes[ 5 ] . node . handle_revoke_and_ack ( node_6_id, & raa) ;
3998
- check_added_monitors ( & nodes[ 5 ] , 3 ) ;
3999
-
4000
- let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 5 ] , & node_6_id) ;
4001
- nodes[ 6 ] . node . handle_revoke_and_ack ( node_5_id, & raa) ;
4002
- nodes[ 6 ] . node . handle_commitment_signed ( node_5_id, & cs) ;
4003
- check_added_monitors ( & nodes[ 6 ] , 2 ) ;
4004
-
4005
- let raa = get_event_msg ! ( nodes[ 6 ] , MessageSendEvent :: SendRevokeAndACK , node_5_id) ;
4006
- nodes[ 5 ] . node . handle_revoke_and_ack ( node_6_id, & raa) ;
4007
- check_added_monitors ( & nodes[ 5 ] , 1 ) ;
4009
+ check_added_monitors ( & nodes[ 8 ] , 7 ) ;
4010
+
4011
+ // Now drive everything to the end, at least as far as node 7 is concerned...
4012
+ * nodes[ 8 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = None ;
4013
+ nodes[ 8 ] . node . handle_commitment_signed ( node_7_id, & cs) ;
4014
+ check_added_monitors ( & nodes[ 8 ] , 1 ) ;
4015
+
4016
+ let ( updates, raa) = get_updates_and_revoke ( & nodes[ 8 ] , & nodes[ 7 ] . node . get_our_node_id ( ) ) ;
4017
+
4018
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
4019
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 2 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4020
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 1 ] ) ;
4021
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 3 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4022
+ let mut next_source = 4 ;
4023
+ if let Some ( update) = updates. update_fulfill_htlcs . get ( 2 ) {
4024
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, update) ;
4025
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 4 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4026
+ next_source += 1 ;
4027
+ }
4028
+
4029
+ nodes[ 7 ] . node . handle_commitment_signed ( node_8_id, & updates. commitment_signed ) ;
4030
+ nodes[ 7 ] . node . handle_revoke_and_ack ( node_8_id, & raa) ;
4031
+ if updates. update_fulfill_htlcs . get ( 2 ) . is_some ( ) {
4032
+ check_added_monitors ( & nodes[ 7 ] , 5 ) ;
4033
+ } else {
4034
+ check_added_monitors ( & nodes[ 7 ] , 4 ) ;
4035
+ }
4036
+
4037
+ let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 7 ] , & node_8_id) ;
4038
+
4039
+ nodes[ 8 ] . node . handle_revoke_and_ack ( node_7_id, & raa) ;
4040
+ nodes[ 8 ] . node . handle_commitment_signed ( node_7_id, & cs) ;
4041
+ check_added_monitors ( & nodes[ 8 ] , 2 ) ;
4042
+
4043
+ let ( updates, raa) = get_updates_and_revoke ( & nodes[ 8 ] , & node_7_id) ;
4044
+
4045
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
4046
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ next_source] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4047
+ next_source += 1 ;
4048
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 1 ] ) ;
4049
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ next_source] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4050
+ next_source += 1 ;
4051
+ if let Some ( update) = updates. update_fulfill_htlcs . get ( 2 ) {
4052
+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, update) ;
4053
+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ next_source] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4054
+ next_source += 1 ;
4055
+ }
4056
+
4057
+ nodes[ 7 ] . node . handle_commitment_signed ( node_8_id, & updates. commitment_signed ) ;
4058
+ nodes[ 7 ] . node . handle_revoke_and_ack ( node_8_id, & raa) ;
4059
+ if updates. update_fulfill_htlcs . get ( 2 ) . is_some ( ) {
4060
+ check_added_monitors ( & nodes[ 7 ] , 5 ) ;
4061
+ } else {
4062
+ check_added_monitors ( & nodes[ 7 ] , 4 ) ;
4063
+ }
4064
+
4065
+ let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 7 ] , & node_8_id) ;
4066
+ nodes[ 8 ] . node . handle_revoke_and_ack ( node_7_id, & raa) ;
4067
+ nodes[ 8 ] . node . handle_commitment_signed ( node_7_id, & cs) ;
4068
+ check_added_monitors ( & nodes[ 8 ] , 2 ) ;
4069
+
4070
+ let raa = get_event_msg ! ( nodes[ 8 ] , MessageSendEvent :: SendRevokeAndACK , node_7_id) ;
4071
+ nodes[ 7 ] . node . handle_revoke_and_ack ( node_8_id, & raa) ;
4072
+ check_added_monitors ( & nodes[ 7 ] , 1 ) ;
4008
4073
}
0 commit comments