Skip to content

Commit 8a370bd

Browse files
committed
f maybe fix test on ci?
1 parent c6bbb99 commit 8a370bd

File tree

1 file changed

+148
-76
lines changed

1 file changed

+148
-76
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

+148-76
Original file line numberDiff line numberDiff line change
@@ -3864,51 +3864,77 @@ fn test_claim_to_closed_channel_blocks_claimed_event() {
38643864
#[test]
38653865
#[cfg(feature = "std")]
38663866
fn test_single_channel_multiple_mpp() {
3867+
use std::sync::atomic::{AtomicBool, Ordering};
3868+
38673869
// Test what happens when we attempt to claim an MPP with many parts that came to us through
38683870
// the same channel with a synchronous persistence interface which has very high latency.
38693871
//
38703872
// Previously, if a `revoke_and_ack` came in while we were still running in
38713873
// `ChannelManager::claim_payment` we'd end up hanging waiting to apply a
38723874
// `ChannelMonitorUpdate` until after it completed. See the commit which introduced this test
38733875
// for more info.
3874-
let chanmon_cfgs = create_chanmon_cfgs(7);
3875-
let node_cfgs = create_node_cfgs(7, &chanmon_cfgs);
3876-
let node_chanmgrs = create_node_chanmgrs(7, &node_cfgs, &[None; 7]);
3877-
let mut nodes = create_network(7, &node_cfgs, &node_chanmgrs);
3878-
3879-
let node_5_id = nodes[5].node.get_our_node_id();
3880-
let node_6_id = nodes[6].node.get_our_node_id();
3881-
3882-
// Send an MPP payment in four parts along the path shown from top to bottom
3883-
// 0
3884-
// 1 2 3 4
3885-
// 5
3886-
// 6
3876+
let chanmon_cfgs = create_chanmon_cfgs(9);
3877+
let node_cfgs = create_node_cfgs(9, &chanmon_cfgs);
3878+
let node_chanmgrs = create_node_chanmgrs(9, &node_cfgs, &[None; 9]);
3879+
let mut nodes = create_network(9, &node_cfgs, &node_chanmgrs);
3880+
3881+
let node_7_id = nodes[7].node.get_our_node_id();
3882+
let node_8_id = nodes[8].node.get_our_node_id();
3883+
3884+
// Send an MPP payment in six parts along the path shown from top to bottom
3885+
// 0
3886+
// 1 2 3 4 5 6
3887+
// 7
3888+
// 8
3889+
//
3890+
// We can in theory reproduce this issue with fewer channels/HTLCs, but getting this test
3891+
// robust is rather challenging. We rely on having the main test thread wait on locks held in
3892+
// the background `claim_funds` thread and unlocking when the `claim_funds` thread completes a
3893+
// single `ChannelMonitorUpdate`.
3894+
// This thread calls `get_and_clear_pending_msg_events()` and `handle_revoke_and_ack()`, both
3895+
// of which require `ChannelManager` locks, but we have to make sure this thread gets a chance
3896+
// to be blocked on the mutexes before we let the background thread wake `claim_funds` so that
3897+
// the mutex can switch to this main thread.
3898+
// This relies on our locks being fair, but also on our threads getting runtime during the test
3899+
// run, which can be pretty competitive. Thus we do a dumb dance to be as conservative as
3900+
// possible - we have a background thread which completes a `ChannelMonitorUpdate` (by sending
3901+
// into the `write_blocker` mpsc) but it doesn't run until a mpsc channel sends from this main
3902+
// thread to the background thread, and then we let it sleep a while before we send the
3903+
// `ChannelMonitorUpdate` unblocker.
3904+
// Further, we give ourselves two chances each time, needing 4 HTLCs just to unlock our two
3905+
// `ChannelManager` calls. We then need a few remaining HTLCs to actually trigger the bug, so
3906+
// we use 6 HTLCs.
3907+
const MAX_THREAD_INIT_TIME: std::time::Duration = std::time::Duration::from_secs(1);
38873908

38883909
create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 100_000, 0);
38893910
create_announced_chan_between_nodes_with_value(&nodes, 0, 2, 100_000, 0);
38903911
create_announced_chan_between_nodes_with_value(&nodes, 0, 3, 100_000, 0);
38913912
create_announced_chan_between_nodes_with_value(&nodes, 0, 4, 100_000, 0);
3892-
create_announced_chan_between_nodes_with_value(&nodes, 1, 5, 100_000, 0);
3893-
create_announced_chan_between_nodes_with_value(&nodes, 2, 5, 100_000, 0);
3894-
create_announced_chan_between_nodes_with_value(&nodes, 3, 5, 100_000, 0);
3895-
create_announced_chan_between_nodes_with_value(&nodes, 4, 5, 100_000, 0);
3896-
create_announced_chan_between_nodes_with_value(&nodes, 5, 6, 1_000_000, 0);
3913+
create_announced_chan_between_nodes_with_value(&nodes, 0, 5, 100_000, 0);
3914+
create_announced_chan_between_nodes_with_value(&nodes, 0, 6, 100_000, 0);
38973915

3898-
let (mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash!(&nodes[0], nodes[6], 30_000_000);
3916+
create_announced_chan_between_nodes_with_value(&nodes, 1, 7, 100_000, 0);
3917+
create_announced_chan_between_nodes_with_value(&nodes, 2, 7, 100_000, 0);
3918+
create_announced_chan_between_nodes_with_value(&nodes, 3, 7, 100_000, 0);
3919+
create_announced_chan_between_nodes_with_value(&nodes, 4, 7, 100_000, 0);
3920+
create_announced_chan_between_nodes_with_value(&nodes, 5, 7, 100_000, 0);
3921+
create_announced_chan_between_nodes_with_value(&nodes, 6, 7, 100_000, 0);
3922+
create_announced_chan_between_nodes_with_value(&nodes, 7, 8, 1_000_000, 0);
38993923

3900-
send_along_route_with_secret(&nodes[0], route, &[&[&nodes[1], &nodes[5], &nodes[6]], &[&nodes[2], &nodes[5], &nodes[6]], &[&nodes[3], &nodes[5], &nodes[6]], &[&nodes[4], &nodes[5], &nodes[6]]], 30_000_000, payment_hash, payment_secret);
3924+
let (mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash!(&nodes[0], nodes[8], 50_000_000);
3925+
3926+
send_along_route_with_secret(&nodes[0], route, &[&[&nodes[1], &nodes[7], &nodes[8]], &[&nodes[2], &nodes[7], &nodes[8]], &[&nodes[3], &nodes[7], &nodes[8]], &[&nodes[4], &nodes[7], &nodes[8]], &[&nodes[5], &nodes[7], &nodes[8]], &[&nodes[6], &nodes[7], &nodes[8]]], 50_000_000, payment_hash, payment_secret);
39013927

39023928
let (do_a_write, blocker) = std::sync::mpsc::sync_channel(0);
3903-
*nodes[6].chain_monitor.write_blocker.lock().unwrap() = Some(blocker);
3929+
*nodes[8].chain_monitor.write_blocker.lock().unwrap() = Some(blocker);
39043930

39053931
// Until we have std::thread::scoped we have to unsafe { turn off the borrow checker }.
39063932
// We do this by casting a pointer to a `TestChannelManager` to a pointer to a
39073933
// `TestChannelManager` with different (in this case 'static) lifetime.
39083934
// This is even suggested in the second example at
39093935
// https://doc.rust-lang.org/std/mem/fn.transmute.html#examples
39103936
let claim_node: &'static TestChannelManager<'static, 'static> =
3911-
unsafe { std::mem::transmute(nodes[6].node as &TestChannelManager) };
3937+
unsafe { std::mem::transmute(nodes[8].node as &TestChannelManager) };
39123938
let thrd = std::thread::spawn(move || {
39133939
// Initiate the claim in a background thread as it will immediately block waiting on the
39143940
// `write_blocker` we set above.
@@ -3924,85 +3950,131 @@ fn test_single_channel_multiple_mpp() {
39243950
// `claim_funds` is holding. Thus, we release a second write after a small sleep in the
39253951
// background to give `claim_funds` a chance to step forward, unblocking
39263952
// `get_and_clear_pending_msg_events`.
3927-
const MAX_THREAD_INIT_TIME: std::time::Duration = std::time::Duration::from_millis(10);
39283953
let do_a_write_background = do_a_write.clone();
3954+
let block_thrd2 = AtomicBool::new(true);
3955+
let block_thrd2_read: &'static AtomicBool = unsafe { std::mem::transmute(&block_thrd2) };
39293956
let thrd2 = std::thread::spawn(move || {
3957+
while block_thrd2_read.load(Ordering::Acquire) {
3958+
std::thread::yield_now();
3959+
}
3960+
std::thread::sleep(MAX_THREAD_INIT_TIME);
3961+
do_a_write_background.send(()).unwrap();
39303962
std::thread::sleep(MAX_THREAD_INIT_TIME);
39313963
do_a_write_background.send(()).unwrap();
39323964
});
3933-
let first_updates = get_htlc_update_msgs(&nodes[6], &nodes[5].node.get_our_node_id());
3965+
block_thrd2.store(false, Ordering::Release);
3966+
let first_updates = get_htlc_update_msgs(&nodes[8], &nodes[7].node.get_our_node_id());
39343967
thrd2.join().unwrap();
39353968

3936-
nodes[5].node.peer_disconnected(nodes[1].node.get_our_node_id());
3937-
nodes[5].node.peer_disconnected(nodes[2].node.get_our_node_id());
3938-
nodes[5].node.peer_disconnected(nodes[3].node.get_our_node_id());
3939-
nodes[5].node.peer_disconnected(nodes[4].node.get_our_node_id());
3940-
3941-
nodes[5].node.handle_update_fulfill_htlc(node_6_id, &first_updates.update_fulfill_htlcs[0]);
3942-
check_added_monitors(&nodes[5], 1);
3943-
expect_payment_forwarded!(nodes[5], nodes[1], nodes[6], Some(1000), false, false);
3944-
nodes[5].node.handle_commitment_signed(node_6_id, &first_updates.commitment_signed);
3945-
check_added_monitors(&nodes[5], 1);
3946-
let (raa, cs) = get_revoke_commit_msgs(&nodes[5], &node_6_id);
3969+
// Disconnect node 6 from all its peers so it doesn't bother to fail the HTLCs back
3970+
nodes[7].node.peer_disconnected(nodes[1].node.get_our_node_id());
3971+
nodes[7].node.peer_disconnected(nodes[2].node.get_our_node_id());
3972+
nodes[7].node.peer_disconnected(nodes[3].node.get_our_node_id());
3973+
nodes[7].node.peer_disconnected(nodes[4].node.get_our_node_id());
3974+
nodes[7].node.peer_disconnected(nodes[5].node.get_our_node_id());
3975+
nodes[7].node.peer_disconnected(nodes[6].node.get_our_node_id());
3976+
3977+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &first_updates.update_fulfill_htlcs[0]);
3978+
check_added_monitors(&nodes[7], 1);
3979+
expect_payment_forwarded!(nodes[7], nodes[1], nodes[8], Some(1000), false, false);
3980+
nodes[7].node.handle_commitment_signed(node_8_id, &first_updates.commitment_signed);
3981+
check_added_monitors(&nodes[7], 1);
3982+
let (raa, cs) = get_revoke_commit_msgs(&nodes[7], &node_8_id);
39473983

39483984
// Now, handle the `revoke_and_ack` from node 5. Note that `claim_funds` is still blocked on
39493985
// our peer lock, so we have to release a write to let it process.
3986+
// After this call completes, the channel previously would be locked up and should not be able
3987+
// to make further progress.
39503988
let do_a_write_background = do_a_write.clone();
3989+
let block_thrd3 = AtomicBool::new(true);
3990+
let block_thrd3_read: &'static AtomicBool = unsafe { std::mem::transmute(&block_thrd3) };
39513991
let thrd3 = std::thread::spawn(move || {
3992+
while block_thrd3_read.load(Ordering::Acquire) {
3993+
std::thread::yield_now();
3994+
}
3995+
std::thread::sleep(MAX_THREAD_INIT_TIME);
3996+
do_a_write_background.send(()).unwrap();
39523997
std::thread::sleep(MAX_THREAD_INIT_TIME);
39533998
do_a_write_background.send(()).unwrap();
39543999
});
3955-
nodes[6].node.handle_revoke_and_ack(node_5_id, &raa);
4000+
block_thrd3.store(false, Ordering::Release);
4001+
nodes[8].node.handle_revoke_and_ack(node_7_id, &raa);
39564002
thrd3.join().unwrap();
4003+
assert!(!thrd.is_finished());
39574004

39584005
let thrd4 = std::thread::spawn(move || {
3959-
std::thread::sleep(MAX_THREAD_INIT_TIME);
39604006
do_a_write.send(()).unwrap();
39614007
do_a_write.send(()).unwrap();
39624008
});
39634009

39644010
thrd4.join().unwrap();
39654011
thrd.join().unwrap();
39664012

3967-
expect_payment_claimed!(nodes[6], payment_hash, 30_000_000);
4013+
expect_payment_claimed!(nodes[8], payment_hash, 50_000_000);
39684014

3969-
// At the end, we should have 5 ChannelMonitorUpdates - 4 for HTLC claims, and one for the
4015+
// At the end, we should have 7 ChannelMonitorUpdates - 6 for HTLC claims, and one for the
39704016
// above `revoke_and_ack`.
3971-
check_added_monitors(&nodes[6], 5);
3972-
3973-
// Now drive everything to the end, at least as far as node 6 is concerned...
3974-
*nodes[6].chain_monitor.write_blocker.lock().unwrap() = None;
3975-
nodes[6].node.handle_commitment_signed(node_5_id, &cs);
3976-
check_added_monitors(&nodes[6], 1);
3977-
3978-
let (updates, raa) = get_updates_and_revoke(&nodes[6], &nodes[5].node.get_our_node_id());
3979-
nodes[5].node.handle_update_fulfill_htlc(node_6_id, &updates.update_fulfill_htlcs[0]);
3980-
expect_payment_forwarded!(nodes[5], nodes[2], nodes[6], Some(1000), false, false);
3981-
nodes[5].node.handle_update_fulfill_htlc(node_6_id, &updates.update_fulfill_htlcs[1]);
3982-
expect_payment_forwarded!(nodes[5], nodes[3], nodes[6], Some(1000), false, false);
3983-
nodes[5].node.handle_commitment_signed(node_6_id, &updates.commitment_signed);
3984-
nodes[5].node.handle_revoke_and_ack(node_6_id, &raa);
3985-
check_added_monitors(&nodes[5], 4);
3986-
3987-
let (raa, cs) = get_revoke_commit_msgs(&nodes[5], &node_6_id);
3988-
3989-
nodes[6].node.handle_revoke_and_ack(node_5_id, &raa);
3990-
nodes[6].node.handle_commitment_signed(node_5_id, &cs);
3991-
check_added_monitors(&nodes[6], 2);
3992-
3993-
let (updates, raa) = get_updates_and_revoke(&nodes[6], &nodes[5].node.get_our_node_id());
3994-
nodes[5].node.handle_update_fulfill_htlc(node_6_id, &updates.update_fulfill_htlcs[0]);
3995-
expect_payment_forwarded!(nodes[5], nodes[4], nodes[6], Some(1000), false, false);
3996-
nodes[5].node.handle_commitment_signed(node_6_id, &updates.commitment_signed);
3997-
nodes[5].node.handle_revoke_and_ack(node_6_id, &raa);
3998-
check_added_monitors(&nodes[5], 3);
3999-
4000-
let (raa, cs) = get_revoke_commit_msgs(&nodes[5], &node_6_id);
4001-
nodes[6].node.handle_revoke_and_ack(node_5_id, &raa);
4002-
nodes[6].node.handle_commitment_signed(node_5_id, &cs);
4003-
check_added_monitors(&nodes[6], 2);
4004-
4005-
let raa = get_event_msg!(nodes[6], MessageSendEvent::SendRevokeAndACK, node_5_id);
4006-
nodes[5].node.handle_revoke_and_ack(node_6_id, &raa);
4007-
check_added_monitors(&nodes[5], 1);
4017+
check_added_monitors(&nodes[8], 7);
4018+
4019+
// Now drive everything to the end, at least as far as node 7 is concerned...
4020+
*nodes[8].chain_monitor.write_blocker.lock().unwrap() = None;
4021+
nodes[8].node.handle_commitment_signed(node_7_id, &cs);
4022+
check_added_monitors(&nodes[8], 1);
4023+
4024+
let (updates, raa) = get_updates_and_revoke(&nodes[8], &nodes[7].node.get_our_node_id());
4025+
4026+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &updates.update_fulfill_htlcs[0]);
4027+
expect_payment_forwarded!(nodes[7], nodes[2], nodes[8], Some(1000), false, false);
4028+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &updates.update_fulfill_htlcs[1]);
4029+
expect_payment_forwarded!(nodes[7], nodes[3], nodes[8], Some(1000), false, false);
4030+
let mut next_source = 4;
4031+
if let Some(update) = updates.update_fulfill_htlcs.get(2) {
4032+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, update);
4033+
expect_payment_forwarded!(nodes[7], nodes[4], nodes[8], Some(1000), false, false);
4034+
next_source += 1;
4035+
}
4036+
4037+
nodes[7].node.handle_commitment_signed(node_8_id, &updates.commitment_signed);
4038+
nodes[7].node.handle_revoke_and_ack(node_8_id, &raa);
4039+
if updates.update_fulfill_htlcs.get(2).is_some() {
4040+
check_added_monitors(&nodes[7], 5);
4041+
} else {
4042+
check_added_monitors(&nodes[7], 4);
4043+
}
4044+
4045+
let (raa, cs) = get_revoke_commit_msgs(&nodes[7], &node_8_id);
4046+
4047+
nodes[8].node.handle_revoke_and_ack(node_7_id, &raa);
4048+
nodes[8].node.handle_commitment_signed(node_7_id, &cs);
4049+
check_added_monitors(&nodes[8], 2);
4050+
4051+
let (updates, raa) = get_updates_and_revoke(&nodes[8], &node_7_id);
4052+
4053+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &updates.update_fulfill_htlcs[0]);
4054+
expect_payment_forwarded!(nodes[7], nodes[next_source], nodes[8], Some(1000), false, false);
4055+
next_source += 1;
4056+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &updates.update_fulfill_htlcs[1]);
4057+
expect_payment_forwarded!(nodes[7], nodes[next_source], nodes[8], Some(1000), false, false);
4058+
next_source += 1;
4059+
if let Some(update) = updates.update_fulfill_htlcs.get(2) {
4060+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, update);
4061+
expect_payment_forwarded!(nodes[7], nodes[next_source], nodes[8], Some(1000), false, false);
4062+
}
4063+
4064+
nodes[7].node.handle_commitment_signed(node_8_id, &updates.commitment_signed);
4065+
nodes[7].node.handle_revoke_and_ack(node_8_id, &raa);
4066+
if updates.update_fulfill_htlcs.get(2).is_some() {
4067+
check_added_monitors(&nodes[7], 5);
4068+
} else {
4069+
check_added_monitors(&nodes[7], 4);
4070+
}
4071+
4072+
let (raa, cs) = get_revoke_commit_msgs(&nodes[7], &node_8_id);
4073+
nodes[8].node.handle_revoke_and_ack(node_7_id, &raa);
4074+
nodes[8].node.handle_commitment_signed(node_7_id, &cs);
4075+
check_added_monitors(&nodes[8], 2);
4076+
4077+
let raa = get_event_msg!(nodes[8], MessageSendEvent::SendRevokeAndACK, node_7_id);
4078+
nodes[7].node.handle_revoke_and_ack(node_8_id, &raa);
4079+
check_added_monitors(&nodes[7], 1);
40084080
}

0 commit comments

Comments
 (0)