From 7d023c2d4ab5c4a5318c44702ac02f2a4031090e Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Tue, 21 Jan 2025 17:38:54 -0300 Subject: [PATCH 01/33] refactor: initial removal of discovery code from net move discovery to its own folder and files --- crates/networking/p2p/discv4/discv4.rs | 684 +++++++++++ crates/networking/p2p/discv4/helpers.rs | 31 + crates/networking/p2p/discv4/lookup.rs | 200 ++++ .../p2p/{discv4.rs => discv4/messages.rs} | 32 +- crates/networking/p2p/discv4/mod.rs | 5 + crates/networking/p2p/discv4/requests.rs | 123 ++ crates/networking/p2p/kademlia.rs | 11 +- crates/networking/p2p/net.rs | 1007 +---------------- 8 files changed, 1072 insertions(+), 1021 deletions(-) create mode 100644 crates/networking/p2p/discv4/discv4.rs create mode 100644 crates/networking/p2p/discv4/helpers.rs create mode 100644 crates/networking/p2p/discv4/lookup.rs rename crates/networking/p2p/{discv4.rs => discv4/messages.rs} (97%) create mode 100644 crates/networking/p2p/discv4/mod.rs create mode 100644 crates/networking/p2p/discv4/requests.rs diff --git a/crates/networking/p2p/discv4/discv4.rs b/crates/networking/p2p/discv4/discv4.rs new file mode 100644 index 0000000000..73551946fb --- /dev/null +++ b/crates/networking/p2p/discv4/discv4.rs @@ -0,0 +1,684 @@ +use crate::{ + bootnode::BootNode, + discv4::{ + helpers::{get_expiration, is_expired, time_now_unix, time_since_in_hs}, + messages::{Message, NeighborsMessage, Packet}, + requests::{ping, pong}, + }, + handle_peer_as_initiator, + kademlia::MAX_NODES_PER_BUCKET, + rlpx::message::Message as RLPxMessage, + types::Node, + KademliaTable, MAX_DISC_PACKET_SIZE, +}; +use ethrex_core::H512; +use ethrex_storage::Store; +use futures::try_join; +use k256::ecdsa::SigningKey; +use std::{collections::HashSet, net::SocketAddr, sync::Arc, time::Duration}; +use tokio::{ + net::UdpSocket, + sync::{broadcast, Mutex}, +}; +use tracing::debug; + +use super::lookup::DiscoveryLookupHandler; + +#[derive(Debug, Clone)] +pub struct Discv4 { + local_node: Node, + udp_addr: SocketAddr, + signer: SigningKey, + storage: Store, + table: Arc>, + tx_broadcaster_send: broadcast::Sender<(tokio::task::Id, Arc)>, + udp_socket: Arc, + revalidation_interval_seconds: u64, +} + +pub enum DiscoveryError { + UnexpectedError, +} + +const REVALIDATION_INTERVAL_IN_SECONDS: u64 = 30; // this is just an arbitrary number, maybe we should get this from a cfg or cli param +const PROOF_EXPIRATION_IN_HS: u16 = 12; + +impl Discv4 { + pub fn new( + local_node: Node, + signer: SigningKey, + storage: Store, + table: Arc>, + tx_broadcaster_send: broadcast::Sender<(tokio::task::Id, Arc)>, + udp_socket: Arc, + ) -> Self { + Self { + local_node, + signer, + storage, + table, + tx_broadcaster_send, + udp_addr: SocketAddr::new(local_node.ip, local_node.udp_port), + udp_socket, + revalidation_interval_seconds: REVALIDATION_INTERVAL_IN_SECONDS, + } + } + + pub fn with_revalidation_interval_of(self, seconds: u64) -> Self { + Self { + revalidation_interval_seconds: seconds, + ..self + } + } + + pub fn with_lookup_interval_of(self, minutes: u64) -> Self { + Self { + revalidation_interval_seconds: minutes, + ..self + } + } + + pub async fn start_discovery_service( + &self, + bootnodes: Vec, + ) -> Result<(), DiscoveryError> { + let server_handler = tokio::spawn({ + let clone = self.clone(); + async move { + clone.handle_messages().await; + } + }); + self.load_bootnodes(bootnodes).await; + + let revalidation_handler = tokio::spawn({ + let clone = self.clone(); + async move { + clone.start_revalidation_task().await; + } + }); + + // a first initial lookup runs without waiting for the interval + // so we need to allow some time to the pinged peers to ping us back and acknowledge us + let self_clone = self.clone(); + let lookup_handler = tokio::spawn(async move { + DiscoveryLookupHandler::new( + self_clone.local_node, + self_clone.signer, + self_clone.udp_socket, + self_clone.table, + ) + .start_lookup_task() + .await + }); + + let result = try_join!(server_handler, revalidation_handler, lookup_handler); + + if result.is_ok() { + Ok(()) + } else { + Err(DiscoveryError::UnexpectedError) + } + } + + async fn load_bootnodes(&self, bootnodes: Vec) { + for bootnode in bootnodes { + self.table.lock().await.insert_node(Node { + ip: bootnode.socket_address.ip(), + udp_port: bootnode.socket_address.port(), + // TODO: udp port can differ from tcp port. + // see https://github.com/lambdaclass/ethrex/issues/905 + tcp_port: bootnode.socket_address.port(), + node_id: bootnode.node_id, + }); + let ping_hash = ping( + &self.udp_socket, + self.udp_addr, + bootnode.socket_address, + &self.signer, + ) + .await; + self.table + .lock() + .await + .update_peer_ping(bootnode.node_id, ping_hash); + } + } + + async fn handle_messages(&self) { + let mut buf = vec![0; MAX_DISC_PACKET_SIZE]; + + loop { + let (read, from) = self.udp_socket.recv_from(&mut buf).await.unwrap(); + debug!("Received {read} bytes from {from}"); + + let packet = Packet::decode(&buf[..read]); + if packet.is_err() { + debug!("Could not decode packet: {:?}", packet.err().unwrap()); + continue; + } + let packet = packet.unwrap(); + + let msg = packet.get_message(); + debug!("Message: {:?} from {}", msg, packet.get_node_id()); + + match msg { + Message::Ping(msg) => { + if is_expired(msg.expiration) { + debug!("Ignoring ping as it is expired."); + continue; + }; + let ping_hash = packet.get_hash(); + pong(&self.udp_socket, from, ping_hash, &self.signer).await; + let node = { + let table = self.table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + if let Some(peer) = node { + // send a a ping to get an endpoint proof + if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { + let hash = + ping(&self.udp_socket, self.udp_addr, from, &self.signer).await; + if let Some(hash) = hash { + self.table + .lock() + .await + .update_peer_ping(peer.node.node_id, Some(hash)); + } + } + } else { + // send a ping to get the endpoint proof from our end + let (peer, inserted_to_table) = { + let mut table = self.table.lock().await; + table.insert_node(Node { + ip: from.ip(), + udp_port: from.port(), + tcp_port: 0, + node_id: packet.get_node_id(), + }) + }; + let hash = ping(&self.udp_socket, self.udp_addr, from, &self.signer).await; + if let Some(hash) = hash { + if inserted_to_table && peer.is_some() { + let peer = peer.unwrap(); + self.table + .lock() + .await + .update_peer_ping(peer.node.node_id, Some(hash)); + } + } + } + } + Message::Pong(msg) => { + let table = self.table.clone(); + if is_expired(msg.expiration) { + debug!("Ignoring pong as it is expired."); + continue; + } + let peer = { + let table = table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + if let Some(peer) = peer { + if peer.last_ping_hash.is_none() { + debug!("Discarding pong as the node did not send a previous ping"); + continue; + } + if peer.last_ping_hash.unwrap() == msg.ping_hash { + table.lock().await.pong_answered(peer.node.node_id); + + let mut msg_buf = vec![0; read - 32]; + buf[32..read].clone_into(&mut msg_buf); + let signer = self.signer.clone(); + let storage = self.storage.clone(); + let broadcaster = self.tx_broadcaster_send.clone(); + tokio::spawn(async move { + handle_peer_as_initiator( + signer, + &msg_buf, + &peer.node, + storage, + table, + broadcaster, + ) + .await; + }); + } else { + debug!( + "Discarding pong as the hash did not match the last corresponding ping" + ); + } + } else { + debug!("Discarding pong as it is not a known node"); + } + } + Message::FindNode(msg) => { + if is_expired(msg.expiration) { + debug!("Ignoring find node msg as it is expired."); + continue; + }; + let node = { + let table = self.table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + if let Some(node) = node { + if node.is_proven { + let nodes = { + let table = self.table.lock().await; + table.get_closest_nodes(msg.target) + }; + let nodes_chunks = nodes.chunks(4); + let expiration = get_expiration(20); + debug!("Sending neighbors!"); + // we are sending the neighbors in 4 different messages as not to exceed the + // maximum packet size + for nodes in nodes_chunks { + let neighbors = Message::Neighbors(NeighborsMessage::new( + nodes.to_vec(), + expiration, + )); + let mut buf = Vec::new(); + neighbors.encode_with_header(&mut buf, &self.signer); + let _ = self.udp_socket.send_to(&buf, from).await; + } + } else { + debug!("Ignoring find node message as the node isn't proven!"); + } + } else { + debug!("Ignoring find node message as it is not a known node"); + } + } + Message::Neighbors(neighbors_msg) => { + if is_expired(neighbors_msg.expiration) { + debug!("Ignoring neighbor msg as it is expired."); + continue; + }; + + let mut nodes_to_insert = None; + let mut table = self.table.lock().await; + if let Some(node) = table.get_by_node_id_mut(packet.get_node_id()) { + if let Some(req) = &mut node.find_node_request { + if time_now_unix().saturating_sub(req.sent_at) >= 60 { + debug!("Ignoring neighbors message as the find_node request expires after one minute"); + node.find_node_request = None; + continue; + } + let nodes = &neighbors_msg.nodes; + let nodes_sent = req.nodes_sent + nodes.len(); + + if nodes_sent <= MAX_NODES_PER_BUCKET { + debug!("Storing neighbors in our table!"); + req.nodes_sent = nodes_sent; + nodes_to_insert = Some(nodes.clone()); + if let Some(tx) = &req.tx { + let _ = tx.send(nodes.clone()); + } + } else { + debug!("Ignoring neighbors message as the client sent more than the allowed nodes"); + } + + if nodes_sent == MAX_NODES_PER_BUCKET { + debug!("Neighbors request has been fulfilled"); + node.find_node_request = None; + } + } + } else { + debug!("Ignoring neighbor msg as it is not a known node"); + } + + if let Some(nodes) = nodes_to_insert { + for node in nodes { + let (peer, inserted_to_table) = table.insert_node(node); + if inserted_to_table && peer.is_some() { + let peer = peer.unwrap(); + let node_addr = SocketAddr::new(peer.node.ip, peer.node.udp_port); + let ping_hash = + ping(&self.udp_socket, self.udp_addr, node_addr, &self.signer) + .await; + table.update_peer_ping(peer.node.node_id, ping_hash); + }; + } + } + } + _ => {} + } + } + } + + async fn start_revalidation_task(&self) { + let mut interval = + tokio::time::interval(Duration::from_secs(self.revalidation_interval_seconds)); + // peers we have pinged in the previous iteration + let mut previously_pinged_peers: HashSet = HashSet::default(); + + // first tick starts immediately + interval.tick().await; + + loop { + interval.tick().await; + debug!("Running peer revalidation"); + + // first check that the peers we ping have responded + for node_id in previously_pinged_peers { + let mut table = self.table.lock().await; + let peer = table.get_by_node_id_mut(node_id).unwrap(); + + if let Some(has_answered) = peer.revalidation { + if has_answered { + peer.increment_liveness(); + } else { + peer.decrement_liveness(); + } + } + + peer.revalidation = None; + + if peer.liveness == 0 { + let new_peer = table.replace_peer(node_id); + if let Some(new_peer) = new_peer { + let ping_hash = ping( + &self.udp_socket, + self.udp_addr, + SocketAddr::new(new_peer.node.ip, new_peer.node.udp_port), + &self.signer, + ) + .await; + table.update_peer_ping(new_peer.node.node_id, ping_hash); + } + } + } + + // now send a ping to the least recently pinged peers + // this might be too expensive to run if our table is filled + // maybe we could just pick them randomly + let peers = self.table.lock().await.get_least_recently_pinged_peers(3); + previously_pinged_peers = HashSet::default(); + for peer in peers { + let ping_hash = ping( + &self.udp_socket, + self.udp_addr, + SocketAddr::new(peer.node.ip, peer.node.udp_port), + &self.signer, + ) + .await; + let mut table = self.table.lock().await; + table.update_peer_ping_with_revalidation(peer.node.node_id, ping_hash); + previously_pinged_peers.insert(peer.node.node_id); + + debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); + } + + debug!("Peer revalidation finished"); + } + } +} + +// #[cfg(test)] +// mod tests { +// use super::*; +// use crate::{kademlia::bucket_number, node_id_from_signing_key, MAX_MESSAGES_TO_BROADCAST}; +// use ethrex_storage::EngineType; +// use rand::rngs::OsRng; +// use std::{ +// collections::HashSet, +// net::{IpAddr, Ipv4Addr}, +// }; +// use tokio::time::sleep; + +// async fn insert_random_node_on_custom_bucket( +// table: Arc>, +// bucket_idx: usize, +// ) { +// let node_id = node_id_from_signing_key(&SigningKey::random(&mut OsRng)); +// let node = Node { +// ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), +// tcp_port: 0, +// udp_port: 0, +// node_id, +// }; +// table +// .lock() +// .await +// .insert_node_on_custom_bucket(node, bucket_idx); +// } + +// async fn fill_table_with_random_nodes(table: Arc>) { +// for i in 0..256 { +// for _ in 0..16 { +// insert_random_node_on_custom_bucket(table.clone(), i).await; +// } +// } +// } + +// async fn start_mock_discovery_server(udp_port: u16, should_start_server: bool) -> Discv4 { +// let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); +// let signer = SigningKey::random(&mut OsRng); +// let udp_socket = Arc::new(UdpSocket::bind(addr).await.unwrap()); +// let node_id = node_id_from_signing_key(&signer); +// let storage = +// Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); +// let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); +// let (channel_broadcast_send_end, _) = tokio::sync::broadcast::channel::<( +// tokio::task::Id, +// Arc, +// )>(MAX_MESSAGES_TO_BROADCAST); + +// let discv4 = Discv4::new(); +// if should_start_server { +// tokio::spawn(disv4.handle_messages()); +// } +// } + +// /// connects two mock servers by pinging a to b +// async fn connect_servers(server_a: &mut MockServer, server_b: &mut MockServer) { +// let ping_hash = ping( +// &server_a.udp_socket, +// server_a.addr, +// server_b.addr, +// &server_a.signer, +// ) +// .await; +// { +// let mut table = server_a.table.lock().await; +// table.insert_node(Node { +// ip: server_b.addr.ip(), +// udp_port: server_b.addr.port(), +// tcp_port: 0, +// node_id: server_b.node_id, +// }); +// table.update_peer_ping(server_b.node_id, ping_hash); +// } +// // allow some time for the server to respond +// sleep(Duration::from_secs(1)).await; +// } + +// #[tokio::test] +// /** This is a end to end test on the discovery server, the idea is as follows: +// * - We'll start two discovery servers (`a` & `b`) to ping between each other +// * - We'll make `b` ping `a`, and validate that the connection is right +// * - Then we'll wait for a revalidation where we expect everything to be the same +// * - We'll do this five 5 more times +// * - Then we'll stop server `a` so that it doesn't respond to re-validations +// * - We expect server `b` to remove node `a` from its table after 3 re-validations +// * To make this run faster, we'll change the revalidation time to be every 2secs +// */ +// async fn discovery_server_revalidation() { +// let mut server_a = start_mock_discovery_server(7998, true).await; +// let mut server_b = start_mock_discovery_server(7999, true).await; + +// connect_servers(&mut server_a, &mut server_b).await; + +// // start revalidation server +// tokio::spawn(peers_revalidation( +// server_b.addr, +// server_b.udp_socket.clone(), +// server_b.table.clone(), +// server_b.signer.clone(), +// 2, +// )); + +// for _ in 0..5 { +// sleep(Duration::from_millis(2500)).await; +// // by now, b should've send a revalidation to a +// let table = server_b.table.lock().await; +// let node = table.get_by_node_id(server_a.node_id).unwrap(); +// assert!(node.revalidation.is_some()); +// } + +// // make sure that `a` has responded too all the re-validations +// // we can do that by checking the liveness +// { +// let table = server_b.table.lock().await; +// let node = table.get_by_node_id(server_a.node_id).unwrap(); +// assert_eq!(node.liveness, 6); +// } + +// // now, stopping server `a` is not trivial +// // so we'll instead change its port, so that no one responds +// { +// let mut table = server_b.table.lock().await; +// let node = table.get_by_node_id_mut(server_a.node_id).unwrap(); +// node.node.udp_port = 0; +// } + +// // now the liveness field should start decreasing until it gets to 0 +// // which should happen in 3 re-validations +// for _ in 0..2 { +// sleep(Duration::from_millis(2500)).await; +// let table = server_b.table.lock().await; +// let node = table.get_by_node_id(server_a.node_id).unwrap(); +// assert!(node.revalidation.is_some()); +// } +// sleep(Duration::from_millis(2500)).await; + +// // finally, `a`` should not exist anymore +// let table = server_b.table.lock().await; +// assert!(table.get_by_node_id(server_a.node_id).is_none()); +// } + +// #[tokio::test] +// /** This test tests the lookup function, the idea is as follows: +// * - We'll start two discovery servers (`a` & `b`) that will connect between each other +// * - We'll insert random nodes to the server `a`` to fill its table +// * - We'll forcedly run `lookup` and validate that a `find_node` request was sent +// * by checking that new nodes have been inserted to the table +// * +// * This test for only one lookup, and not recursively. +// */ +// async fn discovery_server_lookup() { +// let mut server_a = start_mock_discovery_server(8000, true).await; +// let mut server_b = start_mock_discovery_server(8001, true).await; + +// fill_table_with_random_nodes(server_a.table.clone()).await; + +// // before making the connection, remove a node from the `b` bucket. Otherwise it won't be added +// let b_bucket = bucket_number(server_a.node_id, server_b.node_id); +// let node_id_to_remove = server_a.table.lock().await.buckets()[b_bucket].peers[0] +// .node +// .node_id; +// server_a +// .table +// .lock() +// .await +// .replace_peer_on_custom_bucket(node_id_to_remove, b_bucket); + +// connect_servers(&mut server_a, &mut server_b).await; + +// // now we are going to run a lookup with us as the target +// let closets_peers_to_b_from_a = server_a +// .table +// .lock() +// .await +// .get_closest_nodes(server_b.node_id); +// let nodes_to_ask = server_b +// .table +// .lock() +// .await +// .get_closest_nodes(server_b.node_id); + +// lookup( +// server_b.udp_socket.clone(), +// server_b.table.clone(), +// &server_b.signer, +// server_b.node_id, +// &mut HashSet::default(), +// &nodes_to_ask, +// ) +// .await; + +// // find_node sent, allow some time for `a` to respond +// sleep(Duration::from_secs(2)).await; + +// // now all peers should've been inserted +// for peer in closets_peers_to_b_from_a { +// let table = server_b.table.lock().await; +// assert!(table.get_by_node_id(peer.node_id).is_some()); +// } +// } + +// #[tokio::test] +// /** This test tests the lookup function, the idea is as follows: +// * - We'll start four discovery servers (`a`, `b`, `c` & `d`) +// * - `a` will be connected to `b`, `b` will be connected to `c` and `c` will be connected to `d`. +// * - The server `d` will have its table filled with mock nodes +// * - We'll run a recursive lookup on server `a` and we expect to end with `b`, `c`, `d` and its mock nodes +// */ +// async fn discovery_server_recursive_lookup() { +// let mut server_a = start_mock_discovery_server(8002, true).await; +// let mut server_b = start_mock_discovery_server(8003, true).await; +// let mut server_c = start_mock_discovery_server(8004, true).await; +// let mut server_d = start_mock_discovery_server(8005, true).await; + +// connect_servers(&mut server_a, &mut server_b).await; +// connect_servers(&mut server_b, &mut server_c).await; +// connect_servers(&mut server_c, &mut server_d).await; + +// // now we fill the server_d table with 3 random nodes +// // the reason we don't put more is because this nodes won't respond (as they don't are not real servers) +// // and so we will have to wait for the timeout on each node, which will only slow down the test +// for _ in 0..3 { +// insert_random_node_on_custom_bucket(server_d.table.clone(), 0).await; +// } + +// let mut expected_peers = vec![]; +// expected_peers.extend( +// server_b +// .table +// .lock() +// .await +// .get_closest_nodes(server_a.node_id), +// ); +// expected_peers.extend( +// server_c +// .table +// .lock() +// .await +// .get_closest_nodes(server_a.node_id), +// ); +// expected_peers.extend( +// server_d +// .table +// .lock() +// .await +// .get_closest_nodes(server_a.node_id), +// ); + +// // we'll run a recursive lookup closest to the server itself +// recursive_lookup( +// server_a.udp_socket.clone(), +// server_a.table.clone(), +// server_a.signer.clone(), +// server_a.node_id, +// server_a.node_id, +// ) +// .await; + +// for peer in expected_peers { +// assert!(server_a +// .table +// .lock() +// .await +// .get_by_node_id(peer.node_id) +// .is_some()); +// } +// } +// } diff --git a/crates/networking/p2p/discv4/helpers.rs b/crates/networking/p2p/discv4/helpers.rs new file mode 100644 index 0000000000..de5e4e006b --- /dev/null +++ b/crates/networking/p2p/discv4/helpers.rs @@ -0,0 +1,31 @@ +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +//TODO remove unwraps +pub fn get_expiration(seconds: u64) -> u64 { + (SystemTime::now() + Duration::from_secs(seconds)) + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() +} + +pub fn is_expired(expiration: u64) -> bool { + // this cast to a signed integer is needed as the rlp decoder doesn't take into account the sign + // otherwise a potential negative expiration would pass since it would take 2^64. + (expiration as i64) + < SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() as i64 +} + +pub fn time_since_in_hs(time: u64) -> u64 { + let time = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(time); + SystemTime::now().duration_since(time).unwrap().as_secs() / 3600 +} + +pub fn time_now_unix() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() +} diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs new file mode 100644 index 0000000000..11d122b381 --- /dev/null +++ b/crates/networking/p2p/discv4/lookup.rs @@ -0,0 +1,200 @@ +use std::{collections::HashSet, net::SocketAddr, sync::Arc, time::Duration}; + +use ethrex_core::H512; +use k256::ecdsa::SigningKey; +use rand::rngs::OsRng; +use tokio::{net::UdpSocket, sync::Mutex, try_join}; +use tracing::debug; + +use crate::{ + kademlia::{bucket_number, MAX_NODES_PER_BUCKET}, + node_id_from_signing_key, + types::Node, + KademliaTable, +}; + +use super::requests::find_node_and_wait_for_response; + +const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: u64 = 30; // same as above + +#[derive(Clone, Debug)] +pub struct DiscoveryLookupHandler { + local_node: Node, + signer: SigningKey, + udp_socket: Arc, + table: Arc>, + lookup_interval_minutes: u64, + seen_peers: HashSet, + asked_peers: HashSet, +} + +impl DiscoveryLookupHandler { + pub fn new( + local_node: Node, + signer: SigningKey, + udp_socket: Arc, + table: Arc>, + ) -> Self { + Self { + local_node, + signer, + udp_socket, + table, + lookup_interval_minutes: PEERS_RANDOM_LOOKUP_TIME_IN_MIN, + seen_peers: HashSet::new(), + asked_peers: HashSet::new(), + } + } + + pub async fn start_lookup_task(&self) { + let mut interval = tokio::time::interval(Duration::from_secs(self.lookup_interval_minutes)); + + loop { + // Notice that the first tick is immediate, + // so as soon as the server starts we'll do a lookup with the seeder nodes. + interval.tick().await; + + debug!("Starting lookup"); + + let mut handlers = vec![]; + + // lookup closest to our pub key + let self_clone = self.clone(); + handlers.push(tokio::spawn(async move { + self_clone + .recursive_lookup(self_clone.local_node.node_id) + .await; + })); + + // lookup closest to 3 random keys + for _ in 0..3 { + let random_pub_key = SigningKey::random(&mut OsRng); + let self_clone = self.clone(); + handlers.push(tokio::spawn(async move { + self_clone + .recursive_lookup(node_id_from_signing_key(&random_pub_key)) + .await + })) + } + + for handle in handlers { + let _ = try_join!(handle); + } + + debug!("Lookup finished"); + } + } + + async fn recursive_lookup(&self, target: H512) { + let mut asked_peers = HashSet::default(); + // lookups start with the closest from our table + let closest_nodes = self.table.lock().await.get_closest_nodes(target); + let mut seen_peers: HashSet = HashSet::default(); + + seen_peers.insert(self.local_node.node_id); + for node in &closest_nodes { + seen_peers.insert(node.node_id); + } + + let mut peers_to_ask: Vec = closest_nodes; + + loop { + let (nodes_found, queries) = self.lookup(target, &mut asked_peers, &peers_to_ask).await; + + // only push the peers that have not been seen + // that is those who have not been yet pushed, which also accounts for + // those peers that were in the array but have been replaced for closer peers + for node in nodes_found { + if !seen_peers.contains(&node.node_id) { + seen_peers.insert(node.node_id); + self.peers_to_ask_push(&mut peers_to_ask, target, node); + } + } + + // the lookup finishes when there are no more queries to do + // that happens when we have asked all the peers + if queries == 0 { + break; + } + } + } + + fn peers_to_ask_push(&self, peers_to_ask: &mut Vec, target: H512, node: Node) { + let distance = bucket_number(target, node.node_id); + + if peers_to_ask.len() < MAX_NODES_PER_BUCKET { + peers_to_ask.push(node); + return; + } + + // replace this node for the one whose distance to the target is the highest + let (mut idx_to_replace, mut highest_distance) = (None, 0); + + for (i, peer) in peers_to_ask.iter().enumerate() { + let current_distance = bucket_number(peer.node_id, target); + + if distance < current_distance && current_distance >= highest_distance { + highest_distance = current_distance; + idx_to_replace = Some(i); + } + } + + if let Some(idx) = idx_to_replace { + peers_to_ask[idx] = node; + } + } + + async fn lookup( + &self, + target: H512, + asked_peers: &mut HashSet, + nodes_to_ask: &Vec, + ) -> (Vec, u32) { + // ask FIND_NODE as much as three times + let alpha = 3; + let mut queries = 0; + let mut nodes = vec![]; + + for node in nodes_to_ask { + if !asked_peers.contains(&node.node_id) { + #[allow(unused_assignments)] + let mut rx = None; + { + let mut table = self.table.lock().await; + let peer = table.get_by_node_id_mut(node.node_id); + if let Some(peer) = peer { + // if the peer has an ongoing find_node request, don't query + if peer.find_node_request.is_some() { + continue; + } + let (tx, receiver) = tokio::sync::mpsc::unbounded_channel::>(); + peer.new_find_node_request_with_sender(tx); + rx = Some(receiver); + } else { + // if peer isn't inserted to table, don't query + continue; + } + } + + queries += 1; + asked_peers.insert(node.node_id); + + let mut found_nodes = find_node_and_wait_for_response( + &self.udp_socket, + SocketAddr::new(node.ip, node.udp_port), + &self.signer, + target, + &mut rx.unwrap(), + ) + .await; + nodes.append(&mut found_nodes); + } + + if queries == alpha { + break; + } + } + + (nodes, queries) + } +} diff --git a/crates/networking/p2p/discv4.rs b/crates/networking/p2p/discv4/messages.rs similarity index 97% rename from crates/networking/p2p/discv4.rs rename to crates/networking/p2p/discv4/messages.rs index e457e2d122..5d64b739bc 100644 --- a/crates/networking/p2p/discv4.rs +++ b/crates/networking/p2p/discv4/messages.rs @@ -1,5 +1,3 @@ -use std::time::{Duration, SystemTime, UNIX_EPOCH}; - use crate::types::{Endpoint, Node, NodeRecord}; use bytes::BufMut; use ethrex_core::{H256, H512, H520}; @@ -12,35 +10,7 @@ use ethrex_rlp::{ use k256::ecdsa::{RecoveryId, Signature, SigningKey, VerifyingKey}; use sha3::{Digest, Keccak256}; -//todo add tests -pub fn get_expiration(seconds: u64) -> u64 { - (SystemTime::now() + Duration::from_secs(seconds)) - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() -} - -pub fn is_expired(expiration: u64) -> bool { - // this cast to a signed integer is needed as the rlp decoder doesn't take into account the sign - // otherwise a potential negative expiration would pass since it would take 2^64. - (expiration as i64) - < SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() as i64 -} - -pub fn time_since_in_hs(time: u64) -> u64 { - let time = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(time); - SystemTime::now().duration_since(time).unwrap().as_secs() / 3600 -} - -pub fn time_now_unix() -> u64 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() -} +use super::helpers::time_now_unix; #[derive(Debug, PartialEq)] pub enum PacketDecodeErr { diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs new file mode 100644 index 0000000000..3f7c39e4a6 --- /dev/null +++ b/crates/networking/p2p/discv4/mod.rs @@ -0,0 +1,5 @@ +pub(crate) mod discv4; +pub(self) mod helpers; +pub(crate) mod lookup; +pub(crate) mod messages; +pub(crate) mod requests; diff --git a/crates/networking/p2p/discv4/requests.rs b/crates/networking/p2p/discv4/requests.rs new file mode 100644 index 0000000000..b39262a5ce --- /dev/null +++ b/crates/networking/p2p/discv4/requests.rs @@ -0,0 +1,123 @@ +use std::{ + net::SocketAddr, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; + +use ethrex_core::{H256, H512}; +use k256::ecdsa::SigningKey; +use tokio::net::UdpSocket; + +use crate::{ + kademlia::MAX_NODES_PER_BUCKET, + types::{Endpoint, Node}, +}; + +use super::messages::{FindNodeMessage, Message, PingMessage, PongMessage}; + +// Sends a ping to the addr +/// # Returns +/// an optional hash corresponding to the message header hash to account if the send was successful +pub async fn ping( + socket: &UdpSocket, + // TODO replace this with our node, so we can fill the tcp port + local_addr: SocketAddr, + to_addr: SocketAddr, + signer: &SigningKey, +) -> Option { + let mut buf = Vec::new(); + + let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + + // TODO: this should send our advertised TCP port + let from = Endpoint { + ip: local_addr.ip(), + udp_port: local_addr.port(), + tcp_port: 0, + }; + let to = Endpoint { + ip: to_addr.ip(), + udp_port: to_addr.port(), + tcp_port: 0, + }; + + let ping = Message::Ping(PingMessage::new(from, to, expiration)); + ping.encode_with_header(&mut buf, signer); + let res = socket.send_to(&buf, to_addr).await; + + if res.is_err() { + return None; + } + let bytes_sent = res.unwrap(); + + if bytes_sent == buf.len() { + return Some(H256::from_slice(&buf[0..32])); + } + + None +} + +pub async fn pong(socket: &UdpSocket, to_addr: SocketAddr, ping_hash: H256, signer: &SigningKey) { + let mut buf = Vec::new(); + + let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + + let to = Endpoint { + ip: to_addr.ip(), + udp_port: to_addr.port(), + tcp_port: 0, + }; + let pong = Message::Pong(PongMessage::new(to, ping_hash, expiration)); + + pong.encode_with_header(&mut buf, signer); + let _ = socket.send_to(&buf, to_addr).await; +} + +pub async fn find_node_and_wait_for_response( + socket: &UdpSocket, + to_addr: SocketAddr, + signer: &SigningKey, + target_node_id: H512, + request_receiver: &mut tokio::sync::mpsc::UnboundedReceiver>, +) -> Vec { + let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + + let msg = Message::FindNode(FindNodeMessage::new(target_node_id, expiration)); + + let mut buf = Vec::new(); + msg.encode_with_header(&mut buf, signer); + let res = socket.send_to(&buf, to_addr).await; + + let mut nodes = vec![]; + + if res.is_err() { + return nodes; + } + + loop { + // wait as much as 5 seconds for the response + match tokio::time::timeout(Duration::from_secs(5), request_receiver.recv()).await { + Ok(Some(mut found_nodes)) => { + nodes.append(&mut found_nodes); + if nodes.len() == MAX_NODES_PER_BUCKET { + return nodes; + }; + } + Ok(None) => { + return nodes; + } + Err(_) => { + // timeout expired + return nodes; + } + } + } +} diff --git a/crates/networking/p2p/kademlia.rs b/crates/networking/p2p/kademlia.rs index 6a8b4182bd..f9f5ae4a98 100644 --- a/crates/networking/p2p/kademlia.rs +++ b/crates/networking/p2p/kademlia.rs @@ -1,7 +1,5 @@ use crate::{ - discv4::{time_now_unix, FindNodeRequest}, - peer_channels::PeerChannels, - rlpx::p2p::Capability, + discv4::messages::FindNodeRequest, peer_channels::PeerChannels, rlpx::p2p::Capability, types::Node, }; use ethrex_core::{H256, H512, U256}; @@ -350,6 +348,13 @@ pub fn bucket_number(node_id_1: H512, node_id_2: H512) -> usize { distance.bits().saturating_sub(1) } +pub fn time_now_unix() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() +} + #[derive(Debug, Clone)] pub struct PeerData { pub node: Node, diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index ad0387ad0a..c3c42c2d8c 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -1,24 +1,14 @@ -use std::{ - collections::HashSet, - net::SocketAddr, - sync::Arc, - time::{Duration, SystemTime, UNIX_EPOCH}, -}; +use std::{net::SocketAddr, sync::Arc}; use bootnode::BootNode; -use discv4::{ - get_expiration, is_expired, time_now_unix, time_since_in_hs, FindNodeMessage, Message, - NeighborsMessage, Packet, PingMessage, PongMessage, -}; -use ethrex_core::{H256, H512}; +use discv4::discv4::Discv4; +use ethrex_core::H512; use ethrex_storage::Store; use k256::{ ecdsa::SigningKey, elliptic_curve::{sec1::ToEncodedPoint, PublicKey}, }; pub use kademlia::KademliaTable; -use kademlia::{bucket_number, MAX_NODES_PER_BUCKET}; -use rand::rngs::OsRng; use rlpx::{connection::RLPxConnection, message::Message as RLPxMessage}; use tokio::{ net::{TcpSocket, TcpStream, UdpSocket}, @@ -26,7 +16,7 @@ use tokio::{ try_join, }; use tracing::{debug, error, info}; -use types::{Endpoint, Node}; +use types::Node; pub mod bootnode; pub(crate) mod discv4; @@ -64,14 +54,25 @@ pub async fn start_network( tokio::task::Id, Arc, )>(MAX_MESSAGES_TO_BROADCAST); - let discovery_handle = tokio::spawn(discover_peers( - udp_addr, + + let udp_socket = UdpSocket::bind(udp_addr).await.unwrap(); + + let discv4 = Discv4::new( + Node { + ip: udp_addr.ip(), + node_id: node_id_from_signing_key(&signer), + udp_port: udp_addr.port(), + tcp_port: tcp_addr.port(), + }, signer.clone(), storage.clone(), peer_table.clone(), - bootnodes, channel_broadcast_send_end.clone(), - )); + Arc::new(udp_socket), + ); + let discovery_handle = + tokio::spawn(async move { discv4.start_discovery_service(bootnodes).await }); + let server_handle = tokio::spawn(serve_requests( tcp_addr, signer.clone(), @@ -80,683 +81,7 @@ pub async fn start_network( channel_broadcast_send_end, )); - try_join!(discovery_handle, server_handle).unwrap(); -} - -async fn discover_peers( - udp_addr: SocketAddr, - signer: SigningKey, - storage: Store, - table: Arc>, - bootnodes: Vec, - connection_broadcast: broadcast::Sender<(tokio::task::Id, Arc)>, -) { - let udp_socket = Arc::new(UdpSocket::bind(udp_addr).await.unwrap()); - - let server_handler = tokio::spawn(discover_peers_server( - udp_addr, - udp_socket.clone(), - storage, - table.clone(), - signer.clone(), - connection_broadcast, - )); - let revalidation_handler = tokio::spawn(peers_revalidation( - udp_addr, - udp_socket.clone(), - table.clone(), - signer.clone(), - REVALIDATION_INTERVAL_IN_SECONDS as u64, - )); - - discovery_startup( - udp_addr, - udp_socket.clone(), - table.clone(), - signer.clone(), - bootnodes, - ) - .await; - - // a first initial lookup runs without waiting for the interval - // so we need to allow some time to the pinged peers to ping us back and acknowledge us - tokio::time::sleep(Duration::from_secs(10)).await; - let lookup_handler = tokio::spawn(peers_lookup( - udp_socket.clone(), - table.clone(), - signer.clone(), - node_id_from_signing_key(&signer), - PEERS_RANDOM_LOOKUP_TIME_IN_MIN as u64 * 60, - )); - - try_join!(server_handler, revalidation_handler, lookup_handler).unwrap(); -} - -async fn discover_peers_server( - udp_addr: SocketAddr, - udp_socket: Arc, - storage: Store, - table: Arc>, - signer: SigningKey, - tx_broadcaster_send: broadcast::Sender<(tokio::task::Id, Arc)>, -) { - let mut buf = vec![0; MAX_DISC_PACKET_SIZE]; - - loop { - let (read, from) = udp_socket.recv_from(&mut buf).await.unwrap(); - debug!("Received {read} bytes from {from}"); - - let packet = Packet::decode(&buf[..read]); - if packet.is_err() { - debug!("Could not decode packet: {:?}", packet.err().unwrap()); - continue; - } - let packet = packet.unwrap(); - - let msg = packet.get_message(); - debug!("Message: {:?} from {}", msg, packet.get_node_id()); - - match msg { - Message::Ping(msg) => { - if is_expired(msg.expiration) { - debug!("Ignoring ping as it is expired."); - continue; - }; - let ping_hash = packet.get_hash(); - pong(&udp_socket, from, ping_hash, &signer).await; - let node = { - let table = table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(peer) = node { - // send a a ping to get an endpoint proof - if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { - let hash = ping(&udp_socket, udp_addr, from, &signer).await; - if let Some(hash) = hash { - table - .lock() - .await - .update_peer_ping(peer.node.node_id, Some(hash)); - } - } - } else { - // send a ping to get the endpoint proof from our end - let (peer, inserted_to_table) = { - let mut table = table.lock().await; - table.insert_node(Node { - ip: from.ip(), - udp_port: from.port(), - tcp_port: 0, - node_id: packet.get_node_id(), - }) - }; - let hash = ping(&udp_socket, udp_addr, from, &signer).await; - if let Some(hash) = hash { - if inserted_to_table && peer.is_some() { - let peer = peer.unwrap(); - table - .lock() - .await - .update_peer_ping(peer.node.node_id, Some(hash)); - } - } - } - } - Message::Pong(msg) => { - let table = table.clone(); - if is_expired(msg.expiration) { - debug!("Ignoring pong as it is expired."); - continue; - } - let peer = { - let table = table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(peer) = peer { - if peer.last_ping_hash.is_none() { - debug!("Discarding pong as the node did not send a previous ping"); - continue; - } - if peer.last_ping_hash.unwrap() == msg.ping_hash { - table.lock().await.pong_answered(peer.node.node_id); - - let mut msg_buf = vec![0; read - 32]; - buf[32..read].clone_into(&mut msg_buf); - let signer = signer.clone(); - let storage = storage.clone(); - let broadcaster = tx_broadcaster_send.clone(); - tokio::spawn(async move { - handle_peer_as_initiator( - signer, - &msg_buf, - &peer.node, - storage, - table, - broadcaster, - ) - .await; - }); - } else { - debug!( - "Discarding pong as the hash did not match the last corresponding ping" - ); - } - } else { - debug!("Discarding pong as it is not a known node"); - } - } - Message::FindNode(msg) => { - if is_expired(msg.expiration) { - debug!("Ignoring find node msg as it is expired."); - continue; - }; - let node = { - let table = table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(node) = node { - if node.is_proven { - let nodes = { - let table = table.lock().await; - table.get_closest_nodes(msg.target) - }; - let nodes_chunks = nodes.chunks(4); - let expiration = get_expiration(20); - debug!("Sending neighbors!"); - // we are sending the neighbors in 4 different messages as not to exceed the - // maximum packet size - for nodes in nodes_chunks { - let neighbors = discv4::Message::Neighbors(NeighborsMessage::new( - nodes.to_vec(), - expiration, - )); - let mut buf = Vec::new(); - neighbors.encode_with_header(&mut buf, &signer); - udp_socket.send_to(&buf, from).await.unwrap(); - } - } else { - debug!("Ignoring find node message as the node isn't proven!"); - } - } else { - debug!("Ignoring find node message as it is not a known node"); - } - } - Message::Neighbors(neighbors_msg) => { - if is_expired(neighbors_msg.expiration) { - debug!("Ignoring neighbor msg as it is expired."); - continue; - }; - - let mut nodes_to_insert = None; - let mut table = table.lock().await; - if let Some(node) = table.get_by_node_id_mut(packet.get_node_id()) { - if let Some(req) = &mut node.find_node_request { - if time_now_unix().saturating_sub(req.sent_at) >= 60 { - debug!("Ignoring neighbors message as the find_node request expires after one minute"); - node.find_node_request = None; - continue; - } - let nodes = &neighbors_msg.nodes; - let nodes_sent = req.nodes_sent + nodes.len(); - - if nodes_sent <= MAX_NODES_PER_BUCKET { - debug!("Storing neighbors in our table!"); - req.nodes_sent = nodes_sent; - nodes_to_insert = Some(nodes.clone()); - if let Some(tx) = &req.tx { - let _ = tx.send(nodes.clone()); - } - } else { - debug!("Ignoring neighbors message as the client sent more than the allowed nodes"); - } - - if nodes_sent == MAX_NODES_PER_BUCKET { - debug!("Neighbors request has been fulfilled"); - node.find_node_request = None; - } - } - } else { - debug!("Ignoring neighbor msg as it is not a known node"); - } - - if let Some(nodes) = nodes_to_insert { - for node in nodes { - let (peer, inserted_to_table) = table.insert_node(node); - if inserted_to_table && peer.is_some() { - let peer = peer.unwrap(); - let node_addr = SocketAddr::new(peer.node.ip, peer.node.udp_port); - let ping_hash = ping(&udp_socket, udp_addr, node_addr, &signer).await; - table.update_peer_ping(peer.node.node_id, ping_hash); - }; - } - } - } - _ => {} - } - } -} - -// this is just an arbitrary number, maybe we should get this from some kind of cfg -/// This is a really basic startup and should be improved when we have the nodes stored in the db -/// currently, since we are not storing nodes, the only way to have startup nodes is by providing -/// an array of bootnodes. -async fn discovery_startup( - udp_addr: SocketAddr, - udp_socket: Arc, - table: Arc>, - signer: SigningKey, - bootnodes: Vec, -) { - for bootnode in bootnodes { - table.lock().await.insert_node(Node { - ip: bootnode.socket_address.ip(), - udp_port: bootnode.socket_address.port(), - // TODO: udp port can differ from tcp port. - // see https://github.com/lambdaclass/ethrex/issues/905 - tcp_port: bootnode.socket_address.port(), - node_id: bootnode.node_id, - }); - let ping_hash = ping(&udp_socket, udp_addr, bootnode.socket_address, &signer).await; - table - .lock() - .await - .update_peer_ping(bootnode.node_id, ping_hash); - } -} - -const REVALIDATION_INTERVAL_IN_SECONDS: usize = 30; // this is just an arbitrary number, maybe we should get this from some kind of cfg -const PROOF_EXPIRATION_IN_HS: usize = 12; - -/// Starts a tokio scheduler that: -/// - performs periodic revalidation of the current nodes (sends a ping to the old nodes). Currently this is configured to happen every [`REVALIDATION_INTERVAL_IN_MINUTES`] -/// -/// **Peer revalidation** -/// -/// Peers revalidation works in the following manner: -/// 1. Every `REVALIDATION_INTERVAL_IN_SECONDS` we ping the 3 least recently pinged peers -/// 2. In the next iteration we check if they have answered -/// - if they have: we increment the liveness field by one -/// - otherwise we decrement it by the current value / 3. -/// 3. If the liveness field is 0, then we delete it and insert a new one from the replacements table -/// -/// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#kademlia-table -async fn peers_revalidation( - udp_addr: SocketAddr, - udp_socket: Arc, - table: Arc>, - signer: SigningKey, - interval_time_in_seconds: u64, -) { - let mut interval = tokio::time::interval(Duration::from_secs(interval_time_in_seconds)); - // peers we have pinged in the previous iteration - let mut previously_pinged_peers: HashSet = HashSet::default(); - - // first tick starts immediately - interval.tick().await; - - loop { - interval.tick().await; - debug!("Running peer revalidation"); - - // first check that the peers we ping have responded - for node_id in previously_pinged_peers { - let mut table = table.lock().await; - let peer = table.get_by_node_id_mut(node_id).unwrap(); - - if let Some(has_answered) = peer.revalidation { - if has_answered { - peer.increment_liveness(); - } else { - peer.decrement_liveness(); - } - } - - peer.revalidation = None; - - if peer.liveness == 0 { - let new_peer = table.replace_peer(node_id); - if let Some(new_peer) = new_peer { - let ping_hash = ping( - &udp_socket, - udp_addr, - SocketAddr::new(new_peer.node.ip, new_peer.node.udp_port), - &signer, - ) - .await; - table.update_peer_ping(new_peer.node.node_id, ping_hash); - } - } - } - - // now send a ping to the least recently pinged peers - // this might be too expensive to run if our table is filled - // maybe we could just pick them randomly - let peers = table.lock().await.get_least_recently_pinged_peers(3); - previously_pinged_peers = HashSet::default(); - for peer in peers { - let ping_hash = ping( - &udp_socket, - udp_addr, - SocketAddr::new(peer.node.ip, peer.node.udp_port), - &signer, - ) - .await; - let mut table = table.lock().await; - table.update_peer_ping_with_revalidation(peer.node.node_id, ping_hash); - previously_pinged_peers.insert(peer.node.node_id); - - debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); - } - - debug!("Peer revalidation finished"); - } -} - -const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: usize = 30; - -/// Starts a tokio scheduler that: -/// - performs random lookups to discover new nodes. Currently this is configure to run every `PEERS_RANDOM_LOOKUP_TIME_IN_MIN` -/// -/// **Random lookups** -/// -/// Random lookups work in the following manner: -/// 1. Every 30min we spawn three concurrent lookups: one closest to our pubkey -/// and three other closest to random generated pubkeys. -/// 2. Every lookup starts with the closest nodes from our table. -/// Each lookup keeps track of: -/// - Peers that have already been asked for nodes -/// - Peers that have been already seen -/// - Potential peers to query for nodes: a vector of up to 16 entries holding the closest peers to the pubkey. -/// This vector is initially filled with nodes from our table. -/// 3. We send a `find_node` to the closest 3 nodes (that we have not yet asked) from the pubkey. -/// 4. We wait for the neighbors response and pushed or replace those that are closer to the potential peers. -/// 5. We select three other nodes from the potential peers vector and do the same until one lookup -/// doesn't have any node to ask. -/// -/// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#recursive-lookup -async fn peers_lookup( - udp_socket: Arc, - table: Arc>, - signer: SigningKey, - local_node_id: H512, - interval_time_in_seconds: u64, -) { - let mut interval = tokio::time::interval(Duration::from_secs(interval_time_in_seconds)); - - loop { - // Notice that the first tick is immediate, - // so as soon as the server starts we'll do a lookup with the seeder nodes. - interval.tick().await; - - debug!("Starting lookup"); - - let mut handlers = vec![]; - - // lookup closest to our pub key - handlers.push(tokio::spawn(recursive_lookup( - udp_socket.clone(), - table.clone(), - signer.clone(), - local_node_id, - local_node_id, - ))); - - // lookup closest to 3 random keys - for _ in 0..3 { - let random_pub_key = &SigningKey::random(&mut OsRng); - handlers.push(tokio::spawn(recursive_lookup( - udp_socket.clone(), - table.clone(), - signer.clone(), - node_id_from_signing_key(random_pub_key), - local_node_id, - ))); - } - - for handle in handlers { - let _ = try_join!(handle); - } - - debug!("Lookup finished"); - } -} - -async fn recursive_lookup( - udp_socket: Arc, - table: Arc>, - signer: SigningKey, - target: H512, - local_node_id: H512, -) { - let mut asked_peers = HashSet::default(); - // lookups start with the closest from our table - let closest_nodes = table.lock().await.get_closest_nodes(target); - let mut seen_peers: HashSet = HashSet::default(); - - seen_peers.insert(local_node_id); - for node in &closest_nodes { - seen_peers.insert(node.node_id); - } - - let mut peers_to_ask: Vec = closest_nodes; - - loop { - let (nodes_found, queries) = lookup( - udp_socket.clone(), - table.clone(), - &signer, - target, - &mut asked_peers, - &peers_to_ask, - ) - .await; - - // only push the peers that have not been seen - // that is those who have not been yet pushed, which also accounts for - // those peers that were in the array but have been replaced for closer peers - for node in nodes_found { - if !seen_peers.contains(&node.node_id) { - seen_peers.insert(node.node_id); - peers_to_ask_push(&mut peers_to_ask, target, node); - } - } - - // the lookup finishes when there are no more queries to do - // that happens when we have asked all the peers - if queries == 0 { - break; - } - } -} - -async fn lookup( - udp_socket: Arc, - table: Arc>, - signer: &SigningKey, - target: H512, - asked_peers: &mut HashSet, - nodes_to_ask: &Vec, -) -> (Vec, u32) { - let alpha = 3; - let mut queries = 0; - let mut nodes = vec![]; - - for node in nodes_to_ask { - if !asked_peers.contains(&node.node_id) { - #[allow(unused_assignments)] - let mut rx = None; - { - let mut table = table.lock().await; - let peer = table.get_by_node_id_mut(node.node_id); - if let Some(peer) = peer { - // if the peer has an ongoing find_node request, don't query - if peer.find_node_request.is_some() { - continue; - } - let (tx, receiver) = tokio::sync::mpsc::unbounded_channel::>(); - peer.new_find_node_request_with_sender(tx); - rx = Some(receiver); - } else { - // if peer isn't inserted to table, don't query - continue; - } - } - - queries += 1; - asked_peers.insert(node.node_id); - - let mut found_nodes = find_node_and_wait_for_response( - &udp_socket, - SocketAddr::new(node.ip, node.udp_port), - signer, - target, - &mut rx.unwrap(), - ) - .await; - nodes.append(&mut found_nodes); - } - - if queries == alpha { - break; - } - } - - (nodes, queries) -} - -fn peers_to_ask_push(peers_to_ask: &mut Vec, target: H512, node: Node) { - let distance = bucket_number(target, node.node_id); - - if peers_to_ask.len() < MAX_NODES_PER_BUCKET { - peers_to_ask.push(node); - return; - } - - // replace this node for the one whose distance to the target is the highest - let (mut idx_to_replace, mut highest_distance) = (None, 0); - - for (i, peer) in peers_to_ask.iter().enumerate() { - let current_distance = bucket_number(peer.node_id, target); - - if distance < current_distance && current_distance >= highest_distance { - highest_distance = current_distance; - idx_to_replace = Some(i); - } - } - - if let Some(idx) = idx_to_replace { - peers_to_ask[idx] = node; - } -} - -/// Sends a ping to the addr -/// # Returns -/// an optional hash corresponding to the message header hash to account if the send was successful -async fn ping( - socket: &UdpSocket, - local_addr: SocketAddr, - to_addr: SocketAddr, - signer: &SigningKey, -) -> Option { - let mut buf = Vec::new(); - - let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(); - - // TODO: this should send our advertised TCP port - let from = Endpoint { - ip: local_addr.ip(), - udp_port: local_addr.port(), - tcp_port: 0, - }; - let to = Endpoint { - ip: to_addr.ip(), - udp_port: to_addr.port(), - tcp_port: 0, - }; - - let ping: discv4::Message = discv4::Message::Ping(PingMessage::new(from, to, expiration)); - ping.encode_with_header(&mut buf, signer); - let res = socket.send_to(&buf, to_addr).await; - - if res.is_err() { - return None; - } - let bytes_sent = res.unwrap(); - - // sanity check to make sure the ping was well sent - // though idk if this is actually needed or if it might break other stuff - if bytes_sent == buf.len() { - return Some(H256::from_slice(&buf[0..32])); - } - - None -} - -async fn find_node_and_wait_for_response( - socket: &UdpSocket, - to_addr: SocketAddr, - signer: &SigningKey, - target_node_id: H512, - request_receiver: &mut tokio::sync::mpsc::UnboundedReceiver>, -) -> Vec { - let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(); - - let msg: discv4::Message = - discv4::Message::FindNode(FindNodeMessage::new(target_node_id, expiration)); - - let mut buf = Vec::new(); - msg.encode_with_header(&mut buf, signer); - let res = socket.send_to(&buf, to_addr).await; - - let mut nodes = vec![]; - - if res.is_err() { - return nodes; - } - - loop { - // wait as much as 5 seconds for the response - match tokio::time::timeout(Duration::from_secs(5), request_receiver.recv()).await { - Ok(Some(mut found_nodes)) => { - nodes.append(&mut found_nodes); - if nodes.len() == MAX_NODES_PER_BUCKET { - return nodes; - }; - } - Ok(None) => { - return nodes; - } - Err(_) => { - // timeout expired - return nodes; - } - } - } -} - -async fn pong(socket: &UdpSocket, to_addr: SocketAddr, ping_hash: H256, signer: &SigningKey) { - let mut buf = Vec::new(); - - let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(); - - let to = Endpoint { - ip: to_addr.ip(), - udp_port: to_addr.port(), - tcp_port: 0, - }; - let pong: discv4::Message = discv4::Message::Pong(PongMessage::new(to, ping_hash, expiration)); - - pong.encode_with_header(&mut buf, signer); - let _ = socket.send_to(&buf, to_addr).await; + let _ = try_join!(discovery_handle, server_handle).unwrap(); } async fn serve_requests( @@ -830,295 +155,3 @@ pub async fn periodically_show_peer_stats(peer_table: Arc>) interval.tick().await; } } - -#[cfg(test)] -mod tests { - use super::*; - use ethrex_storage::EngineType; - use kademlia::bucket_number; - use rand::rngs::OsRng; - use std::{ - collections::HashSet, - net::{IpAddr, Ipv4Addr}, - }; - use tokio::time::sleep; - - async fn insert_random_node_on_custom_bucket( - table: Arc>, - bucket_idx: usize, - ) { - let node_id = node_id_from_signing_key(&SigningKey::random(&mut OsRng)); - let node = Node { - ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), - tcp_port: 0, - udp_port: 0, - node_id, - }; - table - .lock() - .await - .insert_node_on_custom_bucket(node, bucket_idx); - } - - async fn fill_table_with_random_nodes(table: Arc>) { - for i in 0..256 { - for _ in 0..16 { - insert_random_node_on_custom_bucket(table.clone(), i).await; - } - } - } - - struct MockServer { - pub addr: SocketAddr, - pub signer: SigningKey, - pub table: Arc>, - pub node_id: H512, - pub udp_socket: Arc, - } - - async fn start_mock_discovery_server(udp_port: u16, should_start_server: bool) -> MockServer { - let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); - let signer = SigningKey::random(&mut OsRng); - let udp_socket = Arc::new(UdpSocket::bind(addr).await.unwrap()); - let node_id = node_id_from_signing_key(&signer); - let storage = - Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); - let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); - let (channel_broadcast_send_end, _) = tokio::sync::broadcast::channel::<( - tokio::task::Id, - Arc, - )>(MAX_MESSAGES_TO_BROADCAST); - if should_start_server { - tokio::spawn(discover_peers_server( - addr, - udp_socket.clone(), - storage.clone(), - table.clone(), - signer.clone(), - channel_broadcast_send_end, - )); - } - - MockServer { - addr, - signer, - table, - node_id, - udp_socket, - } - } - - /// connects two mock servers by pinging a to b - async fn connect_servers(server_a: &mut MockServer, server_b: &mut MockServer) { - let ping_hash = ping( - &server_a.udp_socket, - server_a.addr, - server_b.addr, - &server_a.signer, - ) - .await; - { - let mut table = server_a.table.lock().await; - table.insert_node(Node { - ip: server_b.addr.ip(), - udp_port: server_b.addr.port(), - tcp_port: 0, - node_id: server_b.node_id, - }); - table.update_peer_ping(server_b.node_id, ping_hash); - } - // allow some time for the server to respond - sleep(Duration::from_secs(1)).await; - } - - #[tokio::test] - /** This is a end to end test on the discovery server, the idea is as follows: - * - We'll start two discovery servers (`a` & `b`) to ping between each other - * - We'll make `b` ping `a`, and validate that the connection is right - * - Then we'll wait for a revalidation where we expect everything to be the same - * - We'll do this five 5 more times - * - Then we'll stop server `a` so that it doesn't respond to re-validations - * - We expect server `b` to remove node `a` from its table after 3 re-validations - * To make this run faster, we'll change the revalidation time to be every 2secs - */ - async fn discovery_server_revalidation() { - let mut server_a = start_mock_discovery_server(7998, true).await; - let mut server_b = start_mock_discovery_server(7999, true).await; - - connect_servers(&mut server_a, &mut server_b).await; - - // start revalidation server - tokio::spawn(peers_revalidation( - server_b.addr, - server_b.udp_socket.clone(), - server_b.table.clone(), - server_b.signer.clone(), - 2, - )); - - for _ in 0..5 { - sleep(Duration::from_millis(2500)).await; - // by now, b should've send a revalidation to a - let table = server_b.table.lock().await; - let node = table.get_by_node_id(server_a.node_id).unwrap(); - assert!(node.revalidation.is_some()); - } - - // make sure that `a` has responded too all the re-validations - // we can do that by checking the liveness - { - let table = server_b.table.lock().await; - let node = table.get_by_node_id(server_a.node_id).unwrap(); - assert_eq!(node.liveness, 6); - } - - // now, stopping server `a` is not trivial - // so we'll instead change its port, so that no one responds - { - let mut table = server_b.table.lock().await; - let node = table.get_by_node_id_mut(server_a.node_id).unwrap(); - node.node.udp_port = 0; - } - - // now the liveness field should start decreasing until it gets to 0 - // which should happen in 3 re-validations - for _ in 0..2 { - sleep(Duration::from_millis(2500)).await; - let table = server_b.table.lock().await; - let node = table.get_by_node_id(server_a.node_id).unwrap(); - assert!(node.revalidation.is_some()); - } - sleep(Duration::from_millis(2500)).await; - - // finally, `a`` should not exist anymore - let table = server_b.table.lock().await; - assert!(table.get_by_node_id(server_a.node_id).is_none()); - } - - #[tokio::test] - /** This test tests the lookup function, the idea is as follows: - * - We'll start two discovery servers (`a` & `b`) that will connect between each other - * - We'll insert random nodes to the server `a`` to fill its table - * - We'll forcedly run `lookup` and validate that a `find_node` request was sent - * by checking that new nodes have been inserted to the table - * - * This test for only one lookup, and not recursively. - */ - async fn discovery_server_lookup() { - let mut server_a = start_mock_discovery_server(8000, true).await; - let mut server_b = start_mock_discovery_server(8001, true).await; - - fill_table_with_random_nodes(server_a.table.clone()).await; - - // before making the connection, remove a node from the `b` bucket. Otherwise it won't be added - let b_bucket = bucket_number(server_a.node_id, server_b.node_id); - let node_id_to_remove = server_a.table.lock().await.buckets()[b_bucket].peers[0] - .node - .node_id; - server_a - .table - .lock() - .await - .replace_peer_on_custom_bucket(node_id_to_remove, b_bucket); - - connect_servers(&mut server_a, &mut server_b).await; - - // now we are going to run a lookup with us as the target - let closets_peers_to_b_from_a = server_a - .table - .lock() - .await - .get_closest_nodes(server_b.node_id); - let nodes_to_ask = server_b - .table - .lock() - .await - .get_closest_nodes(server_b.node_id); - - lookup( - server_b.udp_socket.clone(), - server_b.table.clone(), - &server_b.signer, - server_b.node_id, - &mut HashSet::default(), - &nodes_to_ask, - ) - .await; - - // find_node sent, allow some time for `a` to respond - sleep(Duration::from_secs(2)).await; - - // now all peers should've been inserted - for peer in closets_peers_to_b_from_a { - let table = server_b.table.lock().await; - assert!(table.get_by_node_id(peer.node_id).is_some()); - } - } - - #[tokio::test] - /** This test tests the lookup function, the idea is as follows: - * - We'll start four discovery servers (`a`, `b`, `c` & `d`) - * - `a` will be connected to `b`, `b` will be connected to `c` and `c` will be connected to `d`. - * - The server `d` will have its table filled with mock nodes - * - We'll run a recursive lookup on server `a` and we expect to end with `b`, `c`, `d` and its mock nodes - */ - async fn discovery_server_recursive_lookup() { - let mut server_a = start_mock_discovery_server(8002, true).await; - let mut server_b = start_mock_discovery_server(8003, true).await; - let mut server_c = start_mock_discovery_server(8004, true).await; - let mut server_d = start_mock_discovery_server(8005, true).await; - - connect_servers(&mut server_a, &mut server_b).await; - connect_servers(&mut server_b, &mut server_c).await; - connect_servers(&mut server_c, &mut server_d).await; - - // now we fill the server_d table with 3 random nodes - // the reason we don't put more is because this nodes won't respond (as they don't are not real servers) - // and so we will have to wait for the timeout on each node, which will only slow down the test - for _ in 0..3 { - insert_random_node_on_custom_bucket(server_d.table.clone(), 0).await; - } - - let mut expected_peers = vec![]; - expected_peers.extend( - server_b - .table - .lock() - .await - .get_closest_nodes(server_a.node_id), - ); - expected_peers.extend( - server_c - .table - .lock() - .await - .get_closest_nodes(server_a.node_id), - ); - expected_peers.extend( - server_d - .table - .lock() - .await - .get_closest_nodes(server_a.node_id), - ); - - // we'll run a recursive lookup closest to the server itself - recursive_lookup( - server_a.udp_socket.clone(), - server_a.table.clone(), - server_a.signer.clone(), - server_a.node_id, - server_a.node_id, - ) - .await; - - for peer in expected_peers { - assert!(server_a - .table - .lock() - .await - .get_by_node_id(peer.node_id) - .is_some()); - } - } -} From dca6bea8857e8d4fd299e99b5c2e82e8ad7cca92 Mon Sep 17 00:00:00 2001 From: nicolau Date: Wed, 22 Jan 2025 10:48:33 -0300 Subject: [PATCH 02/33] refactor: avoid cloning self and move message handling to new function --- crates/networking/p2p/discv4/discv4.rs | 368 ++++++++++++------------- crates/networking/p2p/net.rs | 17 +- 2 files changed, 190 insertions(+), 195 deletions(-) diff --git a/crates/networking/p2p/discv4/discv4.rs b/crates/networking/p2p/discv4/discv4.rs index 73551946fb..97e9dd3a4d 100644 --- a/crates/networking/p2p/discv4/discv4.rs +++ b/crates/networking/p2p/discv4/discv4.rs @@ -28,11 +28,11 @@ use super::lookup::DiscoveryLookupHandler; pub struct Discv4 { local_node: Node, udp_addr: SocketAddr, + udp_socket: Arc, signer: SigningKey, storage: Store, table: Arc>, tx_broadcaster_send: broadcast::Sender<(tokio::task::Id, Arc)>, - udp_socket: Arc, revalidation_interval_seconds: u64, } @@ -79,39 +79,25 @@ impl Discv4 { } pub async fn start_discovery_service( - &self, + self: Arc, bootnodes: Vec, ) -> Result<(), DiscoveryError> { - let server_handler = tokio::spawn({ - let clone = self.clone(); - async move { - clone.handle_messages().await; - } - }); + let server_handle = tokio::spawn(self.clone().receive()); self.load_bootnodes(bootnodes).await; - let revalidation_handler = tokio::spawn({ - let clone = self.clone(); - async move { - clone.start_revalidation_task().await; - } - }); + let revalidation_handle = tokio::spawn(self.clone().start_revalidation_task()); // a first initial lookup runs without waiting for the interval // so we need to allow some time to the pinged peers to ping us back and acknowledge us - let self_clone = self.clone(); - let lookup_handler = tokio::spawn(async move { - DiscoveryLookupHandler::new( - self_clone.local_node, - self_clone.signer, - self_clone.udp_socket, - self_clone.table, - ) - .start_lookup_task() - .await - }); + let lookup_handler = DiscoveryLookupHandler::new( + self.local_node, + self.signer.clone(), + self.udp_socket.clone(), + self.table.clone(), + ); + let lookup_handle = tokio::spawn(async move { lookup_handler.start_lookup_task().await }); - let result = try_join!(server_handler, revalidation_handler, lookup_handler); + let result = try_join!(server_handle, revalidation_handle, lookup_handle); if result.is_ok() { Ok(()) @@ -144,7 +130,7 @@ impl Discv4 { } } - async fn handle_messages(&self) { + async fn receive(self: Arc) { let mut buf = vec![0; MAX_DISC_PACKET_SIZE]; loop { @@ -158,193 +144,201 @@ impl Discv4 { } let packet = packet.unwrap(); - let msg = packet.get_message(); - debug!("Message: {:?} from {}", msg, packet.get_node_id()); + self.handle_message(packet, from, read, &buf).await; + } + } - match msg { - Message::Ping(msg) => { - if is_expired(msg.expiration) { - debug!("Ignoring ping as it is expired."); - continue; - }; - let ping_hash = packet.get_hash(); - pong(&self.udp_socket, from, ping_hash, &self.signer).await; - let node = { - let table = self.table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(peer) = node { - // send a a ping to get an endpoint proof - if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { - let hash = - ping(&self.udp_socket, self.udp_addr, from, &self.signer).await; - if let Some(hash) = hash { - self.table - .lock() - .await - .update_peer_ping(peer.node.node_id, Some(hash)); - } - } - } else { - // send a ping to get the endpoint proof from our end - let (peer, inserted_to_table) = { - let mut table = self.table.lock().await; - table.insert_node(Node { - ip: from.ip(), - udp_port: from.port(), - tcp_port: 0, - node_id: packet.get_node_id(), - }) - }; + async fn handle_message( + &self, + packet: Packet, + from: SocketAddr, + msg_len: usize, + msg_bytes: &[u8], + ) { + let msg = packet.get_message(); + debug!("Message: {:?} from {}", msg, packet.get_node_id()); + match msg { + Message::Ping(msg) => { + if is_expired(msg.expiration) { + debug!("Ignoring ping as it is expired."); + return; + }; + let ping_hash = packet.get_hash(); + pong(&self.udp_socket, from, ping_hash, &self.signer).await; + let node = { + let table = self.table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + if let Some(peer) = node { + // send a a ping to get an endpoint proof + if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { let hash = ping(&self.udp_socket, self.udp_addr, from, &self.signer).await; if let Some(hash) = hash { - if inserted_to_table && peer.is_some() { - let peer = peer.unwrap(); - self.table - .lock() - .await - .update_peer_ping(peer.node.node_id, Some(hash)); - } + self.table + .lock() + .await + .update_peer_ping(peer.node.node_id, Some(hash)); } } - } - Message::Pong(msg) => { - let table = self.table.clone(); - if is_expired(msg.expiration) { - debug!("Ignoring pong as it is expired."); - continue; - } - let peer = { - let table = table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() + } else { + // send a ping to get the endpoint proof from our end + let (peer, inserted_to_table) = { + let mut table = self.table.lock().await; + table.insert_node(Node { + ip: from.ip(), + udp_port: msg.from.udp_port, + tcp_port: msg.from.tcp_port, + node_id: packet.get_node_id(), + }) }; - if let Some(peer) = peer { - if peer.last_ping_hash.is_none() { - debug!("Discarding pong as the node did not send a previous ping"); - continue; + let hash = ping(&self.udp_socket, self.udp_addr, from, &self.signer).await; + if let Some(hash) = hash { + if inserted_to_table && peer.is_some() { + let peer = peer.unwrap(); + self.table + .lock() + .await + .update_peer_ping(peer.node.node_id, Some(hash)); } - if peer.last_ping_hash.unwrap() == msg.ping_hash { - table.lock().await.pong_answered(peer.node.node_id); - - let mut msg_buf = vec![0; read - 32]; - buf[32..read].clone_into(&mut msg_buf); - let signer = self.signer.clone(); - let storage = self.storage.clone(); - let broadcaster = self.tx_broadcaster_send.clone(); - tokio::spawn(async move { - handle_peer_as_initiator( - signer, - &msg_buf, - &peer.node, - storage, - table, - broadcaster, - ) - .await; - }); - } else { - debug!( + } + } + } + Message::Pong(msg) => { + let table = self.table.clone(); + if is_expired(msg.expiration) { + debug!("Ignoring pong as it is expired."); + return; + } + let peer = { + let table = table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + if let Some(peer) = peer { + if peer.last_ping_hash.is_none() { + debug!("Discarding pong as the node did not send a previous ping"); + return; + } + if peer.last_ping_hash.unwrap() == msg.ping_hash { + table.lock().await.pong_answered(peer.node.node_id); + + let mut msg_buf = vec![0; msg_len - 32]; + msg_bytes[32..msg_len].clone_into(&mut msg_buf); + let signer = self.signer.clone(); + let storage = self.storage.clone(); + let broadcaster = self.tx_broadcaster_send.clone(); + tokio::spawn(async move { + handle_peer_as_initiator( + signer, + &msg_buf, + &peer.node, + storage, + table, + broadcaster, + ) + .await; + }); + } else { + debug!( "Discarding pong as the hash did not match the last corresponding ping" ); - } - } else { - debug!("Discarding pong as it is not a known node"); } + } else { + debug!("Discarding pong as it is not a known node"); } - Message::FindNode(msg) => { - if is_expired(msg.expiration) { - debug!("Ignoring find node msg as it is expired."); - continue; - }; - let node = { - let table = self.table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(node) = node { - if node.is_proven { - let nodes = { - let table = self.table.lock().await; - table.get_closest_nodes(msg.target) - }; - let nodes_chunks = nodes.chunks(4); - let expiration = get_expiration(20); - debug!("Sending neighbors!"); - // we are sending the neighbors in 4 different messages as not to exceed the - // maximum packet size - for nodes in nodes_chunks { - let neighbors = Message::Neighbors(NeighborsMessage::new( - nodes.to_vec(), - expiration, - )); - let mut buf = Vec::new(); - neighbors.encode_with_header(&mut buf, &self.signer); - let _ = self.udp_socket.send_to(&buf, from).await; - } - } else { - debug!("Ignoring find node message as the node isn't proven!"); + } + Message::FindNode(msg) => { + if is_expired(msg.expiration) { + debug!("Ignoring find node msg as it is expired."); + return; + }; + let node = { + let table = self.table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + if let Some(node) = node { + if node.is_proven { + let nodes = { + let table = self.table.lock().await; + table.get_closest_nodes(msg.target) + }; + let nodes_chunks = nodes.chunks(4); + let expiration = get_expiration(20); + debug!("Sending neighbors!"); + // we are sending the neighbors in 4 different messages as not to exceed the + // maximum packet size + for nodes in nodes_chunks { + let neighbors = Message::Neighbors(NeighborsMessage::new( + nodes.to_vec(), + expiration, + )); + let mut buf = Vec::new(); + neighbors.encode_with_header(&mut buf, &self.signer); + let _ = self.udp_socket.send_to(&buf, from).await; } } else { - debug!("Ignoring find node message as it is not a known node"); + debug!("Ignoring find node message as the node isn't proven!"); } + } else { + debug!("Ignoring find node message as it is not a known node"); } - Message::Neighbors(neighbors_msg) => { - if is_expired(neighbors_msg.expiration) { - debug!("Ignoring neighbor msg as it is expired."); - continue; - }; + } + Message::Neighbors(neighbors_msg) => { + if is_expired(neighbors_msg.expiration) { + debug!("Ignoring neighbor msg as it is expired."); + return; + }; - let mut nodes_to_insert = None; - let mut table = self.table.lock().await; - if let Some(node) = table.get_by_node_id_mut(packet.get_node_id()) { - if let Some(req) = &mut node.find_node_request { - if time_now_unix().saturating_sub(req.sent_at) >= 60 { - debug!("Ignoring neighbors message as the find_node request expires after one minute"); - node.find_node_request = None; - continue; - } - let nodes = &neighbors_msg.nodes; - let nodes_sent = req.nodes_sent + nodes.len(); - - if nodes_sent <= MAX_NODES_PER_BUCKET { - debug!("Storing neighbors in our table!"); - req.nodes_sent = nodes_sent; - nodes_to_insert = Some(nodes.clone()); - if let Some(tx) = &req.tx { - let _ = tx.send(nodes.clone()); - } - } else { - debug!("Ignoring neighbors message as the client sent more than the allowed nodes"); + let mut nodes_to_insert = None; + let mut table = self.table.lock().await; + if let Some(node) = table.get_by_node_id_mut(packet.get_node_id()) { + if let Some(req) = &mut node.find_node_request { + if time_now_unix().saturating_sub(req.sent_at) >= 60 { + debug!("Ignoring neighbors message as the find_node request expires after one minute"); + node.find_node_request = None; + return; + } + let nodes = &neighbors_msg.nodes; + let nodes_sent = req.nodes_sent + nodes.len(); + + if nodes_sent <= MAX_NODES_PER_BUCKET { + debug!("Storing neighbors in our table!"); + req.nodes_sent = nodes_sent; + nodes_to_insert = Some(nodes.clone()); + if let Some(tx) = &req.tx { + let _ = tx.send(nodes.clone()); } + } else { + debug!("Ignoring neighbors message as the client sent more than the allowed nodes"); + } - if nodes_sent == MAX_NODES_PER_BUCKET { - debug!("Neighbors request has been fulfilled"); - node.find_node_request = None; - } + if nodes_sent == MAX_NODES_PER_BUCKET { + debug!("Neighbors request has been fulfilled"); + node.find_node_request = None; } - } else { - debug!("Ignoring neighbor msg as it is not a known node"); } + } else { + debug!("Ignoring neighbor msg as it is not a known node"); + } - if let Some(nodes) = nodes_to_insert { - for node in nodes { - let (peer, inserted_to_table) = table.insert_node(node); - if inserted_to_table && peer.is_some() { - let peer = peer.unwrap(); - let node_addr = SocketAddr::new(peer.node.ip, peer.node.udp_port); - let ping_hash = - ping(&self.udp_socket, self.udp_addr, node_addr, &self.signer) - .await; - table.update_peer_ping(peer.node.node_id, ping_hash); - }; - } + if let Some(nodes) = nodes_to_insert { + for node in nodes { + let (peer, inserted_to_table) = table.insert_node(node); + if inserted_to_table && peer.is_some() { + let peer = peer.unwrap(); + let node_addr = SocketAddr::new(peer.node.ip, peer.node.udp_port); + let ping_hash = + ping(&self.udp_socket, self.udp_addr, node_addr, &self.signer) + .await; + table.update_peer_ping(peer.node.node_id, ping_hash); + }; } } - _ => {} } + _ => {} } } - async fn start_revalidation_task(&self) { + async fn start_revalidation_task(self: Arc) { let mut interval = tokio::time::interval(Duration::from_secs(self.revalidation_interval_seconds)); // peers we have pinged in the previous iteration diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index c3c42c2d8c..e8d26248c1 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -56,22 +56,23 @@ pub async fn start_network( )>(MAX_MESSAGES_TO_BROADCAST); let udp_socket = UdpSocket::bind(udp_addr).await.unwrap(); + let local_node = Node { + ip: udp_addr.ip(), + node_id: node_id_from_signing_key(&signer), + udp_port: udp_addr.port(), + tcp_port: tcp_addr.port(), + }; let discv4 = Discv4::new( - Node { - ip: udp_addr.ip(), - node_id: node_id_from_signing_key(&signer), - udp_port: udp_addr.port(), - tcp_port: tcp_addr.port(), - }, + local_node, signer.clone(), storage.clone(), peer_table.clone(), channel_broadcast_send_end.clone(), Arc::new(udp_socket), ); - let discovery_handle = - tokio::spawn(async move { discv4.start_discovery_service(bootnodes).await }); + let discv4 = Arc::new(discv4); + let discovery_handle = tokio::spawn(discv4.start_discovery_service(bootnodes)); let server_handle = tokio::spawn(serve_requests( tcp_addr, From 7c1bbf7247e87eef7e082ba91364d0a5f06aab1e Mon Sep 17 00:00:00 2001 From: nicolau Date: Wed, 22 Jan 2025 12:01:15 -0300 Subject: [PATCH 03/33] fix: ping and pong send tcp_ports from node --- crates/networking/p2p/discv4/discv4.rs | 104 ++++++++++------------- crates/networking/p2p/discv4/requests.rs | 36 ++++---- 2 files changed, 66 insertions(+), 74 deletions(-) diff --git a/crates/networking/p2p/discv4/discv4.rs b/crates/networking/p2p/discv4/discv4.rs index 97e9dd3a4d..38ca92fdb9 100644 --- a/crates/networking/p2p/discv4/discv4.rs +++ b/crates/networking/p2p/discv4/discv4.rs @@ -22,12 +22,11 @@ use tokio::{ }; use tracing::debug; -use super::lookup::DiscoveryLookupHandler; +use super::lookup::{DiscoveryLookupHandler, PEERS_RANDOM_LOOKUP_TIME_IN_MIN}; #[derive(Debug, Clone)] pub struct Discv4 { local_node: Node, - udp_addr: SocketAddr, udp_socket: Arc, signer: SigningKey, storage: Store, @@ -58,12 +57,12 @@ impl Discv4 { storage, table, tx_broadcaster_send, - udp_addr: SocketAddr::new(local_node.ip, local_node.udp_port), udp_socket, revalidation_interval_seconds: REVALIDATION_INTERVAL_IN_SECONDS, } } + #[allow(unused)] pub fn with_revalidation_interval_of(self, seconds: u64) -> Self { Self { revalidation_interval_seconds: seconds, @@ -71,6 +70,7 @@ impl Discv4 { } } + #[allow(unused)] pub fn with_lookup_interval_of(self, minutes: u64) -> Self { Self { revalidation_interval_seconds: minutes, @@ -84,7 +84,6 @@ impl Discv4 { ) -> Result<(), DiscoveryError> { let server_handle = tokio::spawn(self.clone().receive()); self.load_bootnodes(bootnodes).await; - let revalidation_handle = tokio::spawn(self.clone().start_revalidation_task()); // a first initial lookup runs without waiting for the interval @@ -94,6 +93,7 @@ impl Discv4 { self.signer.clone(), self.udp_socket.clone(), self.table.clone(), + PEERS_RANDOM_LOOKUP_TIME_IN_MIN, ); let lookup_handle = tokio::spawn(async move { lookup_handler.start_lookup_task().await }); @@ -108,25 +108,15 @@ impl Discv4 { async fn load_bootnodes(&self, bootnodes: Vec) { for bootnode in bootnodes { - self.table.lock().await.insert_node(Node { + let node = Node { ip: bootnode.socket_address.ip(), udp_port: bootnode.socket_address.port(), // TODO: udp port can differ from tcp port. // see https://github.com/lambdaclass/ethrex/issues/905 tcp_port: bootnode.socket_address.port(), node_id: bootnode.node_id, - }); - let ping_hash = ping( - &self.udp_socket, - self.udp_addr, - bootnode.socket_address, - &self.signer, - ) - .await; - self.table - .lock() - .await - .update_peer_ping(bootnode.node_id, ping_hash); + }; + self.try_add_peer_and_ping(node).await; } } @@ -163,16 +153,23 @@ impl Discv4 { debug!("Ignoring ping as it is expired."); return; }; + let node = Node { + ip: from.ip(), + udp_port: msg.from.udp_port, + tcp_port: msg.from.tcp_port, + node_id: packet.get_node_id(), + }; let ping_hash = packet.get_hash(); - pong(&self.udp_socket, from, ping_hash, &self.signer).await; - let node = { + pong(&self.udp_socket, from, node, ping_hash, &self.signer).await; + let peer = { let table = self.table.lock().await; table.get_by_node_id(packet.get_node_id()).cloned() }; - if let Some(peer) = node { + if let Some(peer) = peer { // send a a ping to get an endpoint proof if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { - let hash = ping(&self.udp_socket, self.udp_addr, from, &self.signer).await; + let hash = + ping(&self.udp_socket, self.local_node, peer.node, &self.signer).await; if let Some(hash) = hash { self.table .lock() @@ -182,25 +179,7 @@ impl Discv4 { } } else { // send a ping to get the endpoint proof from our end - let (peer, inserted_to_table) = { - let mut table = self.table.lock().await; - table.insert_node(Node { - ip: from.ip(), - udp_port: msg.from.udp_port, - tcp_port: msg.from.tcp_port, - node_id: packet.get_node_id(), - }) - }; - let hash = ping(&self.udp_socket, self.udp_addr, from, &self.signer).await; - if let Some(hash) = hash { - if inserted_to_table && peer.is_some() { - let peer = peer.unwrap(); - self.table - .lock() - .await - .update_peer_ping(peer.node.node_id, Some(hash)); - } - } + self.try_add_peer_and_ping(node).await; } } Message::Pong(msg) => { @@ -301,7 +280,6 @@ impl Discv4 { let nodes_sent = req.nodes_sent + nodes.len(); if nodes_sent <= MAX_NODES_PER_BUCKET { - debug!("Storing neighbors in our table!"); req.nodes_sent = nodes_sent; nodes_to_insert = Some(nodes.clone()); if let Some(tx) = &req.tx { @@ -322,15 +300,9 @@ impl Discv4 { if let Some(nodes) = nodes_to_insert { for node in nodes { - let (peer, inserted_to_table) = table.insert_node(node); - if inserted_to_table && peer.is_some() { - let peer = peer.unwrap(); - let node_addr = SocketAddr::new(peer.node.ip, peer.node.udp_port); - let ping_hash = - ping(&self.udp_socket, self.udp_addr, node_addr, &self.signer) - .await; - table.update_peer_ping(peer.node.node_id, ping_hash); - }; + if node.node_id != self.local_node.node_id { + self.try_add_peer_and_ping(node).await; + } } } } @@ -338,6 +310,25 @@ impl Discv4 { } } + /// Attempts to add a node to the Kademlia table and send a ping if necessary. + /// + /// - If the node is **not found** in the table and there is enough space, it will be added, + /// and a ping message will be sent to verify connectivity. + /// - If the node is **already present**, no action is taken. + pub async fn try_add_peer_and_ping(&self, node: Node) { + let (Some(peer), inserted_to_table) = self.table.lock().await.insert_node(node) else { + return; + }; + if inserted_to_table { + debug!("Node {:?} was inserted to kademlia table", node); + let ping_hash = ping(&self.udp_socket, self.local_node, node, &self.signer).await; + self.table + .lock() + .await + .update_peer_ping(peer.node.node_id, ping_hash); + }; + } + async fn start_revalidation_task(self: Arc) { let mut interval = tokio::time::interval(Duration::from_secs(self.revalidation_interval_seconds)); @@ -371,8 +362,8 @@ impl Discv4 { if let Some(new_peer) = new_peer { let ping_hash = ping( &self.udp_socket, - self.udp_addr, - SocketAddr::new(new_peer.node.ip, new_peer.node.udp_port), + self.local_node, + new_peer.node, &self.signer, ) .await; @@ -387,13 +378,8 @@ impl Discv4 { let peers = self.table.lock().await.get_least_recently_pinged_peers(3); previously_pinged_peers = HashSet::default(); for peer in peers { - let ping_hash = ping( - &self.udp_socket, - self.udp_addr, - SocketAddr::new(peer.node.ip, peer.node.udp_port), - &self.signer, - ) - .await; + let ping_hash = + ping(&self.udp_socket, self.local_node, peer.node, &self.signer).await; let mut table = self.table.lock().await; table.update_peer_ping_with_revalidation(peer.node.node_id, ping_hash); previously_pinged_peers.insert(peer.node.node_id); diff --git a/crates/networking/p2p/discv4/requests.rs b/crates/networking/p2p/discv4/requests.rs index b39262a5ce..1427eb6290 100644 --- a/crates/networking/p2p/discv4/requests.rs +++ b/crates/networking/p2p/discv4/requests.rs @@ -19,9 +19,8 @@ use super::messages::{FindNodeMessage, Message, PingMessage, PongMessage}; /// an optional hash corresponding to the message header hash to account if the send was successful pub async fn ping( socket: &UdpSocket, - // TODO replace this with our node, so we can fill the tcp port - local_addr: SocketAddr, - to_addr: SocketAddr, + local_node: Node, + to_node: Node, signer: &SigningKey, ) -> Option { let mut buf = Vec::new(); @@ -31,21 +30,22 @@ pub async fn ping( .unwrap() .as_secs(); - // TODO: this should send our advertised TCP port let from = Endpoint { - ip: local_addr.ip(), - udp_port: local_addr.port(), - tcp_port: 0, + ip: local_node.ip, + udp_port: local_node.udp_port, + tcp_port: local_node.tcp_port, }; let to = Endpoint { - ip: to_addr.ip(), - udp_port: to_addr.port(), - tcp_port: 0, + ip: to_node.ip, + udp_port: to_node.udp_port, + tcp_port: to_node.tcp_port, }; let ping = Message::Ping(PingMessage::new(from, to, expiration)); ping.encode_with_header(&mut buf, signer); - let res = socket.send_to(&buf, to_addr).await; + let res = socket + .send_to(&buf, SocketAddr::new(to_node.ip, to_node.udp_port)) + .await; if res.is_err() { return None; @@ -59,7 +59,13 @@ pub async fn ping( None } -pub async fn pong(socket: &UdpSocket, to_addr: SocketAddr, ping_hash: H256, signer: &SigningKey) { +pub async fn pong( + socket: &UdpSocket, + to_addr: SocketAddr, + node: Node, + ping_hash: H256, + signer: &SigningKey, +) { let mut buf = Vec::new(); let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) @@ -68,9 +74,9 @@ pub async fn pong(socket: &UdpSocket, to_addr: SocketAddr, ping_hash: H256, sign .as_secs(); let to = Endpoint { - ip: to_addr.ip(), - udp_port: to_addr.port(), - tcp_port: 0, + ip: node.ip, + udp_port: node.udp_port, + tcp_port: node.tcp_port, }; let pong = Message::Pong(PongMessage::new(to, ping_hash, expiration)); From 5038e166ebece08c7dcbebab7f1c97fd1909615b Mon Sep 17 00:00:00 2001 From: nicolau Date: Wed, 22 Jan 2025 13:49:22 -0300 Subject: [PATCH 04/33] refactor: messages errors type and remove old todos --- crates/networking/p2p/discv4/lookup.rs | 5 +++-- crates/networking/p2p/discv4/messages.rs | 6 +----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index 11d122b381..26b02c38fa 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -15,7 +15,7 @@ use crate::{ use super::requests::find_node_and_wait_for_response; -const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: u64 = 30; // same as above +pub const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: u64 = 30; // same as above #[derive(Clone, Debug)] pub struct DiscoveryLookupHandler { @@ -34,13 +34,14 @@ impl DiscoveryLookupHandler { signer: SigningKey, udp_socket: Arc, table: Arc>, + lookup_interval_minutes: u64, ) -> Self { Self { local_node, signer, udp_socket, table, - lookup_interval_minutes: PEERS_RANDOM_LOOKUP_TIME_IN_MIN, + lookup_interval_minutes, seen_peers: HashSet::new(), asked_peers: HashSet::new(), } diff --git a/crates/networking/p2p/discv4/messages.rs b/crates/networking/p2p/discv4/messages.rs index 5d64b739bc..c497d0e877 100644 --- a/crates/networking/p2p/discv4/messages.rs +++ b/crates/networking/p2p/discv4/messages.rs @@ -21,7 +21,6 @@ pub enum PacketDecodeErr { InvalidSignature, } -#[allow(unused)] #[derive(Debug)] pub struct Packet { hash: H256, @@ -67,7 +66,7 @@ impl Packet { let node_id = H512::from_slice(&encoded.as_bytes()[1..]); let signature = H520::from_slice(signature_bytes); let message = Message::decode_with_type(packet_type, &encoded_msg[1..]) - .map_err(PacketDecodeErr::RLPDecodeError)?; + .map_err(|e| PacketDecodeErr::RLPDecodeError(e))?; Ok(Self { hash, @@ -98,10 +97,7 @@ impl Packet { #[derive(Debug, Eq, PartialEq)] // NOTE: All messages could have more fields than specified by the spec. // Those additional fields should be ignored, and the message must be accepted. -// TODO: remove when all variants are used -#[allow(dead_code)] pub(crate) enum Message { - /// A ping message. Should be responded to with a Pong message. Ping(PingMessage), Pong(PongMessage), FindNode(FindNodeMessage), From d9fbd938e9c1b9a63682e27d9c78fbbf78efe3b6 Mon Sep 17 00:00:00 2001 From: nicolau Date: Wed, 22 Jan 2025 14:04:55 -0300 Subject: [PATCH 05/33] refactor: lookups --- crates/networking/p2p/discv4/discv4.rs | 8 ++++-- crates/networking/p2p/discv4/lookup.rs | 38 +++++++++++++------------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/crates/networking/p2p/discv4/discv4.rs b/crates/networking/p2p/discv4/discv4.rs index 38ca92fdb9..f6ecddf31e 100644 --- a/crates/networking/p2p/discv4/discv4.rs +++ b/crates/networking/p2p/discv4/discv4.rs @@ -79,12 +79,14 @@ impl Discv4 { } pub async fn start_discovery_service( - self: Arc, + &self, bootnodes: Vec, ) -> Result<(), DiscoveryError> { - let server_handle = tokio::spawn(self.clone().receive()); + let self_arc = Arc::new(self.clone()); + + let server_handle = tokio::spawn(self_arc.clone().receive()); self.load_bootnodes(bootnodes).await; - let revalidation_handle = tokio::spawn(self.clone().start_revalidation_task()); + let revalidation_handle = tokio::spawn(self_arc.clone().start_revalidation_task()); // a first initial lookup runs without waiting for the interval // so we need to allow some time to the pinged peers to ping us back and acknowledge us diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index 26b02c38fa..f6b64a3353 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -15,7 +15,7 @@ use crate::{ use super::requests::find_node_and_wait_for_response; -pub const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: u64 = 30; // same as above +pub const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: u64 = 30; #[derive(Clone, Debug)] pub struct DiscoveryLookupHandler { @@ -24,8 +24,6 @@ pub struct DiscoveryLookupHandler { udp_socket: Arc, table: Arc>, lookup_interval_minutes: u64, - seen_peers: HashSet, - asked_peers: HashSet, } impl DiscoveryLookupHandler { @@ -42,13 +40,12 @@ impl DiscoveryLookupHandler { udp_socket, table, lookup_interval_minutes, - seen_peers: HashSet::new(), - asked_peers: HashSet::new(), } } pub async fn start_lookup_task(&self) { let mut interval = tokio::time::interval(Duration::from_secs(self.lookup_interval_minutes)); + let self_arc = Arc::new(self.clone()); loop { // Notice that the first tick is immediate, @@ -60,22 +57,18 @@ impl DiscoveryLookupHandler { let mut handlers = vec![]; // lookup closest to our pub key - let self_clone = self.clone(); - handlers.push(tokio::spawn(async move { - self_clone - .recursive_lookup(self_clone.local_node.node_id) - .await; - })); + handlers.push(tokio::spawn( + self_arc.clone().recursive_lookup(self.local_node.node_id), + )); // lookup closest to 3 random keys for _ in 0..3 { let random_pub_key = SigningKey::random(&mut OsRng); - let self_clone = self.clone(); - handlers.push(tokio::spawn(async move { - self_clone - .recursive_lookup(node_id_from_signing_key(&random_pub_key)) - .await - })) + handlers.push(tokio::spawn( + self_arc + .clone() + .recursive_lookup(node_id_from_signing_key(&random_pub_key)), + )) } for handle in handlers { @@ -86,7 +79,7 @@ impl DiscoveryLookupHandler { } } - async fn recursive_lookup(&self, target: H512) { + async fn recursive_lookup(self: Arc, target: H512) { let mut asked_peers = HashSet::default(); // lookups start with the closest from our table let closest_nodes = self.table.lock().await.get_closest_nodes(target); @@ -100,7 +93,10 @@ impl DiscoveryLookupHandler { let mut peers_to_ask: Vec = closest_nodes; loop { - let (nodes_found, queries) = self.lookup(target, &mut asked_peers, &peers_to_ask).await; + let (nodes_found, queries) = self + .clone() + .lookup(target, &mut asked_peers, &peers_to_ask) + .await; // only push the peers that have not been seen // that is those who have not been yet pushed, which also accounts for @@ -120,6 +116,9 @@ impl DiscoveryLookupHandler { } } + /** + * TODO explain what this does + */ fn peers_to_ask_push(&self, peers_to_ask: &mut Vec, target: H512, node: Node) { let distance = bucket_number(target, node.node_id); @@ -145,6 +144,7 @@ impl DiscoveryLookupHandler { } } + // TODO lookup comment async fn lookup( &self, target: H512, From 2540d398360013c606badb9dad70b7d807bf76e0 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Wed, 22 Jan 2025 18:55:29 -0300 Subject: [PATCH 06/33] refactor: merge with main --- crates/networking/p2p/discv4/helpers.rs | 34 + crates/networking/p2p/discv4/lookup.rs | 260 +++++ .../p2p/{discv4.rs => discv4/messages.rs} | 39 +- crates/networking/p2p/discv4/mod.rs | 789 +++++++++++++ crates/networking/p2p/kademlia.rs | 11 +- crates/networking/p2p/net.rs | 1036 +---------------- crates/networking/p2p/rlpx/connection.rs | 10 +- 7 files changed, 1128 insertions(+), 1051 deletions(-) create mode 100644 crates/networking/p2p/discv4/helpers.rs create mode 100644 crates/networking/p2p/discv4/lookup.rs rename crates/networking/p2p/{discv4.rs => discv4/messages.rs} (96%) create mode 100644 crates/networking/p2p/discv4/mod.rs diff --git a/crates/networking/p2p/discv4/helpers.rs b/crates/networking/p2p/discv4/helpers.rs new file mode 100644 index 0000000000..63b0edc9fa --- /dev/null +++ b/crates/networking/p2p/discv4/helpers.rs @@ -0,0 +1,34 @@ +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +pub fn get_expiration(seconds: u64) -> u64 { + (SystemTime::now() + Duration::from_secs(seconds)) + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() +} + +pub fn is_expired(expiration: u64) -> bool { + // this cast to a signed integer is needed as the rlp decoder doesn't take into account the sign + // otherwise a potential negative expiration would pass since it would take 2^64. + (expiration as i64) + < SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() as i64 +} + +pub fn time_since_in_hs(time: u64) -> u64 { + let time = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(time); + SystemTime::now() + .duration_since(time) + .unwrap_or_default() + .as_secs() + / 3600 +} + +pub fn time_now_unix() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs() +} diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs new file mode 100644 index 0000000000..264beb5e9f --- /dev/null +++ b/crates/networking/p2p/discv4/lookup.rs @@ -0,0 +1,260 @@ +use super::{helpers::get_expiration, DiscoveryError, Message}; +use crate::{ + kademlia::{bucket_number, MAX_NODES_PER_BUCKET}, + node_id_from_signing_key, + types::Node, + KademliaTable, +}; +use ethrex_core::H512; +use k256::ecdsa::SigningKey; +use rand::rngs::OsRng; +use std::{collections::HashSet, net::SocketAddr, sync::Arc, time::Duration}; +use tokio::{net::UdpSocket, sync::Mutex}; +use tokio_util::task::TaskTracker; +use tracing::debug; + +/// Starts a tokio scheduler that: +/// - performs random lookups to discover new nodes. Currently this is configure to run every `PEERS_RANDOM_LOOKUP_TIME_IN_MIN` +/// +/// **Random lookups** +/// +/// Random lookups work in the following manner: +/// 1. Every 30min we spawn three concurrent lookups: one closest to our pubkey +/// and three other closest to random generated pubkeys. +/// 2. Every lookup starts with the closest nodes from our table. +/// Each lookup keeps track of: +/// - Peers that have already been asked for nodes +/// - Peers that have been already seen +/// - Potential peers to query for nodes: a vector of up to 16 entries holding the closest peers to the pubkey. +/// This vector is initially filled with nodes from our table. +/// 3. We send a `find_node` to the closest 3 nodes (that we have not yet asked) from the pubkey. +/// 4. We wait for the neighbors response and pushed or replace those that are closer to the potential peers. +/// 5. We select three other nodes from the potential peers vector and do the same until one lookup +/// doesn't have any node to ask. +/// +/// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#recursive-lookup +#[derive(Clone, Debug)] +pub struct Disv4LookupHandler { + local_node: Node, + signer: SigningKey, + udp_socket: Arc, + table: Arc>, + interval_minutes: u64, + tracker: TaskTracker, +} + +impl Disv4LookupHandler { + pub fn new( + local_node: Node, + signer: SigningKey, + udp_socket: Arc, + table: Arc>, + interval_minutes: u64, + tracker: TaskTracker, + ) -> Self { + Self { + local_node, + signer, + udp_socket, + table, + interval_minutes, + tracker, + } + } + + pub async fn start(&self, initial_interval_wait_seconds: u64) { + let mut interval = tokio::time::interval(Duration::from_secs(self.interval_minutes)); + tokio::time::sleep(Duration::from_secs(initial_interval_wait_seconds)).await; + + loop { + // Notice that the first tick is immediate, + // so as soon as the server starts we'll do a lookup with the seeder nodes. + interval.tick().await; + + debug!("Starting lookup"); + + self.tracker.spawn({ + let self_clone = self.clone(); + async move { + self_clone + .recursive_lookup(self_clone.local_node.node_id) + .await + } + }); + + // lookup closest to 3 random keys + for _ in 0..3 { + let random_pub_key = SigningKey::random(&mut OsRng); + self.tracker.spawn({ + let self_clone = self.clone(); + async move { + self_clone + .recursive_lookup(node_id_from_signing_key(&random_pub_key)) + .await + } + }); + } + + debug!("Lookup finished"); + } + } + + async fn recursive_lookup(&self, target: H512) { + let mut asked_peers = HashSet::default(); + // lookups start with the closest from our table + let closest_nodes = self.table.lock().await.get_closest_nodes(target); + let mut seen_peers: HashSet = HashSet::default(); + + seen_peers.insert(self.local_node.node_id); + for node in &closest_nodes { + seen_peers.insert(node.node_id); + } + + let mut peers_to_ask: Vec = closest_nodes; + + loop { + let (nodes_found, queries) = self + .clone() + .lookup(target, &mut asked_peers, &peers_to_ask) + .await; + + // only push the peers that have not been seen + // that is those who have not been yet pushed, which also accounts for + // those peers that were in the array but have been replaced for closer peers + for node in nodes_found { + if !seen_peers.contains(&node.node_id) { + seen_peers.insert(node.node_id); + self.peers_to_ask_push(&mut peers_to_ask, target, node); + } + } + + // the lookup finishes when there are no more queries to do + // that happens when we have asked all the peers + if queries == 0 { + break; + } + } + } + + async fn lookup( + &self, + target: H512, + asked_peers: &mut HashSet, + nodes_to_ask: &Vec, + ) -> (Vec, u32) { + // ask FIND_NODE as much as three times + let alpha = 3; + let mut queries = 0; + let mut nodes = vec![]; + + for node in nodes_to_ask { + if !asked_peers.contains(&node.node_id) { + #[allow(unused_assignments)] + let mut rx = None; + { + let mut table = self.table.lock().await; + let peer = table.get_by_node_id_mut(node.node_id); + if let Some(peer) = peer { + // if the peer has an ongoing find_node request, don't query + if peer.find_node_request.is_some() { + continue; + } + let (tx, receiver) = tokio::sync::mpsc::unbounded_channel::>(); + peer.new_find_node_request_with_sender(tx); + rx = Some(receiver); + } else { + // if peer isn't inserted to table, don't query + continue; + } + } + + queries += 1; + asked_peers.insert(node.node_id); + + if let Ok(mut found_nodes) = self + .find_node_and_wait_for_response(*node, target, &mut rx.unwrap()) + .await + { + nodes.append(&mut found_nodes); + } + } + + if queries == alpha { + break; + } + } + + (nodes, queries) + } + + /** + * TODO explain what this does + */ + fn peers_to_ask_push(&self, peers_to_ask: &mut Vec, target: H512, node: Node) { + let distance = bucket_number(target, node.node_id); + + if peers_to_ask.len() < MAX_NODES_PER_BUCKET { + peers_to_ask.push(node); + return; + } + + // replace this node for the one whose distance to the target is the highest + let (mut idx_to_replace, mut highest_distance) = (None, 0); + + for (i, peer) in peers_to_ask.iter().enumerate() { + let current_distance = bucket_number(peer.node_id, target); + + if distance < current_distance && current_distance >= highest_distance { + highest_distance = current_distance; + idx_to_replace = Some(i); + } + } + + if let Some(idx) = idx_to_replace { + peers_to_ask[idx] = node; + } + } + + async fn find_node_and_wait_for_response( + &self, + node: Node, + target_id: H512, + request_receiver: &mut tokio::sync::mpsc::UnboundedReceiver>, + ) -> Result, DiscoveryError> { + let expiration: u64 = get_expiration(20); + + let msg = Message::FindNode(super::FindNodeMessage::new(target_id, expiration)); + + let mut buf = Vec::new(); + msg.encode_with_header(&mut buf, &self.signer); + let bytes_sent = self + .udp_socket + .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) + .await + .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); + } + + let mut nodes = vec![]; + loop { + // wait as much as 5 seconds for the response + match tokio::time::timeout(Duration::from_secs(5), request_receiver.recv()).await { + Ok(Some(mut found_nodes)) => { + nodes.append(&mut found_nodes); + if nodes.len() == MAX_NODES_PER_BUCKET { + return Ok(nodes); + }; + } + Ok(None) => { + return Ok(nodes); + } + Err(_) => { + // timeout expired + return Ok(nodes); + } + } + } + } +} diff --git a/crates/networking/p2p/discv4.rs b/crates/networking/p2p/discv4/messages.rs similarity index 96% rename from crates/networking/p2p/discv4.rs rename to crates/networking/p2p/discv4/messages.rs index e457e2d122..342e386dfb 100644 --- a/crates/networking/p2p/discv4.rs +++ b/crates/networking/p2p/discv4/messages.rs @@ -1,5 +1,3 @@ -use std::time::{Duration, SystemTime, UNIX_EPOCH}; - use crate::types::{Endpoint, Node, NodeRecord}; use bytes::BufMut; use ethrex_core::{H256, H512, H520}; @@ -12,35 +10,7 @@ use ethrex_rlp::{ use k256::ecdsa::{RecoveryId, Signature, SigningKey, VerifyingKey}; use sha3::{Digest, Keccak256}; -//todo add tests -pub fn get_expiration(seconds: u64) -> u64 { - (SystemTime::now() + Duration::from_secs(seconds)) - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() -} - -pub fn is_expired(expiration: u64) -> bool { - // this cast to a signed integer is needed as the rlp decoder doesn't take into account the sign - // otherwise a potential negative expiration would pass since it would take 2^64. - (expiration as i64) - < SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() as i64 -} - -pub fn time_since_in_hs(time: u64) -> u64 { - let time = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(time); - SystemTime::now().duration_since(time).unwrap().as_secs() / 3600 -} - -pub fn time_now_unix() -> u64 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() -} +use super::helpers::time_now_unix; #[derive(Debug, PartialEq)] pub enum PacketDecodeErr { @@ -51,7 +21,6 @@ pub enum PacketDecodeErr { InvalidSignature, } -#[allow(unused)] #[derive(Debug)] pub struct Packet { hash: H256, @@ -126,10 +95,6 @@ impl Packet { } #[derive(Debug, Eq, PartialEq)] -// NOTE: All messages could have more fields than specified by the spec. -// Those additional fields should be ignored, and the message must be accepted. -// TODO: remove when all variants are used -#[allow(dead_code)] pub(crate) enum Message { /// A ping message. Should be responded to with a Pong message. Ping(PingMessage), @@ -380,8 +345,6 @@ impl PongMessage { } } - // TODO: remove when used - #[allow(unused)] pub fn with_enr_seq(self, enr_seq: u64) -> Self { Self { enr_seq: Some(enr_seq), diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs new file mode 100644 index 0000000000..de2998e91c --- /dev/null +++ b/crates/networking/p2p/discv4/mod.rs @@ -0,0 +1,789 @@ +pub(self) mod helpers; +mod lookup; +pub(super) mod messages; + +use crate::{ + bootnode::BootNode, + handle_peer_as_initiator, + kademlia::MAX_NODES_PER_BUCKET, + rlpx::connection::RLPxConnBroadcastSender, + types::{Endpoint, Node}, + KademliaTable, +}; +use ethrex_core::H256; +use ethrex_storage::Store; +use helpers::{get_expiration, is_expired, time_now_unix, time_since_in_hs}; +use k256::ecdsa::SigningKey; +use lookup::Disv4LookupHandler; +use messages::{FindNodeMessage, Message, NeighborsMessage, Packet, PingMessage, PongMessage}; +use std::{collections::HashSet, net::SocketAddr, sync::Arc, time::Duration}; +use tokio::{net::UdpSocket, sync::Mutex}; +use tokio_util::task::TaskTracker; +use tracing::{debug, error}; + +pub const MAX_DISC_PACKET_SIZE: usize = 1280; +const PROOF_EXPIRATION_IN_HS: u64 = 12; +// These interval times are arbitrary numbers, maybe we should read them from a cfg or a cli param +const REVALIDATION_INTERVAL_IN_SECONDS: u64 = 30; +const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: u64 = 30; + +#[derive(Debug)] +#[allow(dead_code)] +pub enum DiscoveryError { + BindSocket(std::io::Error), + MessageSendFailure(std::io::Error), + PartialMessageSent, + MessageExpired, + InvalidMessage(String), +} + +#[derive(Debug, Clone)] +pub struct Discv4 { + local_node: Node, + udp_socket: Arc, + signer: SigningKey, + storage: Store, + table: Arc>, + tracker: TaskTracker, + rlxp_conn_sender: RLPxConnBroadcastSender, + revalidation_interval_seconds: u64, + lookup_interval_minutes: u64, +} + +impl Discv4 { + pub async fn try_new( + local_node: Node, + signer: SigningKey, + storage: Store, + table: Arc>, + tx_broadcaster_send: RLPxConnBroadcastSender, + tracker: TaskTracker, + ) -> Result { + let udp_socket = UdpSocket::bind(SocketAddr::new(local_node.ip, local_node.udp_port)) + .await + .map_err(|e| DiscoveryError::BindSocket(e))?; + + Ok(Self { + local_node, + signer, + storage, + table, + rlxp_conn_sender: tx_broadcaster_send, + udp_socket: Arc::new(udp_socket), + revalidation_interval_seconds: REVALIDATION_INTERVAL_IN_SECONDS, + lookup_interval_minutes: PEERS_RANDOM_LOOKUP_TIME_IN_MIN, + tracker, + }) + } + + #[allow(unused)] + pub fn with_revalidation_interval_of(self, seconds: u64) -> Self { + Self { + revalidation_interval_seconds: seconds, + ..self + } + } + + #[allow(unused)] + pub fn with_lookup_interval_of(self, minutes: u64) -> Self { + Self { + revalidation_interval_seconds: minutes, + ..self + } + } + + pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { + let lookup_handler = Disv4LookupHandler::new( + self.local_node, + self.signer.clone(), + self.udp_socket.clone(), + self.table.clone(), + self.lookup_interval_minutes, + self.tracker.clone(), + ); + + self.tracker.spawn({ + let self_clone = self.clone(); + async move { self_clone.receive().await } + }); + self.tracker.spawn({ + let self_clone = self.clone(); + async move { self_clone.start_revalidation().await } + }); + self.load_bootnodes(bootnodes).await; + self.tracker + .spawn(async move { lookup_handler.start(10).await }); + + Ok(()) + } + + async fn load_bootnodes(&self, bootnodes: Vec) { + for bootnode in bootnodes { + let node = Node { + ip: bootnode.socket_address.ip(), + udp_port: bootnode.socket_address.port(), + // TODO: udp port can differ from tcp port. + // see https://github.com/lambdaclass/ethrex/issues/905 + tcp_port: bootnode.socket_address.port(), + node_id: bootnode.node_id, + }; + if let Err(e) = self.try_add_peer_and_ping(node).await { + debug!("Error while adding bootnode to table: {:?}", e); + }; + } + } + + pub async fn receive(&self) { + let mut buf = vec![0; MAX_DISC_PACKET_SIZE]; + + loop { + let (read, from) = match self.udp_socket.recv_from(&mut buf).await { + Ok(result) => result, + Err(e) => { + error!("Error receiving data from socket: {e}. Stopping discovery server"); + return; + } + }; + debug!("Received {read} bytes from {from}"); + + match Packet::decode(&buf[..read]) { + Err(e) => error!("Could not decode packet: {:?}", e), + Ok(packet) => { + let msg = packet.get_message(); + debug!("Message: {:?} from {}", msg, packet.get_node_id()); + if let Err(e) = self.handle_message(packet, from, read, &buf).await { + debug!("Error while processing message: {:?}", e); + }; + } + } + } + } + + async fn handle_message( + &self, + packet: Packet, + from: SocketAddr, + msg_len: usize, + msg_bytes: &[u8], + ) -> Result<(), DiscoveryError> { + match packet.get_message() { + Message::Ping(msg) => { + if is_expired(msg.expiration) { + return Err(DiscoveryError::MessageExpired); + }; + let node = Node { + ip: from.ip(), + udp_port: msg.from.udp_port, + tcp_port: msg.from.tcp_port, + node_id: packet.get_node_id(), + }; + self.pong(packet.get_hash(), node).await?; + let peer = { + let table = self.table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + // if peer was already inserted, and last ping was 12 hs ago + // we need to re ping to re-validate the endpoint proof + if let Some(peer) = peer { + if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { + self.ping(node).await?; + } + } else { + // otherwise add to the table + let mut table = self.table.lock().await; + if let (Some(peer), true) = table.insert_node(node) { + // it was inserted, send ping to bond + self.ping(peer.node).await?; + } + } + + Ok(()) + } + Message::Pong(msg) => { + let table = self.table.clone(); + if is_expired(msg.expiration) { + return Err(DiscoveryError::MessageExpired); + } + let peer = { + let table = table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + if let Some(peer) = peer { + if peer.last_ping_hash.is_none() { + return Err(DiscoveryError::InvalidMessage( + "node did not send a previous ping".into(), + )); + } + if peer + .last_ping_hash + .is_some_and(|hash| hash == msg.ping_hash) + { + table.lock().await.pong_answered(peer.node.node_id); + let mut msg_buf = vec![0; msg_len - 32]; + msg_bytes[32..msg_len].clone_into(&mut msg_buf); + let signer = self.signer.clone(); + let storage = self.storage.clone(); + let broadcaster = self.rlxp_conn_sender.clone(); + self.tracker.spawn(async move { + handle_peer_as_initiator( + signer, + &msg_buf, + &peer.node, + storage, + table, + broadcaster, + ) + .await + }); + Ok(()) + } else { + Err(DiscoveryError::InvalidMessage( + "pong as the hash did not match the last corresponding ping".into(), + )) + } + } else { + Err(DiscoveryError::InvalidMessage( + "pong from a not known node".into(), + )) + } + } + Message::FindNode(msg) => { + if is_expired(msg.expiration) { + return Err(DiscoveryError::MessageExpired); + }; + let node = { + let table = self.table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + if let Some(node) = node { + if node.is_proven { + let nodes = { + let table = self.table.lock().await; + table.get_closest_nodes(msg.target) + }; + let nodes_chunks = nodes.chunks(4); + let expiration = get_expiration(20); + debug!("Sending neighbors!"); + // we are sending the neighbors in 4 different messages as not to exceed the + // maximum packet size + for nodes in nodes_chunks { + let neighbors = Message::Neighbors(NeighborsMessage::new( + nodes.to_vec(), + expiration, + )); + let mut buf = Vec::new(); + neighbors.encode_with_header(&mut buf, &self.signer); + let bytes_sent = self + .udp_socket + .send_to(&buf, from) + .await + .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); + } + } + Ok(()) + } else { + Err(DiscoveryError::InvalidMessage("Node isn't proven.".into())) + } + } else { + Err(DiscoveryError::InvalidMessage("Node is not known".into())) + } + } + Message::Neighbors(neighbors_msg) => { + if is_expired(neighbors_msg.expiration) { + return Err(DiscoveryError::MessageExpired); + }; + + let mut nodes_to_insert = None; + let mut table = self.table.lock().await; + if let Some(node) = table.get_by_node_id_mut(packet.get_node_id()) { + if let Some(req) = &mut node.find_node_request { + if time_now_unix().saturating_sub(req.sent_at) >= 60 { + node.find_node_request = None; + return Err(DiscoveryError::InvalidMessage( + "find_node request expired after one minute".into(), + )); + } + let nodes = &neighbors_msg.nodes; + let nodes_sent = req.nodes_sent + nodes.len(); + + if nodes_sent <= MAX_NODES_PER_BUCKET { + req.nodes_sent = nodes_sent; + nodes_to_insert = Some(nodes.clone()); + if let Some(tx) = &req.tx { + let _ = tx.send(nodes.clone()); + } + } else { + debug!("Ignoring neighbors message as the client sent more than the allowed nodes"); + } + + if nodes_sent == MAX_NODES_PER_BUCKET { + debug!("Neighbors request has been fulfilled"); + node.find_node_request = None; + } + } + } else { + return Err(DiscoveryError::InvalidMessage("Unknown node".into())); + } + + if let Some(nodes) = nodes_to_insert { + debug!("Storing neighbors in our table!"); + for node in nodes { + let _ = self.try_add_peer_and_ping(node); + } + } + + Ok(()) + } + _ => Ok(()), + } + } + + /// Starts a tokio scheduler that: + /// - performs periodic revalidation of the current nodes (sends a ping to the old nodes). Currently this is configured to happen every [`REVALIDATION_INTERVAL_IN_MINUTES`] + /// + /// **Peer revalidation** + /// + /// Peers revalidation works in the following manner: + /// 1. Every `REVALIDATION_INTERVAL_IN_SECONDS` we ping the 3 least recently pinged peers + /// 2. In the next iteration we check if they have answered + /// - if they have: we increment the liveness field by one + /// - otherwise we decrement it by the current value / 3. + /// 3. If the liveness field is 0, then we delete it and insert a new one from the replacements table + /// + /// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#kademlia-table + pub async fn start_revalidation(&self) { + let mut interval = + tokio::time::interval(Duration::from_secs(self.revalidation_interval_seconds)); + // peers we have pinged in the previous iteration + let mut previously_pinged_peers = HashSet::new(); + + // first tick starts immediately + interval.tick().await; + + loop { + interval.tick().await; + debug!("Running peer revalidation"); + + // first check that the peers we ping have responded + for node_id in previously_pinged_peers { + let mut table = self.table.lock().await; + let peer = table.get_by_node_id_mut(node_id).unwrap(); + + if let Some(has_answered) = peer.revalidation { + if has_answered { + peer.increment_liveness(); + } else { + peer.decrement_liveness(); + } + } + + peer.revalidation = None; + + if peer.liveness == 0 { + let new_peer = table.replace_peer(node_id); + if let Some(new_peer) = new_peer { + let _ = self.ping(new_peer.node); + } + } + } + + // now send a ping to the least recently pinged peers + // this might be too expensive to run if our table is filled + // maybe we could just pick them randomly + let peers = self.table.lock().await.get_least_recently_pinged_peers(3); + previously_pinged_peers = HashSet::default(); // reset pinged peers + for peer in peers { + debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); + let _ = self.ping(peer.node); + previously_pinged_peers.insert(peer.node.node_id); + let mut table = self.table.lock().await; + let peer = table.get_by_node_id_mut(peer.node.node_id); + if let Some(peer) = peer { + peer.revalidation = Some(false); + } + } + + debug!("Peer revalidation finished"); + } + } + + /// Attempts to add a node to the Kademlia table and send a ping if necessary. + /// + /// - If the node is **not found** in the table and there is enough space, it will be added, + /// and a ping message will be sent to verify connectivity. + /// - If the node is **already present**, no action is taken. + async fn try_add_peer_and_ping(&self, node: Node) -> Result<(), DiscoveryError> { + if let (Some(peer), true) = self.table.lock().await.insert_node(node) { + self.ping(peer.node).await?; + }; + Ok(()) + } + + // Sends a ping to the addr + /// # Returns + /// an optional hash corresponding to the message header hash to account if the send was successful + async fn ping(&self, node: Node) -> Result<(), DiscoveryError> { + let mut buf = Vec::new(); + let expiration: u64 = get_expiration(20); + let from = Endpoint { + ip: self.local_node.ip, + udp_port: self.local_node.udp_port, + tcp_port: self.local_node.tcp_port, + }; + let to = Endpoint { + ip: node.ip, + udp_port: node.udp_port, + tcp_port: node.tcp_port, + }; + + let ping = Message::Ping(PingMessage::new(from, to, expiration)); + ping.encode_with_header(&mut buf, &self.signer); + let bytes_sent = self + .udp_socket + .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) + .await + .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); + } + + let hash = H256::from_slice(&buf[0..32]); + self.table + .lock() + .await + .update_peer_ping(node.node_id, Some(hash)); + + Ok(()) + } + + async fn pong(&self, ping_hash: H256, node: Node) -> Result<(), DiscoveryError> { + let mut buf = Vec::new(); + let expiration: u64 = get_expiration(20); + let to = Endpoint { + ip: node.ip, + udp_port: node.udp_port, + tcp_port: node.tcp_port, + }; + + let pong = Message::Pong(PongMessage::new(to, ping_hash, expiration)); + pong.encode_with_header(&mut buf, &self.signer); + + let bytes_sent = self + .udp_socket + .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) + .await + .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + + if bytes_sent != buf.len() { + Err(DiscoveryError::PartialMessageSent) + } else { + Ok(()) + } + } +} + +// #[cfg(test)] +// mod tests { +// use super::*; +// use ethrex_storage::EngineType; +// use kademlia::bucket_number; +// use rand::rngs::OsRng; +// use std::{ +// collections::HashSet, +// net::{IpAddr, Ipv4Addr}, +// }; +// use tokio::time::sleep; + +// async fn insert_random_node_on_custom_bucket( +// table: Arc>, +// bucket_idx: usize, +// ) { +// let node_id = node_id_from_signing_key(&SigningKey::random(&mut OsRng)); +// let node = Node { +// ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), +// tcp_port: 0, +// udp_port: 0, +// node_id, +// }; +// table +// .lock() +// .await +// .insert_node_on_custom_bucket(node, bucket_idx); +// } + +// async fn fill_table_with_random_nodes(table: Arc>) { +// for i in 0..256 { +// for _ in 0..16 { +// insert_random_node_on_custom_bucket(table.clone(), i).await; +// } +// } +// } + +// struct MockServer { +// pub addr: SocketAddr, +// pub signer: SigningKey, +// pub table: Arc>, +// pub node_id: H512, +// pub udp_socket: Arc, +// } + +// async fn start_mock_discovery_server( +// udp_port: u16, +// should_start_server: bool, +// ) -> Result { +// let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); +// let signer = SigningKey::random(&mut OsRng); +// let udp_socket = Arc::new(UdpSocket::bind(addr).await?); +// let node_id = node_id_from_signing_key(&signer); +// let storage = +// Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); +// let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); +// let (channel_broadcast_send_end, _) = tokio::sync::broadcast::channel::<( +// tokio::task::Id, +// Arc, +// )>(MAX_MESSAGES_TO_BROADCAST); +// let tracker = TaskTracker::new(); +// if should_start_server { +// tracker.spawn(discover_peers_server( +// tracker.clone(), +// addr, +// udp_socket.clone(), +// storage.clone(), +// table.clone(), +// signer.clone(), +// channel_broadcast_send_end, +// )); +// } + +// Ok(MockServer { +// addr, +// signer, +// table, +// node_id, +// udp_socket, +// }) +// } + +// /// connects two mock servers by pinging a to b +// async fn connect_servers(server_a: &mut MockServer, server_b: &mut MockServer) { +// let ping_hash = ping( +// &server_a.udp_socket, +// server_a.addr, +// server_b.addr, +// &server_a.signer, +// ) +// .await; +// { +// let mut table = server_a.table.lock().await; +// table.insert_node(Node { +// ip: server_b.addr.ip(), +// udp_port: server_b.addr.port(), +// tcp_port: 0, +// node_id: server_b.node_id, +// }); +// table.update_peer_ping(server_b.node_id, ping_hash); +// } +// // allow some time for the server to respond +// sleep(Duration::from_secs(1)).await; +// } + +// #[tokio::test] +// /** This is a end to end test on the discovery server, the idea is as follows: +// * - We'll start two discovery servers (`a` & `b`) to ping between each other +// * - We'll make `b` ping `a`, and validate that the connection is right +// * - Then we'll wait for a revalidation where we expect everything to be the same +// * - We'll do this five 5 more times +// * - Then we'll stop server `a` so that it doesn't respond to re-validations +// * - We expect server `b` to remove node `a` from its table after 3 re-validations +// * To make this run faster, we'll change the revalidation time to be every 2secs +// */ +// async fn discovery_server_revalidation() -> Result<(), io::Error> { +// let mut server_a = start_mock_discovery_server(7998, true).await?; +// let mut server_b = start_mock_discovery_server(7999, true).await?; + +// connect_servers(&mut server_a, &mut server_b).await; + +// // start revalidation server +// tokio::spawn(peers_revalidation( +// server_b.addr, +// server_b.udp_socket.clone(), +// server_b.table.clone(), +// server_b.signer.clone(), +// 2, +// )); + +// for _ in 0..5 { +// sleep(Duration::from_millis(2500)).await; +// // by now, b should've send a revalidation to a +// let table = server_b.table.lock().await; +// let node = table.get_by_node_id(server_a.node_id); +// assert!(node.is_some_and(|n| n.revalidation.is_some())); +// } + +// // make sure that `a` has responded too all the re-validations +// // we can do that by checking the liveness +// { +// let table = server_b.table.lock().await; +// let node = table.get_by_node_id(server_a.node_id); +// assert_eq!(node.map_or(0, |n| n.liveness), 6); +// } + +// // now, stopping server `a` is not trivial +// // so we'll instead change its port, so that no one responds +// { +// let mut table = server_b.table.lock().await; +// let node = table.get_by_node_id_mut(server_a.node_id); +// if let Some(node) = node { +// node.node.udp_port = 0 +// }; +// } + +// // now the liveness field should start decreasing until it gets to 0 +// // which should happen in 3 re-validations +// for _ in 0..2 { +// sleep(Duration::from_millis(2500)).await; +// let table = server_b.table.lock().await; +// let node = table.get_by_node_id(server_a.node_id); +// assert!(node.is_some_and(|n| n.revalidation.is_some())); +// } +// sleep(Duration::from_millis(2500)).await; + +// // finally, `a`` should not exist anymore +// let table = server_b.table.lock().await; +// assert!(table.get_by_node_id(server_a.node_id).is_none()); +// Ok(()) +// } + +// #[tokio::test] +// /** This test tests the lookup function, the idea is as follows: +// * - We'll start two discovery servers (`a` & `b`) that will connect between each other +// * - We'll insert random nodes to the server `a`` to fill its table +// * - We'll forcedly run `lookup` and validate that a `find_node` request was sent +// * by checking that new nodes have been inserted to the table +// * +// * This test for only one lookup, and not recursively. +// */ +// async fn discovery_server_lookup() -> Result<(), io::Error> { +// let mut server_a = start_mock_discovery_server(8000, true).await?; +// let mut server_b = start_mock_discovery_server(8001, true).await?; + +// fill_table_with_random_nodes(server_a.table.clone()).await; + +// // before making the connection, remove a node from the `b` bucket. Otherwise it won't be added +// let b_bucket = bucket_number(server_a.node_id, server_b.node_id); +// let node_id_to_remove = server_a.table.lock().await.buckets()[b_bucket].peers[0] +// .node +// .node_id; +// server_a +// .table +// .lock() +// .await +// .replace_peer_on_custom_bucket(node_id_to_remove, b_bucket); + +// connect_servers(&mut server_a, &mut server_b).await; + +// // now we are going to run a lookup with us as the target +// let closets_peers_to_b_from_a = server_a +// .table +// .lock() +// .await +// .get_closest_nodes(server_b.node_id); +// let nodes_to_ask = server_b +// .table +// .lock() +// .await +// .get_closest_nodes(server_b.node_id); + +// lookup( +// server_b.udp_socket.clone(), +// server_b.table.clone(), +// &server_b.signer, +// server_b.node_id, +// &mut HashSet::default(), +// &nodes_to_ask, +// ) +// .await; + +// // find_node sent, allow some time for `a` to respond +// sleep(Duration::from_secs(2)).await; + +// // now all peers should've been inserted +// for peer in closets_peers_to_b_from_a { +// let table = server_b.table.lock().await; +// assert!(table.get_by_node_id(peer.node_id).is_some()); +// } +// Ok(()) +// } + +// #[tokio::test] +// /** This test tests the lookup function, the idea is as follows: +// * - We'll start four discovery servers (`a`, `b`, `c` & `d`) +// * - `a` will be connected to `b`, `b` will be connected to `c` and `c` will be connected to `d`. +// * - The server `d` will have its table filled with mock nodes +// * - We'll run a recursive lookup on server `a` and we expect to end with `b`, `c`, `d` and its mock nodes +// */ +// async fn discovery_server_recursive_lookup() -> Result<(), io::Error> { +// let mut server_a = start_mock_discovery_server(8002, true).await?; +// let mut server_b = start_mock_discovery_server(8003, true).await?; +// let mut server_c = start_mock_discovery_server(8004, true).await?; +// let mut server_d = start_mock_discovery_server(8005, true).await?; + +// connect_servers(&mut server_a, &mut server_b).await; +// connect_servers(&mut server_b, &mut server_c).await; +// connect_servers(&mut server_c, &mut server_d).await; + +// // now we fill the server_d table with 3 random nodes +// // the reason we don't put more is because this nodes won't respond (as they don't are not real servers) +// // and so we will have to wait for the timeout on each node, which will only slow down the test +// for _ in 0..3 { +// insert_random_node_on_custom_bucket(server_d.table.clone(), 0).await; +// } + +// let mut expected_peers = vec![]; +// expected_peers.extend( +// server_b +// .table +// .lock() +// .await +// .get_closest_nodes(server_a.node_id), +// ); +// expected_peers.extend( +// server_c +// .table +// .lock() +// .await +// .get_closest_nodes(server_a.node_id), +// ); +// expected_peers.extend( +// server_d +// .table +// .lock() +// .await +// .get_closest_nodes(server_a.node_id), +// ); + +// // we'll run a recursive lookup closest to the server itself +// recursive_lookup( +// server_a.udp_socket.clone(), +// server_a.table.clone(), +// server_a.signer.clone(), +// server_a.node_id, +// server_a.node_id, +// ) +// .await; + +// for peer in expected_peers { +// assert!(server_a +// .table +// .lock() +// .await +// .get_by_node_id(peer.node_id) +// .is_some()); +// } +// Ok(()) +// } +// } diff --git a/crates/networking/p2p/kademlia.rs b/crates/networking/p2p/kademlia.rs index 0b8bfa280e..6016348b80 100644 --- a/crates/networking/p2p/kademlia.rs +++ b/crates/networking/p2p/kademlia.rs @@ -1,7 +1,5 @@ use crate::{ - discv4::{time_now_unix, FindNodeRequest}, - peer_channels::PeerChannels, - rlpx::p2p::Capability, + discv4::messages::FindNodeRequest, peer_channels::PeerChannels, rlpx::p2p::Capability, types::Node, }; use ethrex_core::{H256, H512, U256}; @@ -13,6 +11,13 @@ pub const MAX_NODES_PER_BUCKET: usize = 16; const NUMBER_OF_BUCKETS: usize = 256; const MAX_NUMBER_OF_REPLACEMENTS: usize = 10; +pub fn time_now_unix() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs() +} + #[derive(Clone, Debug, Default)] pub struct Bucket { pub peers: Vec, diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index 06615df108..669201f65c 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -1,33 +1,24 @@ -use std::{ - collections::HashSet, - io, - net::SocketAddr, - sync::Arc, - time::{Duration, SystemTime, UNIX_EPOCH}, -}; - use bootnode::BootNode; -use discv4::{ - get_expiration, is_expired, time_now_unix, time_since_in_hs, FindNodeMessage, Message, - NeighborsMessage, Packet, PingMessage, PongMessage, -}; -use ethrex_core::{H256, H512}; +use discv4::Discv4; +use ethrex_core::H512; use ethrex_storage::Store; use k256::{ ecdsa::SigningKey, elliptic_curve::{sec1::ToEncodedPoint, PublicKey}, }; pub use kademlia::KademliaTable; -use kademlia::{bucket_number, MAX_NODES_PER_BUCKET}; -use rand::rngs::OsRng; -use rlpx::{connection::RLPxConnection, message::Message as RLPxMessage}; +use rlpx::{ + connection::{RLPxConnBroadcastSender, RLPxConnection}, + message::Message as RLPxMessage, +}; +use std::{io, net::SocketAddr, sync::Arc}; use tokio::{ - net::{TcpListener, TcpSocket, TcpStream, UdpSocket}, - sync::{broadcast, Mutex}, + net::{TcpListener, TcpSocket, TcpStream}, + sync::Mutex, }; use tokio_util::task::TaskTracker; -use tracing::{debug, error, info}; -use types::{Endpoint, Node}; +use tracing::{error, info}; +use types::Node; pub mod bootnode; pub(crate) mod discv4; @@ -38,8 +29,6 @@ pub(crate) mod snap; pub mod sync; pub mod types; -const MAX_DISC_PACKET_SIZE: usize = 1280; - // Totally arbitrary limit on how // many messages the connections can queue, // if we miss messages to broadcast, maybe @@ -66,15 +55,25 @@ pub async fn start_network( tokio::task::Id, Arc, )>(MAX_MESSAGES_TO_BROADCAST); - tracker.spawn(discover_peers( - tracker.clone(), - udp_addr, + + // TODO handle errors here + let discovery = Discv4::try_new( + Node { + ip: udp_addr.ip(), + udp_port: udp_addr.port(), + tcp_port: tcp_addr.port(), + node_id: H512::default(), + }, signer.clone(), storage.clone(), peer_table.clone(), - bootnodes, channel_broadcast_send_end.clone(), - )); + tracker.clone(), + ) + .await + .unwrap(); + discovery.start(bootnodes).await.unwrap(); + tracker.spawn(serve_p2p_requests( tracker.clone(), tcp_addr, @@ -85,688 +84,13 @@ pub async fn start_network( )); } -async fn discover_peers( - tracker: TaskTracker, - udp_addr: SocketAddr, - signer: SigningKey, - storage: Store, - table: Arc>, - bootnodes: Vec, - connection_broadcast: broadcast::Sender<(tokio::task::Id, Arc)>, -) { - let udp_socket = match UdpSocket::bind(udp_addr).await { - Ok(socket) => Arc::new(socket), - Err(e) => { - error!("Error binding udp socket {udp_addr}: {e}. Stopping discover peers task"); - return; - } - }; - - tracker.spawn(discover_peers_server( - tracker.clone(), - udp_addr, - udp_socket.clone(), - storage, - table.clone(), - signer.clone(), - connection_broadcast, - )); - tracker.spawn(peers_revalidation( - udp_addr, - udp_socket.clone(), - table.clone(), - signer.clone(), - REVALIDATION_INTERVAL_IN_SECONDS as u64, - )); - - discovery_startup( - udp_addr, - udp_socket.clone(), - table.clone(), - signer.clone(), - bootnodes, - ) - .await; - - // a first initial lookup runs without waiting for the interval - // so we need to allow some time to the pinged peers to ping us back and acknowledge us - tokio::time::sleep(Duration::from_secs(10)).await; - tracker.spawn(peers_lookup( - tracker.clone(), - udp_socket.clone(), - table.clone(), - signer.clone(), - node_id_from_signing_key(&signer), - PEERS_RANDOM_LOOKUP_TIME_IN_MIN as u64 * 60, - )); -} - -async fn discover_peers_server( - tracker: TaskTracker, - udp_addr: SocketAddr, - udp_socket: Arc, - storage: Store, - table: Arc>, - signer: SigningKey, - tx_broadcaster_send: broadcast::Sender<(tokio::task::Id, Arc)>, -) { - let mut buf = vec![0; MAX_DISC_PACKET_SIZE]; - - loop { - let (read, from) = match udp_socket.recv_from(&mut buf).await { - Ok(result) => result, - Err(e) => { - error!( - "Error receiving data from socket {udp_addr}: {e}. Stopping discovery server" - ); - return; - } - }; - debug!("Received {read} bytes from {from}"); - - match Packet::decode(&buf[..read]) { - Err(e) => error!("Could not decode packet: {:?}", e), - Ok(packet) => { - let msg = packet.get_message(); - debug!("Message: {:?} from {}", msg, packet.get_node_id()); - - match msg { - Message::Ping(msg) => { - if is_expired(msg.expiration) { - debug!("Ignoring ping as it is expired."); - continue; - }; - let ping_hash = packet.get_hash(); - pong(&udp_socket, from, ping_hash, &signer).await; - let node = { - let table = table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(peer) = node { - // send a a ping to get an endpoint proof - if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { - let hash = ping(&udp_socket, udp_addr, from, &signer).await; - if let Some(hash) = hash { - table - .lock() - .await - .update_peer_ping(peer.node.node_id, Some(hash)); - } - } - } else { - let mut table = table.lock().await; - let node = Node { - ip: from.ip(), - udp_port: from.port(), - tcp_port: 0, - node_id: packet.get_node_id(), - }; - if let (Some(peer), true) = table.insert_node(node) { - // send a ping to get the endpoint proof from our end - let hash = ping(&udp_socket, udp_addr, from, &signer).await; - table.update_peer_ping(peer.node.node_id, hash); - } - } - } - Message::Pong(msg) => { - let table = table.clone(); - if is_expired(msg.expiration) { - debug!("Ignoring pong as it is expired."); - continue; - } - let peer = { - let table = table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(peer) = peer { - if peer.last_ping_hash.is_none() { - debug!("Discarding pong as the node did not send a previous ping"); - continue; - } - if peer - .last_ping_hash - .is_some_and(|hash| hash == msg.ping_hash) - { - table.lock().await.pong_answered(peer.node.node_id); - - let mut msg_buf = vec![0; read - 32]; - buf[32..read].clone_into(&mut msg_buf); - let signer = signer.clone(); - let storage = storage.clone(); - let broadcaster = tx_broadcaster_send.clone(); - tracker.spawn(async move { - handle_peer_as_initiator( - signer, - &msg_buf, - &peer.node, - storage, - table, - broadcaster, - ) - .await - }); - } else { - debug!( - "Discarding pong as the hash did not match the last corresponding ping" - ); - } - } else { - debug!("Discarding pong as it is not a known node"); - } - } - Message::FindNode(msg) => { - if is_expired(msg.expiration) { - debug!("Ignoring find node msg as it is expired."); - continue; - }; - let node = { - let table = table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(node) = node { - if node.is_proven { - let nodes = { - let table = table.lock().await; - table.get_closest_nodes(msg.target) - }; - let nodes_chunks = nodes.chunks(4); - let expiration = get_expiration(20); - debug!("Sending neighbors!"); - // we are sending the neighbors in 4 different messages as not to exceed the - // maximum packet size - for nodes in nodes_chunks { - let neighbors = discv4::Message::Neighbors( - NeighborsMessage::new(nodes.to_vec(), expiration), - ); - let mut buf = Vec::new(); - neighbors.encode_with_header(&mut buf, &signer); - if let Err(e) = udp_socket.send_to(&buf, from).await { - error!("Could not send Neighbors message {e}"); - } - } - } else { - debug!("Ignoring find node message as the node isn't proven!"); - } - } else { - debug!("Ignoring find node message as it is not a known node"); - } - } - Message::Neighbors(neighbors_msg) => { - if is_expired(neighbors_msg.expiration) { - debug!("Ignoring neighbor msg as it is expired."); - continue; - }; - - let mut nodes_to_insert = None; - let mut table = table.lock().await; - if let Some(node) = table.get_by_node_id_mut(packet.get_node_id()) { - if let Some(req) = &mut node.find_node_request { - if time_now_unix().saturating_sub(req.sent_at) >= 60 { - debug!("Ignoring neighbors message as the find_node request expires after one minute"); - node.find_node_request = None; - continue; - } - let nodes = &neighbors_msg.nodes; - let nodes_sent = req.nodes_sent + nodes.len(); - - if nodes_sent <= MAX_NODES_PER_BUCKET { - debug!("Storing neighbors in our table!"); - req.nodes_sent = nodes_sent; - nodes_to_insert = Some(nodes.clone()); - if let Some(tx) = &req.tx { - let _ = tx.send(nodes.clone()); - } - } else { - debug!("Ignoring neighbors message as the client sent more than the allowed nodes"); - } - - if nodes_sent == MAX_NODES_PER_BUCKET { - debug!("Neighbors request has been fulfilled"); - node.find_node_request = None; - } - } - } else { - debug!("Ignoring neighbor msg as it is not a known node"); - } - - if let Some(nodes) = nodes_to_insert { - for node in nodes { - if let (Some(peer), true) = table.insert_node(node) { - let node_addr = - SocketAddr::new(peer.node.ip, peer.node.udp_port); - let ping_hash = - ping(&udp_socket, udp_addr, node_addr, &signer).await; - table.update_peer_ping(peer.node.node_id, ping_hash); - } - } - } - } - _ => {} - } - } - } - } -} - -// this is just an arbitrary number, maybe we should get this from some kind of cfg -/// This is a really basic startup and should be improved when we have the nodes stored in the db -/// currently, since we are not storing nodes, the only way to have startup nodes is by providing -/// an array of bootnodes. -async fn discovery_startup( - udp_addr: SocketAddr, - udp_socket: Arc, - table: Arc>, - signer: SigningKey, - bootnodes: Vec, -) { - for bootnode in bootnodes { - table.lock().await.insert_node(Node { - ip: bootnode.socket_address.ip(), - udp_port: bootnode.socket_address.port(), - // TODO: udp port can differ from tcp port. - // see https://github.com/lambdaclass/ethrex/issues/905 - tcp_port: bootnode.socket_address.port(), - node_id: bootnode.node_id, - }); - let ping_hash = ping(&udp_socket, udp_addr, bootnode.socket_address, &signer).await; - table - .lock() - .await - .update_peer_ping(bootnode.node_id, ping_hash); - } -} - -const REVALIDATION_INTERVAL_IN_SECONDS: usize = 30; // this is just an arbitrary number, maybe we should get this from some kind of cfg -const PROOF_EXPIRATION_IN_HS: usize = 12; - -/// Starts a tokio scheduler that: -/// - performs periodic revalidation of the current nodes (sends a ping to the old nodes). Currently this is configured to happen every [`REVALIDATION_INTERVAL_IN_MINUTES`] -/// -/// **Peer revalidation** -/// -/// Peers revalidation works in the following manner: -/// 1. Every `REVALIDATION_INTERVAL_IN_SECONDS` we ping the 3 least recently pinged peers -/// 2. In the next iteration we check if they have answered -/// - if they have: we increment the liveness field by one -/// - otherwise we decrement it by the current value / 3. -/// 3. If the liveness field is 0, then we delete it and insert a new one from the replacements table -/// -/// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#kademlia-table -async fn peers_revalidation( - udp_addr: SocketAddr, - udp_socket: Arc, - table: Arc>, - signer: SigningKey, - interval_time_in_seconds: u64, -) { - let mut interval = tokio::time::interval(Duration::from_secs(interval_time_in_seconds)); - // peers we have pinged in the previous iteration - let mut previously_pinged_peers: HashSet = HashSet::default(); - - // first tick starts immediately - interval.tick().await; - - loop { - interval.tick().await; - debug!("Running peer revalidation"); - - // first check that the peers we ping have responded - for node_id in previously_pinged_peers { - let mut table = table.lock().await; - if let Some(peer) = table.get_by_node_id_mut(node_id) { - if let Some(has_answered) = peer.revalidation { - if has_answered { - peer.increment_liveness(); - } else { - peer.decrement_liveness(); - } - } - - peer.revalidation = None; - - if peer.liveness == 0 { - let new_peer = table.replace_peer(node_id); - if let Some(new_peer) = new_peer { - let ping_hash = ping( - &udp_socket, - udp_addr, - SocketAddr::new(new_peer.node.ip, new_peer.node.udp_port), - &signer, - ) - .await; - table.update_peer_ping(new_peer.node.node_id, ping_hash); - } - } - } - } - - // now send a ping to the least recently pinged peers - // this might be too expensive to run if our table is filled - // maybe we could just pick them randomly - let peers = table.lock().await.get_least_recently_pinged_peers(3); - previously_pinged_peers = HashSet::default(); - for peer in peers { - let ping_hash = ping( - &udp_socket, - udp_addr, - SocketAddr::new(peer.node.ip, peer.node.udp_port), - &signer, - ) - .await; - let mut table = table.lock().await; - table.update_peer_ping_with_revalidation(peer.node.node_id, ping_hash); - previously_pinged_peers.insert(peer.node.node_id); - - debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); - } - - debug!("Peer revalidation finished"); - } -} - -const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: usize = 30; - -/// Starts a tokio scheduler that: -/// - performs random lookups to discover new nodes. Currently this is configure to run every `PEERS_RANDOM_LOOKUP_TIME_IN_MIN` -/// -/// **Random lookups** -/// -/// Random lookups work in the following manner: -/// 1. Every 30min we spawn three concurrent lookups: one closest to our pubkey -/// and three other closest to random generated pubkeys. -/// 2. Every lookup starts with the closest nodes from our table. -/// Each lookup keeps track of: -/// - Peers that have already been asked for nodes -/// - Peers that have been already seen -/// - Potential peers to query for nodes: a vector of up to 16 entries holding the closest peers to the pubkey. -/// This vector is initially filled with nodes from our table. -/// 3. We send a `find_node` to the closest 3 nodes (that we have not yet asked) from the pubkey. -/// 4. We wait for the neighbors response and pushed or replace those that are closer to the potential peers. -/// 5. We select three other nodes from the potential peers vector and do the same until one lookup -/// doesn't have any node to ask. -/// -/// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#recursive-lookup -async fn peers_lookup( - tracker: TaskTracker, - udp_socket: Arc, - table: Arc>, - signer: SigningKey, - local_node_id: H512, - interval_time_in_seconds: u64, -) { - let mut interval = tokio::time::interval(Duration::from_secs(interval_time_in_seconds)); - - loop { - // Notice that the first tick is immediate, - // so as soon as the server starts we'll do a lookup with the seeder nodes. - interval.tick().await; - - debug!("Starting lookup"); - - // lookup closest to our pub key - tracker.spawn(recursive_lookup( - udp_socket.clone(), - table.clone(), - signer.clone(), - local_node_id, - local_node_id, - )); - - // lookup closest to 3 random keys - for _ in 0..3 { - let random_pub_key = &SigningKey::random(&mut OsRng); - tracker.spawn(recursive_lookup( - udp_socket.clone(), - table.clone(), - signer.clone(), - node_id_from_signing_key(random_pub_key), - local_node_id, - )); - } - - debug!("Lookup finished"); - } -} - -async fn recursive_lookup( - udp_socket: Arc, - table: Arc>, - signer: SigningKey, - target: H512, - local_node_id: H512, -) { - let mut asked_peers = HashSet::default(); - // lookups start with the closest from our table - let closest_nodes = table.lock().await.get_closest_nodes(target); - let mut seen_peers: HashSet = HashSet::default(); - - seen_peers.insert(local_node_id); - for node in &closest_nodes { - seen_peers.insert(node.node_id); - } - - let mut peers_to_ask: Vec = closest_nodes; - - loop { - let (nodes_found, queries) = lookup( - udp_socket.clone(), - table.clone(), - &signer, - target, - &mut asked_peers, - &peers_to_ask, - ) - .await; - - // only push the peers that have not been seen - // that is those who have not been yet pushed, which also accounts for - // those peers that were in the array but have been replaced for closer peers - for node in nodes_found { - if !seen_peers.contains(&node.node_id) { - seen_peers.insert(node.node_id); - peers_to_ask_push(&mut peers_to_ask, target, node); - } - } - - // the lookup finishes when there are no more queries to do - // that happens when we have asked all the peers - if queries == 0 { - break; - } - } -} - -async fn lookup( - udp_socket: Arc, - table: Arc>, - signer: &SigningKey, - target: H512, - asked_peers: &mut HashSet, - nodes_to_ask: &Vec, -) -> (Vec, u32) { - let alpha = 3; - let mut queries = 0; - let mut nodes = vec![]; - - for node in nodes_to_ask { - if !asked_peers.contains(&node.node_id) { - let mut locked_table = table.lock().await; - if let Some(peer) = locked_table.get_by_node_id_mut(node.node_id) { - // if the peer has an ongoing find_node request, don't query - if peer.find_node_request.is_none() { - let (tx, mut receiver) = tokio::sync::mpsc::unbounded_channel::>(); - peer.new_find_node_request_with_sender(tx); - - // Release the lock - drop(locked_table); - - queries += 1; - asked_peers.insert(node.node_id); - let mut found_nodes = find_node_and_wait_for_response( - &udp_socket, - SocketAddr::new(node.ip, node.udp_port), - signer, - target, - &mut receiver, - ) - .await; - nodes.append(&mut found_nodes) - } - } - } - - if queries == alpha { - break; - } - } - - (nodes, queries) -} - -fn peers_to_ask_push(peers_to_ask: &mut Vec, target: H512, node: Node) { - let distance = bucket_number(target, node.node_id); - - if peers_to_ask.len() < MAX_NODES_PER_BUCKET { - peers_to_ask.push(node); - return; - } - - // replace this node for the one whose distance to the target is the highest - let (mut idx_to_replace, mut highest_distance) = (None, 0); - - for (i, peer) in peers_to_ask.iter().enumerate() { - let current_distance = bucket_number(peer.node_id, target); - - if distance < current_distance && current_distance >= highest_distance { - highest_distance = current_distance; - idx_to_replace = Some(i); - } - } - - if let Some(idx) = idx_to_replace { - peers_to_ask[idx] = node; - } -} - -/// Sends a ping to the addr -/// # Returns -/// an optional hash corresponding to the message header hash to account if the send was successful -async fn ping( - socket: &UdpSocket, - local_addr: SocketAddr, - to_addr: SocketAddr, - signer: &SigningKey, -) -> Option { - let mut buf = Vec::new(); - - let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - - // TODO: this should send our advertised TCP port - let from = Endpoint { - ip: local_addr.ip(), - udp_port: local_addr.port(), - tcp_port: 0, - }; - let to = Endpoint { - ip: to_addr.ip(), - udp_port: to_addr.port(), - tcp_port: 0, - }; - - let ping: discv4::Message = discv4::Message::Ping(PingMessage::new(from, to, expiration)); - ping.encode_with_header(&mut buf, signer); - - // Send ping and log if error - match socket.send_to(&buf, to_addr).await { - Ok(bytes_sent) => { - // sanity check to make sure the ping was well sent - // though idk if this is actually needed or if it might break other stuff - if bytes_sent == buf.len() { - return Some(H256::from_slice(&buf[0..32])); - } - } - Err(e) => error!("Unable to send ping: {e}"), - } - - None -} - -async fn find_node_and_wait_for_response( - socket: &UdpSocket, - to_addr: SocketAddr, - signer: &SigningKey, - target_node_id: H512, - request_receiver: &mut tokio::sync::mpsc::UnboundedReceiver>, -) -> Vec { - let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - - let msg: discv4::Message = - discv4::Message::FindNode(FindNodeMessage::new(target_node_id, expiration)); - - let mut buf = Vec::new(); - msg.encode_with_header(&mut buf, signer); - let mut nodes = vec![]; - - if socket.send_to(&buf, to_addr).await.is_err() { - return nodes; - } - - loop { - // wait as much as 5 seconds for the response - match tokio::time::timeout(Duration::from_secs(5), request_receiver.recv()).await { - Ok(Some(mut found_nodes)) => { - nodes.append(&mut found_nodes); - if nodes.len() == MAX_NODES_PER_BUCKET { - return nodes; - }; - } - Ok(None) => { - return nodes; - } - Err(_) => { - // timeout expired - return nodes; - } - } - } -} - -async fn pong(socket: &UdpSocket, to_addr: SocketAddr, ping_hash: H256, signer: &SigningKey) { - let mut buf = Vec::new(); - - let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - - let to = Endpoint { - ip: to_addr.ip(), - udp_port: to_addr.port(), - tcp_port: 0, - }; - let pong: discv4::Message = discv4::Message::Pong(PongMessage::new(to, ping_hash, expiration)); - - pong.encode_with_header(&mut buf, signer); - - // Send pong and log if error - if let Err(e) = socket.send_to(&buf, to_addr).await { - error!("Unable to send pong: {e}") - } -} - async fn serve_p2p_requests( tracker: TaskTracker, tcp_addr: SocketAddr, signer: SigningKey, storage: Store, table: Arc>, - connection_broadcast: broadcast::Sender<(tokio::task::Id, Arc)>, + connection_broadcast: RLPxConnBroadcastSender, ) { let listener = match listener(tcp_addr) { Ok(result) => result, @@ -807,7 +131,7 @@ async fn handle_peer_as_receiver( stream: TcpStream, storage: Store, table: Arc>, - connection_broadcast: broadcast::Sender<(tokio::task::Id, Arc)>, + connection_broadcast: RLPxConnBroadcastSender, ) { let mut conn = RLPxConnection::receiver(signer, stream, storage, connection_broadcast); conn.start_peer(peer_addr, table).await; @@ -819,7 +143,7 @@ async fn handle_peer_as_initiator( node: &Node, storage: Store, table: Arc>, - connection_broadcast: broadcast::Sender<(tokio::task::Id, Arc)>, + connection_broadcast: RLPxConnBroadcastSender, ) { let addr = SocketAddr::new(node.ip, node.tcp_port); let stream = match tcp_stream(addr).await { @@ -865,305 +189,3 @@ pub async fn periodically_show_peer_stats(peer_table: Arc>) interval.tick().await; } } - -#[cfg(test)] -mod tests { - use super::*; - use ethrex_storage::EngineType; - use kademlia::bucket_number; - use rand::rngs::OsRng; - use std::{ - collections::HashSet, - net::{IpAddr, Ipv4Addr}, - }; - use tokio::time::sleep; - - async fn insert_random_node_on_custom_bucket( - table: Arc>, - bucket_idx: usize, - ) { - let node_id = node_id_from_signing_key(&SigningKey::random(&mut OsRng)); - let node = Node { - ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), - tcp_port: 0, - udp_port: 0, - node_id, - }; - table - .lock() - .await - .insert_node_on_custom_bucket(node, bucket_idx); - } - - async fn fill_table_with_random_nodes(table: Arc>) { - for i in 0..256 { - for _ in 0..16 { - insert_random_node_on_custom_bucket(table.clone(), i).await; - } - } - } - - struct MockServer { - pub addr: SocketAddr, - pub signer: SigningKey, - pub table: Arc>, - pub node_id: H512, - pub udp_socket: Arc, - } - - async fn start_mock_discovery_server( - udp_port: u16, - should_start_server: bool, - ) -> Result { - let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); - let signer = SigningKey::random(&mut OsRng); - let udp_socket = Arc::new(UdpSocket::bind(addr).await?); - let node_id = node_id_from_signing_key(&signer); - let storage = - Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); - let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); - let (channel_broadcast_send_end, _) = tokio::sync::broadcast::channel::<( - tokio::task::Id, - Arc, - )>(MAX_MESSAGES_TO_BROADCAST); - let tracker = TaskTracker::new(); - if should_start_server { - tracker.spawn(discover_peers_server( - tracker.clone(), - addr, - udp_socket.clone(), - storage.clone(), - table.clone(), - signer.clone(), - channel_broadcast_send_end, - )); - } - - Ok(MockServer { - addr, - signer, - table, - node_id, - udp_socket, - }) - } - - /// connects two mock servers by pinging a to b - async fn connect_servers(server_a: &mut MockServer, server_b: &mut MockServer) { - let ping_hash = ping( - &server_a.udp_socket, - server_a.addr, - server_b.addr, - &server_a.signer, - ) - .await; - { - let mut table = server_a.table.lock().await; - table.insert_node(Node { - ip: server_b.addr.ip(), - udp_port: server_b.addr.port(), - tcp_port: 0, - node_id: server_b.node_id, - }); - table.update_peer_ping(server_b.node_id, ping_hash); - } - // allow some time for the server to respond - sleep(Duration::from_secs(1)).await; - } - - #[tokio::test] - /** This is a end to end test on the discovery server, the idea is as follows: - * - We'll start two discovery servers (`a` & `b`) to ping between each other - * - We'll make `b` ping `a`, and validate that the connection is right - * - Then we'll wait for a revalidation where we expect everything to be the same - * - We'll do this five 5 more times - * - Then we'll stop server `a` so that it doesn't respond to re-validations - * - We expect server `b` to remove node `a` from its table after 3 re-validations - * To make this run faster, we'll change the revalidation time to be every 2secs - */ - async fn discovery_server_revalidation() -> Result<(), io::Error> { - let mut server_a = start_mock_discovery_server(7998, true).await?; - let mut server_b = start_mock_discovery_server(7999, true).await?; - - connect_servers(&mut server_a, &mut server_b).await; - - // start revalidation server - tokio::spawn(peers_revalidation( - server_b.addr, - server_b.udp_socket.clone(), - server_b.table.clone(), - server_b.signer.clone(), - 2, - )); - - for _ in 0..5 { - sleep(Duration::from_millis(2500)).await; - // by now, b should've send a revalidation to a - let table = server_b.table.lock().await; - let node = table.get_by_node_id(server_a.node_id); - assert!(node.is_some_and(|n| n.revalidation.is_some())); - } - - // make sure that `a` has responded too all the re-validations - // we can do that by checking the liveness - { - let table = server_b.table.lock().await; - let node = table.get_by_node_id(server_a.node_id); - assert_eq!(node.map_or(0, |n| n.liveness), 6); - } - - // now, stopping server `a` is not trivial - // so we'll instead change its port, so that no one responds - { - let mut table = server_b.table.lock().await; - let node = table.get_by_node_id_mut(server_a.node_id); - if let Some(node) = node { - node.node.udp_port = 0 - }; - } - - // now the liveness field should start decreasing until it gets to 0 - // which should happen in 3 re-validations - for _ in 0..2 { - sleep(Duration::from_millis(2500)).await; - let table = server_b.table.lock().await; - let node = table.get_by_node_id(server_a.node_id); - assert!(node.is_some_and(|n| n.revalidation.is_some())); - } - sleep(Duration::from_millis(2500)).await; - - // finally, `a`` should not exist anymore - let table = server_b.table.lock().await; - assert!(table.get_by_node_id(server_a.node_id).is_none()); - Ok(()) - } - - #[tokio::test] - /** This test tests the lookup function, the idea is as follows: - * - We'll start two discovery servers (`a` & `b`) that will connect between each other - * - We'll insert random nodes to the server `a`` to fill its table - * - We'll forcedly run `lookup` and validate that a `find_node` request was sent - * by checking that new nodes have been inserted to the table - * - * This test for only one lookup, and not recursively. - */ - async fn discovery_server_lookup() -> Result<(), io::Error> { - let mut server_a = start_mock_discovery_server(8000, true).await?; - let mut server_b = start_mock_discovery_server(8001, true).await?; - - fill_table_with_random_nodes(server_a.table.clone()).await; - - // before making the connection, remove a node from the `b` bucket. Otherwise it won't be added - let b_bucket = bucket_number(server_a.node_id, server_b.node_id); - let node_id_to_remove = server_a.table.lock().await.buckets()[b_bucket].peers[0] - .node - .node_id; - server_a - .table - .lock() - .await - .replace_peer_on_custom_bucket(node_id_to_remove, b_bucket); - - connect_servers(&mut server_a, &mut server_b).await; - - // now we are going to run a lookup with us as the target - let closets_peers_to_b_from_a = server_a - .table - .lock() - .await - .get_closest_nodes(server_b.node_id); - let nodes_to_ask = server_b - .table - .lock() - .await - .get_closest_nodes(server_b.node_id); - - lookup( - server_b.udp_socket.clone(), - server_b.table.clone(), - &server_b.signer, - server_b.node_id, - &mut HashSet::default(), - &nodes_to_ask, - ) - .await; - - // find_node sent, allow some time for `a` to respond - sleep(Duration::from_secs(2)).await; - - // now all peers should've been inserted - for peer in closets_peers_to_b_from_a { - let table = server_b.table.lock().await; - assert!(table.get_by_node_id(peer.node_id).is_some()); - } - Ok(()) - } - - #[tokio::test] - /** This test tests the lookup function, the idea is as follows: - * - We'll start four discovery servers (`a`, `b`, `c` & `d`) - * - `a` will be connected to `b`, `b` will be connected to `c` and `c` will be connected to `d`. - * - The server `d` will have its table filled with mock nodes - * - We'll run a recursive lookup on server `a` and we expect to end with `b`, `c`, `d` and its mock nodes - */ - async fn discovery_server_recursive_lookup() -> Result<(), io::Error> { - let mut server_a = start_mock_discovery_server(8002, true).await?; - let mut server_b = start_mock_discovery_server(8003, true).await?; - let mut server_c = start_mock_discovery_server(8004, true).await?; - let mut server_d = start_mock_discovery_server(8005, true).await?; - - connect_servers(&mut server_a, &mut server_b).await; - connect_servers(&mut server_b, &mut server_c).await; - connect_servers(&mut server_c, &mut server_d).await; - - // now we fill the server_d table with 3 random nodes - // the reason we don't put more is because this nodes won't respond (as they don't are not real servers) - // and so we will have to wait for the timeout on each node, which will only slow down the test - for _ in 0..3 { - insert_random_node_on_custom_bucket(server_d.table.clone(), 0).await; - } - - let mut expected_peers = vec![]; - expected_peers.extend( - server_b - .table - .lock() - .await - .get_closest_nodes(server_a.node_id), - ); - expected_peers.extend( - server_c - .table - .lock() - .await - .get_closest_nodes(server_a.node_id), - ); - expected_peers.extend( - server_d - .table - .lock() - .await - .get_closest_nodes(server_a.node_id), - ); - - // we'll run a recursive lookup closest to the server itself - recursive_lookup( - server_a.udp_socket.clone(), - server_a.table.clone(), - server_a.signer.clone(), - server_a.node_id, - server_a.node_id, - ) - .await; - - for peer in expected_peers { - assert!(server_a - .table - .lock() - .await - .get_by_node_id(peer.node_id) - .is_some()); - } - Ok(()) - } -} diff --git a/crates/networking/p2p/rlpx/connection.rs b/crates/networking/p2p/rlpx/connection.rs index 92460805e4..dea6eab7f0 100644 --- a/crates/networking/p2p/rlpx/connection.rs +++ b/crates/networking/p2p/rlpx/connection.rs @@ -1,6 +1,8 @@ use std::sync::Arc; use crate::{ + //TODO is this right? + discv4::MAX_DISC_PACKET_SIZE, peer_channels::PeerChannels, rlpx::{ eth::{ @@ -18,7 +20,6 @@ use crate::{ process_account_range_request, process_byte_codes_request, process_storage_ranges_request, process_trie_nodes_request, }, - MAX_DISC_PACKET_SIZE, }; use super::{ @@ -60,6 +61,8 @@ const PERIODIC_TASKS_CHECK_INTERVAL: std::time::Duration = std::time::Duration:: pub(crate) type Aes256Ctr64BE = ctr::Ctr64BE; +pub(crate) type RLPxConnBroadcastSender = broadcast::Sender<(tokio::task::Id, Arc)>; + enum RLPxConnectionMode { Initiator, Receiver, @@ -94,7 +97,7 @@ pub(crate) struct RLPxConnection { /// messages from other connections (sent from other peers). /// The receive end is instantiated after the handshake is completed /// under `handle_peer`. - connection_broadcast_send: broadcast::Sender<(task::Id, Arc)>, + connection_broadcast_send: RLPxConnBroadcastSender, } impl RLPxConnection { @@ -104,7 +107,7 @@ impl RLPxConnection { stream: S, mode: RLPxConnectionMode, storage: Store, - connection_broadcast: broadcast::Sender<(task::Id, Arc)>, + connection_broadcast: RLPxConnBroadcastSender, ) -> Self { Self { signer, @@ -143,6 +146,7 @@ impl RLPxConnection { storage: Store, connection_broadcast_send: broadcast::Sender<(task::Id, Arc)>, ) -> Result { + //TODO remove this, it is already done on the discv4 packet decoding let digest = Keccak256::digest(msg.get(65..).ok_or(RLPxError::InvalidMessageLength())?); let signature = &Signature::from_bytes( msg.get(..64) From e3e68a444bc0fae4cc522eb2ed18a020e3fe3f0f Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 10:24:48 -0300 Subject: [PATCH 07/33] chore: remove old files from merge --- crates/networking/p2p/discv4/discv4.rs | 666 ----------------------- crates/networking/p2p/discv4/requests.rs | 129 ----- 2 files changed, 795 deletions(-) delete mode 100644 crates/networking/p2p/discv4/discv4.rs delete mode 100644 crates/networking/p2p/discv4/requests.rs diff --git a/crates/networking/p2p/discv4/discv4.rs b/crates/networking/p2p/discv4/discv4.rs deleted file mode 100644 index f6ecddf31e..0000000000 --- a/crates/networking/p2p/discv4/discv4.rs +++ /dev/null @@ -1,666 +0,0 @@ -use crate::{ - bootnode::BootNode, - discv4::{ - helpers::{get_expiration, is_expired, time_now_unix, time_since_in_hs}, - messages::{Message, NeighborsMessage, Packet}, - requests::{ping, pong}, - }, - handle_peer_as_initiator, - kademlia::MAX_NODES_PER_BUCKET, - rlpx::message::Message as RLPxMessage, - types::Node, - KademliaTable, MAX_DISC_PACKET_SIZE, -}; -use ethrex_core::H512; -use ethrex_storage::Store; -use futures::try_join; -use k256::ecdsa::SigningKey; -use std::{collections::HashSet, net::SocketAddr, sync::Arc, time::Duration}; -use tokio::{ - net::UdpSocket, - sync::{broadcast, Mutex}, -}; -use tracing::debug; - -use super::lookup::{DiscoveryLookupHandler, PEERS_RANDOM_LOOKUP_TIME_IN_MIN}; - -#[derive(Debug, Clone)] -pub struct Discv4 { - local_node: Node, - udp_socket: Arc, - signer: SigningKey, - storage: Store, - table: Arc>, - tx_broadcaster_send: broadcast::Sender<(tokio::task::Id, Arc)>, - revalidation_interval_seconds: u64, -} - -pub enum DiscoveryError { - UnexpectedError, -} - -const REVALIDATION_INTERVAL_IN_SECONDS: u64 = 30; // this is just an arbitrary number, maybe we should get this from a cfg or cli param -const PROOF_EXPIRATION_IN_HS: u16 = 12; - -impl Discv4 { - pub fn new( - local_node: Node, - signer: SigningKey, - storage: Store, - table: Arc>, - tx_broadcaster_send: broadcast::Sender<(tokio::task::Id, Arc)>, - udp_socket: Arc, - ) -> Self { - Self { - local_node, - signer, - storage, - table, - tx_broadcaster_send, - udp_socket, - revalidation_interval_seconds: REVALIDATION_INTERVAL_IN_SECONDS, - } - } - - #[allow(unused)] - pub fn with_revalidation_interval_of(self, seconds: u64) -> Self { - Self { - revalidation_interval_seconds: seconds, - ..self - } - } - - #[allow(unused)] - pub fn with_lookup_interval_of(self, minutes: u64) -> Self { - Self { - revalidation_interval_seconds: minutes, - ..self - } - } - - pub async fn start_discovery_service( - &self, - bootnodes: Vec, - ) -> Result<(), DiscoveryError> { - let self_arc = Arc::new(self.clone()); - - let server_handle = tokio::spawn(self_arc.clone().receive()); - self.load_bootnodes(bootnodes).await; - let revalidation_handle = tokio::spawn(self_arc.clone().start_revalidation_task()); - - // a first initial lookup runs without waiting for the interval - // so we need to allow some time to the pinged peers to ping us back and acknowledge us - let lookup_handler = DiscoveryLookupHandler::new( - self.local_node, - self.signer.clone(), - self.udp_socket.clone(), - self.table.clone(), - PEERS_RANDOM_LOOKUP_TIME_IN_MIN, - ); - let lookup_handle = tokio::spawn(async move { lookup_handler.start_lookup_task().await }); - - let result = try_join!(server_handle, revalidation_handle, lookup_handle); - - if result.is_ok() { - Ok(()) - } else { - Err(DiscoveryError::UnexpectedError) - } - } - - async fn load_bootnodes(&self, bootnodes: Vec) { - for bootnode in bootnodes { - let node = Node { - ip: bootnode.socket_address.ip(), - udp_port: bootnode.socket_address.port(), - // TODO: udp port can differ from tcp port. - // see https://github.com/lambdaclass/ethrex/issues/905 - tcp_port: bootnode.socket_address.port(), - node_id: bootnode.node_id, - }; - self.try_add_peer_and_ping(node).await; - } - } - - async fn receive(self: Arc) { - let mut buf = vec![0; MAX_DISC_PACKET_SIZE]; - - loop { - let (read, from) = self.udp_socket.recv_from(&mut buf).await.unwrap(); - debug!("Received {read} bytes from {from}"); - - let packet = Packet::decode(&buf[..read]); - if packet.is_err() { - debug!("Could not decode packet: {:?}", packet.err().unwrap()); - continue; - } - let packet = packet.unwrap(); - - self.handle_message(packet, from, read, &buf).await; - } - } - - async fn handle_message( - &self, - packet: Packet, - from: SocketAddr, - msg_len: usize, - msg_bytes: &[u8], - ) { - let msg = packet.get_message(); - debug!("Message: {:?} from {}", msg, packet.get_node_id()); - match msg { - Message::Ping(msg) => { - if is_expired(msg.expiration) { - debug!("Ignoring ping as it is expired."); - return; - }; - let node = Node { - ip: from.ip(), - udp_port: msg.from.udp_port, - tcp_port: msg.from.tcp_port, - node_id: packet.get_node_id(), - }; - let ping_hash = packet.get_hash(); - pong(&self.udp_socket, from, node, ping_hash, &self.signer).await; - let peer = { - let table = self.table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(peer) = peer { - // send a a ping to get an endpoint proof - if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { - let hash = - ping(&self.udp_socket, self.local_node, peer.node, &self.signer).await; - if let Some(hash) = hash { - self.table - .lock() - .await - .update_peer_ping(peer.node.node_id, Some(hash)); - } - } - } else { - // send a ping to get the endpoint proof from our end - self.try_add_peer_and_ping(node).await; - } - } - Message::Pong(msg) => { - let table = self.table.clone(); - if is_expired(msg.expiration) { - debug!("Ignoring pong as it is expired."); - return; - } - let peer = { - let table = table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(peer) = peer { - if peer.last_ping_hash.is_none() { - debug!("Discarding pong as the node did not send a previous ping"); - return; - } - if peer.last_ping_hash.unwrap() == msg.ping_hash { - table.lock().await.pong_answered(peer.node.node_id); - - let mut msg_buf = vec![0; msg_len - 32]; - msg_bytes[32..msg_len].clone_into(&mut msg_buf); - let signer = self.signer.clone(); - let storage = self.storage.clone(); - let broadcaster = self.tx_broadcaster_send.clone(); - tokio::spawn(async move { - handle_peer_as_initiator( - signer, - &msg_buf, - &peer.node, - storage, - table, - broadcaster, - ) - .await; - }); - } else { - debug!( - "Discarding pong as the hash did not match the last corresponding ping" - ); - } - } else { - debug!("Discarding pong as it is not a known node"); - } - } - Message::FindNode(msg) => { - if is_expired(msg.expiration) { - debug!("Ignoring find node msg as it is expired."); - return; - }; - let node = { - let table = self.table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - if let Some(node) = node { - if node.is_proven { - let nodes = { - let table = self.table.lock().await; - table.get_closest_nodes(msg.target) - }; - let nodes_chunks = nodes.chunks(4); - let expiration = get_expiration(20); - debug!("Sending neighbors!"); - // we are sending the neighbors in 4 different messages as not to exceed the - // maximum packet size - for nodes in nodes_chunks { - let neighbors = Message::Neighbors(NeighborsMessage::new( - nodes.to_vec(), - expiration, - )); - let mut buf = Vec::new(); - neighbors.encode_with_header(&mut buf, &self.signer); - let _ = self.udp_socket.send_to(&buf, from).await; - } - } else { - debug!("Ignoring find node message as the node isn't proven!"); - } - } else { - debug!("Ignoring find node message as it is not a known node"); - } - } - Message::Neighbors(neighbors_msg) => { - if is_expired(neighbors_msg.expiration) { - debug!("Ignoring neighbor msg as it is expired."); - return; - }; - - let mut nodes_to_insert = None; - let mut table = self.table.lock().await; - if let Some(node) = table.get_by_node_id_mut(packet.get_node_id()) { - if let Some(req) = &mut node.find_node_request { - if time_now_unix().saturating_sub(req.sent_at) >= 60 { - debug!("Ignoring neighbors message as the find_node request expires after one minute"); - node.find_node_request = None; - return; - } - let nodes = &neighbors_msg.nodes; - let nodes_sent = req.nodes_sent + nodes.len(); - - if nodes_sent <= MAX_NODES_PER_BUCKET { - req.nodes_sent = nodes_sent; - nodes_to_insert = Some(nodes.clone()); - if let Some(tx) = &req.tx { - let _ = tx.send(nodes.clone()); - } - } else { - debug!("Ignoring neighbors message as the client sent more than the allowed nodes"); - } - - if nodes_sent == MAX_NODES_PER_BUCKET { - debug!("Neighbors request has been fulfilled"); - node.find_node_request = None; - } - } - } else { - debug!("Ignoring neighbor msg as it is not a known node"); - } - - if let Some(nodes) = nodes_to_insert { - for node in nodes { - if node.node_id != self.local_node.node_id { - self.try_add_peer_and_ping(node).await; - } - } - } - } - _ => {} - } - } - - /// Attempts to add a node to the Kademlia table and send a ping if necessary. - /// - /// - If the node is **not found** in the table and there is enough space, it will be added, - /// and a ping message will be sent to verify connectivity. - /// - If the node is **already present**, no action is taken. - pub async fn try_add_peer_and_ping(&self, node: Node) { - let (Some(peer), inserted_to_table) = self.table.lock().await.insert_node(node) else { - return; - }; - if inserted_to_table { - debug!("Node {:?} was inserted to kademlia table", node); - let ping_hash = ping(&self.udp_socket, self.local_node, node, &self.signer).await; - self.table - .lock() - .await - .update_peer_ping(peer.node.node_id, ping_hash); - }; - } - - async fn start_revalidation_task(self: Arc) { - let mut interval = - tokio::time::interval(Duration::from_secs(self.revalidation_interval_seconds)); - // peers we have pinged in the previous iteration - let mut previously_pinged_peers: HashSet = HashSet::default(); - - // first tick starts immediately - interval.tick().await; - - loop { - interval.tick().await; - debug!("Running peer revalidation"); - - // first check that the peers we ping have responded - for node_id in previously_pinged_peers { - let mut table = self.table.lock().await; - let peer = table.get_by_node_id_mut(node_id).unwrap(); - - if let Some(has_answered) = peer.revalidation { - if has_answered { - peer.increment_liveness(); - } else { - peer.decrement_liveness(); - } - } - - peer.revalidation = None; - - if peer.liveness == 0 { - let new_peer = table.replace_peer(node_id); - if let Some(new_peer) = new_peer { - let ping_hash = ping( - &self.udp_socket, - self.local_node, - new_peer.node, - &self.signer, - ) - .await; - table.update_peer_ping(new_peer.node.node_id, ping_hash); - } - } - } - - // now send a ping to the least recently pinged peers - // this might be too expensive to run if our table is filled - // maybe we could just pick them randomly - let peers = self.table.lock().await.get_least_recently_pinged_peers(3); - previously_pinged_peers = HashSet::default(); - for peer in peers { - let ping_hash = - ping(&self.udp_socket, self.local_node, peer.node, &self.signer).await; - let mut table = self.table.lock().await; - table.update_peer_ping_with_revalidation(peer.node.node_id, ping_hash); - previously_pinged_peers.insert(peer.node.node_id); - - debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); - } - - debug!("Peer revalidation finished"); - } - } -} - -// #[cfg(test)] -// mod tests { -// use super::*; -// use crate::{kademlia::bucket_number, node_id_from_signing_key, MAX_MESSAGES_TO_BROADCAST}; -// use ethrex_storage::EngineType; -// use rand::rngs::OsRng; -// use std::{ -// collections::HashSet, -// net::{IpAddr, Ipv4Addr}, -// }; -// use tokio::time::sleep; - -// async fn insert_random_node_on_custom_bucket( -// table: Arc>, -// bucket_idx: usize, -// ) { -// let node_id = node_id_from_signing_key(&SigningKey::random(&mut OsRng)); -// let node = Node { -// ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), -// tcp_port: 0, -// udp_port: 0, -// node_id, -// }; -// table -// .lock() -// .await -// .insert_node_on_custom_bucket(node, bucket_idx); -// } - -// async fn fill_table_with_random_nodes(table: Arc>) { -// for i in 0..256 { -// for _ in 0..16 { -// insert_random_node_on_custom_bucket(table.clone(), i).await; -// } -// } -// } - -// async fn start_mock_discovery_server(udp_port: u16, should_start_server: bool) -> Discv4 { -// let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); -// let signer = SigningKey::random(&mut OsRng); -// let udp_socket = Arc::new(UdpSocket::bind(addr).await.unwrap()); -// let node_id = node_id_from_signing_key(&signer); -// let storage = -// Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); -// let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); -// let (channel_broadcast_send_end, _) = tokio::sync::broadcast::channel::<( -// tokio::task::Id, -// Arc, -// )>(MAX_MESSAGES_TO_BROADCAST); - -// let discv4 = Discv4::new(); -// if should_start_server { -// tokio::spawn(disv4.handle_messages()); -// } -// } - -// /// connects two mock servers by pinging a to b -// async fn connect_servers(server_a: &mut MockServer, server_b: &mut MockServer) { -// let ping_hash = ping( -// &server_a.udp_socket, -// server_a.addr, -// server_b.addr, -// &server_a.signer, -// ) -// .await; -// { -// let mut table = server_a.table.lock().await; -// table.insert_node(Node { -// ip: server_b.addr.ip(), -// udp_port: server_b.addr.port(), -// tcp_port: 0, -// node_id: server_b.node_id, -// }); -// table.update_peer_ping(server_b.node_id, ping_hash); -// } -// // allow some time for the server to respond -// sleep(Duration::from_secs(1)).await; -// } - -// #[tokio::test] -// /** This is a end to end test on the discovery server, the idea is as follows: -// * - We'll start two discovery servers (`a` & `b`) to ping between each other -// * - We'll make `b` ping `a`, and validate that the connection is right -// * - Then we'll wait for a revalidation where we expect everything to be the same -// * - We'll do this five 5 more times -// * - Then we'll stop server `a` so that it doesn't respond to re-validations -// * - We expect server `b` to remove node `a` from its table after 3 re-validations -// * To make this run faster, we'll change the revalidation time to be every 2secs -// */ -// async fn discovery_server_revalidation() { -// let mut server_a = start_mock_discovery_server(7998, true).await; -// let mut server_b = start_mock_discovery_server(7999, true).await; - -// connect_servers(&mut server_a, &mut server_b).await; - -// // start revalidation server -// tokio::spawn(peers_revalidation( -// server_b.addr, -// server_b.udp_socket.clone(), -// server_b.table.clone(), -// server_b.signer.clone(), -// 2, -// )); - -// for _ in 0..5 { -// sleep(Duration::from_millis(2500)).await; -// // by now, b should've send a revalidation to a -// let table = server_b.table.lock().await; -// let node = table.get_by_node_id(server_a.node_id).unwrap(); -// assert!(node.revalidation.is_some()); -// } - -// // make sure that `a` has responded too all the re-validations -// // we can do that by checking the liveness -// { -// let table = server_b.table.lock().await; -// let node = table.get_by_node_id(server_a.node_id).unwrap(); -// assert_eq!(node.liveness, 6); -// } - -// // now, stopping server `a` is not trivial -// // so we'll instead change its port, so that no one responds -// { -// let mut table = server_b.table.lock().await; -// let node = table.get_by_node_id_mut(server_a.node_id).unwrap(); -// node.node.udp_port = 0; -// } - -// // now the liveness field should start decreasing until it gets to 0 -// // which should happen in 3 re-validations -// for _ in 0..2 { -// sleep(Duration::from_millis(2500)).await; -// let table = server_b.table.lock().await; -// let node = table.get_by_node_id(server_a.node_id).unwrap(); -// assert!(node.revalidation.is_some()); -// } -// sleep(Duration::from_millis(2500)).await; - -// // finally, `a`` should not exist anymore -// let table = server_b.table.lock().await; -// assert!(table.get_by_node_id(server_a.node_id).is_none()); -// } - -// #[tokio::test] -// /** This test tests the lookup function, the idea is as follows: -// * - We'll start two discovery servers (`a` & `b`) that will connect between each other -// * - We'll insert random nodes to the server `a`` to fill its table -// * - We'll forcedly run `lookup` and validate that a `find_node` request was sent -// * by checking that new nodes have been inserted to the table -// * -// * This test for only one lookup, and not recursively. -// */ -// async fn discovery_server_lookup() { -// let mut server_a = start_mock_discovery_server(8000, true).await; -// let mut server_b = start_mock_discovery_server(8001, true).await; - -// fill_table_with_random_nodes(server_a.table.clone()).await; - -// // before making the connection, remove a node from the `b` bucket. Otherwise it won't be added -// let b_bucket = bucket_number(server_a.node_id, server_b.node_id); -// let node_id_to_remove = server_a.table.lock().await.buckets()[b_bucket].peers[0] -// .node -// .node_id; -// server_a -// .table -// .lock() -// .await -// .replace_peer_on_custom_bucket(node_id_to_remove, b_bucket); - -// connect_servers(&mut server_a, &mut server_b).await; - -// // now we are going to run a lookup with us as the target -// let closets_peers_to_b_from_a = server_a -// .table -// .lock() -// .await -// .get_closest_nodes(server_b.node_id); -// let nodes_to_ask = server_b -// .table -// .lock() -// .await -// .get_closest_nodes(server_b.node_id); - -// lookup( -// server_b.udp_socket.clone(), -// server_b.table.clone(), -// &server_b.signer, -// server_b.node_id, -// &mut HashSet::default(), -// &nodes_to_ask, -// ) -// .await; - -// // find_node sent, allow some time for `a` to respond -// sleep(Duration::from_secs(2)).await; - -// // now all peers should've been inserted -// for peer in closets_peers_to_b_from_a { -// let table = server_b.table.lock().await; -// assert!(table.get_by_node_id(peer.node_id).is_some()); -// } -// } - -// #[tokio::test] -// /** This test tests the lookup function, the idea is as follows: -// * - We'll start four discovery servers (`a`, `b`, `c` & `d`) -// * - `a` will be connected to `b`, `b` will be connected to `c` and `c` will be connected to `d`. -// * - The server `d` will have its table filled with mock nodes -// * - We'll run a recursive lookup on server `a` and we expect to end with `b`, `c`, `d` and its mock nodes -// */ -// async fn discovery_server_recursive_lookup() { -// let mut server_a = start_mock_discovery_server(8002, true).await; -// let mut server_b = start_mock_discovery_server(8003, true).await; -// let mut server_c = start_mock_discovery_server(8004, true).await; -// let mut server_d = start_mock_discovery_server(8005, true).await; - -// connect_servers(&mut server_a, &mut server_b).await; -// connect_servers(&mut server_b, &mut server_c).await; -// connect_servers(&mut server_c, &mut server_d).await; - -// // now we fill the server_d table with 3 random nodes -// // the reason we don't put more is because this nodes won't respond (as they don't are not real servers) -// // and so we will have to wait for the timeout on each node, which will only slow down the test -// for _ in 0..3 { -// insert_random_node_on_custom_bucket(server_d.table.clone(), 0).await; -// } - -// let mut expected_peers = vec![]; -// expected_peers.extend( -// server_b -// .table -// .lock() -// .await -// .get_closest_nodes(server_a.node_id), -// ); -// expected_peers.extend( -// server_c -// .table -// .lock() -// .await -// .get_closest_nodes(server_a.node_id), -// ); -// expected_peers.extend( -// server_d -// .table -// .lock() -// .await -// .get_closest_nodes(server_a.node_id), -// ); - -// // we'll run a recursive lookup closest to the server itself -// recursive_lookup( -// server_a.udp_socket.clone(), -// server_a.table.clone(), -// server_a.signer.clone(), -// server_a.node_id, -// server_a.node_id, -// ) -// .await; - -// for peer in expected_peers { -// assert!(server_a -// .table -// .lock() -// .await -// .get_by_node_id(peer.node_id) -// .is_some()); -// } -// } -// } diff --git a/crates/networking/p2p/discv4/requests.rs b/crates/networking/p2p/discv4/requests.rs deleted file mode 100644 index 1427eb6290..0000000000 --- a/crates/networking/p2p/discv4/requests.rs +++ /dev/null @@ -1,129 +0,0 @@ -use std::{ - net::SocketAddr, - time::{Duration, SystemTime, UNIX_EPOCH}, -}; - -use ethrex_core::{H256, H512}; -use k256::ecdsa::SigningKey; -use tokio::net::UdpSocket; - -use crate::{ - kademlia::MAX_NODES_PER_BUCKET, - types::{Endpoint, Node}, -}; - -use super::messages::{FindNodeMessage, Message, PingMessage, PongMessage}; - -// Sends a ping to the addr -/// # Returns -/// an optional hash corresponding to the message header hash to account if the send was successful -pub async fn ping( - socket: &UdpSocket, - local_node: Node, - to_node: Node, - signer: &SigningKey, -) -> Option { - let mut buf = Vec::new(); - - let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(); - - let from = Endpoint { - ip: local_node.ip, - udp_port: local_node.udp_port, - tcp_port: local_node.tcp_port, - }; - let to = Endpoint { - ip: to_node.ip, - udp_port: to_node.udp_port, - tcp_port: to_node.tcp_port, - }; - - let ping = Message::Ping(PingMessage::new(from, to, expiration)); - ping.encode_with_header(&mut buf, signer); - let res = socket - .send_to(&buf, SocketAddr::new(to_node.ip, to_node.udp_port)) - .await; - - if res.is_err() { - return None; - } - let bytes_sent = res.unwrap(); - - if bytes_sent == buf.len() { - return Some(H256::from_slice(&buf[0..32])); - } - - None -} - -pub async fn pong( - socket: &UdpSocket, - to_addr: SocketAddr, - node: Node, - ping_hash: H256, - signer: &SigningKey, -) { - let mut buf = Vec::new(); - - let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(); - - let to = Endpoint { - ip: node.ip, - udp_port: node.udp_port, - tcp_port: node.tcp_port, - }; - let pong = Message::Pong(PongMessage::new(to, ping_hash, expiration)); - - pong.encode_with_header(&mut buf, signer); - let _ = socket.send_to(&buf, to_addr).await; -} - -pub async fn find_node_and_wait_for_response( - socket: &UdpSocket, - to_addr: SocketAddr, - signer: &SigningKey, - target_node_id: H512, - request_receiver: &mut tokio::sync::mpsc::UnboundedReceiver>, -) -> Vec { - let expiration: u64 = (SystemTime::now() + Duration::from_secs(20)) - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(); - - let msg = Message::FindNode(FindNodeMessage::new(target_node_id, expiration)); - - let mut buf = Vec::new(); - msg.encode_with_header(&mut buf, signer); - let res = socket.send_to(&buf, to_addr).await; - - let mut nodes = vec![]; - - if res.is_err() { - return nodes; - } - - loop { - // wait as much as 5 seconds for the response - match tokio::time::timeout(Duration::from_secs(5), request_receiver.recv()).await { - Ok(Some(mut found_nodes)) => { - nodes.append(&mut found_nodes); - if nodes.len() == MAX_NODES_PER_BUCKET { - return nodes; - }; - } - Ok(None) => { - return nodes; - } - Err(_) => { - // timeout expired - return nodes; - } - } - } -} From 4cc7a84180747e97f0dc9e1adb3469e0cefacb92 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 11:29:54 -0300 Subject: [PATCH 08/33] refactor: enr messages --- cmd/ethrex/ethrex.rs | 2 - crates/networking/p2p/discv4/lookup.rs | 11 +- crates/networking/p2p/discv4/messages.rs | 5 +- crates/networking/p2p/discv4/mod.rs | 258 ++++++++++++++++++++++- crates/networking/p2p/net.rs | 13 +- 5 files changed, 262 insertions(+), 27 deletions(-) diff --git a/cmd/ethrex/ethrex.rs b/cmd/ethrex/ethrex.rs index 9af8f5ef63..27899b0291 100644 --- a/cmd/ethrex/ethrex.rs +++ b/cmd/ethrex/ethrex.rs @@ -267,8 +267,6 @@ async fn main() { let networking = ethrex_net::start_network( local_p2p_node, tracker.clone(), - udp_socket_addr, - tcp_socket_addr, bootnodes, signer, peer_table.clone(), diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index 264beb5e9f..7814336790 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -62,7 +62,16 @@ impl Disv4LookupHandler { } } - pub async fn start(&self, initial_interval_wait_seconds: u64) { + pub fn start(&self, initial_interval_wait_seconds: u64) { + self.tracker.spawn({ + let self_clone = self.clone(); + async move { + self_clone.start_task(initial_interval_wait_seconds).await; + } + }); + } + + async fn start_task(&self, initial_interval_wait_seconds: u64) { let mut interval = tokio::time::interval(Duration::from_secs(self.interval_minutes)); tokio::time::sleep(Duration::from_secs(initial_interval_wait_seconds)).await; diff --git a/crates/networking/p2p/discv4/messages.rs b/crates/networking/p2p/discv4/messages.rs index f3b39ecea1..74e40338fb 100644 --- a/crates/networking/p2p/discv4/messages.rs +++ b/crates/networking/p2p/discv4/messages.rs @@ -1,3 +1,4 @@ +use super::helpers::time_now_unix; use crate::types::{Endpoint, Node, NodeRecord}; use bytes::BufMut; use ethrex_core::{H256, H512, H520}; @@ -10,8 +11,6 @@ use ethrex_rlp::{ use k256::ecdsa::{RecoveryId, Signature, SigningKey, VerifyingKey}; use sha3::{Digest, Keccak256}; -use super::helpers::time_now_unix; - #[derive(Debug, PartialEq)] pub enum PacketDecodeErr { #[allow(unused)] @@ -66,7 +65,7 @@ impl Packet { let node_id = H512::from_slice(&encoded.as_bytes()[1..]); let signature = H520::from_slice(signature_bytes); let message = Message::decode_with_type(packet_type, &encoded_msg[1..]) - .map_err(|e| PacketDecodeErr::RLPDecodeError(e))?; + .map_err(PacketDecodeErr::RLPDecodeError)?; Ok(Self { hash, diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index de2998e91c..8793fa2feb 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -7,22 +7,31 @@ use crate::{ handle_peer_as_initiator, kademlia::MAX_NODES_PER_BUCKET, rlpx::connection::RLPxConnBroadcastSender, - types::{Endpoint, Node}, + types::{Endpoint, Node, NodeRecord}, KademliaTable, }; use ethrex_core::H256; use ethrex_storage::Store; use helpers::{get_expiration, is_expired, time_now_unix, time_since_in_hs}; -use k256::ecdsa::SigningKey; +use k256::ecdsa::{signature::hazmat::PrehashVerifier, Signature, SigningKey, VerifyingKey}; use lookup::Disv4LookupHandler; -use messages::{FindNodeMessage, Message, NeighborsMessage, Packet, PingMessage, PongMessage}; -use std::{collections::HashSet, net::SocketAddr, sync::Arc, time::Duration}; +use messages::{ + ENRRequestMessage, ENRResponseMessage, FindNodeMessage, Message, NeighborsMessage, Packet, + PingMessage, PongMessage, +}; +use std::{ + collections::HashSet, + net::{IpAddr, Ipv4Addr, SocketAddr}, + sync::Arc, + time::Duration, +}; use tokio::{net::UdpSocket, sync::Mutex}; use tokio_util::task::TaskTracker; use tracing::{debug, error}; pub const MAX_DISC_PACKET_SIZE: usize = 1280; const PROOF_EXPIRATION_IN_HS: u64 = 12; + // These interval times are arbitrary numbers, maybe we should read them from a cfg or a cli param const REVALIDATION_INTERVAL_IN_SECONDS: u64 = 30; const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: u64 = 30; @@ -56,7 +65,7 @@ impl Discv4 { signer: SigningKey, storage: Store, table: Arc>, - tx_broadcaster_send: RLPxConnBroadcastSender, + rlpx_conn_sender: RLPxConnBroadcastSender, tracker: TaskTracker, ) -> Result { let udp_socket = UdpSocket::bind(SocketAddr::new(local_node.ip, local_node.udp_port)) @@ -68,7 +77,7 @@ impl Discv4 { signer, storage, table, - rlxp_conn_sender: tx_broadcaster_send, + rlxp_conn_sender: rlpx_conn_sender, udp_socket: Arc::new(udp_socket), revalidation_interval_seconds: REVALIDATION_INTERVAL_IN_SECONDS, lookup_interval_minutes: PEERS_RANDOM_LOOKUP_TIME_IN_MIN, @@ -92,6 +101,10 @@ impl Discv4 { } } + pub fn addr(&self) -> SocketAddr { + return SocketAddr::new(self.local_node.ip, self.local_node.udp_port); + } + pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { let lookup_handler = Disv4LookupHandler::new( self.local_node, @@ -111,8 +124,7 @@ impl Discv4 { async move { self_clone.start_revalidation().await } }); self.load_bootnodes(bootnodes).await; - self.tracker - .spawn(async move { lookup_handler.start(10).await }); + lookup_handler.start(10); Ok(()) } @@ -188,6 +200,12 @@ impl Discv4 { if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { self.ping(node).await?; } + if let Some(enr_seq) = msg.enr_seq { + if enr_seq > peer.record.seq { + debug!("Found outdated enr-seq, send an enr_request"); + self.send_enr_request(peer.node, enr_seq).await?; + } + } } else { // otherwise add to the table let mut table = self.table.lock().await; @@ -219,6 +237,12 @@ impl Discv4 { .is_some_and(|hash| hash == msg.ping_hash) { table.lock().await.pong_answered(peer.node.node_id); + if let Some(enr_seq) = msg.enr_seq { + if enr_seq > peer.record.seq { + debug!("Found outdated enr-seq, send an enr_request"); + self.send_enr_request(peer.node, enr_seq).await?; + } + } let mut msg_buf = vec![0; msg_len - 32]; msg_bytes[32..msg_len].clone_into(&mut msg_buf); let signer = self.signer.clone(); @@ -337,7 +361,115 @@ impl Discv4 { Ok(()) } - _ => Ok(()), + Message::ENRRequest(msg) => { + if is_expired(msg.expiration) { + return Err(DiscoveryError::MessageExpired); + } + // Note we are passing the current timestamp as the sequence number + // This is because we are not storing our local_node updates in the db + let Ok(node_record) = + NodeRecord::from_node(self.local_node, time_now_unix(), &self.signer) + else { + return Err(DiscoveryError::InvalidMessage( + "Could not build local node record".into(), + )); + }; + let msg = + Message::ENRResponse(ENRResponseMessage::new(packet.get_hash(), node_record)); + let mut buf = vec![]; + msg.encode_with_header(&mut buf, &self.signer); + match self.udp_socket.send_to(&buf, from).await { + Ok(bytes_sent) => { + if bytes_sent == buf.len() { + Ok(()) + } else { + Err(DiscoveryError::PartialMessageSent) + } + } + Err(e) => Err(DiscoveryError::MessageSendFailure(e)), + } + } + Message::ENRResponse(msg) => { + let mut table = self.table.lock().await; + let peer = table.get_by_node_id_mut(packet.get_node_id()); + let Some(peer) = peer else { + return Err(DiscoveryError::InvalidMessage("Peer not known".into())); + }; + + let Some(req_hash) = peer.enr_request_hash else { + return Err(DiscoveryError::InvalidMessage( + "Discarding enr-response as enr-request wasn't sent".into(), + )); + }; + if req_hash != msg.request_hash { + return Err(DiscoveryError::InvalidMessage( + "Discarding enr-response did not match enr-request hash".into(), + )); + } + peer.enr_request_hash = None; + + if msg.node_record.seq < peer.record.seq { + return Err(DiscoveryError::InvalidMessage( + "msg node record is lower than the one we have".into(), + )); + } + + let record = msg.node_record.decode_pairs(); + let Some(id) = record.id else { + return Err(DiscoveryError::InvalidMessage( + "msg node record does not have required `id` field".into(), + )); + }; + + // https://github.com/ethereum/devp2p/blob/master/enr.md#v4-identity-scheme + let signature_valid = match id.as_str() { + "v4" => { + let digest = msg.node_record.get_signature_digest(); + let Some(public_key) = record.secp256k1 else { + return Err(DiscoveryError::InvalidMessage( + "signature could not be verified because public key was not provided".into(), + )); + }; + let signature_bytes = msg.node_record.signature.as_bytes(); + let Ok(signature) = Signature::from_slice(&signature_bytes[0..64]) else { + return Err(DiscoveryError::InvalidMessage( + "signature could not be build from msg signature bytes".into(), + )); + }; + let Ok(verifying_key) = + VerifyingKey::from_sec1_bytes(public_key.as_bytes()) + else { + return Err(DiscoveryError::InvalidMessage( + "public key could no be built from msg pub key bytes".into(), + )); + }; + verifying_key.verify_prehash(&digest, &signature).is_ok() + } + _ => false, + }; + if !signature_valid { + return Err(DiscoveryError::InvalidMessage( + "Signature verification invalid".into(), + )); + } + + if let Some(ip) = record.ip { + peer.node.ip = IpAddr::from(Ipv4Addr::from_bits(ip)); + } + if let Some(tcp_port) = record.tcp_port { + peer.node.tcp_port = tcp_port; + } + if let Some(udp_port) = record.udp_port { + peer.node.udp_port = udp_port; + } + peer.record.seq = msg.node_record.seq; + peer.record = msg.node_record.clone(); + debug!( + "Node with id {:?} record has been successfully updated", + peer.node.node_id + ); + Ok(()) + } } } @@ -439,7 +571,8 @@ impl Discv4 { tcp_port: node.tcp_port, }; - let ping = Message::Ping(PingMessage::new(from, to, expiration)); + let ping = + Message::Ping(PingMessage::new(from, to, expiration).with_enr_seq(time_now_unix())); ping.encode_with_header(&mut buf, &self.signer); let bytes_sent = self .udp_socket @@ -469,7 +602,9 @@ impl Discv4 { tcp_port: node.tcp_port, }; - let pong = Message::Pong(PongMessage::new(to, ping_hash, expiration)); + let pong = Message::Pong( + PongMessage::new(to, ping_hash, expiration).with_enr_seq(time_now_unix()), + ); pong.encode_with_header(&mut buf, &self.signer); let bytes_sent = self @@ -484,6 +619,31 @@ impl Discv4 { Ok(()) } } + + async fn send_enr_request(&self, node: Node, enr_seq: u64) -> Result<(), DiscoveryError> { + let mut buf = Vec::new(); + + let expiration: u64 = get_expiration(20); + let enr_req = Message::ENRRequest(ENRRequestMessage::new(expiration)); + enr_req.encode_with_header(&mut buf, &self.signer); + + let bytes_sent = self + .udp_socket + .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) + .await + .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); + } + + let hash = H256::from_slice(&buf[0..32]); + self.table + .lock() + .await + .update_peer_enr_seq(node.node_id, enr_seq, Some(hash)); + + Ok(()) + } } // #[cfg(test)] @@ -786,4 +946,80 @@ impl Discv4 { // } // Ok(()) // } +// #[tokio::test] +// /** +// * This test verifies the exchange and update of ENR (Ethereum Node Record) messages. +// * The test follows these steps: +// * +// * 1. Start two nodes. +// * 2. Wait until they establish a connection. +// * 3. Assert that they exchange their records and store them +// * 3. Modify the ENR (node record) of one of the nodes. +// * 4. Send a new ping message and check that an ENR request was triggered. +// * 5. Verify that the updated node record has been correctly received and stored. +// */ +// async fn discovery_enr_message() -> Result<(), io::Error> { +// let mut server_a = start_mock_discovery_server(8006, true).await?; +// let mut server_b = start_mock_discovery_server(8007, true).await?; + +// connect_servers(&mut server_a, &mut server_b).await; + +// // wait some time for the enr request-response finishes +// sleep(Duration::from_millis(2500)).await; + +// let expected_record = +// NodeRecord::from_node(server_b.local_node, time_now_unix(), &server_b.signer) +// .expect("Node record is created from node"); + +// let server_a_peer_b = server_a +// .table +// .lock() +// .await +// .get_by_node_id(server_b.node_id) +// .cloned() +// .unwrap(); + +// // we only match the pairs, as the signature and seq will change +// // because they are calculated with the current time +// assert!(server_a_peer_b.record.decode_pairs() == expected_record.decode_pairs()); + +// // Modify server_a's record of server_b with an incorrect TCP port. +// // This simulates an outdated or incorrect entry in the node table. +// server_a +// .table +// .lock() +// .await +// .get_by_node_id_mut(server_b.node_id) +// .unwrap() +// .node +// .tcp_port = 10; + +// // Send a ping from server_b to server_a. +// // server_a should notice the enr_seq is outdated +// // and trigger a enr-request to server_b to update the record. +// ping( +// &server_b.udp_socket, +// server_b.addr, +// server_a.addr, +// &server_b.signer, +// ) +// .await; + +// // Wait for the update to propagate. +// sleep(Duration::from_millis(2500)).await; + +// // Verify that server_a has updated its record of server_b with the correct TCP port. +// let tcp_port = server_a +// .table +// .lock() +// .await +// .get_by_node_id(server_b.node_id) +// .unwrap() +// .node +// .tcp_port; + +// assert!(tcp_port == server_b.addr.port()); + +// Ok(()) +// } // } diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index 29858f30c6..e6cbb001b0 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -40,18 +40,15 @@ pub fn peer_table(signer: SigningKey) -> Arc> { Arc::new(Mutex::new(KademliaTable::new(local_node_id))) } -#[allow(clippy::too_many_arguments)] pub async fn start_network( local_node: Node, tracker: TaskTracker, - udp_addr: SocketAddr, - tcp_addr: SocketAddr, bootnodes: Vec, signer: SigningKey, peer_table: Arc>, storage: Store, ) { - info!("Starting discovery service at {udp_addr}"); + let tcp_addr = SocketAddr::new(local_node.ip, local_node.tcp_port); info!("Listening for requests at {tcp_addr}"); let (channel_broadcast_send_end, _) = tokio::sync::broadcast::channel::<( tokio::task::Id, @@ -60,12 +57,7 @@ pub async fn start_network( // TODO handle errors here let discovery = Discv4::try_new( - Node { - ip: udp_addr.ip(), - udp_port: udp_addr.port(), - tcp_port: tcp_addr.port(), - node_id: H512::default(), - }, + local_node, signer.clone(), storage.clone(), peer_table.clone(), @@ -74,6 +66,7 @@ pub async fn start_network( ) .await .unwrap(); + info!("Starting discovery service at {}", discovery.addr()); discovery.start(bootnodes).await.unwrap(); tracker.spawn(serve_p2p_requests( From 4f4bc6a87fafd15cfaf67ce87dbf91d2dd8599d4 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 11:52:01 -0300 Subject: [PATCH 09/33] refactor: lookups --- crates/networking/p2p/discv4/lookup.rs | 121 +++++++++++-------------- crates/networking/p2p/discv4/mod.rs | 18 ++-- 2 files changed, 60 insertions(+), 79 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index 7814336790..781feb5e40 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -13,28 +13,8 @@ use tokio::{net::UdpSocket, sync::Mutex}; use tokio_util::task::TaskTracker; use tracing::debug; -/// Starts a tokio scheduler that: -/// - performs random lookups to discover new nodes. Currently this is configure to run every `PEERS_RANDOM_LOOKUP_TIME_IN_MIN` -/// -/// **Random lookups** -/// -/// Random lookups work in the following manner: -/// 1. Every 30min we spawn three concurrent lookups: one closest to our pubkey -/// and three other closest to random generated pubkeys. -/// 2. Every lookup starts with the closest nodes from our table. -/// Each lookup keeps track of: -/// - Peers that have already been asked for nodes -/// - Peers that have been already seen -/// - Potential peers to query for nodes: a vector of up to 16 entries holding the closest peers to the pubkey. -/// This vector is initially filled with nodes from our table. -/// 3. We send a `find_node` to the closest 3 nodes (that we have not yet asked) from the pubkey. -/// 4. We wait for the neighbors response and pushed or replace those that are closer to the potential peers. -/// 5. We select three other nodes from the potential peers vector and do the same until one lookup -/// doesn't have any node to ask. -/// -/// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#recursive-lookup #[derive(Clone, Debug)] -pub struct Disv4LookupHandler { +pub struct Discv4LookupHandler { local_node: Node, signer: SigningKey, udp_socket: Arc, @@ -43,7 +23,7 @@ pub struct Disv4LookupHandler { tracker: TaskTracker, } -impl Disv4LookupHandler { +impl Discv4LookupHandler { pub fn new( local_node: Node, signer: SigningKey, @@ -62,6 +42,26 @@ impl Disv4LookupHandler { } } + /// Starts a tokio scheduler that: + /// - performs random lookups to discover new nodes. + /// + /// **Random lookups** + /// + /// Random lookups work in the following manner: + /// 1. Every 30min we spawn three concurrent lookups: one closest to our pubkey + /// and three other closest to random generated pubkeys. + /// 2. Every lookup starts with the closest nodes from our table. + /// Each lookup keeps track of: + /// - Peers that have already been asked for nodes + /// - Peers that have been already seen + /// - Potential peers to query for nodes: a vector of up to 16 entries holding the closest peers to the pubkey. + /// This vector is initially filled with nodes from our table. + /// 3. We send a `find_node` to the closest 3 nodes (that we have not yet asked) from the pubkey. + /// 4. We wait for the neighbors response and pushed or replace those that are closer to the potential peers. + /// 5. We select three other nodes from the potential peers vector and do the same until one lookup + /// doesn't have any node to ask. + /// + /// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#recursive-lookup pub fn start(&self, initial_interval_wait_seconds: u64) { self.tracker.spawn({ let self_clone = self.clone(); @@ -76,12 +76,12 @@ impl Disv4LookupHandler { tokio::time::sleep(Duration::from_secs(initial_interval_wait_seconds)).await; loop { - // Notice that the first tick is immediate, - // so as soon as the server starts we'll do a lookup with the seeder nodes. + // first tick is immediate, interval.tick().await; debug!("Starting lookup"); + // lookup closest to our node_id self.tracker.spawn({ let self_clone = self.clone(); async move { @@ -109,27 +109,20 @@ impl Disv4LookupHandler { } async fn recursive_lookup(&self, target: H512) { - let mut asked_peers = HashSet::default(); - // lookups start with the closest from our table - let closest_nodes = self.table.lock().await.get_closest_nodes(target); + // lookups start with the closest nodes to the target from our table + let mut peers_to_ask: Vec = self.table.lock().await.get_closest_nodes(target); + // stores the peers in peers_to_ask + the peers that were in peers_to_ask but were replaced by closer targets let mut seen_peers: HashSet = HashSet::default(); + let mut asked_peers = HashSet::default(); seen_peers.insert(self.local_node.node_id); - for node in &closest_nodes { + for node in &peers_to_ask { seen_peers.insert(node.node_id); } - let mut peers_to_ask: Vec = closest_nodes; - loop { - let (nodes_found, queries) = self - .clone() - .lookup(target, &mut asked_peers, &peers_to_ask) - .await; + let (nodes_found, queries) = self.lookup(target, &mut asked_peers, &peers_to_ask).await; - // only push the peers that have not been seen - // that is those who have not been yet pushed, which also accounts for - // those peers that were in the array but have been replaced for closer peers for node in nodes_found { if !seen_peers.contains(&node.node_id) { seen_peers.insert(node.node_id); @@ -151,41 +144,34 @@ impl Disv4LookupHandler { asked_peers: &mut HashSet, nodes_to_ask: &Vec, ) -> (Vec, u32) { - // ask FIND_NODE as much as three times + // send FIND_NODE as much as three times let alpha = 3; let mut queries = 0; let mut nodes = vec![]; for node in nodes_to_ask { - if !asked_peers.contains(&node.node_id) { - #[allow(unused_assignments)] - let mut rx = None; - { - let mut table = self.table.lock().await; - let peer = table.get_by_node_id_mut(node.node_id); - if let Some(peer) = peer { - // if the peer has an ongoing find_node request, don't query - if peer.find_node_request.is_some() { - continue; - } - let (tx, receiver) = tokio::sync::mpsc::unbounded_channel::>(); - peer.new_find_node_request_with_sender(tx); - rx = Some(receiver); - } else { - // if peer isn't inserted to table, don't query - continue; + if asked_peers.contains(&node.node_id) { + continue; + } + let mut locked_table = self.table.lock().await; + if let Some(peer) = locked_table.get_by_node_id_mut(node.node_id) { + // if the peer has an ongoing find_node request, don't query + if peer.find_node_request.is_none() { + let (tx, mut receiver) = tokio::sync::mpsc::unbounded_channel::>(); + peer.new_find_node_request_with_sender(tx); + + // Release the lock + drop(locked_table); + + queries += 1; + asked_peers.insert(node.node_id); + if let Ok(mut found_nodes) = self + .find_node_and_wait_for_response(*node, target, &mut receiver) + .await + { + nodes.append(&mut found_nodes); } } - - queries += 1; - asked_peers.insert(node.node_id); - - if let Ok(mut found_nodes) = self - .find_node_and_wait_for_response(*node, target, &mut rx.unwrap()) - .await - { - nodes.append(&mut found_nodes); - } } if queries == alpha { @@ -196,9 +182,8 @@ impl Disv4LookupHandler { (nodes, queries) } - /** - * TODO explain what this does - */ + /// Adds a node to `peers_to_ask` if there's space; otherwise, replaces the farthest node + /// from `target` if the new node is closer. fn peers_to_ask_push(&self, peers_to_ask: &mut Vec, target: H512, node: Node) { let distance = bucket_number(target, node.node_id); diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 8793fa2feb..16f0226a5e 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -14,7 +14,7 @@ use ethrex_core::H256; use ethrex_storage::Store; use helpers::{get_expiration, is_expired, time_now_unix, time_since_in_hs}; use k256::ecdsa::{signature::hazmat::PrehashVerifier, Signature, SigningKey, VerifyingKey}; -use lookup::Disv4LookupHandler; +use lookup::Discv4LookupHandler; use messages::{ ENRRequestMessage, ENRResponseMessage, FindNodeMessage, Message, NeighborsMessage, Packet, PingMessage, PongMessage, @@ -106,7 +106,7 @@ impl Discv4 { } pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { - let lookup_handler = Disv4LookupHandler::new( + let lookup_handler = Discv4LookupHandler::new( self.local_node, self.signer.clone(), self.udp_socket.clone(), @@ -474,12 +474,12 @@ impl Discv4 { } /// Starts a tokio scheduler that: - /// - performs periodic revalidation of the current nodes (sends a ping to the old nodes). Currently this is configured to happen every [`REVALIDATION_INTERVAL_IN_MINUTES`] + /// - performs periodic revalidation of the current nodes (sends a ping to the old nodes). /// /// **Peer revalidation** /// /// Peers revalidation works in the following manner: - /// 1. Every `REVALIDATION_INTERVAL_IN_SECONDS` we ping the 3 least recently pinged peers + /// 1. Every `revalidation_interval_seconds` we ping the 3 least recently pinged peers /// 2. In the next iteration we check if they have answered /// - if they have: we increment the liveness field by one /// - otherwise we decrement it by the current value / 3. @@ -489,12 +489,11 @@ impl Discv4 { pub async fn start_revalidation(&self) { let mut interval = tokio::time::interval(Duration::from_secs(self.revalidation_interval_seconds)); - // peers we have pinged in the previous iteration - let mut previously_pinged_peers = HashSet::new(); // first tick starts immediately interval.tick().await; + let mut previously_pinged_peers = HashSet::new(); loop { interval.tick().await; debug!("Running peer revalidation"); @@ -526,10 +525,10 @@ impl Discv4 { // this might be too expensive to run if our table is filled // maybe we could just pick them randomly let peers = self.table.lock().await.get_least_recently_pinged_peers(3); - previously_pinged_peers = HashSet::default(); // reset pinged peers + previously_pinged_peers = HashSet::default(); for peer in peers { debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); - let _ = self.ping(peer.node); + let _ = self.ping(peer.node).await; previously_pinged_peers.insert(peer.node.node_id); let mut table = self.table.lock().await; let peer = table.get_by_node_id_mut(peer.node.node_id); @@ -554,9 +553,6 @@ impl Discv4 { Ok(()) } - // Sends a ping to the addr - /// # Returns - /// an optional hash corresponding to the message header hash to account if the send was successful async fn ping(&self, node: Node) -> Result<(), DiscoveryError> { let mut buf = Vec::new(); let expiration: u64 = get_expiration(20); From 7c59825328dfa2e8e32534d3b1ea61ee4e74b2c5 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 11:55:59 -0300 Subject: [PATCH 10/33] chore: address clippy warnings --- crates/networking/p2p/discv4/lookup.rs | 2 +- crates/networking/p2p/discv4/mod.rs | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index 781feb5e40..7dcf264fba 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -225,7 +225,7 @@ impl Discv4LookupHandler { .udp_socket .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) .await - .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + .map_err(DiscoveryError::MessageSendFailure)?; if bytes_sent != buf.len() { return Err(DiscoveryError::PartialMessageSent); diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 16f0226a5e..d33a4a0a94 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -1,4 +1,4 @@ -pub(self) mod helpers; +pub(super) mod helpers; mod lookup; pub(super) mod messages; @@ -70,7 +70,7 @@ impl Discv4 { ) -> Result { let udp_socket = UdpSocket::bind(SocketAddr::new(local_node.ip, local_node.udp_port)) .await - .map_err(|e| DiscoveryError::BindSocket(e))?; + .map_err(DiscoveryError::BindSocket)?; Ok(Self { local_node, @@ -102,7 +102,7 @@ impl Discv4 { } pub fn addr(&self) -> SocketAddr { - return SocketAddr::new(self.local_node.ip, self.local_node.udp_port); + SocketAddr::new(self.local_node.ip, self.local_node.udp_port) } pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { @@ -197,7 +197,7 @@ impl Discv4 { // if peer was already inserted, and last ping was 12 hs ago // we need to re ping to re-validate the endpoint proof if let Some(peer) = peer { - if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS as u64 { + if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS { self.ping(node).await?; } if let Some(enr_seq) = msg.enr_seq { @@ -301,7 +301,7 @@ impl Discv4 { .udp_socket .send_to(&buf, from) .await - .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + .map_err(DiscoveryError::MessageSendFailure)?; if bytes_sent != buf.len() { return Err(DiscoveryError::PartialMessageSent); @@ -355,7 +355,7 @@ impl Discv4 { if let Some(nodes) = nodes_to_insert { debug!("Storing neighbors in our table!"); for node in nodes { - let _ = self.try_add_peer_and_ping(node); + let _ = self.try_add_peer_and_ping(node).await; } } @@ -516,7 +516,7 @@ impl Discv4 { if peer.liveness == 0 { let new_peer = table.replace_peer(node_id); if let Some(new_peer) = new_peer { - let _ = self.ping(new_peer.node); + let _ = self.ping(new_peer.node).await; } } } @@ -574,7 +574,7 @@ impl Discv4 { .udp_socket .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) .await - .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + .map_err(DiscoveryError::MessageSendFailure)?; if bytes_sent != buf.len() { return Err(DiscoveryError::PartialMessageSent); @@ -607,7 +607,7 @@ impl Discv4 { .udp_socket .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) .await - .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + .map_err(DiscoveryError::MessageSendFailure)?; if bytes_sent != buf.len() { Err(DiscoveryError::PartialMessageSent) @@ -627,7 +627,7 @@ impl Discv4 { .udp_socket .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) .await - .map_err(|e| DiscoveryError::MessageSendFailure(e))?; + .map_err(DiscoveryError::MessageSendFailure)?; if bytes_sent != buf.len() { return Err(DiscoveryError::PartialMessageSent); } From 381ba163b370e40e108f1e968c4d6fc98eaf9826 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 11:58:42 -0300 Subject: [PATCH 11/33] refactor: remove uwnrap on node record decoding --- crates/networking/p2p/types.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/networking/p2p/types.rs b/crates/networking/p2p/types.rs index 19c2c27b61..4c30ff34ad 100644 --- a/crates/networking/p2p/types.rs +++ b/crates/networking/p2p/types.rs @@ -205,7 +205,7 @@ impl RLPDecode for NodeRecord { let decoder = Decoder::new(rlp)?; let (signature, decoder) = decoder.decode_field("signature")?; let (seq, decoder) = decoder.decode_field("seq")?; - let (pairs, decoder) = decode_node_record_optional_fields(vec![], decoder); + let (pairs, decoder) = decode_node_record_optional_fields(vec![], decoder)?; // all fields in pairs are optional except for id let id_pair = pairs.iter().find(|(k, _v)| k.eq("id".as_bytes())); @@ -232,14 +232,14 @@ impl RLPDecode for NodeRecord { fn decode_node_record_optional_fields( mut pairs: Vec<(Bytes, Bytes)>, decoder: Decoder, -) -> (Vec<(Bytes, Bytes)>, Decoder) { +) -> Result<(Vec<(Bytes, Bytes)>, Decoder), RLPDecodeError> { let (key, decoder): (Option, Decoder) = decoder.decode_optional_field(); if let Some(k) = key { - let (value, decoder): (Vec, Decoder) = decoder.get_encoded_item().unwrap(); + let (value, decoder): (Vec, Decoder) = decoder.get_encoded_item()?; pairs.push((k, Bytes::from(value))); decode_node_record_optional_fields(pairs, decoder) } else { - (pairs, decoder) + Ok((pairs, decoder)) } } From 4c233edc61526a24139adad358da8ea297bcc536 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 12:09:18 -0300 Subject: [PATCH 12/33] refactor: proper erro handling in network start --- cmd/ethrex/ethrex.rs | 5 ++--- crates/networking/p2p/net.rs | 25 +++++++++++++++++-------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/cmd/ethrex/ethrex.rs b/cmd/ethrex/ethrex.rs index 27899b0291..e2482ca3f3 100644 --- a/cmd/ethrex/ethrex.rs +++ b/cmd/ethrex/ethrex.rs @@ -264,7 +264,7 @@ async fn main() { let block_producer_engine = ethrex_dev::block_producer::start_block_producer(url, authrpc_jwtsecret.into(), head_block_hash, max_tries, 1000, ethrex_core::Address::default()); tracker.spawn(block_producer_engine); } else { - let networking = ethrex_net::start_network( + ethrex_net::start_network( local_p2p_node, tracker.clone(), bootnodes, @@ -272,8 +272,7 @@ async fn main() { peer_table.clone(), store, ) - .into_future(); - tracker.spawn(networking); + .await.expect("Network starts"); tracker.spawn(ethrex_net::periodically_show_peer_stats(peer_table)); } } diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index e6cbb001b0..9520c4c954 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -1,5 +1,5 @@ use bootnode::BootNode; -use discv4::Discv4; +use discv4::{DiscoveryError, Discv4}; use ethrex_core::H512; use ethrex_storage::Store; use k256::{ @@ -40,6 +40,11 @@ pub fn peer_table(signer: SigningKey) -> Arc> { Arc::new(Mutex::new(KademliaTable::new(local_node_id))) } +#[derive(Debug)] +pub enum NetworkError { + DiscoveryStart(DiscoveryError), +} + pub async fn start_network( local_node: Node, tracker: TaskTracker, @@ -47,15 +52,11 @@ pub async fn start_network( signer: SigningKey, peer_table: Arc>, storage: Store, -) { - let tcp_addr = SocketAddr::new(local_node.ip, local_node.tcp_port); - info!("Listening for requests at {tcp_addr}"); +) -> Result<(), NetworkError> { let (channel_broadcast_send_end, _) = tokio::sync::broadcast::channel::<( tokio::task::Id, Arc, )>(MAX_MESSAGES_TO_BROADCAST); - - // TODO handle errors here let discovery = Discv4::try_new( local_node, signer.clone(), @@ -65,10 +66,16 @@ pub async fn start_network( tracker.clone(), ) .await - .unwrap(); + .map_err(NetworkError::DiscoveryStart)?; + info!("Starting discovery service at {}", discovery.addr()); - discovery.start(bootnodes).await.unwrap(); + discovery + .start(bootnodes) + .await + .map_err(NetworkError::DiscoveryStart)?; + let tcp_addr = SocketAddr::new(local_node.ip, local_node.tcp_port); + info!("Listening for requests at {tcp_addr}"); tracker.spawn(serve_p2p_requests( tracker.clone(), tcp_addr, @@ -77,6 +84,8 @@ pub async fn start_network( peer_table.clone(), channel_broadcast_send_end, )); + + Ok(()) } async fn serve_p2p_requests( From bd1330c559bcea92601f5db3390c381f92e2d478 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 13:07:28 -0300 Subject: [PATCH 13/33] refactor: better debug on handle message errors --- crates/networking/p2p/discv4/messages.rs | 14 ++++++++++++++ crates/networking/p2p/discv4/mod.rs | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/crates/networking/p2p/discv4/messages.rs b/crates/networking/p2p/discv4/messages.rs index 74e40338fb..2a5c9951f7 100644 --- a/crates/networking/p2p/discv4/messages.rs +++ b/crates/networking/p2p/discv4/messages.rs @@ -103,6 +103,20 @@ pub(crate) enum Message { ENRResponse(ENRResponseMessage), } +impl std::fmt::Display for Message { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let variant = match self { + Message::Ping(_) => "Ping", + Message::Pong(_) => "Pong", + Message::FindNode(_) => "FindNode", + Message::Neighbors(_) => "Neighbors", + Message::ENRRequest(_) => "ENRRequest", + Message::ENRResponse(_) => "ENRResponse", + }; + write!(f, "{}", variant) + } +} + impl Message { pub fn encode_with_header(&self, buf: &mut dyn BufMut, node_signer: &SigningKey) { let signature_size = 65_usize; diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index d33a4a0a94..b03e6c8d60 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -162,9 +162,10 @@ impl Discv4 { Err(e) => error!("Could not decode packet: {:?}", e), Ok(packet) => { let msg = packet.get_message(); + let msg_name = msg.to_string(); debug!("Message: {:?} from {}", msg, packet.get_node_id()); if let Err(e) = self.handle_message(packet, from, read, &buf).await { - debug!("Error while processing message: {:?}", e); + debug!("Error while processing {} message: {:?}", msg_name, e); }; } } From 7f82351f401abb5a13802548118d72125ecbcdcd Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 13:58:01 -0300 Subject: [PATCH 14/33] refactor: pass node_id directly instead of msg --- crates/networking/p2p/discv4/mod.rs | 23 ++++------------------- crates/networking/p2p/net.rs | 5 ++--- crates/networking/p2p/rlpx/connection.rs | 14 ++------------ 3 files changed, 8 insertions(+), 34 deletions(-) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index b03e6c8d60..e6a3f962b8 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -164,7 +164,7 @@ impl Discv4 { let msg = packet.get_message(); let msg_name = msg.to_string(); debug!("Message: {:?} from {}", msg, packet.get_node_id()); - if let Err(e) = self.handle_message(packet, from, read, &buf).await { + if let Err(e) = self.handle_message(packet, from).await { debug!("Error while processing {} message: {:?}", msg_name, e); }; } @@ -172,13 +172,7 @@ impl Discv4 { } } - async fn handle_message( - &self, - packet: Packet, - from: SocketAddr, - msg_len: usize, - msg_bytes: &[u8], - ) -> Result<(), DiscoveryError> { + async fn handle_message(&self, packet: Packet, from: SocketAddr) -> Result<(), DiscoveryError> { match packet.get_message() { Message::Ping(msg) => { if is_expired(msg.expiration) { @@ -244,21 +238,12 @@ impl Discv4 { self.send_enr_request(peer.node, enr_seq).await?; } } - let mut msg_buf = vec![0; msg_len - 32]; - msg_bytes[32..msg_len].clone_into(&mut msg_buf); let signer = self.signer.clone(); let storage = self.storage.clone(); let broadcaster = self.rlxp_conn_sender.clone(); self.tracker.spawn(async move { - handle_peer_as_initiator( - signer, - &msg_buf, - &peer.node, - storage, - table, - broadcaster, - ) - .await + handle_peer_as_initiator(signer, peer.node, storage, table, broadcaster) + .await }); Ok(()) } else { diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index 9520c4c954..9173672a59 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -143,8 +143,7 @@ async fn handle_peer_as_receiver( async fn handle_peer_as_initiator( signer: SigningKey, - msg: &[u8], - node: &Node, + node: Node, storage: Store, table: Arc>, connection_broadcast: RLPxConnBroadcastSender, @@ -160,7 +159,7 @@ async fn handle_peer_as_initiator( return; } }; - match RLPxConnection::initiator(signer, msg, stream, storage, connection_broadcast) { + match RLPxConnection::initiator(signer, node.node_id, stream, storage, connection_broadcast) { Ok(mut conn) => { conn.start_peer(SocketAddr::new(node.ip, node.udp_port), table) .await diff --git a/crates/networking/p2p/rlpx/connection.rs b/crates/networking/p2p/rlpx/connection.rs index fbcc02874c..8b9e9e5d69 100644 --- a/crates/networking/p2p/rlpx/connection.rs +++ b/crates/networking/p2p/rlpx/connection.rs @@ -141,24 +141,14 @@ impl RLPxConnection { pub fn initiator( signer: SigningKey, - msg: &[u8], + remote_node_id: H512, stream: S, storage: Store, connection_broadcast_send: broadcast::Sender<(task::Id, Arc)>, ) -> Result { - //TODO remove this, it is already done on the discv4 packet decoding - let digest = Keccak256::digest(msg.get(65..).ok_or(RLPxError::InvalidMessageLength())?); - let signature = &Signature::from_bytes( - msg.get(..64) - .ok_or(RLPxError::InvalidMessageLength())? - .into(), - )?; - let rid = RecoveryId::from_byte(*msg.get(64).ok_or(RLPxError::InvalidMessageLength())?) - .ok_or(RLPxError::InvalidRecoveryId())?; - let peer_pk = VerifyingKey::recover_from_prehash(&digest, signature, rid)?; Ok(RLPxConnection::new( signer, - pubkey2id(&peer_pk.into()), + remote_node_id, stream, RLPxConnectionMode::Initiator, storage, From 9f871bbc0e08ebce96b45335081cf670537a3bb3 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 14:32:58 -0300 Subject: [PATCH 15/33] refactor: remove usage of discv max packet size in rlpx --- crates/networking/p2p/discv4/mod.rs | 2 +- crates/networking/p2p/rlpx/connection.rs | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index e6a3f962b8..c758c612d1 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -29,7 +29,7 @@ use tokio::{net::UdpSocket, sync::Mutex}; use tokio_util::task::TaskTracker; use tracing::{debug, error}; -pub const MAX_DISC_PACKET_SIZE: usize = 1280; +const MAX_DISC_PACKET_SIZE: usize = 1280; const PROOF_EXPIRATION_IN_HS: u64 = 12; // These interval times are arbitrary numbers, maybe we should read them from a cfg or a cli param diff --git a/crates/networking/p2p/rlpx/connection.rs b/crates/networking/p2p/rlpx/connection.rs index 8b9e9e5d69..5b1653da4a 100644 --- a/crates/networking/p2p/rlpx/connection.rs +++ b/crates/networking/p2p/rlpx/connection.rs @@ -1,8 +1,6 @@ use std::sync::Arc; use crate::{ - //TODO is this right? - discv4::MAX_DISC_PACKET_SIZE, peer_channels::PeerChannels, rlpx::{ eth::{ @@ -29,16 +27,12 @@ use super::{ handshake::{decode_ack_message, decode_auth_message, encode_auth_message}, message as rlpx, p2p::Capability, - utils::pubkey2id, }; use ethrex_blockchain::mempool::{self}; use ethrex_core::{H256, H512}; use ethrex_storage::Store; use futures::SinkExt; -use k256::{ - ecdsa::{RecoveryId, Signature, SigningKey, VerifyingKey}, - PublicKey, SecretKey, -}; +use k256::{ecdsa::SigningKey, PublicKey, SecretKey}; use rand::random; use sha3::{Digest, Keccak256}; use tokio::{ @@ -63,6 +57,9 @@ pub(crate) type Aes256Ctr64BE = ctr::Ctr64BE; pub(crate) type RLPxConnBroadcastSender = broadcast::Sender<(tokio::task::Id, Arc)>; +// https://github.com/ethereum/go-ethereum/blob/master/p2p/peer.go#L44 +pub const P2P_MAX_MESSAGE_SIZE: usize = 2048; + enum RLPxConnectionMode { Initiator, Receiver, @@ -600,12 +597,16 @@ impl RLPxConnection { } async fn receive_handshake_msg(&mut self) -> Result, RLPxError> { - let mut buf = vec![0; MAX_DISC_PACKET_SIZE]; + let mut buf = vec![0; 2]; // Read the message's size self.framed.get_mut().read_exact(&mut buf[..2]).await?; let ack_data = [buf[0], buf[1]]; let msg_size = u16::from_be_bytes(ack_data) as usize; + if msg_size > P2P_MAX_MESSAGE_SIZE { + return Err(RLPxError::InvalidMessageLength()); + } + buf.resize(msg_size, 0); // Read the rest of the message self.framed From 6fc235cd33bc91b92f05dad6168c1e0bb2f674f4 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 16:41:35 -0300 Subject: [PATCH 16/33] test: re-enable discovery tests --- crates/networking/p2p/discv4/lookup.rs | 153 ++++++ crates/networking/p2p/discv4/mod.rs | 670 ++++++++++--------------- 2 files changed, 423 insertions(+), 400 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index 7dcf264fba..2c7ef0d919 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -252,3 +252,156 @@ impl Discv4LookupHandler { } } } + +#[cfg(test)] +mod tests { + use tokio::time::sleep; + + use super::*; + use crate::discv4::{ + tests::{ + connect_servers, fill_table_with_random_nodes, insert_random_node_on_custom_bucket, + start_discovery_server, + }, + Discv4, + }; + + fn lookup_handler_from_server(server: Discv4) -> Discv4LookupHandler { + Discv4LookupHandler::new( + server.local_node, + server.signer.clone(), + server.udp_socket.clone(), + server.table.clone(), + server.lookup_interval_minutes, + server.tracker.clone(), + ) + } + + #[tokio::test] + /** This test tests the lookup function, the idea is as follows: + * - We'll start two discovery servers (`a` & `b`) that will connect between each other + * - We'll insert random nodes to the server `a`` to fill its table + * - We'll forcedly run `lookup` and validate that a `find_node` request was sent + * by checking that new nodes have been inserted to the table + * + * This test for only one lookup, and not recursively. + */ + async fn discovery_server_lookup() -> Result<(), DiscoveryError> { + let mut server_a = start_discovery_server(8000, true).await?; + let mut server_b = start_discovery_server(8001, true).await?; + + fill_table_with_random_nodes(server_a.table.clone()).await; + + // because the table is filled, before making the connection, remove a node from the `b` bucket + // otherwise it won't be added. + let b_bucket = bucket_number(server_a.local_node.node_id, server_b.local_node.node_id); + let node_id_to_remove = server_a.table.lock().await.buckets()[b_bucket].peers[0] + .node + .node_id; + server_a + .table + .lock() + .await + .replace_peer_on_custom_bucket(node_id_to_remove, b_bucket); + + connect_servers(&mut server_a, &mut server_b).await?; + + // now we are going to run a lookup with us as the target + let closets_peers_to_b_from_a = server_a + .table + .lock() + .await + .get_closest_nodes(server_b.local_node.node_id); + let nodes_to_ask = server_b + .table + .lock() + .await + .get_closest_nodes(server_b.local_node.node_id); + + let lookup_handler = lookup_handler_from_server(server_b.clone()); + println!("NODES TO ASK {:?}", nodes_to_ask); + lookup_handler + .lookup( + server_b.local_node.node_id, + &mut HashSet::default(), + &nodes_to_ask, + ) + .await; + + // find_node sent, allow some time for `a` to respond + sleep(Duration::from_secs(2)).await; + + // now all peers should've been inserted + for peer in closets_peers_to_b_from_a { + let table = server_b.table.lock().await; + assert!(table.get_by_node_id(peer.node_id).is_some()); + } + Ok(()) + } + + #[tokio::test] + /** This test tests the lookup function, the idea is as follows: + * - We'll start four discovery servers (`a`, `b`, `c` & `d`) + * - `a` will be connected to `b`, `b` will be connected to `c` and `c` will be connected to `d`. + * - The server `d` will have its table filled with mock nodes + * - We'll run a recursive lookup on server `a` and we expect to end with `b`, `c`, `d` and its mock nodes + */ + async fn discovery_server_recursive_lookup() -> Result<(), DiscoveryError> { + let mut server_a = start_discovery_server(8002, true).await?; + let mut server_b = start_discovery_server(8003, true).await?; + let mut server_c = start_discovery_server(8004, true).await?; + let mut server_d = start_discovery_server(8005, true).await?; + + connect_servers(&mut server_a, &mut server_b).await?; + connect_servers(&mut server_b, &mut server_c).await?; + connect_servers(&mut server_c, &mut server_d).await?; + + // now we fill the server_d table with 3 random nodes + // the reason we don't put more is because this nodes won't respond (as they don't are not real servers) + // and so we will have to wait for the timeout on each node, which will only slow down the test + for _ in 0..3 { + insert_random_node_on_custom_bucket(server_d.table.clone(), 0).await; + } + + let mut expected_peers = vec![]; + expected_peers.extend( + server_b + .table + .lock() + .await + .get_closest_nodes(server_a.local_node.node_id), + ); + expected_peers.extend( + server_c + .table + .lock() + .await + .get_closest_nodes(server_a.local_node.node_id), + ); + expected_peers.extend( + server_d + .table + .lock() + .await + .get_closest_nodes(server_a.local_node.node_id), + ); + + let lookup_handler = lookup_handler_from_server(server_a.clone()); + + // we'll run a recursive lookup closest to the server itself + lookup_handler + .recursive_lookup(server_a.local_node.node_id) + .await; + + for peer in expected_peers { + assert!(server_a + .table + .lock() + .await + .get_by_node_id(peer.node_id) + .is_some()); + } + + Ok(()) + } +} diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index c758c612d1..6e046306c9 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -25,7 +25,10 @@ use std::{ sync::Arc, time::Duration, }; -use tokio::{net::UdpSocket, sync::Mutex}; +use tokio::{ + net::UdpSocket, + sync::{Mutex, MutexGuard}, +}; use tokio_util::task::TaskTracker; use tracing::{debug, error}; @@ -139,7 +142,10 @@ impl Discv4 { tcp_port: bootnode.socket_address.port(), node_id: bootnode.node_id, }; - if let Err(e) = self.try_add_peer_and_ping(node).await { + if let Err(e) = self + .try_add_peer_and_ping(node, self.table.lock().await) + .await + { debug!("Error while adding bootnode to table: {:?}", e); }; } @@ -180,7 +186,7 @@ impl Discv4 { }; let node = Node { ip: from.ip(), - udp_port: msg.from.udp_port, + udp_port: from.port(), tcp_port: msg.from.tcp_port, node_id: packet.get_node_id(), }; @@ -193,11 +199,11 @@ impl Discv4 { // we need to re ping to re-validate the endpoint proof if let Some(peer) = peer { if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS { - self.ping(node).await?; + self.ping(node, self.table.lock().await).await?; } if let Some(enr_seq) = msg.enr_seq { if enr_seq > peer.record.seq { - debug!("Found outdated enr-seq, send an enr_request"); + debug!("Found outdated enr-seq, sending an enr_request"); self.send_enr_request(peer.node, enr_seq).await?; } } @@ -206,7 +212,7 @@ impl Discv4 { let mut table = self.table.lock().await; if let (Some(peer), true) = table.insert_node(node) { // it was inserted, send ping to bond - self.ping(peer.node).await?; + self.ping(peer.node, table).await?; } } @@ -307,8 +313,8 @@ impl Discv4 { }; let mut nodes_to_insert = None; - let mut table = self.table.lock().await; - if let Some(node) = table.get_by_node_id_mut(packet.get_node_id()) { + let mut table_lock = self.table.lock().await; + if let Some(node) = table_lock.get_by_node_id_mut(packet.get_node_id()) { if let Some(req) = &mut node.find_node_request { if time_now_unix().saturating_sub(req.sent_at) >= 60 { node.find_node_request = None; @@ -337,11 +343,14 @@ impl Discv4 { } else { return Err(DiscoveryError::InvalidMessage("Unknown node".into())); } + drop(table_lock); if let Some(nodes) = nodes_to_insert { debug!("Storing neighbors in our table!"); for node in nodes { - let _ = self.try_add_peer_and_ping(node).await; + let _ = self + .try_add_peer_and_ping(node, self.table.lock().await) + .await; } } @@ -472,7 +481,7 @@ impl Discv4 { /// 3. If the liveness field is 0, then we delete it and insert a new one from the replacements table /// /// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#kademlia-table - pub async fn start_revalidation(&self) { + async fn start_revalidation(&self) { let mut interval = tokio::time::interval(Duration::from_secs(self.revalidation_interval_seconds)); @@ -486,8 +495,8 @@ impl Discv4 { // first check that the peers we ping have responded for node_id in previously_pinged_peers { - let mut table = self.table.lock().await; - let peer = table.get_by_node_id_mut(node_id).unwrap(); + let mut table_lock = self.table.lock().await; + let peer = table_lock.get_by_node_id_mut(node_id).unwrap(); if let Some(has_answered) = peer.revalidation { if has_answered { @@ -500,9 +509,9 @@ impl Discv4 { peer.revalidation = None; if peer.liveness == 0 { - let new_peer = table.replace_peer(node_id); + let new_peer = table_lock.replace_peer(node_id); if let Some(new_peer) = new_peer { - let _ = self.ping(new_peer.node).await; + let _ = self.ping(new_peer.node, table_lock).await; } } } @@ -514,7 +523,7 @@ impl Discv4 { previously_pinged_peers = HashSet::default(); for peer in peers { debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); - let _ = self.ping(peer.node).await; + let _ = self.ping(peer.node, self.table.lock().await).await; previously_pinged_peers.insert(peer.node.node_id); let mut table = self.table.lock().await; let peer = table.get_by_node_id_mut(peer.node.node_id); @@ -532,14 +541,22 @@ impl Discv4 { /// - If the node is **not found** in the table and there is enough space, it will be added, /// and a ping message will be sent to verify connectivity. /// - If the node is **already present**, no action is taken. - async fn try_add_peer_and_ping(&self, node: Node) -> Result<(), DiscoveryError> { - if let (Some(peer), true) = self.table.lock().await.insert_node(node) { - self.ping(peer.node).await?; + async fn try_add_peer_and_ping<'a>( + &self, + node: Node, + mut table_lock: MutexGuard<'a, KademliaTable>, + ) -> Result<(), DiscoveryError> { + if let (Some(peer), true) = table_lock.insert_node(node) { + self.ping(peer.node, table_lock).await?; }; Ok(()) } - async fn ping(&self, node: Node) -> Result<(), DiscoveryError> { + async fn ping<'a>( + &self, + node: Node, + mut table_lock: MutexGuard<'a, KademliaTable>, + ) -> Result<(), DiscoveryError> { let mut buf = Vec::new(); let expiration: u64 = get_expiration(20); let from = Endpoint { @@ -567,10 +584,7 @@ impl Discv4 { } let hash = H256::from_slice(&buf[0..32]); - self.table - .lock() - .await - .update_peer_ping(node.node_id, Some(hash)); + table_lock.update_peer_ping(node.node_id, Some(hash)); Ok(()) } @@ -628,380 +642,236 @@ impl Discv4 { } } -// #[cfg(test)] -// mod tests { -// use super::*; -// use ethrex_storage::EngineType; -// use kademlia::bucket_number; -// use rand::rngs::OsRng; -// use std::{ -// collections::HashSet, -// net::{IpAddr, Ipv4Addr}, -// }; -// use tokio::time::sleep; - -// async fn insert_random_node_on_custom_bucket( -// table: Arc>, -// bucket_idx: usize, -// ) { -// let node_id = node_id_from_signing_key(&SigningKey::random(&mut OsRng)); -// let node = Node { -// ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), -// tcp_port: 0, -// udp_port: 0, -// node_id, -// }; -// table -// .lock() -// .await -// .insert_node_on_custom_bucket(node, bucket_idx); -// } - -// async fn fill_table_with_random_nodes(table: Arc>) { -// for i in 0..256 { -// for _ in 0..16 { -// insert_random_node_on_custom_bucket(table.clone(), i).await; -// } -// } -// } - -// struct MockServer { -// pub addr: SocketAddr, -// pub signer: SigningKey, -// pub table: Arc>, -// pub node_id: H512, -// pub udp_socket: Arc, -// } - -// async fn start_mock_discovery_server( -// udp_port: u16, -// should_start_server: bool, -// ) -> Result { -// let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); -// let signer = SigningKey::random(&mut OsRng); -// let udp_socket = Arc::new(UdpSocket::bind(addr).await?); -// let node_id = node_id_from_signing_key(&signer); -// let storage = -// Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); -// let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); -// let (channel_broadcast_send_end, _) = tokio::sync::broadcast::channel::<( -// tokio::task::Id, -// Arc, -// )>(MAX_MESSAGES_TO_BROADCAST); -// let tracker = TaskTracker::new(); -// if should_start_server { -// tracker.spawn(discover_peers_server( -// tracker.clone(), -// addr, -// udp_socket.clone(), -// storage.clone(), -// table.clone(), -// signer.clone(), -// channel_broadcast_send_end, -// )); -// } - -// Ok(MockServer { -// addr, -// signer, -// table, -// node_id, -// udp_socket, -// }) -// } - -// /// connects two mock servers by pinging a to b -// async fn connect_servers(server_a: &mut MockServer, server_b: &mut MockServer) { -// let ping_hash = ping( -// &server_a.udp_socket, -// server_a.addr, -// server_b.addr, -// &server_a.signer, -// ) -// .await; -// { -// let mut table = server_a.table.lock().await; -// table.insert_node(Node { -// ip: server_b.addr.ip(), -// udp_port: server_b.addr.port(), -// tcp_port: 0, -// node_id: server_b.node_id, -// }); -// table.update_peer_ping(server_b.node_id, ping_hash); -// } -// // allow some time for the server to respond -// sleep(Duration::from_secs(1)).await; -// } - -// #[tokio::test] -// /** This is a end to end test on the discovery server, the idea is as follows: -// * - We'll start two discovery servers (`a` & `b`) to ping between each other -// * - We'll make `b` ping `a`, and validate that the connection is right -// * - Then we'll wait for a revalidation where we expect everything to be the same -// * - We'll do this five 5 more times -// * - Then we'll stop server `a` so that it doesn't respond to re-validations -// * - We expect server `b` to remove node `a` from its table after 3 re-validations -// * To make this run faster, we'll change the revalidation time to be every 2secs -// */ -// async fn discovery_server_revalidation() -> Result<(), io::Error> { -// let mut server_a = start_mock_discovery_server(7998, true).await?; -// let mut server_b = start_mock_discovery_server(7999, true).await?; - -// connect_servers(&mut server_a, &mut server_b).await; - -// // start revalidation server -// tokio::spawn(peers_revalidation( -// server_b.addr, -// server_b.udp_socket.clone(), -// server_b.table.clone(), -// server_b.signer.clone(), -// 2, -// )); - -// for _ in 0..5 { -// sleep(Duration::from_millis(2500)).await; -// // by now, b should've send a revalidation to a -// let table = server_b.table.lock().await; -// let node = table.get_by_node_id(server_a.node_id); -// assert!(node.is_some_and(|n| n.revalidation.is_some())); -// } - -// // make sure that `a` has responded too all the re-validations -// // we can do that by checking the liveness -// { -// let table = server_b.table.lock().await; -// let node = table.get_by_node_id(server_a.node_id); -// assert_eq!(node.map_or(0, |n| n.liveness), 6); -// } - -// // now, stopping server `a` is not trivial -// // so we'll instead change its port, so that no one responds -// { -// let mut table = server_b.table.lock().await; -// let node = table.get_by_node_id_mut(server_a.node_id); -// if let Some(node) = node { -// node.node.udp_port = 0 -// }; -// } - -// // now the liveness field should start decreasing until it gets to 0 -// // which should happen in 3 re-validations -// for _ in 0..2 { -// sleep(Duration::from_millis(2500)).await; -// let table = server_b.table.lock().await; -// let node = table.get_by_node_id(server_a.node_id); -// assert!(node.is_some_and(|n| n.revalidation.is_some())); -// } -// sleep(Duration::from_millis(2500)).await; - -// // finally, `a`` should not exist anymore -// let table = server_b.table.lock().await; -// assert!(table.get_by_node_id(server_a.node_id).is_none()); -// Ok(()) -// } - -// #[tokio::test] -// /** This test tests the lookup function, the idea is as follows: -// * - We'll start two discovery servers (`a` & `b`) that will connect between each other -// * - We'll insert random nodes to the server `a`` to fill its table -// * - We'll forcedly run `lookup` and validate that a `find_node` request was sent -// * by checking that new nodes have been inserted to the table -// * -// * This test for only one lookup, and not recursively. -// */ -// async fn discovery_server_lookup() -> Result<(), io::Error> { -// let mut server_a = start_mock_discovery_server(8000, true).await?; -// let mut server_b = start_mock_discovery_server(8001, true).await?; - -// fill_table_with_random_nodes(server_a.table.clone()).await; - -// // before making the connection, remove a node from the `b` bucket. Otherwise it won't be added -// let b_bucket = bucket_number(server_a.node_id, server_b.node_id); -// let node_id_to_remove = server_a.table.lock().await.buckets()[b_bucket].peers[0] -// .node -// .node_id; -// server_a -// .table -// .lock() -// .await -// .replace_peer_on_custom_bucket(node_id_to_remove, b_bucket); - -// connect_servers(&mut server_a, &mut server_b).await; - -// // now we are going to run a lookup with us as the target -// let closets_peers_to_b_from_a = server_a -// .table -// .lock() -// .await -// .get_closest_nodes(server_b.node_id); -// let nodes_to_ask = server_b -// .table -// .lock() -// .await -// .get_closest_nodes(server_b.node_id); - -// lookup( -// server_b.udp_socket.clone(), -// server_b.table.clone(), -// &server_b.signer, -// server_b.node_id, -// &mut HashSet::default(), -// &nodes_to_ask, -// ) -// .await; - -// // find_node sent, allow some time for `a` to respond -// sleep(Duration::from_secs(2)).await; - -// // now all peers should've been inserted -// for peer in closets_peers_to_b_from_a { -// let table = server_b.table.lock().await; -// assert!(table.get_by_node_id(peer.node_id).is_some()); -// } -// Ok(()) -// } - -// #[tokio::test] -// /** This test tests the lookup function, the idea is as follows: -// * - We'll start four discovery servers (`a`, `b`, `c` & `d`) -// * - `a` will be connected to `b`, `b` will be connected to `c` and `c` will be connected to `d`. -// * - The server `d` will have its table filled with mock nodes -// * - We'll run a recursive lookup on server `a` and we expect to end with `b`, `c`, `d` and its mock nodes -// */ -// async fn discovery_server_recursive_lookup() -> Result<(), io::Error> { -// let mut server_a = start_mock_discovery_server(8002, true).await?; -// let mut server_b = start_mock_discovery_server(8003, true).await?; -// let mut server_c = start_mock_discovery_server(8004, true).await?; -// let mut server_d = start_mock_discovery_server(8005, true).await?; - -// connect_servers(&mut server_a, &mut server_b).await; -// connect_servers(&mut server_b, &mut server_c).await; -// connect_servers(&mut server_c, &mut server_d).await; - -// // now we fill the server_d table with 3 random nodes -// // the reason we don't put more is because this nodes won't respond (as they don't are not real servers) -// // and so we will have to wait for the timeout on each node, which will only slow down the test -// for _ in 0..3 { -// insert_random_node_on_custom_bucket(server_d.table.clone(), 0).await; -// } - -// let mut expected_peers = vec![]; -// expected_peers.extend( -// server_b -// .table -// .lock() -// .await -// .get_closest_nodes(server_a.node_id), -// ); -// expected_peers.extend( -// server_c -// .table -// .lock() -// .await -// .get_closest_nodes(server_a.node_id), -// ); -// expected_peers.extend( -// server_d -// .table -// .lock() -// .await -// .get_closest_nodes(server_a.node_id), -// ); - -// // we'll run a recursive lookup closest to the server itself -// recursive_lookup( -// server_a.udp_socket.clone(), -// server_a.table.clone(), -// server_a.signer.clone(), -// server_a.node_id, -// server_a.node_id, -// ) -// .await; - -// for peer in expected_peers { -// assert!(server_a -// .table -// .lock() -// .await -// .get_by_node_id(peer.node_id) -// .is_some()); -// } -// Ok(()) -// } -// #[tokio::test] -// /** -// * This test verifies the exchange and update of ENR (Ethereum Node Record) messages. -// * The test follows these steps: -// * -// * 1. Start two nodes. -// * 2. Wait until they establish a connection. -// * 3. Assert that they exchange their records and store them -// * 3. Modify the ENR (node record) of one of the nodes. -// * 4. Send a new ping message and check that an ENR request was triggered. -// * 5. Verify that the updated node record has been correctly received and stored. -// */ -// async fn discovery_enr_message() -> Result<(), io::Error> { -// let mut server_a = start_mock_discovery_server(8006, true).await?; -// let mut server_b = start_mock_discovery_server(8007, true).await?; - -// connect_servers(&mut server_a, &mut server_b).await; - -// // wait some time for the enr request-response finishes -// sleep(Duration::from_millis(2500)).await; - -// let expected_record = -// NodeRecord::from_node(server_b.local_node, time_now_unix(), &server_b.signer) -// .expect("Node record is created from node"); - -// let server_a_peer_b = server_a -// .table -// .lock() -// .await -// .get_by_node_id(server_b.node_id) -// .cloned() -// .unwrap(); - -// // we only match the pairs, as the signature and seq will change -// // because they are calculated with the current time -// assert!(server_a_peer_b.record.decode_pairs() == expected_record.decode_pairs()); - -// // Modify server_a's record of server_b with an incorrect TCP port. -// // This simulates an outdated or incorrect entry in the node table. -// server_a -// .table -// .lock() -// .await -// .get_by_node_id_mut(server_b.node_id) -// .unwrap() -// .node -// .tcp_port = 10; - -// // Send a ping from server_b to server_a. -// // server_a should notice the enr_seq is outdated -// // and trigger a enr-request to server_b to update the record. -// ping( -// &server_b.udp_socket, -// server_b.addr, -// server_a.addr, -// &server_b.signer, -// ) -// .await; - -// // Wait for the update to propagate. -// sleep(Duration::from_millis(2500)).await; - -// // Verify that server_a has updated its record of server_b with the correct TCP port. -// let tcp_port = server_a -// .table -// .lock() -// .await -// .get_by_node_id(server_b.node_id) -// .unwrap() -// .node -// .tcp_port; - -// assert!(tcp_port == server_b.addr.port()); - -// Ok(()) -// } -// } +#[cfg(test)] +pub(super) mod tests { + use super::*; + use crate::{ + node_id_from_signing_key, rlpx::message::Message as RLPxMessage, MAX_MESSAGES_TO_BROADCAST, + }; + use ethrex_storage::EngineType; + use rand::rngs::OsRng; + use std::net::{IpAddr, Ipv4Addr}; + use tokio::time::sleep; + + pub async fn insert_random_node_on_custom_bucket( + table: Arc>, + bucket_idx: usize, + ) { + let node_id = node_id_from_signing_key(&SigningKey::random(&mut OsRng)); + let node = Node { + ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), + tcp_port: 0, + udp_port: 0, + node_id, + }; + table + .lock() + .await + .insert_node_on_custom_bucket(node, bucket_idx); + } + + pub async fn fill_table_with_random_nodes(table: Arc>) { + for i in 0..256 { + for _ in 0..16 { + insert_random_node_on_custom_bucket(table.clone(), i).await; + } + } + } + + pub async fn start_discovery_server( + udp_port: u16, + should_start_server: bool, + ) -> Result { + let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); + let signer = SigningKey::random(&mut OsRng); + let node_id = node_id_from_signing_key(&signer); + let local_node = Node { + ip: addr.ip(), + node_id, + udp_port, + tcp_port: udp_port, + }; + + let storage = + Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); + let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); + let (rlpx_conn_sender, _) = tokio::sync::broadcast::channel::<( + tokio::task::Id, + Arc, + )>(MAX_MESSAGES_TO_BROADCAST); + let tracker = TaskTracker::new(); + + let discv4 = Discv4::try_new( + local_node, + signer.clone(), + storage, + table.clone(), + rlpx_conn_sender, + tracker.clone(), + ) + .await?; + + if should_start_server { + tracker.spawn({ + let discv4 = discv4.clone(); + async move { + discv4.receive().await; + } + }); + } + + Ok(discv4) + } + + /// connects two mock servers by pinging a to b + pub async fn connect_servers( + server_a: &mut Discv4, + server_b: &mut Discv4, + ) -> Result<(), DiscoveryError> { + server_a + .ping(server_b.local_node, server_a.table.lock().await) + .await?; + // allow some time for the server to respond + sleep(Duration::from_secs(1)).await; + Ok(()) + } + + #[tokio::test] + /** This is a end to end test on the discovery server, the idea is as follows: + * - We'll start two discovery servers (`a` & `b`) to ping between each other + * - We'll make `b` ping `a`, and validate that the connection is right + * - Then we'll wait for a revalidation where we expect everything to be the same + * - We'll do this five 5 more times + * - Then we'll stop server `a` so that it doesn't respond to re-validations + * - We expect server `b` to remove node `a` from its table after 3 re-validations + * To make this run faster, we'll change the revalidation time to be every 2secs + */ + async fn discovery_server_revalidation() -> Result<(), DiscoveryError> { + let mut server_a = start_discovery_server(7998, true).await?; + let mut server_b = start_discovery_server(7999, true).await?; + + connect_servers(&mut server_a, &mut server_b).await?; + + server_b = server_b.with_revalidation_interval_of(2); + + // start revalidation server + server_b.tracker.spawn({ + let server_b = server_b.clone(); + async move { server_b.start_revalidation().await } + }); + + for _ in 0..5 { + sleep(Duration::from_millis(2500)).await; + // by now, b should've send a revalidation to a + let table = server_b.table.lock().await; + let node = table.get_by_node_id(server_a.local_node.node_id); + assert!(node.is_some_and(|n| n.revalidation.is_some())); + } + + // make sure that `a` has responded too all the re-validations + // we can do that by checking the liveness + { + let table = server_b.table.lock().await; + let node = table.get_by_node_id(server_a.local_node.node_id); + assert_eq!(node.map_or(0, |n| n.liveness), 6); + } + + // now, stopping server `a` is not trivial + // so we'll instead change its port, so that no one responds + { + let mut table = server_b.table.lock().await; + let node = table.get_by_node_id_mut(server_a.local_node.node_id); + if let Some(node) = node { + node.node.udp_port = 0 + }; + } + + // now the liveness field should start decreasing until it gets to 0 + // which should happen in 3 re-validations + for _ in 0..2 { + sleep(Duration::from_millis(2500)).await; + let table = server_b.table.lock().await; + let node = table.get_by_node_id(server_a.local_node.node_id); + assert!(node.is_some_and(|n| n.revalidation.is_some())); + } + sleep(Duration::from_millis(2500)).await; + + // finally, `a`` should not exist anymore + let table = server_b.table.lock().await; + assert!(table.get_by_node_id(server_a.local_node.node_id).is_none()); + Ok(()) + } + + #[tokio::test] + /** + * This test verifies the exchange and update of ENR (Ethereum Node Record) messages. + * The test follows these steps: + * + * 1. Start two nodes. + * 2. Wait until they establish a connection. + * 3. Assert that they exchange their records and store them + * 3. Modify the ENR (node record) of one of the nodes. + * 4. Send a new ping message and check that an ENR request was triggered. + * 5. Verify that the updated node record has been correctly received and stored. + */ + async fn discovery_enr_message() -> Result<(), DiscoveryError> { + let mut server_a = start_discovery_server(8006, true).await?; + let mut server_b = start_discovery_server(8007, true).await?; + + connect_servers(&mut server_a, &mut server_b).await?; + + // wait some time for the enr request-response finishes + sleep(Duration::from_millis(2500)).await; + + let expected_record = + NodeRecord::from_node(server_b.local_node, time_now_unix(), &server_b.signer) + .expect("Node record is created from node"); + + let server_a_peer_b = server_a + .table + .lock() + .await + .get_by_node_id(server_b.local_node.node_id) + .cloned() + .unwrap(); + + // we only match the pairs, as the signature and seq will change + // because they are calculated with the current time + assert!(server_a_peer_b.record.decode_pairs() == expected_record.decode_pairs()); + + // Modify server_a's record of server_b with an incorrect TCP port. + // This simulates an outdated or incorrect entry in the node table. + server_a + .table + .lock() + .await + .get_by_node_id_mut(server_b.local_node.node_id) + .unwrap() + .node + .tcp_port = 10; + + // Send a ping from server_b to server_a. + // server_a should notice the enr_seq is outdated + // and trigger a enr-request to server_b to update the record. + server_b + .ping(server_a.local_node, server_a.table.lock().await) + .await?; + + // Wait for the update to propagate. + sleep(Duration::from_millis(2500)).await; + + // Verify that server_a has updated its record of server_b with the correct TCP port. + let tcp_port = server_a + .table + .lock() + .await + .get_by_node_id(server_b.local_node.node_id) + .unwrap() + .node + .tcp_port; + + assert!(tcp_port == server_b.local_node.tcp_port); + + Ok(()) + } +} From f1ba350ccf63282364d78a3d7069bbe2e7761330 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 17:21:22 -0300 Subject: [PATCH 17/33] fix: sanity checks --- crates/networking/p2p/discv4/lookup.rs | 1 - crates/networking/p2p/discv4/mod.rs | 6 ++++++ crates/networking/p2p/rlpx/connection.rs | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index 2c7ef0d919..af94642215 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -319,7 +319,6 @@ mod tests { .get_closest_nodes(server_b.local_node.node_id); let lookup_handler = lookup_handler_from_server(server_b.clone()); - println!("NODES TO ASK {:?}", nodes_to_ask); lookup_handler .lookup( server_b.local_node.node_id, diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 6e046306c9..4a0e67292d 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -546,6 +546,12 @@ impl Discv4 { node: Node, mut table_lock: MutexGuard<'a, KademliaTable>, ) -> Result<(), DiscoveryError> { + // sanity check to make sure we are not storing ourselves + // a case that may happen in a neighbor message for example + if node.node_id == self.local_node.node_id { + return Ok(()); + } + if let (Some(peer), true) = table_lock.insert_node(node) { self.ping(peer.node, table_lock).await?; }; diff --git a/crates/networking/p2p/rlpx/connection.rs b/crates/networking/p2p/rlpx/connection.rs index 5b1653da4a..96e0d4404f 100644 --- a/crates/networking/p2p/rlpx/connection.rs +++ b/crates/networking/p2p/rlpx/connection.rs @@ -606,7 +606,7 @@ impl RLPxConnection { if msg_size > P2P_MAX_MESSAGE_SIZE { return Err(RLPxError::InvalidMessageLength()); } - buf.resize(msg_size, 0); + buf.resize(msg_size + 2, 0); // Read the rest of the message self.framed From 399ef4c573d442c4d529c30f6f82df8f3557f565 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 17:44:32 -0300 Subject: [PATCH 18/33] refactor: use try_add_peer_and_ping in ping msg --- crates/networking/p2p/discv4/mod.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 4a0e67292d..a43dc2ae8d 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -209,11 +209,8 @@ impl Discv4 { } } else { // otherwise add to the table - let mut table = self.table.lock().await; - if let (Some(peer), true) = table.insert_node(node) { - // it was inserted, send ping to bond - self.ping(peer.node, table).await?; - } + self.try_add_peer_and_ping(node, self.table.lock().await) + .await?; } Ok(()) From 522a2f73fd55cb86f3175b460e7cdb61d7a77156 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Thu, 23 Jan 2025 19:14:19 -0300 Subject: [PATCH 19/33] refactor: handle messages more rustacean code Aguante rust --- crates/networking/p2p/discv4/mod.rs | 233 +++++++++++++++------------- 1 file changed, 122 insertions(+), 111 deletions(-) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index a43dc2ae8d..e43db91b52 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -184,6 +184,7 @@ impl Discv4 { if is_expired(msg.expiration) { return Err(DiscoveryError::MessageExpired); }; + let node = Node { ip: from.ip(), udp_port: from.port(), @@ -191,10 +192,12 @@ impl Discv4 { node_id: packet.get_node_id(), }; self.pong(packet.get_hash(), node).await?; + let peer = { let table = self.table.lock().await; table.get_by_node_id(packet.get_node_id()).cloned() }; + // if peer was already inserted, and last ping was 12 hs ago // we need to re ping to re-validate the endpoint proof if let Some(peer) = peer { @@ -220,45 +223,41 @@ impl Discv4 { if is_expired(msg.expiration) { return Err(DiscoveryError::MessageExpired); } + let peer = { let table = table.lock().await; table.get_by_node_id(packet.get_node_id()).cloned() }; - if let Some(peer) = peer { - if peer.last_ping_hash.is_none() { - return Err(DiscoveryError::InvalidMessage( - "node did not send a previous ping".into(), - )); - } - if peer - .last_ping_hash - .is_some_and(|hash| hash == msg.ping_hash) - { - table.lock().await.pong_answered(peer.node.node_id); - if let Some(enr_seq) = msg.enr_seq { - if enr_seq > peer.record.seq { - debug!("Found outdated enr-seq, send an enr_request"); - self.send_enr_request(peer.node, enr_seq).await?; - } - } - let signer = self.signer.clone(); - let storage = self.storage.clone(); - let broadcaster = self.rlxp_conn_sender.clone(); - self.tracker.spawn(async move { - handle_peer_as_initiator(signer, peer.node, storage, table, broadcaster) - .await - }); - Ok(()) - } else { - Err(DiscoveryError::InvalidMessage( - "pong as the hash did not match the last corresponding ping".into(), - )) + let Some(peer) = peer else { + return Err(DiscoveryError::InvalidMessage("not known node".into())); + }; + + let Some(ping_hash) = peer.last_ping_hash else { + return Err(DiscoveryError::InvalidMessage( + "node did not send a previous ping".into(), + )); + }; + if ping_hash != msg.ping_hash { + return Err(DiscoveryError::InvalidMessage( + "hash did not match the last corresponding ping".into(), + )); + } + + // all validations went well, mark as answered and start a rlpx connection + table.lock().await.pong_answered(peer.node.node_id); + if let Some(enr_seq) = msg.enr_seq { + if enr_seq > peer.record.seq { + debug!("Found outdated enr-seq, send an enr_request"); + self.send_enr_request(peer.node, enr_seq).await?; } - } else { - Err(DiscoveryError::InvalidMessage( - "pong from a not known node".into(), - )) } + let signer = self.signer.clone(); + let storage = self.storage.clone(); + let broadcaster = self.rlxp_conn_sender.clone(); + self.tracker.spawn(async move { + handle_peer_as_initiator(signer, peer.node, storage, table, broadcaster).await + }); + Ok(()) } Message::FindNode(msg) => { if is_expired(msg.expiration) { @@ -268,87 +267,97 @@ impl Discv4 { let table = self.table.lock().await; table.get_by_node_id(packet.get_node_id()).cloned() }; - if let Some(node) = node { - if node.is_proven { - let nodes = { - let table = self.table.lock().await; - table.get_closest_nodes(msg.target) - }; - let nodes_chunks = nodes.chunks(4); - let expiration = get_expiration(20); - debug!("Sending neighbors!"); - // we are sending the neighbors in 4 different messages as not to exceed the - // maximum packet size - for nodes in nodes_chunks { - let neighbors = Message::Neighbors(NeighborsMessage::new( - nodes.to_vec(), - expiration, - )); - let mut buf = Vec::new(); - neighbors.encode_with_header(&mut buf, &self.signer); - let bytes_sent = self - .udp_socket - .send_to(&buf, from) - .await - .map_err(DiscoveryError::MessageSendFailure)?; - - if bytes_sent != buf.len() { - return Err(DiscoveryError::PartialMessageSent); - } - } - Ok(()) - } else { - Err(DiscoveryError::InvalidMessage("Node isn't proven.".into())) + + let Some(node) = node else { + return Err(DiscoveryError::InvalidMessage("not a known node".into())); + }; + if !node.is_proven { + return Err(DiscoveryError::InvalidMessage("node isn't proven".into())); + } + + let nodes = { + let table = self.table.lock().await; + table.get_closest_nodes(msg.target) + }; + let nodes_chunks = nodes.chunks(4); + let expiration = get_expiration(20); + + debug!("Sending neighbors!"); + // we are sending the neighbors in 4 different messages as not to exceed the + // maximum packet size + for nodes in nodes_chunks { + let neighbors = + Message::Neighbors(NeighborsMessage::new(nodes.to_vec(), expiration)); + let mut buf = Vec::new(); + neighbors.encode_with_header(&mut buf, &self.signer); + + let bytes_sent = self + .udp_socket + .send_to(&buf, from) + .await + .map_err(DiscoveryError::MessageSendFailure)?; + + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); } - } else { - Err(DiscoveryError::InvalidMessage("Node is not known".into())) } + + Ok(()) } Message::Neighbors(neighbors_msg) => { if is_expired(neighbors_msg.expiration) { return Err(DiscoveryError::MessageExpired); }; - let mut nodes_to_insert = None; let mut table_lock = self.table.lock().await; - if let Some(node) = table_lock.get_by_node_id_mut(packet.get_node_id()) { - if let Some(req) = &mut node.find_node_request { - if time_now_unix().saturating_sub(req.sent_at) >= 60 { - node.find_node_request = None; - return Err(DiscoveryError::InvalidMessage( - "find_node request expired after one minute".into(), - )); - } - let nodes = &neighbors_msg.nodes; - let nodes_sent = req.nodes_sent + nodes.len(); - - if nodes_sent <= MAX_NODES_PER_BUCKET { - req.nodes_sent = nodes_sent; - nodes_to_insert = Some(nodes.clone()); - if let Some(tx) = &req.tx { - let _ = tx.send(nodes.clone()); - } - } else { - debug!("Ignoring neighbors message as the client sent more than the allowed nodes"); - } - if nodes_sent == MAX_NODES_PER_BUCKET { - debug!("Neighbors request has been fulfilled"); - node.find_node_request = None; - } - } - } else { - return Err(DiscoveryError::InvalidMessage("Unknown node".into())); + let Some(node) = table_lock.get_by_node_id_mut(packet.get_node_id()) else { + return Err(DiscoveryError::InvalidMessage("not a known node".into())); + }; + + let Some(req) = &mut node.find_node_request else { + return Err(DiscoveryError::InvalidMessage( + "find node request not sent".into(), + )); + }; + if time_now_unix().saturating_sub(req.sent_at) >= 60 { + node.find_node_request = None; + return Err(DiscoveryError::InvalidMessage( + "find_node request expired after one minute".into(), + )); + } + + let nodes = &neighbors_msg.nodes; + let total_nodes_sent = req.nodes_sent + nodes.len(); + + if total_nodes_sent > MAX_NODES_PER_BUCKET { + node.find_node_request = None; + return Err(DiscoveryError::InvalidMessage( + "sent more than allowed nodes".into(), + )); + } + + // update the number of node_sent + // and forward the nodes sent if a channel is attached + req.nodes_sent = total_nodes_sent; + if let Some(tx) = &req.tx { + let _ = tx.send(nodes.clone()); + } + + if total_nodes_sent == MAX_NODES_PER_BUCKET { + debug!("Neighbors request has been fulfilled"); + node.find_node_request = None; } + + // release the lock early + // as we might be a long time pinging all the new nodes drop(table_lock); - if let Some(nodes) = nodes_to_insert { - debug!("Storing neighbors in our table!"); - for node in nodes { - let _ = self - .try_add_peer_and_ping(node, self.table.lock().await) - .await; - } + debug!("Storing neighbors in our table!"); + for node in nodes { + let _ = self + .try_add_peer_and_ping(*node, self.table.lock().await) + .await; } Ok(()) @@ -363,23 +372,25 @@ impl Discv4 { NodeRecord::from_node(self.local_node, time_now_unix(), &self.signer) else { return Err(DiscoveryError::InvalidMessage( - "Could not build local node record".into(), + "could not build local node record".into(), )); }; let msg = Message::ENRResponse(ENRResponseMessage::new(packet.get_hash(), node_record)); let mut buf = vec![]; msg.encode_with_header(&mut buf, &self.signer); - match self.udp_socket.send_to(&buf, from).await { - Ok(bytes_sent) => { - if bytes_sent == buf.len() { - Ok(()) - } else { - Err(DiscoveryError::PartialMessageSent) - } - } - Err(e) => Err(DiscoveryError::MessageSendFailure(e)), + + let bytes_sent = self + .udp_socket + .send_to(&buf, from) + .await + .map_err(DiscoveryError::MessageSendFailure)?; + + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); } + + Ok(()) } Message::ENRResponse(msg) => { let mut table = self.table.lock().await; From 2f5a38aba85660249f25929bbe2a5a120adaa218 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Fri, 24 Jan 2025 10:39:10 -0300 Subject: [PATCH 20/33] test: fix lookups assertion --- crates/networking/p2p/discv4/lookup.rs | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index af94642215..6cae4da60a 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -333,7 +333,14 @@ mod tests { // now all peers should've been inserted for peer in closets_peers_to_b_from_a { let table = server_b.table.lock().await; - assert!(table.get_by_node_id(peer.node_id).is_some()); + let node = table.get_by_node_id(peer.node_id); + // sometimes nodes can send ourselves as a neighbor + // make sure we don't add it + if peer.node_id == server_b.local_node.node_id { + assert!(node.is_none()); + } else { + assert!(node.is_some()); + } } Ok(()) } @@ -392,13 +399,17 @@ mod tests { .recursive_lookup(server_a.local_node.node_id) .await; + // sometimes nodes can send ourselves as a neighbor + // make sure we don't add it for peer in expected_peers { - assert!(server_a - .table - .lock() - .await - .get_by_node_id(peer.node_id) - .is_some()); + let table = server_a.table.lock().await; + let node = table.get_by_node_id(peer.node_id); + + if peer.node_id == server_a.local_node.node_id { + assert!(node.is_none()); + } else { + assert!(node.is_some()); + } } Ok(()) From 7d69b813baeec6a0524841c8150f7eb5015d0f69 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Fri, 24 Jan 2025 10:55:00 -0300 Subject: [PATCH 21/33] refactor: handle messages --- crates/networking/p2p/discv4/mod.rs | 56 +++++++++++++++-------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index e43db91b52..69e948328f 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -99,7 +99,7 @@ impl Discv4 { #[allow(unused)] pub fn with_lookup_interval_of(self, minutes: u64) -> Self { Self { - revalidation_interval_seconds: minutes, + lookup_interval_minutes: minutes, ..self } } @@ -198,34 +198,34 @@ impl Discv4 { table.get_by_node_id(packet.get_node_id()).cloned() }; - // if peer was already inserted, and last ping was 12 hs ago - // we need to re ping to re-validate the endpoint proof - if let Some(peer) = peer { - if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS { - self.ping(node, self.table.lock().await).await?; - } - if let Some(enr_seq) = msg.enr_seq { - if enr_seq > peer.record.seq { - debug!("Found outdated enr-seq, sending an enr_request"); - self.send_enr_request(peer.node, enr_seq).await?; - } - } - } else { - // otherwise add to the table + let Some(peer) = peer else { self.try_add_peer_and_ping(node, self.table.lock().await) .await?; + return Ok(()); + }; + + // if peer was in the table and last ping was 12 hs ago + // we need to re ping to re-validate the endpoint proof + if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS { + self.ping(node, self.table.lock().await).await?; + } + if let Some(enr_seq) = msg.enr_seq { + if enr_seq > peer.record.seq { + debug!("Found outdated enr-seq, sending an enr_request"); + self.send_enr_request(peer.node, enr_seq, self.table.lock().await) + .await?; + } } Ok(()) } Message::Pong(msg) => { - let table = self.table.clone(); if is_expired(msg.expiration) { return Err(DiscoveryError::MessageExpired); } let peer = { - let table = table.lock().await; + let table = self.table.lock().await; table.get_by_node_id(packet.get_node_id()).cloned() }; let Some(peer) = peer else { @@ -244,16 +244,18 @@ impl Discv4 { } // all validations went well, mark as answered and start a rlpx connection - table.lock().await.pong_answered(peer.node.node_id); + self.table.lock().await.pong_answered(peer.node.node_id); if let Some(enr_seq) = msg.enr_seq { if enr_seq > peer.record.seq { debug!("Found outdated enr-seq, send an enr_request"); - self.send_enr_request(peer.node, enr_seq).await?; + self.send_enr_request(peer.node, enr_seq, self.table.lock().await) + .await?; } } let signer = self.signer.clone(); let storage = self.storage.clone(); let broadcaster = self.rlxp_conn_sender.clone(); + let table = self.table.clone(); self.tracker.spawn(async move { handle_peer_as_initiator(signer, peer.node, storage, table, broadcaster).await }); @@ -393,8 +395,8 @@ impl Discv4 { Ok(()) } Message::ENRResponse(msg) => { - let mut table = self.table.lock().await; - let peer = table.get_by_node_id_mut(packet.get_node_id()); + let mut table_lock = self.table.lock().await; + let peer = table_lock.get_by_node_id_mut(packet.get_node_id()); let Some(peer) = peer else { return Err(DiscoveryError::InvalidMessage("Peer not known".into())); }; @@ -630,7 +632,12 @@ impl Discv4 { } } - async fn send_enr_request(&self, node: Node, enr_seq: u64) -> Result<(), DiscoveryError> { + async fn send_enr_request<'a>( + &self, + node: Node, + enr_seq: u64, + mut table_lock: MutexGuard<'a, KademliaTable>, + ) -> Result<(), DiscoveryError> { let mut buf = Vec::new(); let expiration: u64 = get_expiration(20); @@ -647,10 +654,7 @@ impl Discv4 { } let hash = H256::from_slice(&buf[0..32]); - self.table - .lock() - .await - .update_peer_enr_seq(node.node_id, enr_seq, Some(hash)); + table_lock.update_peer_enr_seq(node.node_id, enr_seq, Some(hash)); Ok(()) } From 4da7c89f9f147979cf533f17b701f79528a58784 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Fri, 24 Jan 2025 10:59:07 -0300 Subject: [PATCH 22/33] refactor: enr_seq calculate it once on startup --- crates/networking/p2p/discv4/mod.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 69e948328f..d0676dc04e 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -52,6 +52,7 @@ pub enum DiscoveryError { #[derive(Debug, Clone)] pub struct Discv4 { local_node: Node, + enr_seq: u64, udp_socket: Arc, signer: SigningKey, storage: Store, @@ -77,6 +78,10 @@ impl Discv4 { Ok(Self { local_node, + // Note we are passing the current timestamp as the sequence number + // This is because we are not storing our local_node updates in the db + // see #1756 + enr_seq: time_now_unix(), signer, storage, table, @@ -368,10 +373,8 @@ impl Discv4 { if is_expired(msg.expiration) { return Err(DiscoveryError::MessageExpired); } - // Note we are passing the current timestamp as the sequence number - // This is because we are not storing our local_node updates in the db let Ok(node_record) = - NodeRecord::from_node(self.local_node, time_now_unix(), &self.signer) + NodeRecord::from_node(self.local_node, self.enr_seq, &self.signer) else { return Err(DiscoveryError::InvalidMessage( "could not build local node record".into(), @@ -586,8 +589,7 @@ impl Discv4 { tcp_port: node.tcp_port, }; - let ping = - Message::Ping(PingMessage::new(from, to, expiration).with_enr_seq(time_now_unix())); + let ping = Message::Ping(PingMessage::new(from, to, expiration).with_enr_seq(self.enr_seq)); ping.encode_with_header(&mut buf, &self.signer); let bytes_sent = self .udp_socket @@ -614,9 +616,8 @@ impl Discv4 { tcp_port: node.tcp_port, }; - let pong = Message::Pong( - PongMessage::new(to, ping_hash, expiration).with_enr_seq(time_now_unix()), - ); + let pong = + Message::Pong(PongMessage::new(to, ping_hash, expiration).with_enr_seq(self.enr_seq)); pong.encode_with_header(&mut buf, &self.signer); let bytes_sent = self From c01d2fcaf2a8c453d677fb1b87894019734d018e Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Fri, 24 Jan 2025 11:45:55 -0300 Subject: [PATCH 23/33] fix: enr messages with new calculated seq --- crates/networking/p2p/discv4/mod.rs | 26 ++++++++++++++------------ crates/networking/p2p/kademlia.rs | 9 --------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index d0676dc04e..30d1f9e376 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -217,7 +217,7 @@ impl Discv4 { if let Some(enr_seq) = msg.enr_seq { if enr_seq > peer.record.seq { debug!("Found outdated enr-seq, sending an enr_request"); - self.send_enr_request(peer.node, enr_seq, self.table.lock().await) + self.send_enr_request(peer.node, self.table.lock().await) .await?; } } @@ -253,7 +253,7 @@ impl Discv4 { if let Some(enr_seq) = msg.enr_seq { if enr_seq > peer.record.seq { debug!("Found outdated enr-seq, send an enr_request"); - self.send_enr_request(peer.node, enr_seq, self.table.lock().await) + self.send_enr_request(peer.node, self.table.lock().await) .await?; } } @@ -636,7 +636,6 @@ impl Discv4 { async fn send_enr_request<'a>( &self, node: Node, - enr_seq: u64, mut table_lock: MutexGuard<'a, KademliaTable>, ) -> Result<(), DiscoveryError> { let mut buf = Vec::new(); @@ -655,7 +654,9 @@ impl Discv4 { } let hash = H256::from_slice(&buf[0..32]); - table_lock.update_peer_enr_seq(node.node_id, enr_seq, Some(hash)); + if let Some(peer) = table_lock.get_by_node_id_mut(node.node_id) { + peer.enr_request_hash = Some(hash); + }; Ok(()) } @@ -869,6 +870,10 @@ pub(super) mod tests { .node .tcp_port = 10; + // update the enr_seq of server_b so that server_a notices it is outdated + // and sends a request to update it + server_b.enr_seq = time_now_unix(); + // Send a ping from server_b to server_a. // server_a should notice the enr_seq is outdated // and trigger a enr-request to server_b to update the record. @@ -880,16 +885,13 @@ pub(super) mod tests { sleep(Duration::from_millis(2500)).await; // Verify that server_a has updated its record of server_b with the correct TCP port. - let tcp_port = server_a - .table - .lock() - .await + let table_lock = server_a.table.lock().await; + let server_a_node_b_record = table_lock .get_by_node_id(server_b.local_node.node_id) - .unwrap() - .node - .tcp_port; + .unwrap(); - assert!(tcp_port == server_b.local_node.tcp_port); + assert!(server_a_node_b_record.node.tcp_port == server_b.local_node.tcp_port); + assert!(server_a_node_b_record.record.seq == server_b.enr_seq); Ok(()) } diff --git a/crates/networking/p2p/kademlia.rs b/crates/networking/p2p/kademlia.rs index f7001b9dfe..d087568b27 100644 --- a/crates/networking/p2p/kademlia.rs +++ b/crates/networking/p2p/kademlia.rs @@ -172,15 +172,6 @@ impl KademliaTable { peer.last_ping = time_now_unix(); } - pub fn update_peer_enr_seq(&mut self, node_id: H512, enr_seq: u64, enr_req_hash: Option) { - let peer = self.get_by_node_id_mut(node_id); - let Some(peer) = peer else { - return; - }; - peer.record.seq = enr_seq; - peer.enr_request_hash = enr_req_hash; - } - pub fn update_peer_ping_with_revalidation(&mut self, node_id: H512, ping_hash: Option) { let Some(peer) = self.get_by_node_id_mut(node_id) else { return; From b6a4ed508d3e8b7b4c3249d0e924e3778c7b66ea Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Fri, 24 Jan 2025 12:42:11 -0300 Subject: [PATCH 24/33] test: enr with new calculated seq --- crates/networking/p2p/discv4/mod.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 30d1f9e376..ae033c63bc 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -470,7 +470,6 @@ impl Discv4 { if let Some(udp_port) = record.udp_port { peer.node.udp_port = udp_port; } - peer.record.seq = msg.node_record.seq; peer.record = msg.node_record.clone(); debug!( "Node with id {:?} record has been successfully updated", @@ -638,8 +637,15 @@ impl Discv4 { node: Node, mut table_lock: MutexGuard<'a, KademliaTable>, ) -> Result<(), DiscoveryError> { - let mut buf = Vec::new(); + // verify there isn't an ongoing request + if table_lock + .get_by_node_id(node.node_id) + .is_some_and(|p| p.enr_request_hash.is_some()) + { + return Ok(()); + }; + let mut buf = Vec::new(); let expiration: u64 = get_expiration(20); let enr_req = Message::ENRRequest(ENRRequestMessage::new(expiration)); enr_req.encode_with_header(&mut buf, &self.signer); @@ -749,7 +755,7 @@ pub(super) mod tests { server_b: &mut Discv4, ) -> Result<(), DiscoveryError> { server_a - .ping(server_b.local_node, server_a.table.lock().await) + .try_add_peer_and_ping(server_b.local_node, server_a.table.lock().await) .await?; // allow some time for the server to respond sleep(Duration::from_secs(1)).await; @@ -878,7 +884,7 @@ pub(super) mod tests { // server_a should notice the enr_seq is outdated // and trigger a enr-request to server_b to update the record. server_b - .ping(server_a.local_node, server_a.table.lock().await) + .ping(server_a.local_node, server_b.table.lock().await) .await?; // Wait for the update to propagate. @@ -891,7 +897,6 @@ pub(super) mod tests { .unwrap(); assert!(server_a_node_b_record.node.tcp_port == server_b.local_node.tcp_port); - assert!(server_a_node_b_record.record.seq == server_b.enr_seq); Ok(()) } From 4ec15baa27ecc28e41ce9272ffa2a6b61a5abad2 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Fri, 24 Jan 2025 16:00:24 -0300 Subject: [PATCH 25/33] refactor: usage of context in discv4 module --- crates/networking/p2p/discv4/lookup.rs | 84 +++++----- crates/networking/p2p/discv4/mod.rs | 217 ++++++++++++------------- 2 files changed, 138 insertions(+), 163 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index 6cae4da60a..ac7dff3de9 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -3,42 +3,28 @@ use crate::{ kademlia::{bucket_number, MAX_NODES_PER_BUCKET}, node_id_from_signing_key, types::Node, - KademliaTable, + P2PContext, }; use ethrex_core::H512; use k256::ecdsa::SigningKey; use rand::rngs::OsRng; use std::{collections::HashSet, net::SocketAddr, sync::Arc, time::Duration}; -use tokio::{net::UdpSocket, sync::Mutex}; -use tokio_util::task::TaskTracker; +use tokio::net::UdpSocket; use tracing::debug; #[derive(Clone, Debug)] pub struct Discv4LookupHandler { - local_node: Node, - signer: SigningKey, + ctx: P2PContext, udp_socket: Arc, - table: Arc>, interval_minutes: u64, - tracker: TaskTracker, } impl Discv4LookupHandler { - pub fn new( - local_node: Node, - signer: SigningKey, - udp_socket: Arc, - table: Arc>, - interval_minutes: u64, - tracker: TaskTracker, - ) -> Self { + pub fn new(ctx: P2PContext, udp_socket: Arc, interval_minutes: u64) -> Self { Self { - local_node, - signer, + ctx, udp_socket, - table, interval_minutes, - tracker, } } @@ -63,7 +49,7 @@ impl Discv4LookupHandler { /// /// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#recursive-lookup pub fn start(&self, initial_interval_wait_seconds: u64) { - self.tracker.spawn({ + self.ctx.tracker.spawn({ let self_clone = self.clone(); async move { self_clone.start_task(initial_interval_wait_seconds).await; @@ -82,11 +68,11 @@ impl Discv4LookupHandler { debug!("Starting lookup"); // lookup closest to our node_id - self.tracker.spawn({ + self.ctx.tracker.spawn({ let self_clone = self.clone(); async move { self_clone - .recursive_lookup(self_clone.local_node.node_id) + .recursive_lookup(self_clone.ctx.local_node.node_id) .await } }); @@ -94,7 +80,7 @@ impl Discv4LookupHandler { // lookup closest to 3 random keys for _ in 0..3 { let random_pub_key = SigningKey::random(&mut OsRng); - self.tracker.spawn({ + self.ctx.tracker.spawn({ let self_clone = self.clone(); async move { self_clone @@ -110,12 +96,12 @@ impl Discv4LookupHandler { async fn recursive_lookup(&self, target: H512) { // lookups start with the closest nodes to the target from our table - let mut peers_to_ask: Vec = self.table.lock().await.get_closest_nodes(target); + let mut peers_to_ask: Vec = self.ctx.table.lock().await.get_closest_nodes(target); // stores the peers in peers_to_ask + the peers that were in peers_to_ask but were replaced by closer targets let mut seen_peers: HashSet = HashSet::default(); let mut asked_peers = HashSet::default(); - seen_peers.insert(self.local_node.node_id); + seen_peers.insert(self.ctx.local_node.node_id); for node in &peers_to_ask { seen_peers.insert(node.node_id); } @@ -153,7 +139,7 @@ impl Discv4LookupHandler { if asked_peers.contains(&node.node_id) { continue; } - let mut locked_table = self.table.lock().await; + let mut locked_table = self.ctx.table.lock().await; if let Some(peer) = locked_table.get_by_node_id_mut(node.node_id) { // if the peer has an ongoing find_node request, don't query if peer.find_node_request.is_none() { @@ -220,7 +206,7 @@ impl Discv4LookupHandler { let msg = Message::FindNode(super::FindNodeMessage::new(target_id, expiration)); let mut buf = Vec::new(); - msg.encode_with_header(&mut buf, &self.signer); + msg.encode_with_header(&mut buf, &self.ctx.signer); let bytes_sent = self .udp_socket .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) @@ -268,12 +254,9 @@ mod tests { fn lookup_handler_from_server(server: Discv4) -> Discv4LookupHandler { Discv4LookupHandler::new( - server.local_node, - server.signer.clone(), + server.ctx.clone(), server.udp_socket.clone(), - server.table.clone(), server.lookup_interval_minutes, - server.tracker.clone(), ) } @@ -290,15 +273,19 @@ mod tests { let mut server_a = start_discovery_server(8000, true).await?; let mut server_b = start_discovery_server(8001, true).await?; - fill_table_with_random_nodes(server_a.table.clone()).await; + fill_table_with_random_nodes(server_a.ctx.table.clone()).await; // because the table is filled, before making the connection, remove a node from the `b` bucket // otherwise it won't be added. - let b_bucket = bucket_number(server_a.local_node.node_id, server_b.local_node.node_id); - let node_id_to_remove = server_a.table.lock().await.buckets()[b_bucket].peers[0] + let b_bucket = bucket_number( + server_a.ctx.local_node.node_id, + server_b.ctx.local_node.node_id, + ); + let node_id_to_remove = server_a.ctx.table.lock().await.buckets()[b_bucket].peers[0] .node .node_id; server_a + .ctx .table .lock() .await @@ -308,20 +295,22 @@ mod tests { // now we are going to run a lookup with us as the target let closets_peers_to_b_from_a = server_a + .ctx .table .lock() .await - .get_closest_nodes(server_b.local_node.node_id); + .get_closest_nodes(server_b.ctx.local_node.node_id); let nodes_to_ask = server_b + .ctx .table .lock() .await - .get_closest_nodes(server_b.local_node.node_id); + .get_closest_nodes(server_b.ctx.local_node.node_id); let lookup_handler = lookup_handler_from_server(server_b.clone()); lookup_handler .lookup( - server_b.local_node.node_id, + server_b.ctx.local_node.node_id, &mut HashSet::default(), &nodes_to_ask, ) @@ -332,11 +321,11 @@ mod tests { // now all peers should've been inserted for peer in closets_peers_to_b_from_a { - let table = server_b.table.lock().await; + let table = server_b.ctx.table.lock().await; let node = table.get_by_node_id(peer.node_id); // sometimes nodes can send ourselves as a neighbor // make sure we don't add it - if peer.node_id == server_b.local_node.node_id { + if peer.node_id == server_b.ctx.local_node.node_id { assert!(node.is_none()); } else { assert!(node.is_some()); @@ -366,46 +355,49 @@ mod tests { // the reason we don't put more is because this nodes won't respond (as they don't are not real servers) // and so we will have to wait for the timeout on each node, which will only slow down the test for _ in 0..3 { - insert_random_node_on_custom_bucket(server_d.table.clone(), 0).await; + insert_random_node_on_custom_bucket(server_d.ctx.table.clone(), 0).await; } let mut expected_peers = vec![]; expected_peers.extend( server_b + .ctx .table .lock() .await - .get_closest_nodes(server_a.local_node.node_id), + .get_closest_nodes(server_a.ctx.local_node.node_id), ); expected_peers.extend( server_c + .ctx .table .lock() .await - .get_closest_nodes(server_a.local_node.node_id), + .get_closest_nodes(server_a.ctx.local_node.node_id), ); expected_peers.extend( server_d + .ctx .table .lock() .await - .get_closest_nodes(server_a.local_node.node_id), + .get_closest_nodes(server_a.ctx.local_node.node_id), ); let lookup_handler = lookup_handler_from_server(server_a.clone()); // we'll run a recursive lookup closest to the server itself lookup_handler - .recursive_lookup(server_a.local_node.node_id) + .recursive_lookup(server_a.ctx.local_node.node_id) .await; // sometimes nodes can send ourselves as a neighbor // make sure we don't add it for peer in expected_peers { - let table = server_a.table.lock().await; + let table = server_a.ctx.table.lock().await; let node = table.get_by_node_id(peer.node_id); - if peer.node_id == server_a.local_node.node_id { + if peer.node_id == server_a.ctx.local_node.node_id { assert!(node.is_none()); } else { assert!(node.is_some()); diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index ae033c63bc..8fe943dab6 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -6,14 +6,12 @@ use crate::{ bootnode::BootNode, handle_peer_as_initiator, kademlia::MAX_NODES_PER_BUCKET, - rlpx::connection::RLPxConnBroadcastSender, types::{Endpoint, Node, NodeRecord}, - KademliaTable, + KademliaTable, P2PContext, }; use ethrex_core::H256; -use ethrex_storage::Store; use helpers::{get_expiration, is_expired, time_now_unix, time_since_in_hs}; -use k256::ecdsa::{signature::hazmat::PrehashVerifier, Signature, SigningKey, VerifyingKey}; +use k256::ecdsa::{signature::hazmat::PrehashVerifier, Signature, VerifyingKey}; use lookup::Discv4LookupHandler; use messages::{ ENRRequestMessage, ENRResponseMessage, FindNodeMessage, Message, NeighborsMessage, Packet, @@ -25,11 +23,7 @@ use std::{ sync::Arc, time::Duration, }; -use tokio::{ - net::UdpSocket, - sync::{Mutex, MutexGuard}, -}; -use tokio_util::task::TaskTracker; +use tokio::{net::UdpSocket, sync::MutexGuard}; use tracing::{debug, error}; const MAX_DISC_PACKET_SIZE: usize = 1280; @@ -51,45 +45,24 @@ pub enum DiscoveryError { #[derive(Debug, Clone)] pub struct Discv4 { - local_node: Node, - enr_seq: u64, + ctx: P2PContext, udp_socket: Arc, - signer: SigningKey, - storage: Store, - table: Arc>, - tracker: TaskTracker, - rlxp_conn_sender: RLPxConnBroadcastSender, revalidation_interval_seconds: u64, lookup_interval_minutes: u64, } impl Discv4 { - pub async fn try_new( - local_node: Node, - signer: SigningKey, - storage: Store, - table: Arc>, - rlpx_conn_sender: RLPxConnBroadcastSender, - tracker: TaskTracker, - ) -> Result { - let udp_socket = UdpSocket::bind(SocketAddr::new(local_node.ip, local_node.udp_port)) - .await - .map_err(DiscoveryError::BindSocket)?; + pub async fn try_new(ctx: P2PContext) -> Result { + let udp_socket = + UdpSocket::bind(SocketAddr::new(ctx.local_node.ip, ctx.local_node.udp_port)) + .await + .map_err(DiscoveryError::BindSocket)?; Ok(Self { - local_node, - // Note we are passing the current timestamp as the sequence number - // This is because we are not storing our local_node updates in the db - // see #1756 - enr_seq: time_now_unix(), - signer, - storage, - table, - rlxp_conn_sender: rlpx_conn_sender, + ctx, udp_socket: Arc::new(udp_socket), revalidation_interval_seconds: REVALIDATION_INTERVAL_IN_SECONDS, lookup_interval_minutes: PEERS_RANDOM_LOOKUP_TIME_IN_MIN, - tracker, }) } @@ -110,24 +83,21 @@ impl Discv4 { } pub fn addr(&self) -> SocketAddr { - SocketAddr::new(self.local_node.ip, self.local_node.udp_port) + SocketAddr::new(self.ctx.local_node.ip, self.ctx.local_node.udp_port) } pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { let lookup_handler = Discv4LookupHandler::new( - self.local_node, - self.signer.clone(), + self.ctx.clone(), self.udp_socket.clone(), - self.table.clone(), self.lookup_interval_minutes, - self.tracker.clone(), ); - self.tracker.spawn({ + self.ctx.tracker.spawn({ let self_clone = self.clone(); async move { self_clone.receive().await } }); - self.tracker.spawn({ + self.ctx.tracker.spawn({ let self_clone = self.clone(); async move { self_clone.start_revalidation().await } }); @@ -148,7 +118,7 @@ impl Discv4 { node_id: bootnode.node_id, }; if let Err(e) = self - .try_add_peer_and_ping(node, self.table.lock().await) + .try_add_peer_and_ping(node, self.ctx.table.lock().await) .await { debug!("Error while adding bootnode to table: {:?}", e); @@ -199,12 +169,12 @@ impl Discv4 { self.pong(packet.get_hash(), node).await?; let peer = { - let table = self.table.lock().await; + let table = self.ctx.table.lock().await; table.get_by_node_id(packet.get_node_id()).cloned() }; let Some(peer) = peer else { - self.try_add_peer_and_ping(node, self.table.lock().await) + self.try_add_peer_and_ping(node, self.ctx.table.lock().await) .await?; return Ok(()); }; @@ -212,12 +182,12 @@ impl Discv4 { // if peer was in the table and last ping was 12 hs ago // we need to re ping to re-validate the endpoint proof if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS { - self.ping(node, self.table.lock().await).await?; + self.ping(node, self.ctx.table.lock().await).await?; } if let Some(enr_seq) = msg.enr_seq { if enr_seq > peer.record.seq { debug!("Found outdated enr-seq, sending an enr_request"); - self.send_enr_request(peer.node, self.table.lock().await) + self.send_enr_request(peer.node, self.ctx.table.lock().await) .await?; } } @@ -230,7 +200,7 @@ impl Discv4 { } let peer = { - let table = self.table.lock().await; + let table = self.ctx.table.lock().await; table.get_by_node_id(packet.get_node_id()).cloned() }; let Some(peer) = peer else { @@ -249,21 +219,18 @@ impl Discv4 { } // all validations went well, mark as answered and start a rlpx connection - self.table.lock().await.pong_answered(peer.node.node_id); + self.ctx.table.lock().await.pong_answered(peer.node.node_id); if let Some(enr_seq) = msg.enr_seq { if enr_seq > peer.record.seq { debug!("Found outdated enr-seq, send an enr_request"); - self.send_enr_request(peer.node, self.table.lock().await) + self.send_enr_request(peer.node, self.ctx.table.lock().await) .await?; } } - let signer = self.signer.clone(); - let storage = self.storage.clone(); - let broadcaster = self.rlxp_conn_sender.clone(); - let table = self.table.clone(); - self.tracker.spawn(async move { - handle_peer_as_initiator(signer, peer.node, storage, table, broadcaster).await - }); + let ctx = self.ctx.clone(); + self.ctx + .tracker + .spawn(async move { handle_peer_as_initiator(ctx, peer.node).await }); Ok(()) } Message::FindNode(msg) => { @@ -271,7 +238,7 @@ impl Discv4 { return Err(DiscoveryError::MessageExpired); }; let node = { - let table = self.table.lock().await; + let table = self.ctx.table.lock().await; table.get_by_node_id(packet.get_node_id()).cloned() }; @@ -283,7 +250,7 @@ impl Discv4 { } let nodes = { - let table = self.table.lock().await; + let table = self.ctx.table.lock().await; table.get_closest_nodes(msg.target) }; let nodes_chunks = nodes.chunks(4); @@ -296,7 +263,7 @@ impl Discv4 { let neighbors = Message::Neighbors(NeighborsMessage::new(nodes.to_vec(), expiration)); let mut buf = Vec::new(); - neighbors.encode_with_header(&mut buf, &self.signer); + neighbors.encode_with_header(&mut buf, &self.ctx.signer); let bytes_sent = self .udp_socket @@ -316,7 +283,7 @@ impl Discv4 { return Err(DiscoveryError::MessageExpired); }; - let mut table_lock = self.table.lock().await; + let mut table_lock = self.ctx.table.lock().await; let Some(node) = table_lock.get_by_node_id_mut(packet.get_node_id()) else { return Err(DiscoveryError::InvalidMessage("not a known node".into())); @@ -363,7 +330,7 @@ impl Discv4 { debug!("Storing neighbors in our table!"); for node in nodes { let _ = self - .try_add_peer_and_ping(*node, self.table.lock().await) + .try_add_peer_and_ping(*node, self.ctx.table.lock().await) .await; } @@ -374,7 +341,7 @@ impl Discv4 { return Err(DiscoveryError::MessageExpired); } let Ok(node_record) = - NodeRecord::from_node(self.local_node, self.enr_seq, &self.signer) + NodeRecord::from_node(self.ctx.local_node, self.ctx.enr_seq, &self.ctx.signer) else { return Err(DiscoveryError::InvalidMessage( "could not build local node record".into(), @@ -383,7 +350,7 @@ impl Discv4 { let msg = Message::ENRResponse(ENRResponseMessage::new(packet.get_hash(), node_record)); let mut buf = vec![]; - msg.encode_with_header(&mut buf, &self.signer); + msg.encode_with_header(&mut buf, &self.ctx.signer); let bytes_sent = self .udp_socket @@ -398,7 +365,7 @@ impl Discv4 { Ok(()) } Message::ENRResponse(msg) => { - let mut table_lock = self.table.lock().await; + let mut table_lock = self.ctx.table.lock().await; let peer = table_lock.get_by_node_id_mut(packet.get_node_id()); let Some(peer) = peer else { return Err(DiscoveryError::InvalidMessage("Peer not known".into())); @@ -507,7 +474,7 @@ impl Discv4 { // first check that the peers we ping have responded for node_id in previously_pinged_peers { - let mut table_lock = self.table.lock().await; + let mut table_lock = self.ctx.table.lock().await; let peer = table_lock.get_by_node_id_mut(node_id).unwrap(); if let Some(has_answered) = peer.revalidation { @@ -531,13 +498,18 @@ impl Discv4 { // now send a ping to the least recently pinged peers // this might be too expensive to run if our table is filled // maybe we could just pick them randomly - let peers = self.table.lock().await.get_least_recently_pinged_peers(3); + let peers = self + .ctx + .table + .lock() + .await + .get_least_recently_pinged_peers(3); previously_pinged_peers = HashSet::default(); for peer in peers { debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); - let _ = self.ping(peer.node, self.table.lock().await).await; + let _ = self.ping(peer.node, self.ctx.table.lock().await).await; previously_pinged_peers.insert(peer.node.node_id); - let mut table = self.table.lock().await; + let mut table = self.ctx.table.lock().await; let peer = table.get_by_node_id_mut(peer.node.node_id); if let Some(peer) = peer { peer.revalidation = Some(false); @@ -560,7 +532,7 @@ impl Discv4 { ) -> Result<(), DiscoveryError> { // sanity check to make sure we are not storing ourselves // a case that may happen in a neighbor message for example - if node.node_id == self.local_node.node_id { + if node.node_id == self.ctx.local_node.node_id { return Ok(()); } @@ -578,9 +550,9 @@ impl Discv4 { let mut buf = Vec::new(); let expiration: u64 = get_expiration(20); let from = Endpoint { - ip: self.local_node.ip, - udp_port: self.local_node.udp_port, - tcp_port: self.local_node.tcp_port, + ip: self.ctx.local_node.ip, + udp_port: self.ctx.local_node.udp_port, + tcp_port: self.ctx.local_node.tcp_port, }; let to = Endpoint { ip: node.ip, @@ -588,8 +560,9 @@ impl Discv4 { tcp_port: node.tcp_port, }; - let ping = Message::Ping(PingMessage::new(from, to, expiration).with_enr_seq(self.enr_seq)); - ping.encode_with_header(&mut buf, &self.signer); + let ping = + Message::Ping(PingMessage::new(from, to, expiration).with_enr_seq(self.ctx.enr_seq)); + ping.encode_with_header(&mut buf, &self.ctx.signer); let bytes_sent = self .udp_socket .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) @@ -615,9 +588,10 @@ impl Discv4 { tcp_port: node.tcp_port, }; - let pong = - Message::Pong(PongMessage::new(to, ping_hash, expiration).with_enr_seq(self.enr_seq)); - pong.encode_with_header(&mut buf, &self.signer); + let pong = Message::Pong( + PongMessage::new(to, ping_hash, expiration).with_enr_seq(self.ctx.enr_seq), + ); + pong.encode_with_header(&mut buf, &self.ctx.signer); let bytes_sent = self .udp_socket @@ -648,7 +622,7 @@ impl Discv4 { let mut buf = Vec::new(); let expiration: u64 = get_expiration(20); let enr_req = Message::ENRRequest(ENRRequestMessage::new(expiration)); - enr_req.encode_with_header(&mut buf, &self.signer); + enr_req.encode_with_header(&mut buf, &self.ctx.signer); let bytes_sent = self .udp_socket @@ -674,10 +648,11 @@ pub(super) mod tests { use crate::{ node_id_from_signing_key, rlpx::message::Message as RLPxMessage, MAX_MESSAGES_TO_BROADCAST, }; - use ethrex_storage::EngineType; + use ethrex_storage::{EngineType, Store}; + use k256::ecdsa::SigningKey; use rand::rngs::OsRng; use std::net::{IpAddr, Ipv4Addr}; - use tokio::time::sleep; + use tokio::{sync::Mutex, time::sleep}; pub async fn insert_random_node_on_custom_bucket( table: Arc>, @@ -721,21 +696,22 @@ pub(super) mod tests { let storage = Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); - let (rlpx_conn_sender, _) = tokio::sync::broadcast::channel::<( - tokio::task::Id, - Arc, - )>(MAX_MESSAGES_TO_BROADCAST); - let tracker = TaskTracker::new(); + let (broadcast, _) = tokio::sync::broadcast::channel::<(tokio::task::Id, Arc)>( + MAX_MESSAGES_TO_BROADCAST, + ); + let tracker = tokio_util::task::TaskTracker::new(); - let discv4 = Discv4::try_new( + let ctx = P2PContext { local_node, - signer.clone(), + enr_seq: time_now_unix(), + tracker: tracker.clone(), + signer, + table, storage, - table.clone(), - rlpx_conn_sender, - tracker.clone(), - ) - .await?; + broadcast, + }; + + let discv4 = Discv4::try_new(ctx).await?; if should_start_server { tracker.spawn({ @@ -755,7 +731,7 @@ pub(super) mod tests { server_b: &mut Discv4, ) -> Result<(), DiscoveryError> { server_a - .try_add_peer_and_ping(server_b.local_node, server_a.table.lock().await) + .try_add_peer_and_ping(server_b.ctx.local_node, server_a.ctx.table.lock().await) .await?; // allow some time for the server to respond sleep(Duration::from_secs(1)).await; @@ -781,7 +757,7 @@ pub(super) mod tests { server_b = server_b.with_revalidation_interval_of(2); // start revalidation server - server_b.tracker.spawn({ + server_b.ctx.tracker.spawn({ let server_b = server_b.clone(); async move { server_b.start_revalidation().await } }); @@ -789,24 +765,24 @@ pub(super) mod tests { for _ in 0..5 { sleep(Duration::from_millis(2500)).await; // by now, b should've send a revalidation to a - let table = server_b.table.lock().await; - let node = table.get_by_node_id(server_a.local_node.node_id); + let table = server_b.ctx.table.lock().await; + let node = table.get_by_node_id(server_a.ctx.local_node.node_id); assert!(node.is_some_and(|n| n.revalidation.is_some())); } // make sure that `a` has responded too all the re-validations // we can do that by checking the liveness { - let table = server_b.table.lock().await; - let node = table.get_by_node_id(server_a.local_node.node_id); + let table = server_b.ctx.table.lock().await; + let node = table.get_by_node_id(server_a.ctx.local_node.node_id); assert_eq!(node.map_or(0, |n| n.liveness), 6); } // now, stopping server `a` is not trivial // so we'll instead change its port, so that no one responds { - let mut table = server_b.table.lock().await; - let node = table.get_by_node_id_mut(server_a.local_node.node_id); + let mut table = server_b.ctx.table.lock().await; + let node = table.get_by_node_id_mut(server_a.ctx.local_node.node_id); if let Some(node) = node { node.node.udp_port = 0 }; @@ -816,15 +792,17 @@ pub(super) mod tests { // which should happen in 3 re-validations for _ in 0..2 { sleep(Duration::from_millis(2500)).await; - let table = server_b.table.lock().await; - let node = table.get_by_node_id(server_a.local_node.node_id); + let table = server_b.ctx.table.lock().await; + let node = table.get_by_node_id(server_a.ctx.local_node.node_id); assert!(node.is_some_and(|n| n.revalidation.is_some())); } sleep(Duration::from_millis(2500)).await; // finally, `a`` should not exist anymore - let table = server_b.table.lock().await; - assert!(table.get_by_node_id(server_a.local_node.node_id).is_none()); + let table = server_b.ctx.table.lock().await; + assert!(table + .get_by_node_id(server_a.ctx.local_node.node_id) + .is_none()); Ok(()) } @@ -849,15 +827,19 @@ pub(super) mod tests { // wait some time for the enr request-response finishes sleep(Duration::from_millis(2500)).await; - let expected_record = - NodeRecord::from_node(server_b.local_node, time_now_unix(), &server_b.signer) - .expect("Node record is created from node"); + let expected_record = NodeRecord::from_node( + server_b.ctx.local_node, + time_now_unix(), + &server_b.ctx.signer, + ) + .expect("Node record is created from node"); let server_a_peer_b = server_a + .ctx .table .lock() .await - .get_by_node_id(server_b.local_node.node_id) + .get_by_node_id(server_b.ctx.local_node.node_id) .cloned() .unwrap(); @@ -868,35 +850,36 @@ pub(super) mod tests { // Modify server_a's record of server_b with an incorrect TCP port. // This simulates an outdated or incorrect entry in the node table. server_a + .ctx .table .lock() .await - .get_by_node_id_mut(server_b.local_node.node_id) + .get_by_node_id_mut(server_b.ctx.local_node.node_id) .unwrap() .node .tcp_port = 10; // update the enr_seq of server_b so that server_a notices it is outdated // and sends a request to update it - server_b.enr_seq = time_now_unix(); + server_b.ctx.enr_seq = time_now_unix(); // Send a ping from server_b to server_a. // server_a should notice the enr_seq is outdated // and trigger a enr-request to server_b to update the record. server_b - .ping(server_a.local_node, server_b.table.lock().await) + .ping(server_a.ctx.local_node, server_b.ctx.table.lock().await) .await?; // Wait for the update to propagate. sleep(Duration::from_millis(2500)).await; // Verify that server_a has updated its record of server_b with the correct TCP port. - let table_lock = server_a.table.lock().await; + let table_lock = server_a.ctx.table.lock().await; let server_a_node_b_record = table_lock - .get_by_node_id(server_b.local_node.node_id) + .get_by_node_id(server_b.ctx.local_node.node_id) .unwrap(); - assert!(server_a_node_b_record.node.tcp_port == server_b.local_node.tcp_port); + assert!(server_a_node_b_record.node.tcp_port == server_b.ctx.local_node.tcp_port); Ok(()) } From 2e350027c4532d0271131de2e5346778c86c2a25 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Fri, 24 Jan 2025 16:09:26 -0300 Subject: [PATCH 26/33] refactor: use node.addr() instead of creating socket addr --- crates/networking/p2p/discv4/mod.rs | 17 ++++++----------- crates/networking/p2p/net.rs | 16 +++++++++------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 8fe943dab6..5055d4374d 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -53,10 +53,9 @@ pub struct Discv4 { impl Discv4 { pub async fn try_new(ctx: P2PContext) -> Result { - let udp_socket = - UdpSocket::bind(SocketAddr::new(ctx.local_node.ip, ctx.local_node.udp_port)) - .await - .map_err(DiscoveryError::BindSocket)?; + let udp_socket = UdpSocket::bind(ctx.local_node.udp_addr()) + .await + .map_err(DiscoveryError::BindSocket)?; Ok(Self { ctx, @@ -82,10 +81,6 @@ impl Discv4 { } } - pub fn addr(&self) -> SocketAddr { - SocketAddr::new(self.ctx.local_node.ip, self.ctx.local_node.udp_port) - } - pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { let lookup_handler = Discv4LookupHandler::new( self.ctx.clone(), @@ -565,7 +560,7 @@ impl Discv4 { ping.encode_with_header(&mut buf, &self.ctx.signer); let bytes_sent = self .udp_socket - .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) + .send_to(&buf, node.udp_addr()) .await .map_err(DiscoveryError::MessageSendFailure)?; @@ -595,7 +590,7 @@ impl Discv4 { let bytes_sent = self .udp_socket - .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) + .send_to(&buf, node.udp_addr()) .await .map_err(DiscoveryError::MessageSendFailure)?; @@ -626,7 +621,7 @@ impl Discv4 { let bytes_sent = self .udp_socket - .send_to(&buf, SocketAddr::new(node.ip, node.udp_port)) + .send_to(&buf, node.udp_addr()) .await .map_err(DiscoveryError::MessageSendFailure)?; if bytes_sent != buf.len() { diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index 8e3c72037c..ac080bd82f 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -85,14 +85,19 @@ pub async fn start_network( .await .map_err(NetworkError::DiscoveryStart)?; - info!("Starting discovery service at {}", discovery.addr()); + info!( + "Starting discovery service at {}", + context.local_node.udp_addr() + ); discovery .start(bootnodes) .await .map_err(NetworkError::DiscoveryStart)?; - let tcp_addr = context.local_node.tcp_addr(); - info!("Listening for requests at {tcp_addr}"); + info!( + "Listening for requests at {}", + context.local_node.tcp_addr() + ); context.tracker.spawn(serve_p2p_requests(context.clone())); Ok(()) @@ -153,10 +158,7 @@ async fn handle_peer_as_initiator(context: P2PContext, node: Node) { context.storage, context.broadcast, ) { - Ok(mut conn) => { - conn.start_peer(SocketAddr::new(node.ip, node.udp_port), context.table) - .await - } + Ok(mut conn) => conn.start_peer(node.udp_addr(), context.table).await, Err(e) => { // TODO We should remove the peer from the table if connection failed // but currently it will make the tests fail From 4ecebfc89fd022f0c46eb8505287ffec3e6d8fdc Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Mon, 27 Jan 2025 14:36:40 -0300 Subject: [PATCH 27/33] fix: add anr remove request msgs checks --- crates/networking/p2p/discv4/lookup.rs | 5 +++++ crates/networking/p2p/discv4/mod.rs | 10 +--------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index ac7dff3de9..b0f100d120 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -157,6 +157,11 @@ impl Discv4LookupHandler { { nodes.append(&mut found_nodes); } + + if let Some(peer) = self.ctx.table.lock().await.get_by_node_id_mut(node.node_id) + { + peer.find_node_request = None; + }; } } diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 5055d4374d..3f7956a36d 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -180,7 +180,7 @@ impl Discv4 { self.ping(node, self.ctx.table.lock().await).await?; } if let Some(enr_seq) = msg.enr_seq { - if enr_seq > peer.record.seq { + if enr_seq > peer.record.seq && peer.is_proven { debug!("Found outdated enr-seq, sending an enr_request"); self.send_enr_request(peer.node, self.ctx.table.lock().await) .await?; @@ -606,14 +606,6 @@ impl Discv4 { node: Node, mut table_lock: MutexGuard<'a, KademliaTable>, ) -> Result<(), DiscoveryError> { - // verify there isn't an ongoing request - if table_lock - .get_by_node_id(node.node_id) - .is_some_and(|p| p.enr_request_hash.is_some()) - { - return Ok(()); - }; - let mut buf = Vec::new(); let expiration: u64 = get_expiration(20); let enr_req = Message::ENRRequest(ENRRequestMessage::new(expiration)); From d6532fc292750e287118736e973a68b78d31c0aa Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Mon, 27 Jan 2025 15:07:26 -0300 Subject: [PATCH 28/33] refactor: apply some review suggestions --- crates/networking/p2p/discv4/lookup.rs | 12 +++++---- crates/networking/p2p/discv4/mod.rs | 34 +++++++----------------- crates/networking/p2p/net.rs | 4 +-- crates/networking/p2p/rlpx/connection.rs | 2 +- 4 files changed, 20 insertions(+), 32 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index b0f100d120..a7631a2e5e 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -43,7 +43,7 @@ impl Discv4LookupHandler { /// - Potential peers to query for nodes: a vector of up to 16 entries holding the closest peers to the pubkey. /// This vector is initially filled with nodes from our table. /// 3. We send a `find_node` to the closest 3 nodes (that we have not yet asked) from the pubkey. - /// 4. We wait for the neighbors response and pushed or replace those that are closer to the potential peers. + /// 4. We wait for the neighbors response and push or replace those that are closer to the potential peers array. /// 5. We select three other nodes from the potential peers vector and do the same until one lookup /// doesn't have any node to ask. /// @@ -52,12 +52,14 @@ impl Discv4LookupHandler { self.ctx.tracker.spawn({ let self_clone = self.clone(); async move { - self_clone.start_task(initial_interval_wait_seconds).await; + self_clone + .start_lookup_loop(initial_interval_wait_seconds) + .await; } }); } - async fn start_task(&self, initial_interval_wait_seconds: u64) { + async fn start_lookup_loop(&self, initial_interval_wait_seconds: u64) { let mut interval = tokio::time::interval(Duration::from_secs(self.interval_minutes)); tokio::time::sleep(Duration::from_secs(initial_interval_wait_seconds)).await; @@ -254,10 +256,10 @@ mod tests { connect_servers, fill_table_with_random_nodes, insert_random_node_on_custom_bucket, start_discovery_server, }, - Discv4, + Discv4Server, }; - fn lookup_handler_from_server(server: Discv4) -> Discv4LookupHandler { + fn lookup_handler_from_server(server: Discv4Server) -> Discv4LookupHandler { Discv4LookupHandler::new( server.ctx.clone(), server.udp_socket.clone(), diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 3f7956a36d..8115df7f14 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -44,14 +44,14 @@ pub enum DiscoveryError { } #[derive(Debug, Clone)] -pub struct Discv4 { +pub struct Discv4Server { ctx: P2PContext, udp_socket: Arc, revalidation_interval_seconds: u64, lookup_interval_minutes: u64, } -impl Discv4 { +impl Discv4Server { pub async fn try_new(ctx: P2PContext) -> Result { let udp_socket = UdpSocket::bind(ctx.local_node.udp_addr()) .await @@ -65,22 +65,6 @@ impl Discv4 { }) } - #[allow(unused)] - pub fn with_revalidation_interval_of(self, seconds: u64) -> Self { - Self { - revalidation_interval_seconds: seconds, - ..self - } - } - - #[allow(unused)] - pub fn with_lookup_interval_of(self, minutes: u64) -> Self { - Self { - lookup_interval_minutes: minutes, - ..self - } - } - pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { let lookup_handler = Discv4LookupHandler::new( self.ctx.clone(), @@ -470,7 +454,9 @@ impl Discv4 { // first check that the peers we ping have responded for node_id in previously_pinged_peers { let mut table_lock = self.ctx.table.lock().await; - let peer = table_lock.get_by_node_id_mut(node_id).unwrap(); + let Some(peer) = table_lock.get_by_node_id_mut(node_id) else { + continue; + }; if let Some(has_answered) = peer.revalidation { if has_answered { @@ -669,7 +655,7 @@ pub(super) mod tests { pub async fn start_discovery_server( udp_port: u16, should_start_server: bool, - ) -> Result { + ) -> Result { let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); let signer = SigningKey::random(&mut OsRng); let node_id = node_id_from_signing_key(&signer); @@ -698,7 +684,7 @@ pub(super) mod tests { broadcast, }; - let discv4 = Discv4::try_new(ctx).await?; + let discv4 = Discv4Server::try_new(ctx).await?; if should_start_server { tracker.spawn({ @@ -714,8 +700,8 @@ pub(super) mod tests { /// connects two mock servers by pinging a to b pub async fn connect_servers( - server_a: &mut Discv4, - server_b: &mut Discv4, + server_a: &mut Discv4Server, + server_b: &mut Discv4Server, ) -> Result<(), DiscoveryError> { server_a .try_add_peer_and_ping(server_b.ctx.local_node, server_a.ctx.table.lock().await) @@ -741,7 +727,7 @@ pub(super) mod tests { connect_servers(&mut server_a, &mut server_b).await?; - server_b = server_b.with_revalidation_interval_of(2); + server_b.revalidation_interval_seconds = 2; // start revalidation server server_b.ctx.tracker.spawn({ diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index ac080bd82f..5209d98671 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -1,5 +1,5 @@ use bootnode::BootNode; -use discv4::{helpers::time_now_unix, DiscoveryError, Discv4}; +use discv4::{helpers::time_now_unix, DiscoveryError, Discv4Server}; use ethrex_core::H512; use ethrex_storage::Store; use k256::{ @@ -81,7 +81,7 @@ pub async fn start_network( storage, broadcast: channel_broadcast_send_end, }; - let discovery = Discv4::try_new(context.clone()) + let discovery = Discv4Server::try_new(context.clone()) .await .map_err(NetworkError::DiscoveryStart)?; diff --git a/crates/networking/p2p/rlpx/connection.rs b/crates/networking/p2p/rlpx/connection.rs index 96e0d4404f..6158d337bd 100644 --- a/crates/networking/p2p/rlpx/connection.rs +++ b/crates/networking/p2p/rlpx/connection.rs @@ -600,7 +600,7 @@ impl RLPxConnection { let mut buf = vec![0; 2]; // Read the message's size - self.framed.get_mut().read_exact(&mut buf[..2]).await?; + self.framed.get_mut().read_exact(&mut buf).await?; let ack_data = [buf[0], buf[1]]; let msg_size = u16::from_be_bytes(ack_data) as usize; if msg_size > P2P_MAX_MESSAGE_SIZE { From 6487eff43c8ee5e5da1bdb6694f7398c95d9e925 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Mon, 27 Jan 2025 15:19:28 -0300 Subject: [PATCH 29/33] docs: discv4 server --- crates/networking/p2p/discv4/mod.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 8115df7f14..1014a2156e 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -43,6 +43,7 @@ pub enum DiscoveryError { InvalidMessage(String), } +/// Implements the discv4 protocol see: https://github.com/ethereum/devp2p/blob/master/discv4.md #[derive(Debug, Clone)] pub struct Discv4Server { ctx: P2PContext, @@ -52,6 +53,8 @@ pub struct Discv4Server { } impl Discv4Server { + /// Initializes a Discv4 UDP socket and creates a new `Discv4Server` instance. + /// Returns an error if the socket binding fails. pub async fn try_new(ctx: P2PContext) -> Result { let udp_socket = UdpSocket::bind(ctx.local_node.udp_addr()) .await @@ -65,6 +68,10 @@ impl Discv4Server { }) } + /// Initializes the discovery server. It: + /// - Spawns tasks to handle incoming messages and revalidate known nodes. + /// - Loads bootnodes to establish initial peer connections. + /// - Starts the lookup handler via [`Discv4LookupHandler`] to periodically search for new peers. pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { let lookup_handler = Discv4LookupHandler::new( self.ctx.clone(), From 5e76ccece3eaf17e5abd4be7f8450b05d335f36c Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Mon, 27 Jan 2025 15:26:01 -0300 Subject: [PATCH 30/33] refactor: remove time_now_unix from kademlia table --- crates/networking/p2p/discv4/helpers.rs | 2 +- crates/networking/p2p/discv4/messages.rs | 4 +-- crates/networking/p2p/discv4/mod.rs | 18 ++++++----- crates/networking/p2p/kademlia.rs | 39 +++++------------------- crates/networking/p2p/net.rs | 4 +-- 5 files changed, 24 insertions(+), 43 deletions(-) diff --git a/crates/networking/p2p/discv4/helpers.rs b/crates/networking/p2p/discv4/helpers.rs index 63b0edc9fa..f348963f7f 100644 --- a/crates/networking/p2p/discv4/helpers.rs +++ b/crates/networking/p2p/discv4/helpers.rs @@ -26,7 +26,7 @@ pub fn time_since_in_hs(time: u64) -> u64 { / 3600 } -pub fn time_now_unix() -> u64 { +pub fn current_unix_time() -> u64 { SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap_or_default() diff --git a/crates/networking/p2p/discv4/messages.rs b/crates/networking/p2p/discv4/messages.rs index 2a5c9951f7..e950881348 100644 --- a/crates/networking/p2p/discv4/messages.rs +++ b/crates/networking/p2p/discv4/messages.rs @@ -1,4 +1,4 @@ -use super::helpers::time_now_unix; +use super::helpers::current_unix_time; use crate::types::{Endpoint, Node, NodeRecord}; use bytes::BufMut; use ethrex_core::{H256, H512, H520}; @@ -296,7 +296,7 @@ impl Default for FindNodeRequest { fn default() -> Self { Self { nodes_sent: 0, - sent_at: time_now_unix(), + sent_at: current_unix_time(), tx: None, } } diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 1014a2156e..10051355b6 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -10,7 +10,7 @@ use crate::{ KademliaTable, P2PContext, }; use ethrex_core::H256; -use helpers::{get_expiration, is_expired, time_now_unix, time_since_in_hs}; +use helpers::{current_unix_time, get_expiration, is_expired, time_since_in_hs}; use k256::ecdsa::{signature::hazmat::PrehashVerifier, Signature, VerifyingKey}; use lookup::Discv4LookupHandler; use messages::{ @@ -205,7 +205,11 @@ impl Discv4Server { } // all validations went well, mark as answered and start a rlpx connection - self.ctx.table.lock().await.pong_answered(peer.node.node_id); + self.ctx + .table + .lock() + .await + .pong_answered(peer.node.node_id, current_unix_time()); if let Some(enr_seq) = msg.enr_seq { if enr_seq > peer.record.seq { debug!("Found outdated enr-seq, send an enr_request"); @@ -280,7 +284,7 @@ impl Discv4Server { "find node request not sent".into(), )); }; - if time_now_unix().saturating_sub(req.sent_at) >= 60 { + if current_unix_time().saturating_sub(req.sent_at) >= 60 { node.find_node_request = None; return Err(DiscoveryError::InvalidMessage( "find_node request expired after one minute".into(), @@ -562,7 +566,7 @@ impl Discv4Server { } let hash = H256::from_slice(&buf[0..32]); - table_lock.update_peer_ping(node.node_id, Some(hash)); + table_lock.update_peer_ping(node.node_id, Some(hash), current_unix_time()); Ok(()) } @@ -683,7 +687,7 @@ pub(super) mod tests { let ctx = P2PContext { local_node, - enr_seq: time_now_unix(), + enr_seq: current_unix_time(), tracker: tracker.clone(), signer, table, @@ -809,7 +813,7 @@ pub(super) mod tests { let expected_record = NodeRecord::from_node( server_b.ctx.local_node, - time_now_unix(), + current_unix_time(), &server_b.ctx.signer, ) .expect("Node record is created from node"); @@ -841,7 +845,7 @@ pub(super) mod tests { // update the enr_seq of server_b so that server_a notices it is outdated // and sends a request to update it - server_b.ctx.enr_seq = time_now_unix(); + server_b.ctx.enr_seq = current_unix_time(); // Send a ping from server_b to server_a. // server_a should notice the enr_seq is outdated diff --git a/crates/networking/p2p/kademlia.rs b/crates/networking/p2p/kademlia.rs index d087568b27..7cf060ec76 100644 --- a/crates/networking/p2p/kademlia.rs +++ b/crates/networking/p2p/kademlia.rs @@ -94,7 +94,7 @@ impl KademliaTable { return (None, false); } - let peer = PeerData::new(node, NodeRecord::default(), time_now_unix(), 0, false); + let peer = PeerData::new(node, NodeRecord::default(), false); if self.buckets[bucket_idx].peers.len() == MAX_NODES_PER_BUCKET { self.insert_as_replacement(&peer, bucket_idx); @@ -148,7 +148,7 @@ impl KademliaTable { nodes.iter().map(|a| a.0).collect() } - pub fn pong_answered(&mut self, node_id: H512) { + pub fn pong_answered(&mut self, node_id: H512, pong_at: u64) { let peer = self.get_by_node_id_mut(node_id); if peer.is_none() { return; @@ -156,12 +156,12 @@ impl KademliaTable { let peer = peer.unwrap(); peer.is_proven = true; - peer.last_pong = time_now_unix(); + peer.last_pong = pong_at; peer.last_ping_hash = None; peer.revalidation = peer.revalidation.and(Some(true)); } - pub fn update_peer_ping(&mut self, node_id: H512, ping_hash: Option) { + pub fn update_peer_ping(&mut self, node_id: H512, ping_hash: Option, ping_at: u64) { let peer = self.get_by_node_id_mut(node_id); if peer.is_none() { return; @@ -169,17 +169,7 @@ impl KademliaTable { let peer = peer.unwrap(); peer.last_ping_hash = ping_hash; - peer.last_ping = time_now_unix(); - } - - pub fn update_peer_ping_with_revalidation(&mut self, node_id: H512, ping_hash: Option) { - let Some(peer) = self.get_by_node_id_mut(node_id) else { - return; - }; - - peer.last_ping_hash = ping_hash; - peer.last_ping = time_now_unix(); - peer.revalidation = Some(false); + peer.last_ping = ping_at; } /// ## Returns @@ -345,13 +335,6 @@ pub fn bucket_number(node_id_1: H512, node_id_2: H512) -> usize { distance.bits().saturating_sub(1) } -fn time_now_unix() -> u64 { - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_secs() -} - #[derive(Debug, Clone)] pub struct PeerData { pub node: Node, @@ -372,18 +355,12 @@ pub struct PeerData { } impl PeerData { - pub fn new( - node: Node, - record: NodeRecord, - last_ping: u64, - last_pong: u64, - is_proven: bool, - ) -> Self { + pub fn new(node: Node, record: NodeRecord, is_proven: bool) -> Self { Self { node, record, - last_ping, - last_pong, + last_ping: 0, + last_pong: 0, is_proven, liveness: 1, last_ping_hash: None, diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index 5209d98671..9648f16b5a 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -1,5 +1,5 @@ use bootnode::BootNode; -use discv4::{helpers::time_now_unix, DiscoveryError, Discv4Server}; +use discv4::{helpers::current_unix_time, DiscoveryError, Discv4Server}; use ethrex_core::H512; use ethrex_storage::Store; use k256::{ @@ -74,7 +74,7 @@ pub async fn start_network( // Note we are passing the current timestamp as the sequence number // This is because we are not storing our local_node updates in the db // see #1756 - enr_seq: time_now_unix(), + enr_seq: current_unix_time(), tracker, signer, table: peer_table, From fb16d7778946b33f67d7fb901195cc39214b960d Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Mon, 27 Jan 2025 15:57:35 -0300 Subject: [PATCH 31/33] refactor: discv4 helpers --- crates/networking/p2p/discv4/helpers.rs | 17 ++++++----------- crates/networking/p2p/discv4/lookup.rs | 4 ++-- crates/networking/p2p/discv4/mod.rs | 24 +++++++++++++----------- crates/networking/p2p/discv4/server.rs | 0 4 files changed, 21 insertions(+), 24 deletions(-) create mode 100644 crates/networking/p2p/discv4/server.rs diff --git a/crates/networking/p2p/discv4/helpers.rs b/crates/networking/p2p/discv4/helpers.rs index f348963f7f..b0f21e42f0 100644 --- a/crates/networking/p2p/discv4/helpers.rs +++ b/crates/networking/p2p/discv4/helpers.rs @@ -1,29 +1,24 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; -pub fn get_expiration(seconds: u64) -> u64 { +pub fn get_msg_expiration_from_seconds(seconds: u64) -> u64 { (SystemTime::now() + Duration::from_secs(seconds)) .duration_since(UNIX_EPOCH) .unwrap_or_default() .as_secs() } -pub fn is_expired(expiration: u64) -> bool { +pub fn is_msg_expired(expiration: u64) -> bool { // this cast to a signed integer is needed as the rlp decoder doesn't take into account the sign - // otherwise a potential negative expiration would pass since it would take 2^64. - (expiration as i64) - < SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs() as i64 + // otherwise if a msg contains a negative expiration, it would pass since as it would wrap around the u64. + (expiration as i64) < (current_unix_time() as i64) } -pub fn time_since_in_hs(time: u64) -> u64 { - let time = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(time); +pub fn elapsed_time_since(unix_timestamp: u64) -> u64 { + let time = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(unix_timestamp); SystemTime::now() .duration_since(time) .unwrap_or_default() .as_secs() - / 3600 } pub fn current_unix_time() -> u64 { diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index a7631a2e5e..486fc88608 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -1,4 +1,4 @@ -use super::{helpers::get_expiration, DiscoveryError, Message}; +use super::{helpers::get_msg_expiration_from_seconds, DiscoveryError, Message}; use crate::{ kademlia::{bucket_number, MAX_NODES_PER_BUCKET}, node_id_from_signing_key, @@ -208,7 +208,7 @@ impl Discv4LookupHandler { target_id: H512, request_receiver: &mut tokio::sync::mpsc::UnboundedReceiver>, ) -> Result, DiscoveryError> { - let expiration: u64 = get_expiration(20); + let expiration: u64 = get_msg_expiration_from_seconds(20); let msg = Message::FindNode(super::FindNodeMessage::new(target_id, expiration)); diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 10051355b6..9690a3d501 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -10,7 +10,9 @@ use crate::{ KademliaTable, P2PContext, }; use ethrex_core::H256; -use helpers::{current_unix_time, get_expiration, is_expired, time_since_in_hs}; +use helpers::{ + current_unix_time, elapsed_time_since, get_msg_expiration_from_seconds, is_msg_expired, +}; use k256::ecdsa::{signature::hazmat::PrehashVerifier, Signature, VerifyingKey}; use lookup::Discv4LookupHandler; use messages::{ @@ -142,7 +144,7 @@ impl Discv4Server { async fn handle_message(&self, packet: Packet, from: SocketAddr) -> Result<(), DiscoveryError> { match packet.get_message() { Message::Ping(msg) => { - if is_expired(msg.expiration) { + if is_msg_expired(msg.expiration) { return Err(DiscoveryError::MessageExpired); }; @@ -167,7 +169,7 @@ impl Discv4Server { // if peer was in the table and last ping was 12 hs ago // we need to re ping to re-validate the endpoint proof - if time_since_in_hs(peer.last_ping) >= PROOF_EXPIRATION_IN_HS { + if elapsed_time_since(peer.last_ping) / 3600 >= PROOF_EXPIRATION_IN_HS { self.ping(node, self.ctx.table.lock().await).await?; } if let Some(enr_seq) = msg.enr_seq { @@ -181,7 +183,7 @@ impl Discv4Server { Ok(()) } Message::Pong(msg) => { - if is_expired(msg.expiration) { + if is_msg_expired(msg.expiration) { return Err(DiscoveryError::MessageExpired); } @@ -224,7 +226,7 @@ impl Discv4Server { Ok(()) } Message::FindNode(msg) => { - if is_expired(msg.expiration) { + if is_msg_expired(msg.expiration) { return Err(DiscoveryError::MessageExpired); }; let node = { @@ -244,7 +246,7 @@ impl Discv4Server { table.get_closest_nodes(msg.target) }; let nodes_chunks = nodes.chunks(4); - let expiration = get_expiration(20); + let expiration = get_msg_expiration_from_seconds(20); debug!("Sending neighbors!"); // we are sending the neighbors in 4 different messages as not to exceed the @@ -269,7 +271,7 @@ impl Discv4Server { Ok(()) } Message::Neighbors(neighbors_msg) => { - if is_expired(neighbors_msg.expiration) { + if is_msg_expired(neighbors_msg.expiration) { return Err(DiscoveryError::MessageExpired); }; @@ -327,7 +329,7 @@ impl Discv4Server { Ok(()) } Message::ENRRequest(msg) => { - if is_expired(msg.expiration) { + if is_msg_expired(msg.expiration) { return Err(DiscoveryError::MessageExpired); } let Ok(node_record) = @@ -540,7 +542,7 @@ impl Discv4Server { mut table_lock: MutexGuard<'a, KademliaTable>, ) -> Result<(), DiscoveryError> { let mut buf = Vec::new(); - let expiration: u64 = get_expiration(20); + let expiration: u64 = get_msg_expiration_from_seconds(20); let from = Endpoint { ip: self.ctx.local_node.ip, udp_port: self.ctx.local_node.udp_port, @@ -573,7 +575,7 @@ impl Discv4Server { async fn pong(&self, ping_hash: H256, node: Node) -> Result<(), DiscoveryError> { let mut buf = Vec::new(); - let expiration: u64 = get_expiration(20); + let expiration: u64 = get_msg_expiration_from_seconds(20); let to = Endpoint { ip: node.ip, udp_port: node.udp_port, @@ -604,7 +606,7 @@ impl Discv4Server { mut table_lock: MutexGuard<'a, KademliaTable>, ) -> Result<(), DiscoveryError> { let mut buf = Vec::new(); - let expiration: u64 = get_expiration(20); + let expiration: u64 = get_msg_expiration_from_seconds(20); let enr_req = Message::ENRRequest(ENRRequestMessage::new(expiration)); enr_req.encode_with_header(&mut buf, &self.ctx.signer); diff --git a/crates/networking/p2p/discv4/server.rs b/crates/networking/p2p/discv4/server.rs new file mode 100644 index 0000000000..e69de29bb2 From 28f97de58bd4c19b412b320aa10638a9a10f9efe Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Mon, 27 Jan 2025 16:12:58 -0300 Subject: [PATCH 32/33] refactor: move discv4 server from mod to new file --- crates/networking/p2p/discv4/lookup.rs | 10 +- crates/networking/p2p/discv4/mod.rs | 870 +------------------------ crates/networking/p2p/discv4/server.rs | 870 +++++++++++++++++++++++++ crates/networking/p2p/net.rs | 5 +- 4 files changed, 882 insertions(+), 873 deletions(-) diff --git a/crates/networking/p2p/discv4/lookup.rs b/crates/networking/p2p/discv4/lookup.rs index 486fc88608..c1a62f4418 100644 --- a/crates/networking/p2p/discv4/lookup.rs +++ b/crates/networking/p2p/discv4/lookup.rs @@ -1,4 +1,8 @@ -use super::{helpers::get_msg_expiration_from_seconds, DiscoveryError, Message}; +use super::{ + helpers::get_msg_expiration_from_seconds, + messages::{FindNodeMessage, Message}, + server::DiscoveryError, +}; use crate::{ kademlia::{bucket_number, MAX_NODES_PER_BUCKET}, node_id_from_signing_key, @@ -210,7 +214,7 @@ impl Discv4LookupHandler { ) -> Result, DiscoveryError> { let expiration: u64 = get_msg_expiration_from_seconds(20); - let msg = Message::FindNode(super::FindNodeMessage::new(target_id, expiration)); + let msg = Message::FindNode(FindNodeMessage::new(target_id, expiration)); let mut buf = Vec::new(); msg.encode_with_header(&mut buf, &self.ctx.signer); @@ -251,7 +255,7 @@ mod tests { use tokio::time::sleep; use super::*; - use crate::discv4::{ + use crate::discv4::server::{ tests::{ connect_servers, fill_table_with_random_nodes, insert_random_node_on_custom_bucket, start_discovery_server, diff --git a/crates/networking/p2p/discv4/mod.rs b/crates/networking/p2p/discv4/mod.rs index 9690a3d501..51104b57d5 100644 --- a/crates/networking/p2p/discv4/mod.rs +++ b/crates/networking/p2p/discv4/mod.rs @@ -1,872 +1,4 @@ pub(super) mod helpers; mod lookup; pub(super) mod messages; - -use crate::{ - bootnode::BootNode, - handle_peer_as_initiator, - kademlia::MAX_NODES_PER_BUCKET, - types::{Endpoint, Node, NodeRecord}, - KademliaTable, P2PContext, -}; -use ethrex_core::H256; -use helpers::{ - current_unix_time, elapsed_time_since, get_msg_expiration_from_seconds, is_msg_expired, -}; -use k256::ecdsa::{signature::hazmat::PrehashVerifier, Signature, VerifyingKey}; -use lookup::Discv4LookupHandler; -use messages::{ - ENRRequestMessage, ENRResponseMessage, FindNodeMessage, Message, NeighborsMessage, Packet, - PingMessage, PongMessage, -}; -use std::{ - collections::HashSet, - net::{IpAddr, Ipv4Addr, SocketAddr}, - sync::Arc, - time::Duration, -}; -use tokio::{net::UdpSocket, sync::MutexGuard}; -use tracing::{debug, error}; - -const MAX_DISC_PACKET_SIZE: usize = 1280; -const PROOF_EXPIRATION_IN_HS: u64 = 12; - -// These interval times are arbitrary numbers, maybe we should read them from a cfg or a cli param -const REVALIDATION_INTERVAL_IN_SECONDS: u64 = 30; -const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: u64 = 30; - -#[derive(Debug)] -#[allow(dead_code)] -pub enum DiscoveryError { - BindSocket(std::io::Error), - MessageSendFailure(std::io::Error), - PartialMessageSent, - MessageExpired, - InvalidMessage(String), -} - -/// Implements the discv4 protocol see: https://github.com/ethereum/devp2p/blob/master/discv4.md -#[derive(Debug, Clone)] -pub struct Discv4Server { - ctx: P2PContext, - udp_socket: Arc, - revalidation_interval_seconds: u64, - lookup_interval_minutes: u64, -} - -impl Discv4Server { - /// Initializes a Discv4 UDP socket and creates a new `Discv4Server` instance. - /// Returns an error if the socket binding fails. - pub async fn try_new(ctx: P2PContext) -> Result { - let udp_socket = UdpSocket::bind(ctx.local_node.udp_addr()) - .await - .map_err(DiscoveryError::BindSocket)?; - - Ok(Self { - ctx, - udp_socket: Arc::new(udp_socket), - revalidation_interval_seconds: REVALIDATION_INTERVAL_IN_SECONDS, - lookup_interval_minutes: PEERS_RANDOM_LOOKUP_TIME_IN_MIN, - }) - } - - /// Initializes the discovery server. It: - /// - Spawns tasks to handle incoming messages and revalidate known nodes. - /// - Loads bootnodes to establish initial peer connections. - /// - Starts the lookup handler via [`Discv4LookupHandler`] to periodically search for new peers. - pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { - let lookup_handler = Discv4LookupHandler::new( - self.ctx.clone(), - self.udp_socket.clone(), - self.lookup_interval_minutes, - ); - - self.ctx.tracker.spawn({ - let self_clone = self.clone(); - async move { self_clone.receive().await } - }); - self.ctx.tracker.spawn({ - let self_clone = self.clone(); - async move { self_clone.start_revalidation().await } - }); - self.load_bootnodes(bootnodes).await; - lookup_handler.start(10); - - Ok(()) - } - - async fn load_bootnodes(&self, bootnodes: Vec) { - for bootnode in bootnodes { - let node = Node { - ip: bootnode.socket_address.ip(), - udp_port: bootnode.socket_address.port(), - // TODO: udp port can differ from tcp port. - // see https://github.com/lambdaclass/ethrex/issues/905 - tcp_port: bootnode.socket_address.port(), - node_id: bootnode.node_id, - }; - if let Err(e) = self - .try_add_peer_and_ping(node, self.ctx.table.lock().await) - .await - { - debug!("Error while adding bootnode to table: {:?}", e); - }; - } - } - - pub async fn receive(&self) { - let mut buf = vec![0; MAX_DISC_PACKET_SIZE]; - - loop { - let (read, from) = match self.udp_socket.recv_from(&mut buf).await { - Ok(result) => result, - Err(e) => { - error!("Error receiving data from socket: {e}. Stopping discovery server"); - return; - } - }; - debug!("Received {read} bytes from {from}"); - - match Packet::decode(&buf[..read]) { - Err(e) => error!("Could not decode packet: {:?}", e), - Ok(packet) => { - let msg = packet.get_message(); - let msg_name = msg.to_string(); - debug!("Message: {:?} from {}", msg, packet.get_node_id()); - if let Err(e) = self.handle_message(packet, from).await { - debug!("Error while processing {} message: {:?}", msg_name, e); - }; - } - } - } - } - - async fn handle_message(&self, packet: Packet, from: SocketAddr) -> Result<(), DiscoveryError> { - match packet.get_message() { - Message::Ping(msg) => { - if is_msg_expired(msg.expiration) { - return Err(DiscoveryError::MessageExpired); - }; - - let node = Node { - ip: from.ip(), - udp_port: from.port(), - tcp_port: msg.from.tcp_port, - node_id: packet.get_node_id(), - }; - self.pong(packet.get_hash(), node).await?; - - let peer = { - let table = self.ctx.table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - - let Some(peer) = peer else { - self.try_add_peer_and_ping(node, self.ctx.table.lock().await) - .await?; - return Ok(()); - }; - - // if peer was in the table and last ping was 12 hs ago - // we need to re ping to re-validate the endpoint proof - if elapsed_time_since(peer.last_ping) / 3600 >= PROOF_EXPIRATION_IN_HS { - self.ping(node, self.ctx.table.lock().await).await?; - } - if let Some(enr_seq) = msg.enr_seq { - if enr_seq > peer.record.seq && peer.is_proven { - debug!("Found outdated enr-seq, sending an enr_request"); - self.send_enr_request(peer.node, self.ctx.table.lock().await) - .await?; - } - } - - Ok(()) - } - Message::Pong(msg) => { - if is_msg_expired(msg.expiration) { - return Err(DiscoveryError::MessageExpired); - } - - let peer = { - let table = self.ctx.table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - let Some(peer) = peer else { - return Err(DiscoveryError::InvalidMessage("not known node".into())); - }; - - let Some(ping_hash) = peer.last_ping_hash else { - return Err(DiscoveryError::InvalidMessage( - "node did not send a previous ping".into(), - )); - }; - if ping_hash != msg.ping_hash { - return Err(DiscoveryError::InvalidMessage( - "hash did not match the last corresponding ping".into(), - )); - } - - // all validations went well, mark as answered and start a rlpx connection - self.ctx - .table - .lock() - .await - .pong_answered(peer.node.node_id, current_unix_time()); - if let Some(enr_seq) = msg.enr_seq { - if enr_seq > peer.record.seq { - debug!("Found outdated enr-seq, send an enr_request"); - self.send_enr_request(peer.node, self.ctx.table.lock().await) - .await?; - } - } - let ctx = self.ctx.clone(); - self.ctx - .tracker - .spawn(async move { handle_peer_as_initiator(ctx, peer.node).await }); - Ok(()) - } - Message::FindNode(msg) => { - if is_msg_expired(msg.expiration) { - return Err(DiscoveryError::MessageExpired); - }; - let node = { - let table = self.ctx.table.lock().await; - table.get_by_node_id(packet.get_node_id()).cloned() - }; - - let Some(node) = node else { - return Err(DiscoveryError::InvalidMessage("not a known node".into())); - }; - if !node.is_proven { - return Err(DiscoveryError::InvalidMessage("node isn't proven".into())); - } - - let nodes = { - let table = self.ctx.table.lock().await; - table.get_closest_nodes(msg.target) - }; - let nodes_chunks = nodes.chunks(4); - let expiration = get_msg_expiration_from_seconds(20); - - debug!("Sending neighbors!"); - // we are sending the neighbors in 4 different messages as not to exceed the - // maximum packet size - for nodes in nodes_chunks { - let neighbors = - Message::Neighbors(NeighborsMessage::new(nodes.to_vec(), expiration)); - let mut buf = Vec::new(); - neighbors.encode_with_header(&mut buf, &self.ctx.signer); - - let bytes_sent = self - .udp_socket - .send_to(&buf, from) - .await - .map_err(DiscoveryError::MessageSendFailure)?; - - if bytes_sent != buf.len() { - return Err(DiscoveryError::PartialMessageSent); - } - } - - Ok(()) - } - Message::Neighbors(neighbors_msg) => { - if is_msg_expired(neighbors_msg.expiration) { - return Err(DiscoveryError::MessageExpired); - }; - - let mut table_lock = self.ctx.table.lock().await; - - let Some(node) = table_lock.get_by_node_id_mut(packet.get_node_id()) else { - return Err(DiscoveryError::InvalidMessage("not a known node".into())); - }; - - let Some(req) = &mut node.find_node_request else { - return Err(DiscoveryError::InvalidMessage( - "find node request not sent".into(), - )); - }; - if current_unix_time().saturating_sub(req.sent_at) >= 60 { - node.find_node_request = None; - return Err(DiscoveryError::InvalidMessage( - "find_node request expired after one minute".into(), - )); - } - - let nodes = &neighbors_msg.nodes; - let total_nodes_sent = req.nodes_sent + nodes.len(); - - if total_nodes_sent > MAX_NODES_PER_BUCKET { - node.find_node_request = None; - return Err(DiscoveryError::InvalidMessage( - "sent more than allowed nodes".into(), - )); - } - - // update the number of node_sent - // and forward the nodes sent if a channel is attached - req.nodes_sent = total_nodes_sent; - if let Some(tx) = &req.tx { - let _ = tx.send(nodes.clone()); - } - - if total_nodes_sent == MAX_NODES_PER_BUCKET { - debug!("Neighbors request has been fulfilled"); - node.find_node_request = None; - } - - // release the lock early - // as we might be a long time pinging all the new nodes - drop(table_lock); - - debug!("Storing neighbors in our table!"); - for node in nodes { - let _ = self - .try_add_peer_and_ping(*node, self.ctx.table.lock().await) - .await; - } - - Ok(()) - } - Message::ENRRequest(msg) => { - if is_msg_expired(msg.expiration) { - return Err(DiscoveryError::MessageExpired); - } - let Ok(node_record) = - NodeRecord::from_node(self.ctx.local_node, self.ctx.enr_seq, &self.ctx.signer) - else { - return Err(DiscoveryError::InvalidMessage( - "could not build local node record".into(), - )); - }; - let msg = - Message::ENRResponse(ENRResponseMessage::new(packet.get_hash(), node_record)); - let mut buf = vec![]; - msg.encode_with_header(&mut buf, &self.ctx.signer); - - let bytes_sent = self - .udp_socket - .send_to(&buf, from) - .await - .map_err(DiscoveryError::MessageSendFailure)?; - - if bytes_sent != buf.len() { - return Err(DiscoveryError::PartialMessageSent); - } - - Ok(()) - } - Message::ENRResponse(msg) => { - let mut table_lock = self.ctx.table.lock().await; - let peer = table_lock.get_by_node_id_mut(packet.get_node_id()); - let Some(peer) = peer else { - return Err(DiscoveryError::InvalidMessage("Peer not known".into())); - }; - - let Some(req_hash) = peer.enr_request_hash else { - return Err(DiscoveryError::InvalidMessage( - "Discarding enr-response as enr-request wasn't sent".into(), - )); - }; - if req_hash != msg.request_hash { - return Err(DiscoveryError::InvalidMessage( - "Discarding enr-response did not match enr-request hash".into(), - )); - } - peer.enr_request_hash = None; - - if msg.node_record.seq < peer.record.seq { - return Err(DiscoveryError::InvalidMessage( - "msg node record is lower than the one we have".into(), - )); - } - - let record = msg.node_record.decode_pairs(); - let Some(id) = record.id else { - return Err(DiscoveryError::InvalidMessage( - "msg node record does not have required `id` field".into(), - )); - }; - - // https://github.com/ethereum/devp2p/blob/master/enr.md#v4-identity-scheme - let signature_valid = match id.as_str() { - "v4" => { - let digest = msg.node_record.get_signature_digest(); - let Some(public_key) = record.secp256k1 else { - return Err(DiscoveryError::InvalidMessage( - "signature could not be verified because public key was not provided".into(), - )); - }; - let signature_bytes = msg.node_record.signature.as_bytes(); - let Ok(signature) = Signature::from_slice(&signature_bytes[0..64]) else { - return Err(DiscoveryError::InvalidMessage( - "signature could not be build from msg signature bytes".into(), - )); - }; - let Ok(verifying_key) = - VerifyingKey::from_sec1_bytes(public_key.as_bytes()) - else { - return Err(DiscoveryError::InvalidMessage( - "public key could no be built from msg pub key bytes".into(), - )); - }; - verifying_key.verify_prehash(&digest, &signature).is_ok() - } - _ => false, - }; - if !signature_valid { - return Err(DiscoveryError::InvalidMessage( - "Signature verification invalid".into(), - )); - } - - if let Some(ip) = record.ip { - peer.node.ip = IpAddr::from(Ipv4Addr::from_bits(ip)); - } - if let Some(tcp_port) = record.tcp_port { - peer.node.tcp_port = tcp_port; - } - if let Some(udp_port) = record.udp_port { - peer.node.udp_port = udp_port; - } - peer.record = msg.node_record.clone(); - debug!( - "Node with id {:?} record has been successfully updated", - peer.node.node_id - ); - Ok(()) - } - } - } - - /// Starts a tokio scheduler that: - /// - performs periodic revalidation of the current nodes (sends a ping to the old nodes). - /// - /// **Peer revalidation** - /// - /// Peers revalidation works in the following manner: - /// 1. Every `revalidation_interval_seconds` we ping the 3 least recently pinged peers - /// 2. In the next iteration we check if they have answered - /// - if they have: we increment the liveness field by one - /// - otherwise we decrement it by the current value / 3. - /// 3. If the liveness field is 0, then we delete it and insert a new one from the replacements table - /// - /// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#kademlia-table - async fn start_revalidation(&self) { - let mut interval = - tokio::time::interval(Duration::from_secs(self.revalidation_interval_seconds)); - - // first tick starts immediately - interval.tick().await; - - let mut previously_pinged_peers = HashSet::new(); - loop { - interval.tick().await; - debug!("Running peer revalidation"); - - // first check that the peers we ping have responded - for node_id in previously_pinged_peers { - let mut table_lock = self.ctx.table.lock().await; - let Some(peer) = table_lock.get_by_node_id_mut(node_id) else { - continue; - }; - - if let Some(has_answered) = peer.revalidation { - if has_answered { - peer.increment_liveness(); - } else { - peer.decrement_liveness(); - } - } - - peer.revalidation = None; - - if peer.liveness == 0 { - let new_peer = table_lock.replace_peer(node_id); - if let Some(new_peer) = new_peer { - let _ = self.ping(new_peer.node, table_lock).await; - } - } - } - - // now send a ping to the least recently pinged peers - // this might be too expensive to run if our table is filled - // maybe we could just pick them randomly - let peers = self - .ctx - .table - .lock() - .await - .get_least_recently_pinged_peers(3); - previously_pinged_peers = HashSet::default(); - for peer in peers { - debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); - let _ = self.ping(peer.node, self.ctx.table.lock().await).await; - previously_pinged_peers.insert(peer.node.node_id); - let mut table = self.ctx.table.lock().await; - let peer = table.get_by_node_id_mut(peer.node.node_id); - if let Some(peer) = peer { - peer.revalidation = Some(false); - } - } - - debug!("Peer revalidation finished"); - } - } - - /// Attempts to add a node to the Kademlia table and send a ping if necessary. - /// - /// - If the node is **not found** in the table and there is enough space, it will be added, - /// and a ping message will be sent to verify connectivity. - /// - If the node is **already present**, no action is taken. - async fn try_add_peer_and_ping<'a>( - &self, - node: Node, - mut table_lock: MutexGuard<'a, KademliaTable>, - ) -> Result<(), DiscoveryError> { - // sanity check to make sure we are not storing ourselves - // a case that may happen in a neighbor message for example - if node.node_id == self.ctx.local_node.node_id { - return Ok(()); - } - - if let (Some(peer), true) = table_lock.insert_node(node) { - self.ping(peer.node, table_lock).await?; - }; - Ok(()) - } - - async fn ping<'a>( - &self, - node: Node, - mut table_lock: MutexGuard<'a, KademliaTable>, - ) -> Result<(), DiscoveryError> { - let mut buf = Vec::new(); - let expiration: u64 = get_msg_expiration_from_seconds(20); - let from = Endpoint { - ip: self.ctx.local_node.ip, - udp_port: self.ctx.local_node.udp_port, - tcp_port: self.ctx.local_node.tcp_port, - }; - let to = Endpoint { - ip: node.ip, - udp_port: node.udp_port, - tcp_port: node.tcp_port, - }; - - let ping = - Message::Ping(PingMessage::new(from, to, expiration).with_enr_seq(self.ctx.enr_seq)); - ping.encode_with_header(&mut buf, &self.ctx.signer); - let bytes_sent = self - .udp_socket - .send_to(&buf, node.udp_addr()) - .await - .map_err(DiscoveryError::MessageSendFailure)?; - - if bytes_sent != buf.len() { - return Err(DiscoveryError::PartialMessageSent); - } - - let hash = H256::from_slice(&buf[0..32]); - table_lock.update_peer_ping(node.node_id, Some(hash), current_unix_time()); - - Ok(()) - } - - async fn pong(&self, ping_hash: H256, node: Node) -> Result<(), DiscoveryError> { - let mut buf = Vec::new(); - let expiration: u64 = get_msg_expiration_from_seconds(20); - let to = Endpoint { - ip: node.ip, - udp_port: node.udp_port, - tcp_port: node.tcp_port, - }; - - let pong = Message::Pong( - PongMessage::new(to, ping_hash, expiration).with_enr_seq(self.ctx.enr_seq), - ); - pong.encode_with_header(&mut buf, &self.ctx.signer); - - let bytes_sent = self - .udp_socket - .send_to(&buf, node.udp_addr()) - .await - .map_err(DiscoveryError::MessageSendFailure)?; - - if bytes_sent != buf.len() { - Err(DiscoveryError::PartialMessageSent) - } else { - Ok(()) - } - } - - async fn send_enr_request<'a>( - &self, - node: Node, - mut table_lock: MutexGuard<'a, KademliaTable>, - ) -> Result<(), DiscoveryError> { - let mut buf = Vec::new(); - let expiration: u64 = get_msg_expiration_from_seconds(20); - let enr_req = Message::ENRRequest(ENRRequestMessage::new(expiration)); - enr_req.encode_with_header(&mut buf, &self.ctx.signer); - - let bytes_sent = self - .udp_socket - .send_to(&buf, node.udp_addr()) - .await - .map_err(DiscoveryError::MessageSendFailure)?; - if bytes_sent != buf.len() { - return Err(DiscoveryError::PartialMessageSent); - } - - let hash = H256::from_slice(&buf[0..32]); - if let Some(peer) = table_lock.get_by_node_id_mut(node.node_id) { - peer.enr_request_hash = Some(hash); - }; - - Ok(()) - } -} - -#[cfg(test)] -pub(super) mod tests { - use super::*; - use crate::{ - node_id_from_signing_key, rlpx::message::Message as RLPxMessage, MAX_MESSAGES_TO_BROADCAST, - }; - use ethrex_storage::{EngineType, Store}; - use k256::ecdsa::SigningKey; - use rand::rngs::OsRng; - use std::net::{IpAddr, Ipv4Addr}; - use tokio::{sync::Mutex, time::sleep}; - - pub async fn insert_random_node_on_custom_bucket( - table: Arc>, - bucket_idx: usize, - ) { - let node_id = node_id_from_signing_key(&SigningKey::random(&mut OsRng)); - let node = Node { - ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), - tcp_port: 0, - udp_port: 0, - node_id, - }; - table - .lock() - .await - .insert_node_on_custom_bucket(node, bucket_idx); - } - - pub async fn fill_table_with_random_nodes(table: Arc>) { - for i in 0..256 { - for _ in 0..16 { - insert_random_node_on_custom_bucket(table.clone(), i).await; - } - } - } - - pub async fn start_discovery_server( - udp_port: u16, - should_start_server: bool, - ) -> Result { - let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); - let signer = SigningKey::random(&mut OsRng); - let node_id = node_id_from_signing_key(&signer); - let local_node = Node { - ip: addr.ip(), - node_id, - udp_port, - tcp_port: udp_port, - }; - - let storage = - Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); - let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); - let (broadcast, _) = tokio::sync::broadcast::channel::<(tokio::task::Id, Arc)>( - MAX_MESSAGES_TO_BROADCAST, - ); - let tracker = tokio_util::task::TaskTracker::new(); - - let ctx = P2PContext { - local_node, - enr_seq: current_unix_time(), - tracker: tracker.clone(), - signer, - table, - storage, - broadcast, - }; - - let discv4 = Discv4Server::try_new(ctx).await?; - - if should_start_server { - tracker.spawn({ - let discv4 = discv4.clone(); - async move { - discv4.receive().await; - } - }); - } - - Ok(discv4) - } - - /// connects two mock servers by pinging a to b - pub async fn connect_servers( - server_a: &mut Discv4Server, - server_b: &mut Discv4Server, - ) -> Result<(), DiscoveryError> { - server_a - .try_add_peer_and_ping(server_b.ctx.local_node, server_a.ctx.table.lock().await) - .await?; - // allow some time for the server to respond - sleep(Duration::from_secs(1)).await; - Ok(()) - } - - #[tokio::test] - /** This is a end to end test on the discovery server, the idea is as follows: - * - We'll start two discovery servers (`a` & `b`) to ping between each other - * - We'll make `b` ping `a`, and validate that the connection is right - * - Then we'll wait for a revalidation where we expect everything to be the same - * - We'll do this five 5 more times - * - Then we'll stop server `a` so that it doesn't respond to re-validations - * - We expect server `b` to remove node `a` from its table after 3 re-validations - * To make this run faster, we'll change the revalidation time to be every 2secs - */ - async fn discovery_server_revalidation() -> Result<(), DiscoveryError> { - let mut server_a = start_discovery_server(7998, true).await?; - let mut server_b = start_discovery_server(7999, true).await?; - - connect_servers(&mut server_a, &mut server_b).await?; - - server_b.revalidation_interval_seconds = 2; - - // start revalidation server - server_b.ctx.tracker.spawn({ - let server_b = server_b.clone(); - async move { server_b.start_revalidation().await } - }); - - for _ in 0..5 { - sleep(Duration::from_millis(2500)).await; - // by now, b should've send a revalidation to a - let table = server_b.ctx.table.lock().await; - let node = table.get_by_node_id(server_a.ctx.local_node.node_id); - assert!(node.is_some_and(|n| n.revalidation.is_some())); - } - - // make sure that `a` has responded too all the re-validations - // we can do that by checking the liveness - { - let table = server_b.ctx.table.lock().await; - let node = table.get_by_node_id(server_a.ctx.local_node.node_id); - assert_eq!(node.map_or(0, |n| n.liveness), 6); - } - - // now, stopping server `a` is not trivial - // so we'll instead change its port, so that no one responds - { - let mut table = server_b.ctx.table.lock().await; - let node = table.get_by_node_id_mut(server_a.ctx.local_node.node_id); - if let Some(node) = node { - node.node.udp_port = 0 - }; - } - - // now the liveness field should start decreasing until it gets to 0 - // which should happen in 3 re-validations - for _ in 0..2 { - sleep(Duration::from_millis(2500)).await; - let table = server_b.ctx.table.lock().await; - let node = table.get_by_node_id(server_a.ctx.local_node.node_id); - assert!(node.is_some_and(|n| n.revalidation.is_some())); - } - sleep(Duration::from_millis(2500)).await; - - // finally, `a`` should not exist anymore - let table = server_b.ctx.table.lock().await; - assert!(table - .get_by_node_id(server_a.ctx.local_node.node_id) - .is_none()); - Ok(()) - } - - #[tokio::test] - /** - * This test verifies the exchange and update of ENR (Ethereum Node Record) messages. - * The test follows these steps: - * - * 1. Start two nodes. - * 2. Wait until they establish a connection. - * 3. Assert that they exchange their records and store them - * 3. Modify the ENR (node record) of one of the nodes. - * 4. Send a new ping message and check that an ENR request was triggered. - * 5. Verify that the updated node record has been correctly received and stored. - */ - async fn discovery_enr_message() -> Result<(), DiscoveryError> { - let mut server_a = start_discovery_server(8006, true).await?; - let mut server_b = start_discovery_server(8007, true).await?; - - connect_servers(&mut server_a, &mut server_b).await?; - - // wait some time for the enr request-response finishes - sleep(Duration::from_millis(2500)).await; - - let expected_record = NodeRecord::from_node( - server_b.ctx.local_node, - current_unix_time(), - &server_b.ctx.signer, - ) - .expect("Node record is created from node"); - - let server_a_peer_b = server_a - .ctx - .table - .lock() - .await - .get_by_node_id(server_b.ctx.local_node.node_id) - .cloned() - .unwrap(); - - // we only match the pairs, as the signature and seq will change - // because they are calculated with the current time - assert!(server_a_peer_b.record.decode_pairs() == expected_record.decode_pairs()); - - // Modify server_a's record of server_b with an incorrect TCP port. - // This simulates an outdated or incorrect entry in the node table. - server_a - .ctx - .table - .lock() - .await - .get_by_node_id_mut(server_b.ctx.local_node.node_id) - .unwrap() - .node - .tcp_port = 10; - - // update the enr_seq of server_b so that server_a notices it is outdated - // and sends a request to update it - server_b.ctx.enr_seq = current_unix_time(); - - // Send a ping from server_b to server_a. - // server_a should notice the enr_seq is outdated - // and trigger a enr-request to server_b to update the record. - server_b - .ping(server_a.ctx.local_node, server_b.ctx.table.lock().await) - .await?; - - // Wait for the update to propagate. - sleep(Duration::from_millis(2500)).await; - - // Verify that server_a has updated its record of server_b with the correct TCP port. - let table_lock = server_a.ctx.table.lock().await; - let server_a_node_b_record = table_lock - .get_by_node_id(server_b.ctx.local_node.node_id) - .unwrap(); - - assert!(server_a_node_b_record.node.tcp_port == server_b.ctx.local_node.tcp_port); - - Ok(()) - } -} +pub mod server; diff --git a/crates/networking/p2p/discv4/server.rs b/crates/networking/p2p/discv4/server.rs index e69de29bb2..ae2b6550a6 100644 --- a/crates/networking/p2p/discv4/server.rs +++ b/crates/networking/p2p/discv4/server.rs @@ -0,0 +1,870 @@ +use super::{ + helpers::{ + current_unix_time, elapsed_time_since, get_msg_expiration_from_seconds, is_msg_expired, + }, + lookup::Discv4LookupHandler, + messages::{ + ENRRequestMessage, ENRResponseMessage, FindNodeMessage, Message, NeighborsMessage, Packet, + PingMessage, PongMessage, + }, +}; +use crate::{ + bootnode::BootNode, + handle_peer_as_initiator, + kademlia::MAX_NODES_PER_BUCKET, + types::{Endpoint, Node, NodeRecord}, + KademliaTable, P2PContext, +}; +use ethrex_core::H256; +use k256::ecdsa::{signature::hazmat::PrehashVerifier, Signature, VerifyingKey}; +use std::{ + collections::HashSet, + net::{IpAddr, Ipv4Addr, SocketAddr}, + sync::Arc, + time::Duration, +}; +use tokio::{net::UdpSocket, sync::MutexGuard}; +use tracing::{debug, error}; + +const MAX_DISC_PACKET_SIZE: usize = 1280; +const PROOF_EXPIRATION_IN_HS: u64 = 12; + +// These interval times are arbitrary numbers, maybe we should read them from a cfg or a cli param +const REVALIDATION_INTERVAL_IN_SECONDS: u64 = 30; +const PEERS_RANDOM_LOOKUP_TIME_IN_MIN: u64 = 30; + +#[derive(Debug)] +#[allow(dead_code)] +pub enum DiscoveryError { + BindSocket(std::io::Error), + MessageSendFailure(std::io::Error), + PartialMessageSent, + MessageExpired, + InvalidMessage(String), +} + +/// Implements the discv4 protocol see: https://github.com/ethereum/devp2p/blob/master/discv4.md +#[derive(Debug, Clone)] +pub struct Discv4Server { + pub(super) ctx: P2PContext, + pub(super) udp_socket: Arc, + pub(super) revalidation_interval_seconds: u64, + pub(super) lookup_interval_minutes: u64, +} + +impl Discv4Server { + /// Initializes a Discv4 UDP socket and creates a new `Discv4Server` instance. + /// Returns an error if the socket binding fails. + pub async fn try_new(ctx: P2PContext) -> Result { + let udp_socket = UdpSocket::bind(ctx.local_node.udp_addr()) + .await + .map_err(DiscoveryError::BindSocket)?; + + Ok(Self { + ctx, + udp_socket: Arc::new(udp_socket), + revalidation_interval_seconds: REVALIDATION_INTERVAL_IN_SECONDS, + lookup_interval_minutes: PEERS_RANDOM_LOOKUP_TIME_IN_MIN, + }) + } + + /// Initializes the discovery server. It: + /// - Spawns tasks to handle incoming messages and revalidate known nodes. + /// - Loads bootnodes to establish initial peer connections. + /// - Starts the lookup handler via [`Discv4LookupHandler`] to periodically search for new peers. + pub async fn start(&self, bootnodes: Vec) -> Result<(), DiscoveryError> { + let lookup_handler = Discv4LookupHandler::new( + self.ctx.clone(), + self.udp_socket.clone(), + self.lookup_interval_minutes, + ); + + self.ctx.tracker.spawn({ + let self_clone = self.clone(); + async move { self_clone.receive().await } + }); + self.ctx.tracker.spawn({ + let self_clone = self.clone(); + async move { self_clone.start_revalidation().await } + }); + self.load_bootnodes(bootnodes).await; + lookup_handler.start(10); + + Ok(()) + } + + async fn load_bootnodes(&self, bootnodes: Vec) { + for bootnode in bootnodes { + let node = Node { + ip: bootnode.socket_address.ip(), + udp_port: bootnode.socket_address.port(), + // TODO: udp port can differ from tcp port. + // see https://github.com/lambdaclass/ethrex/issues/905 + tcp_port: bootnode.socket_address.port(), + node_id: bootnode.node_id, + }; + if let Err(e) = self + .try_add_peer_and_ping(node, self.ctx.table.lock().await) + .await + { + debug!("Error while adding bootnode to table: {:?}", e); + }; + } + } + + pub async fn receive(&self) { + let mut buf = vec![0; MAX_DISC_PACKET_SIZE]; + + loop { + let (read, from) = match self.udp_socket.recv_from(&mut buf).await { + Ok(result) => result, + Err(e) => { + error!("Error receiving data from socket: {e}. Stopping discovery server"); + return; + } + }; + debug!("Received {read} bytes from {from}"); + + match Packet::decode(&buf[..read]) { + Err(e) => error!("Could not decode packet: {:?}", e), + Ok(packet) => { + let msg = packet.get_message(); + let msg_name = msg.to_string(); + debug!("Message: {:?} from {}", msg, packet.get_node_id()); + if let Err(e) = self.handle_message(packet, from).await { + debug!("Error while processing {} message: {:?}", msg_name, e); + }; + } + } + } + } + + async fn handle_message(&self, packet: Packet, from: SocketAddr) -> Result<(), DiscoveryError> { + match packet.get_message() { + Message::Ping(msg) => { + if is_msg_expired(msg.expiration) { + return Err(DiscoveryError::MessageExpired); + }; + + let node = Node { + ip: from.ip(), + udp_port: from.port(), + tcp_port: msg.from.tcp_port, + node_id: packet.get_node_id(), + }; + self.pong(packet.get_hash(), node).await?; + + let peer = { + let table = self.ctx.table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + + let Some(peer) = peer else { + self.try_add_peer_and_ping(node, self.ctx.table.lock().await) + .await?; + return Ok(()); + }; + + // if peer was in the table and last ping was 12 hs ago + // we need to re ping to re-validate the endpoint proof + if elapsed_time_since(peer.last_ping) / 3600 >= PROOF_EXPIRATION_IN_HS { + self.ping(node, self.ctx.table.lock().await).await?; + } + if let Some(enr_seq) = msg.enr_seq { + if enr_seq > peer.record.seq && peer.is_proven { + debug!("Found outdated enr-seq, sending an enr_request"); + self.send_enr_request(peer.node, self.ctx.table.lock().await) + .await?; + } + } + + Ok(()) + } + Message::Pong(msg) => { + if is_msg_expired(msg.expiration) { + return Err(DiscoveryError::MessageExpired); + } + + let peer = { + let table = self.ctx.table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + let Some(peer) = peer else { + return Err(DiscoveryError::InvalidMessage("not known node".into())); + }; + + let Some(ping_hash) = peer.last_ping_hash else { + return Err(DiscoveryError::InvalidMessage( + "node did not send a previous ping".into(), + )); + }; + if ping_hash != msg.ping_hash { + return Err(DiscoveryError::InvalidMessage( + "hash did not match the last corresponding ping".into(), + )); + } + + // all validations went well, mark as answered and start a rlpx connection + self.ctx + .table + .lock() + .await + .pong_answered(peer.node.node_id, current_unix_time()); + if let Some(enr_seq) = msg.enr_seq { + if enr_seq > peer.record.seq { + debug!("Found outdated enr-seq, send an enr_request"); + self.send_enr_request(peer.node, self.ctx.table.lock().await) + .await?; + } + } + let ctx = self.ctx.clone(); + self.ctx + .tracker + .spawn(async move { handle_peer_as_initiator(ctx, peer.node).await }); + Ok(()) + } + Message::FindNode(msg) => { + if is_msg_expired(msg.expiration) { + return Err(DiscoveryError::MessageExpired); + }; + let node = { + let table = self.ctx.table.lock().await; + table.get_by_node_id(packet.get_node_id()).cloned() + }; + + let Some(node) = node else { + return Err(DiscoveryError::InvalidMessage("not a known node".into())); + }; + if !node.is_proven { + return Err(DiscoveryError::InvalidMessage("node isn't proven".into())); + } + + let nodes = { + let table = self.ctx.table.lock().await; + table.get_closest_nodes(msg.target) + }; + let nodes_chunks = nodes.chunks(4); + let expiration = get_msg_expiration_from_seconds(20); + + debug!("Sending neighbors!"); + // we are sending the neighbors in 4 different messages as not to exceed the + // maximum packet size + for nodes in nodes_chunks { + let neighbors = + Message::Neighbors(NeighborsMessage::new(nodes.to_vec(), expiration)); + let mut buf = Vec::new(); + neighbors.encode_with_header(&mut buf, &self.ctx.signer); + + let bytes_sent = self + .udp_socket + .send_to(&buf, from) + .await + .map_err(DiscoveryError::MessageSendFailure)?; + + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); + } + } + + Ok(()) + } + Message::Neighbors(neighbors_msg) => { + if is_msg_expired(neighbors_msg.expiration) { + return Err(DiscoveryError::MessageExpired); + }; + + let mut table_lock = self.ctx.table.lock().await; + + let Some(node) = table_lock.get_by_node_id_mut(packet.get_node_id()) else { + return Err(DiscoveryError::InvalidMessage("not a known node".into())); + }; + + let Some(req) = &mut node.find_node_request else { + return Err(DiscoveryError::InvalidMessage( + "find node request not sent".into(), + )); + }; + if current_unix_time().saturating_sub(req.sent_at) >= 60 { + node.find_node_request = None; + return Err(DiscoveryError::InvalidMessage( + "find_node request expired after one minute".into(), + )); + } + + let nodes = &neighbors_msg.nodes; + let total_nodes_sent = req.nodes_sent + nodes.len(); + + if total_nodes_sent > MAX_NODES_PER_BUCKET { + node.find_node_request = None; + return Err(DiscoveryError::InvalidMessage( + "sent more than allowed nodes".into(), + )); + } + + // update the number of node_sent + // and forward the nodes sent if a channel is attached + req.nodes_sent = total_nodes_sent; + if let Some(tx) = &req.tx { + let _ = tx.send(nodes.clone()); + } + + if total_nodes_sent == MAX_NODES_PER_BUCKET { + debug!("Neighbors request has been fulfilled"); + node.find_node_request = None; + } + + // release the lock early + // as we might be a long time pinging all the new nodes + drop(table_lock); + + debug!("Storing neighbors in our table!"); + for node in nodes { + let _ = self + .try_add_peer_and_ping(*node, self.ctx.table.lock().await) + .await; + } + + Ok(()) + } + Message::ENRRequest(msg) => { + if is_msg_expired(msg.expiration) { + return Err(DiscoveryError::MessageExpired); + } + let Ok(node_record) = + NodeRecord::from_node(self.ctx.local_node, self.ctx.enr_seq, &self.ctx.signer) + else { + return Err(DiscoveryError::InvalidMessage( + "could not build local node record".into(), + )); + }; + let msg = + Message::ENRResponse(ENRResponseMessage::new(packet.get_hash(), node_record)); + let mut buf = vec![]; + msg.encode_with_header(&mut buf, &self.ctx.signer); + + let bytes_sent = self + .udp_socket + .send_to(&buf, from) + .await + .map_err(DiscoveryError::MessageSendFailure)?; + + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); + } + + Ok(()) + } + Message::ENRResponse(msg) => { + let mut table_lock = self.ctx.table.lock().await; + let peer = table_lock.get_by_node_id_mut(packet.get_node_id()); + let Some(peer) = peer else { + return Err(DiscoveryError::InvalidMessage("Peer not known".into())); + }; + + let Some(req_hash) = peer.enr_request_hash else { + return Err(DiscoveryError::InvalidMessage( + "Discarding enr-response as enr-request wasn't sent".into(), + )); + }; + if req_hash != msg.request_hash { + return Err(DiscoveryError::InvalidMessage( + "Discarding enr-response did not match enr-request hash".into(), + )); + } + peer.enr_request_hash = None; + + if msg.node_record.seq < peer.record.seq { + return Err(DiscoveryError::InvalidMessage( + "msg node record is lower than the one we have".into(), + )); + } + + let record = msg.node_record.decode_pairs(); + let Some(id) = record.id else { + return Err(DiscoveryError::InvalidMessage( + "msg node record does not have required `id` field".into(), + )); + }; + + // https://github.com/ethereum/devp2p/blob/master/enr.md#v4-identity-scheme + let signature_valid = match id.as_str() { + "v4" => { + let digest = msg.node_record.get_signature_digest(); + let Some(public_key) = record.secp256k1 else { + return Err(DiscoveryError::InvalidMessage( + "signature could not be verified because public key was not provided".into(), + )); + }; + let signature_bytes = msg.node_record.signature.as_bytes(); + let Ok(signature) = Signature::from_slice(&signature_bytes[0..64]) else { + return Err(DiscoveryError::InvalidMessage( + "signature could not be build from msg signature bytes".into(), + )); + }; + let Ok(verifying_key) = + VerifyingKey::from_sec1_bytes(public_key.as_bytes()) + else { + return Err(DiscoveryError::InvalidMessage( + "public key could no be built from msg pub key bytes".into(), + )); + }; + verifying_key.verify_prehash(&digest, &signature).is_ok() + } + _ => false, + }; + if !signature_valid { + return Err(DiscoveryError::InvalidMessage( + "Signature verification invalid".into(), + )); + } + + if let Some(ip) = record.ip { + peer.node.ip = IpAddr::from(Ipv4Addr::from_bits(ip)); + } + if let Some(tcp_port) = record.tcp_port { + peer.node.tcp_port = tcp_port; + } + if let Some(udp_port) = record.udp_port { + peer.node.udp_port = udp_port; + } + peer.record = msg.node_record.clone(); + debug!( + "Node with id {:?} record has been successfully updated", + peer.node.node_id + ); + Ok(()) + } + } + } + + /// Starts a tokio scheduler that: + /// - performs periodic revalidation of the current nodes (sends a ping to the old nodes). + /// + /// **Peer revalidation** + /// + /// Peers revalidation works in the following manner: + /// 1. Every `revalidation_interval_seconds` we ping the 3 least recently pinged peers + /// 2. In the next iteration we check if they have answered + /// - if they have: we increment the liveness field by one + /// - otherwise we decrement it by the current value / 3. + /// 3. If the liveness field is 0, then we delete it and insert a new one from the replacements table + /// + /// See more https://github.com/ethereum/devp2p/blob/master/discv4.md#kademlia-table + async fn start_revalidation(&self) { + let mut interval = + tokio::time::interval(Duration::from_secs(self.revalidation_interval_seconds)); + + // first tick starts immediately + interval.tick().await; + + let mut previously_pinged_peers = HashSet::new(); + loop { + interval.tick().await; + debug!("Running peer revalidation"); + + // first check that the peers we ping have responded + for node_id in previously_pinged_peers { + let mut table_lock = self.ctx.table.lock().await; + let Some(peer) = table_lock.get_by_node_id_mut(node_id) else { + continue; + }; + + if let Some(has_answered) = peer.revalidation { + if has_answered { + peer.increment_liveness(); + } else { + peer.decrement_liveness(); + } + } + + peer.revalidation = None; + + if peer.liveness == 0 { + let new_peer = table_lock.replace_peer(node_id); + if let Some(new_peer) = new_peer { + let _ = self.ping(new_peer.node, table_lock).await; + } + } + } + + // now send a ping to the least recently pinged peers + // this might be too expensive to run if our table is filled + // maybe we could just pick them randomly + let peers = self + .ctx + .table + .lock() + .await + .get_least_recently_pinged_peers(3); + previously_pinged_peers = HashSet::default(); + for peer in peers { + debug!("Pinging peer {:?} to re-validate!", peer.node.node_id); + let _ = self.ping(peer.node, self.ctx.table.lock().await).await; + previously_pinged_peers.insert(peer.node.node_id); + let mut table = self.ctx.table.lock().await; + let peer = table.get_by_node_id_mut(peer.node.node_id); + if let Some(peer) = peer { + peer.revalidation = Some(false); + } + } + + debug!("Peer revalidation finished"); + } + } + + /// Attempts to add a node to the Kademlia table and send a ping if necessary. + /// + /// - If the node is **not found** in the table and there is enough space, it will be added, + /// and a ping message will be sent to verify connectivity. + /// - If the node is **already present**, no action is taken. + async fn try_add_peer_and_ping<'a>( + &self, + node: Node, + mut table_lock: MutexGuard<'a, KademliaTable>, + ) -> Result<(), DiscoveryError> { + // sanity check to make sure we are not storing ourselves + // a case that may happen in a neighbor message for example + if node.node_id == self.ctx.local_node.node_id { + return Ok(()); + } + + if let (Some(peer), true) = table_lock.insert_node(node) { + self.ping(peer.node, table_lock).await?; + }; + Ok(()) + } + + async fn ping<'a>( + &self, + node: Node, + mut table_lock: MutexGuard<'a, KademliaTable>, + ) -> Result<(), DiscoveryError> { + let mut buf = Vec::new(); + let expiration: u64 = get_msg_expiration_from_seconds(20); + let from = Endpoint { + ip: self.ctx.local_node.ip, + udp_port: self.ctx.local_node.udp_port, + tcp_port: self.ctx.local_node.tcp_port, + }; + let to = Endpoint { + ip: node.ip, + udp_port: node.udp_port, + tcp_port: node.tcp_port, + }; + + let ping = + Message::Ping(PingMessage::new(from, to, expiration).with_enr_seq(self.ctx.enr_seq)); + ping.encode_with_header(&mut buf, &self.ctx.signer); + let bytes_sent = self + .udp_socket + .send_to(&buf, node.udp_addr()) + .await + .map_err(DiscoveryError::MessageSendFailure)?; + + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); + } + + let hash = H256::from_slice(&buf[0..32]); + table_lock.update_peer_ping(node.node_id, Some(hash), current_unix_time()); + + Ok(()) + } + + async fn pong(&self, ping_hash: H256, node: Node) -> Result<(), DiscoveryError> { + let mut buf = Vec::new(); + let expiration: u64 = get_msg_expiration_from_seconds(20); + let to = Endpoint { + ip: node.ip, + udp_port: node.udp_port, + tcp_port: node.tcp_port, + }; + + let pong = Message::Pong( + PongMessage::new(to, ping_hash, expiration).with_enr_seq(self.ctx.enr_seq), + ); + pong.encode_with_header(&mut buf, &self.ctx.signer); + + let bytes_sent = self + .udp_socket + .send_to(&buf, node.udp_addr()) + .await + .map_err(DiscoveryError::MessageSendFailure)?; + + if bytes_sent != buf.len() { + Err(DiscoveryError::PartialMessageSent) + } else { + Ok(()) + } + } + + async fn send_enr_request<'a>( + &self, + node: Node, + mut table_lock: MutexGuard<'a, KademliaTable>, + ) -> Result<(), DiscoveryError> { + let mut buf = Vec::new(); + let expiration: u64 = get_msg_expiration_from_seconds(20); + let enr_req = Message::ENRRequest(ENRRequestMessage::new(expiration)); + enr_req.encode_with_header(&mut buf, &self.ctx.signer); + + let bytes_sent = self + .udp_socket + .send_to(&buf, node.udp_addr()) + .await + .map_err(DiscoveryError::MessageSendFailure)?; + if bytes_sent != buf.len() { + return Err(DiscoveryError::PartialMessageSent); + } + + let hash = H256::from_slice(&buf[0..32]); + if let Some(peer) = table_lock.get_by_node_id_mut(node.node_id) { + peer.enr_request_hash = Some(hash); + }; + + Ok(()) + } +} + +#[cfg(test)] +pub(super) mod tests { + use super::*; + use crate::{ + node_id_from_signing_key, rlpx::message::Message as RLPxMessage, MAX_MESSAGES_TO_BROADCAST, + }; + use ethrex_storage::{EngineType, Store}; + use k256::ecdsa::SigningKey; + use rand::rngs::OsRng; + use std::net::{IpAddr, Ipv4Addr}; + use tokio::{sync::Mutex, time::sleep}; + + pub async fn insert_random_node_on_custom_bucket( + table: Arc>, + bucket_idx: usize, + ) { + let node_id = node_id_from_signing_key(&SigningKey::random(&mut OsRng)); + let node = Node { + ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), + tcp_port: 0, + udp_port: 0, + node_id, + }; + table + .lock() + .await + .insert_node_on_custom_bucket(node, bucket_idx); + } + + pub async fn fill_table_with_random_nodes(table: Arc>) { + for i in 0..256 { + for _ in 0..16 { + insert_random_node_on_custom_bucket(table.clone(), i).await; + } + } + } + + pub async fn start_discovery_server( + udp_port: u16, + should_start_server: bool, + ) -> Result { + let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), udp_port); + let signer = SigningKey::random(&mut OsRng); + let node_id = node_id_from_signing_key(&signer); + let local_node = Node { + ip: addr.ip(), + node_id, + udp_port, + tcp_port: udp_port, + }; + + let storage = + Store::new("temp.db", EngineType::InMemory).expect("Failed to create test DB"); + let table = Arc::new(Mutex::new(KademliaTable::new(node_id))); + let (broadcast, _) = tokio::sync::broadcast::channel::<(tokio::task::Id, Arc)>( + MAX_MESSAGES_TO_BROADCAST, + ); + let tracker = tokio_util::task::TaskTracker::new(); + + let ctx = P2PContext { + local_node, + enr_seq: current_unix_time(), + tracker: tracker.clone(), + signer, + table, + storage, + broadcast, + }; + + let discv4 = Discv4Server::try_new(ctx).await?; + + if should_start_server { + tracker.spawn({ + let discv4 = discv4.clone(); + async move { + discv4.receive().await; + } + }); + } + + Ok(discv4) + } + + /// connects two mock servers by pinging a to b + pub async fn connect_servers( + server_a: &mut Discv4Server, + server_b: &mut Discv4Server, + ) -> Result<(), DiscoveryError> { + server_a + .try_add_peer_and_ping(server_b.ctx.local_node, server_a.ctx.table.lock().await) + .await?; + // allow some time for the server to respond + sleep(Duration::from_secs(1)).await; + Ok(()) + } + + #[tokio::test] + /** This is a end to end test on the discovery server, the idea is as follows: + * - We'll start two discovery servers (`a` & `b`) to ping between each other + * - We'll make `b` ping `a`, and validate that the connection is right + * - Then we'll wait for a revalidation where we expect everything to be the same + * - We'll do this five 5 more times + * - Then we'll stop server `a` so that it doesn't respond to re-validations + * - We expect server `b` to remove node `a` from its table after 3 re-validations + * To make this run faster, we'll change the revalidation time to be every 2secs + */ + async fn discovery_server_revalidation() -> Result<(), DiscoveryError> { + let mut server_a = start_discovery_server(7998, true).await?; + let mut server_b = start_discovery_server(7999, true).await?; + + connect_servers(&mut server_a, &mut server_b).await?; + + server_b.revalidation_interval_seconds = 2; + + // start revalidation server + server_b.ctx.tracker.spawn({ + let server_b = server_b.clone(); + async move { server_b.start_revalidation().await } + }); + + for _ in 0..5 { + sleep(Duration::from_millis(2500)).await; + // by now, b should've send a revalidation to a + let table = server_b.ctx.table.lock().await; + let node = table.get_by_node_id(server_a.ctx.local_node.node_id); + assert!(node.is_some_and(|n| n.revalidation.is_some())); + } + + // make sure that `a` has responded too all the re-validations + // we can do that by checking the liveness + { + let table = server_b.ctx.table.lock().await; + let node = table.get_by_node_id(server_a.ctx.local_node.node_id); + assert_eq!(node.map_or(0, |n| n.liveness), 6); + } + + // now, stopping server `a` is not trivial + // so we'll instead change its port, so that no one responds + { + let mut table = server_b.ctx.table.lock().await; + let node = table.get_by_node_id_mut(server_a.ctx.local_node.node_id); + if let Some(node) = node { + node.node.udp_port = 0 + }; + } + + // now the liveness field should start decreasing until it gets to 0 + // which should happen in 3 re-validations + for _ in 0..2 { + sleep(Duration::from_millis(2500)).await; + let table = server_b.ctx.table.lock().await; + let node = table.get_by_node_id(server_a.ctx.local_node.node_id); + assert!(node.is_some_and(|n| n.revalidation.is_some())); + } + sleep(Duration::from_millis(2500)).await; + + // finally, `a`` should not exist anymore + let table = server_b.ctx.table.lock().await; + assert!(table + .get_by_node_id(server_a.ctx.local_node.node_id) + .is_none()); + Ok(()) + } + + #[tokio::test] + /** + * This test verifies the exchange and update of ENR (Ethereum Node Record) messages. + * The test follows these steps: + * + * 1. Start two nodes. + * 2. Wait until they establish a connection. + * 3. Assert that they exchange their records and store them + * 3. Modify the ENR (node record) of one of the nodes. + * 4. Send a new ping message and check that an ENR request was triggered. + * 5. Verify that the updated node record has been correctly received and stored. + */ + async fn discovery_enr_message() -> Result<(), DiscoveryError> { + let mut server_a = start_discovery_server(8006, true).await?; + let mut server_b = start_discovery_server(8007, true).await?; + + connect_servers(&mut server_a, &mut server_b).await?; + + // wait some time for the enr request-response finishes + sleep(Duration::from_millis(2500)).await; + + let expected_record = NodeRecord::from_node( + server_b.ctx.local_node, + current_unix_time(), + &server_b.ctx.signer, + ) + .expect("Node record is created from node"); + + let server_a_peer_b = server_a + .ctx + .table + .lock() + .await + .get_by_node_id(server_b.ctx.local_node.node_id) + .cloned() + .unwrap(); + + // we only match the pairs, as the signature and seq will change + // because they are calculated with the current time + assert!(server_a_peer_b.record.decode_pairs() == expected_record.decode_pairs()); + + // Modify server_a's record of server_b with an incorrect TCP port. + // This simulates an outdated or incorrect entry in the node table. + server_a + .ctx + .table + .lock() + .await + .get_by_node_id_mut(server_b.ctx.local_node.node_id) + .unwrap() + .node + .tcp_port = 10; + + // update the enr_seq of server_b so that server_a notices it is outdated + // and sends a request to update it + server_b.ctx.enr_seq = current_unix_time(); + + // Send a ping from server_b to server_a. + // server_a should notice the enr_seq is outdated + // and trigger a enr-request to server_b to update the record. + server_b + .ping(server_a.ctx.local_node, server_b.ctx.table.lock().await) + .await?; + + // Wait for the update to propagate. + sleep(Duration::from_millis(2500)).await; + + // Verify that server_a has updated its record of server_b with the correct TCP port. + let table_lock = server_a.ctx.table.lock().await; + let server_a_node_b_record = table_lock + .get_by_node_id(server_b.ctx.local_node.node_id) + .unwrap(); + + assert!(server_a_node_b_record.node.tcp_port == server_b.ctx.local_node.tcp_port); + + Ok(()) + } +} diff --git a/crates/networking/p2p/net.rs b/crates/networking/p2p/net.rs index 9648f16b5a..b7d145de8a 100644 --- a/crates/networking/p2p/net.rs +++ b/crates/networking/p2p/net.rs @@ -1,5 +1,8 @@ use bootnode::BootNode; -use discv4::{helpers::current_unix_time, DiscoveryError, Discv4Server}; +use discv4::{ + helpers::current_unix_time, + server::{DiscoveryError, Discv4Server}, +}; use ethrex_core::H512; use ethrex_storage::Store; use k256::{ From 68ada444d1d511efa8a3b70498ace39a46de4d91 Mon Sep 17 00:00:00 2001 From: Marcos Nicolau Date: Mon, 27 Jan 2025 16:45:07 -0300 Subject: [PATCH 33/33] chore: address clippy warnings --- crates/networking/p2p/discv4/server.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/networking/p2p/discv4/server.rs b/crates/networking/p2p/discv4/server.rs index 85afd11438..7f4f1396ad 100644 --- a/crates/networking/p2p/discv4/server.rs +++ b/crates/networking/p2p/discv4/server.rs @@ -4,8 +4,8 @@ use super::{ }, lookup::Discv4LookupHandler, messages::{ - ENRRequestMessage, ENRResponseMessage, FindNodeMessage, Message, NeighborsMessage, Packet, - PingMessage, PongMessage, + ENRRequestMessage, ENRResponseMessage, Message, NeighborsMessage, Packet, PingMessage, + PongMessage, }, }; use crate::{