Skip to content

Feat(dhcp): Send DHCPRELEASE on container teardown #1305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 37 additions & 12 deletions src/commands/dhcp_proxy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ use tonic::{
transport::Server, Code, Code::Internal, Code::InvalidArgument, Request, Response, Status,
};

type TaskData = (Arc<tokio::sync::Mutex<DhcpV4Service>>, AbortHandle);

#[derive(Debug)]
/// This is the tonic netavark proxy service that is required to impl the Netavark Proxy trait which
/// includes the gRPC methods defined in proto/proxy.proto. We can store a atomically referenced counted
Expand All @@ -59,7 +61,7 @@ struct NetavarkProxyService<W: Write + Clear> {
timeout_sender: Option<Arc<Mutex<Sender<i32>>>>,
// All dhcp poll will be spawned on a new task, keep track of it so
// we can remove it on teardown. The key is the container mac.
task_map: Arc<Mutex<HashMap<String, AbortHandle>>>,
task_map: Arc<Mutex<HashMap<String, TaskData>>>,
}

impl<W: Write + Clear> NetavarkProxyService<W> {
Expand Down Expand Up @@ -136,8 +138,8 @@ impl<W: Write + Clear + Send + 'static> NetavarkProxy for NetavarkProxyService<W
};
}

/// When a container is shut down this method should be called. It will clear the lease information
/// from the caching system.
/// When a container is shut down this method should be called. It will release the
/// DHCP lease and clear the lease information from the caching system.
async fn teardown(
&self,
request: Request<NetworkConfig>,
Expand All @@ -149,12 +151,33 @@ impl<W: Write + Clear + Send + 'static> NetavarkProxy for NetavarkProxyService<W
let cache = self.cache.clone();
let tasks = self.task_map.clone();

let task = tasks
.lock()
.expect("lock tasks")
.remove(&nc.container_mac_addr);
if let Some(handle) = task {
handle.abort();
let maybe_service_arc = {
// Scope for the std::sync::MutexGuard
let mut tasks_guard = tasks.lock().expect("lock tasks");

if let Some((service_arc, handle)) = tasks_guard.remove(&nc.container_mac_addr) {
handle.abort();
Some(service_arc)
} else {
None
}
};
if let Some(service_arc) = maybe_service_arc {
let mut service = service_arc.lock().await;
if let Some(lease) = service.previous_lease() {
debug!("Attempting to release lease for {}", &nc.container_mac_addr);
if let Err(e) = service.release_lease(&lease) {
warn!(
"Failed to send DHCPRELEASE for {}: {}",
&nc.container_mac_addr, e
);
} else {
debug!(
"Successfully sent DHCPRELEASE for {}",
&nc.container_mac_addr
);
}
}
}

// Remove the client from the cache dir
Expand Down Expand Up @@ -406,7 +429,7 @@ async fn process_setup<W: Write + Clear>(
network_config: NetworkConfig,
timeout: u32,
cache: Arc<Mutex<LeaseCache<W>>>,
tasks: Arc<Mutex<HashMap<String, AbortHandle>>>,
tasks: Arc<Mutex<HashMap<String, TaskData>>>,
) -> Result<NetavarkLease, Status> {
let container_network_interface = network_config.container_iface.clone();
let ns_path = network_config.ns_path.clone();
Expand All @@ -422,11 +445,13 @@ async fn process_setup<W: Write + Clear>(
let mut service = DhcpV4Service::new(network_config, timeout)?;

let lease = service.get_lease().await?;
let task = tokio::spawn(process_client_stream(service));
let service_arc = Arc::new(tokio::sync::Mutex::new(service));
let service_arc_clone = service_arc.clone();
let task_handle = tokio::spawn(process_client_stream(service_arc_clone));
tasks
.lock()
.expect("lock tasks")
.insert(mac.to_string(), task.abort_handle());
.insert(mac.to_string(), (service_arc, task_handle.abort_handle()));
lease
}
//V6 TODO implement DHCPv6
Expand Down
105 changes: 69 additions & 36 deletions src/dhcp_proxy/dhcp_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ use crate::network::netlink::Route;
use crate::wrap;
use log::debug;
use mozim::{DhcpV4ClientAsync, DhcpV4Config, DhcpV4Lease as MozimV4Lease};
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio_stream::StreamExt;

use tonic::{Code, Status};

/// The kind of DhcpServiceError that can be caused when finding a dhcp lease
Expand All @@ -39,6 +40,7 @@ impl DhcpServiceError {
}

/// DHCP service is responsible for creating, handling, and managing the dhcp lease process.
#[derive(Debug)]
pub struct DhcpV4Service {
client: DhcpV4ClientAsync,
network_config: NetworkConfig,
Expand Down Expand Up @@ -83,6 +85,9 @@ impl DhcpV4Service {
previous_lease: None,
})
}
pub fn previous_lease(&self) -> Option<MozimV4Lease> {
self.previous_lease.clone()
}

/// Performs a DHCP DORA on a ipv4 network configuration.
/// # Arguments
Expand Down Expand Up @@ -129,6 +134,19 @@ impl DhcpV4Service {
"Could not find a lease within the timeout limit".to_string(),
))
}

/// Sends a DHCPRELEASE message for the given lease.
/// This is a "best effort" operation and should not block teardown.
pub fn release_lease(&mut self, lease: &MozimV4Lease) -> Result<(), DhcpServiceError> {
debug!(
"Attempting to release lease for MAC: {}",
&self.network_config.container_mac_addr
);
// Directly call the release function on the underlying mozim client.
self.client
.release(lease)
.map_err(|e| DhcpServiceError::new(Bug, e.to_string()))
}
}

impl std::fmt::Display for DhcpServiceError {
Expand All @@ -149,46 +167,61 @@ impl From<DhcpServiceError> for Status {
}
}

pub async fn process_client_stream(mut client: DhcpV4Service) {
while let Some(lease) = client.client.next().await {
match lease {
Ok(lease) => {
log::info!(
"got new lease for mac {}: {:?}",
&client.network_config.container_mac_addr,
&lease
);
// get previous lease and check if ip addr changed, if not we do not have to do anything
if let Some(old_lease) = &client.previous_lease {
if old_lease.yiaddr != lease.yiaddr
|| old_lease.subnet_mask != lease.subnet_mask
|| old_lease.gateways != lease.gateways
{
// ips do not match, remove old ones and assign new ones.
log::info!(
"ip or gateway for mac {} changed, update address",
&client.network_config.container_mac_addr
);
match update_lease_ip(
&client.network_config.ns_path,
&client.network_config.container_iface,
old_lease,
&lease,
) {
Ok(_) => {}
Err(err) => {
log::error!("{err}");
continue;
pub async fn process_client_stream(service_arc: Arc<Mutex<DhcpV4Service>>) {
loop {
let next_lease_result = {
let mut service = service_arc.lock().await;
service.client.next().await
};
if let Some(lease_result) = next_lease_result {
// Now that we have the result, we can re-lock to update the state.
// This is safe because this part doesn't involve `.await`.
match lease_result {
Ok(lease) => {
let mut client = service_arc.lock().await;
log::info!(
"got new lease for mac {}: {:?}",
&client.network_config.container_mac_addr,
&lease
);
// get previous lease and check if ip addr changed, if not we do not have to do anything
if let Some(old_lease) = &client.previous_lease {
if old_lease.yiaddr != lease.yiaddr
|| old_lease.subnet_mask != lease.subnet_mask
|| old_lease.gateways != lease.gateways
{
// ips do not match, remove old ones and assign new ones.
log::info!(
"ip or gateway for mac {} changed, update address",
&client.network_config.container_mac_addr
);
match update_lease_ip(
&client.network_config.ns_path,
&client.network_config.container_iface,
old_lease,
&lease,
) {
Ok(_) => {}
Err(err) => {
log::error!("{err}");
continue;
}
}
}
}
client.previous_lease = Some(lease);
}
Err(err) => {
let client = service_arc.lock().await;
log::error!(
"Failed to renew lease for {}: {err}",
&client.network_config.container_mac_addr
);
}
client.previous_lease = Some(lease)
}
Err(err) => log::error!(
"Failed to renew lease for {}: {err}",
&client.network_config.container_mac_addr
),
} else {
// The stream has ended (e.g., the client disconnected), so we exit the loop.
break;
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions test-dhcp/003-teardown.bats
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@

load helpers

function start_proxy() {
RUST_LOG=debug ip netns exec "$NS_NAME" $NETAVARK dhcp-proxy --dir "$TMP_TESTDIR" --uds "$TMP_TESTDIR" &>"$TMP_TESTDIR/proxy.log" &
PROXY_PID=$!
}

@test "basic teardown" {
read -r -d '\0' input_config <<EOF
{
Expand All @@ -30,6 +35,11 @@ EOF
assert "$output" == "true"
# Run teardown
run_teardown "$input_config"
# Check the dnsmasq log to confirm it received the DHCPRELEASE message.
# The release is sent synchronously, but we sleep briefly to allow dnsmasq to flush its logs.
# sleep 1
# assert `grep -c "DHCPRELEASE(br0).*[[:space:]]${CONTAINER_MAC}" "$TMP_TESTDIR/dnsmasq.log"` == 1
assert `grep -c "Successfully sent DHCPRELEASE for ${CONTAINER_MAC}" "$TMP_TESTDIR/proxy.log"` == 1
run_helper cat "$TMP_TESTDIR/nv-proxy.lease"
# Check that the length of the lease file is now zero
run_helper jq ". | length" <<<"$output"
Expand Down
2 changes: 1 addition & 1 deletion test-dhcp/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ function stop_dhcp() {
}

function start_proxy() {
RUST_LOG=info ip netns exec "$NS_NAME" $NETAVARK dhcp-proxy --dir "$TMP_TESTDIR" --uds "$TMP_TESTDIR" &>"$TMP_TESTDIR/proxy.log" &
RUST_LOG=debug ip netns exec "$NS_NAME" $NETAVARK dhcp-proxy --dir "$TMP_TESTDIR" --uds "$TMP_TESTDIR" &>"$TMP_TESTDIR/proxy.log" &
PROXY_PID=$!
}

Expand Down