Skip to content

Commit fbd03ff

Browse files
committed
Signed-off-by: Rishikpulhani <[email protected]>
feat(dhcp): Send DHCPRELEASE on container teardown When a container using DHCP is torn down, its lease is left active on the DHCP server until it expires. This can be problematic in environments with small IP pools or long lease times. In setups using Dynamic DNS (DDNS), it can also lead to stale DNS records. This change introduces the capability for netavark to send a DHCPRELEASE message to the DHCP server when a container's network is torn down. This is implemented by: - Adding a `release_lease` method to the `DhcpV4Service` in `dhcp_service.rs`, which wraps the `release` function from the underlying mozim client. - Wrapped DhcpV4Service inside Arc<> share that to the process_client_stream function and then also store it in the task map - used it in process_client_stream task and also in the teardown function to send the DHCPRELEASE message
1 parent 6b44b7e commit fbd03ff

File tree

4 files changed

+111
-49
lines changed

4 files changed

+111
-49
lines changed

src/commands/dhcp_proxy.rs

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ use tonic::{
3838
transport::Server, Code, Code::Internal, Code::InvalidArgument, Request, Response, Status,
3939
};
4040

41+
type TaskData = (Arc<tokio::sync::Mutex<DhcpV4Service>>, AbortHandle);
42+
4143
#[derive(Debug)]
4244
/// This is the tonic netavark proxy service that is required to impl the Netavark Proxy trait which
4345
/// includes the gRPC methods defined in proto/proxy.proto. We can store a atomically referenced counted
@@ -59,7 +61,7 @@ struct NetavarkProxyService<W: Write + Clear> {
5961
timeout_sender: Option<Arc<Mutex<Sender<i32>>>>,
6062
// All dhcp poll will be spawned on a new task, keep track of it so
6163
// we can remove it on teardown. The key is the container mac.
62-
task_map: Arc<Mutex<HashMap<String, AbortHandle>>>,
64+
task_map: Arc<Mutex<HashMap<String, TaskData>>>,
6365
}
6466

6567
impl<W: Write + Clear> NetavarkProxyService<W> {
@@ -136,8 +138,8 @@ impl<W: Write + Clear + Send + 'static> NetavarkProxy for NetavarkProxyService<W
136138
};
137139
}
138140

139-
/// When a container is shut down this method should be called. It will clear the lease information
140-
/// from the caching system.
141+
/// When a container is shut down this method should be called. It will release the
142+
/// DHCP lease and clear the lease information from the caching system.
141143
async fn teardown(
142144
&self,
143145
request: Request<NetworkConfig>,
@@ -149,12 +151,28 @@ impl<W: Write + Clear + Send + 'static> NetavarkProxy for NetavarkProxyService<W
149151
let cache = self.cache.clone();
150152
let tasks = self.task_map.clone();
151153

152-
let task = tasks
153-
.lock()
154-
.expect("lock tasks")
155-
.remove(&nc.container_mac_addr);
156-
if let Some(handle) = task {
157-
handle.abort();
154+
let maybe_service_arc = {
155+
// Scope for the std::sync::MutexGuard
156+
let mut tasks_guard = tasks.lock().expect("lock tasks");
157+
158+
if let Some((service_arc, handle)) = tasks_guard.remove(&nc.container_mac_addr) {
159+
handle.abort();
160+
Some(service_arc)
161+
} else {
162+
None
163+
}
164+
};
165+
if let Some(service_arc) = maybe_service_arc {
166+
let mut service = service_arc.lock().await;
167+
if let Some(lease) = service.previous_lease() {
168+
debug!("Attempting to release lease for {}", &nc.container_mac_addr);
169+
if let Err(e) = service.release_lease(&lease) {
170+
warn!(
171+
"Failed to send DHCPRELEASE for {}: {}",
172+
&nc.container_mac_addr, e
173+
);
174+
}
175+
}
158176
}
159177

160178
// Remove the client from the cache dir
@@ -406,7 +424,7 @@ async fn process_setup<W: Write + Clear>(
406424
network_config: NetworkConfig,
407425
timeout: u32,
408426
cache: Arc<Mutex<LeaseCache<W>>>,
409-
tasks: Arc<Mutex<HashMap<String, AbortHandle>>>,
427+
tasks: Arc<Mutex<HashMap<String, TaskData>>>,
410428
) -> Result<NetavarkLease, Status> {
411429
let container_network_interface = network_config.container_iface.clone();
412430
let ns_path = network_config.ns_path.clone();
@@ -422,11 +440,13 @@ async fn process_setup<W: Write + Clear>(
422440
let mut service = DhcpV4Service::new(network_config, timeout)?;
423441

424442
let lease = service.get_lease().await?;
425-
let task = tokio::spawn(process_client_stream(service));
443+
let service_arc = Arc::new(tokio::sync::Mutex::new(service));
444+
let service_arc_clone = service_arc.clone();
445+
let task_handle = tokio::spawn(process_client_stream(service_arc_clone));
426446
tasks
427447
.lock()
428448
.expect("lock tasks")
429-
.insert(mac.to_string(), task.abort_handle());
449+
.insert(mac.to_string(), (service_arc, task_handle.abort_handle()));
430450
lease
431451
}
432452
//V6 TODO implement DHCPv6

src/dhcp_proxy/dhcp_service.rs

Lines changed: 69 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ use crate::network::netlink::Route;
1111
use crate::wrap;
1212
use log::debug;
1313
use mozim::{DhcpV4ClientAsync, DhcpV4Config, DhcpV4Lease as MozimV4Lease};
14+
use std::sync::Arc;
15+
use tokio::sync::Mutex;
1416
use tokio_stream::StreamExt;
15-
1617
use tonic::{Code, Status};
1718

1819
/// The kind of DhcpServiceError that can be caused when finding a dhcp lease
@@ -39,6 +40,7 @@ impl DhcpServiceError {
3940
}
4041

4142
/// DHCP service is responsible for creating, handling, and managing the dhcp lease process.
43+
#[derive(Debug)]
4244
pub struct DhcpV4Service {
4345
client: DhcpV4ClientAsync,
4446
network_config: NetworkConfig,
@@ -83,6 +85,9 @@ impl DhcpV4Service {
8385
previous_lease: None,
8486
})
8587
}
88+
pub fn previous_lease(&self) -> Option<MozimV4Lease> {
89+
self.previous_lease.clone()
90+
}
8691

8792
/// Performs a DHCP DORA on a ipv4 network configuration.
8893
/// # Arguments
@@ -129,6 +134,19 @@ impl DhcpV4Service {
129134
"Could not find a lease within the timeout limit".to_string(),
130135
))
131136
}
137+
138+
/// Sends a DHCPRELEASE message for the given lease.
139+
/// This is a "best effort" operation and should not block teardown.
140+
pub fn release_lease(&mut self, lease: &MozimV4Lease) -> Result<(), DhcpServiceError> {
141+
debug!(
142+
"Attempting to release lease for MAC: {}",
143+
&self.network_config.container_mac_addr
144+
);
145+
// Directly call the release function on the underlying mozim client.
146+
self.client
147+
.release(lease)
148+
.map_err(|e| DhcpServiceError::new(Bug, e.to_string()))
149+
}
132150
}
133151

134152
impl std::fmt::Display for DhcpServiceError {
@@ -149,46 +167,61 @@ impl From<DhcpServiceError> for Status {
149167
}
150168
}
151169

152-
pub async fn process_client_stream(mut client: DhcpV4Service) {
153-
while let Some(lease) = client.client.next().await {
154-
match lease {
155-
Ok(lease) => {
156-
log::info!(
157-
"got new lease for mac {}: {:?}",
158-
&client.network_config.container_mac_addr,
159-
&lease
160-
);
161-
// get previous lease and check if ip addr changed, if not we do not have to do anything
162-
if let Some(old_lease) = &client.previous_lease {
163-
if old_lease.yiaddr != lease.yiaddr
164-
|| old_lease.subnet_mask != lease.subnet_mask
165-
|| old_lease.gateways != lease.gateways
166-
{
167-
// ips do not match, remove old ones and assign new ones.
168-
log::info!(
169-
"ip or gateway for mac {} changed, update address",
170-
&client.network_config.container_mac_addr
171-
);
172-
match update_lease_ip(
173-
&client.network_config.ns_path,
174-
&client.network_config.container_iface,
175-
old_lease,
176-
&lease,
177-
) {
178-
Ok(_) => {}
179-
Err(err) => {
180-
log::error!("{err}");
181-
continue;
170+
pub async fn process_client_stream(service_arc: Arc<Mutex<DhcpV4Service>>) {
171+
loop {
172+
let next_lease_result = {
173+
let mut service = service_arc.lock().await;
174+
service.client.next().await
175+
};
176+
if let Some(lease_result) = next_lease_result {
177+
// Now that we have the result, we can re-lock to update the state.
178+
// This is safe because this part doesn't involve `.await`.
179+
match lease_result {
180+
Ok(lease) => {
181+
let mut client = service_arc.lock().await;
182+
log::info!(
183+
"got new lease for mac {}: {:?}",
184+
&client.network_config.container_mac_addr,
185+
&lease
186+
);
187+
// get previous lease and check if ip addr changed, if not we do not have to do anything
188+
if let Some(old_lease) = &client.previous_lease {
189+
if old_lease.yiaddr != lease.yiaddr
190+
|| old_lease.subnet_mask != lease.subnet_mask
191+
|| old_lease.gateways != lease.gateways
192+
{
193+
// ips do not match, remove old ones and assign new ones.
194+
log::info!(
195+
"ip or gateway for mac {} changed, update address",
196+
&client.network_config.container_mac_addr
197+
);
198+
match update_lease_ip(
199+
&client.network_config.ns_path,
200+
&client.network_config.container_iface,
201+
old_lease,
202+
&lease,
203+
) {
204+
Ok(_) => {}
205+
Err(err) => {
206+
log::error!("{err}");
207+
continue;
208+
}
182209
}
183210
}
184211
}
212+
client.previous_lease = Some(lease);
213+
}
214+
Err(err) => {
215+
let client = service_arc.lock().await;
216+
log::error!(
217+
"Failed to renew lease for {}: {err}",
218+
&client.network_config.container_mac_addr
219+
);
185220
}
186-
client.previous_lease = Some(lease)
187221
}
188-
Err(err) => log::error!(
189-
"Failed to renew lease for {}: {err}",
190-
&client.network_config.container_mac_addr
191-
),
222+
} else {
223+
// The stream has ended (e.g., the client disconnected), so we exit the loop.
224+
break;
192225
}
193226
}
194227
}

test-dhcp/003-teardown.bats

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55

66
load helpers
77

8+
function start_proxy() {
9+
RUST_LOG=debug ip netns exec "$NS_NAME" $NETAVARK dhcp-proxy --dir "$TMP_TESTDIR" --uds "$TMP_TESTDIR" &>"$TMP_TESTDIR/proxy.log" &
10+
PROXY_PID=$!
11+
}
12+
813
@test "basic teardown" {
914
read -r -d '\0' input_config <<EOF
1015
{
@@ -30,6 +35,10 @@ EOF
3035
assert "$output" == "true"
3136
# Run teardown
3237
run_teardown "$input_config"
38+
# Check the dnsmasq log to confirm it received the DHCPRELEASE message.
39+
# The release is sent synchronously, but we sleep briefly to allow dnsmasq to flush its logs.
40+
sleep 1
41+
assert `grep -c "DHCPRELEASE(br0).*[[:space:]]${CONTAINER_MAC}" "$TMP_TESTDIR/dnsmasq.log"` == 1
3342
run_helper cat "$TMP_TESTDIR/nv-proxy.lease"
3443
# Check that the length of the lease file is now zero
3544
run_helper jq ". | length" <<<"$output"

test-dhcp/helpers.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ function stop_dhcp() {
375375
}
376376

377377
function start_proxy() {
378-
RUST_LOG=info ip netns exec "$NS_NAME" $NETAVARK dhcp-proxy --dir "$TMP_TESTDIR" --uds "$TMP_TESTDIR" &>"$TMP_TESTDIR/proxy.log" &
378+
RUST_LOG=debug ip netns exec "$NS_NAME" $NETAVARK dhcp-proxy --dir "$TMP_TESTDIR" --uds "$TMP_TESTDIR" &>"$TMP_TESTDIR/proxy.log" &
379379
PROXY_PID=$!
380380
}
381381

0 commit comments

Comments
 (0)