|
| 1 | +// Copyright 2018 The Chromium OS Authors. All rights reserved. |
| 2 | +// Use of this source code is governed by a BSD-style license that can be |
| 3 | +// found in the LICENSE-BSD-3-Clause file. |
| 4 | +// |
| 5 | +// Copyright © 2019 Intel Corporation |
| 6 | +// |
| 7 | +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause |
| 8 | + |
| 9 | +use crate::{ |
| 10 | + ActivateError, ActivateResult, Error, GuestMemoryMmap, GuestRegionMmap, |
| 11 | + VIRTIO_F_RING_INDIRECT_DESC, |
| 12 | +}; |
| 13 | +use libc::EFD_NONBLOCK; |
| 14 | +use std::collections::HashMap; |
| 15 | +use std::io::Write; |
| 16 | +use std::num::Wrapping; |
| 17 | +use std::sync::{ |
| 18 | + atomic::{AtomicBool, Ordering}, |
| 19 | + Arc, Barrier, |
| 20 | +}; |
| 21 | +use std::thread; |
| 22 | +use virtio_queue::Queue; |
| 23 | +use vm_memory::{GuestAddress, GuestMemoryAtomic, GuestUsize}; |
| 24 | +use vm_migration::{MigratableError, Pausable}; |
| 25 | +use vm_virtio::AccessPlatform; |
| 26 | +use vm_virtio::VirtioDeviceType; |
| 27 | +use vmm_sys_util::eventfd::EventFd; |
| 28 | + |
| 29 | +pub enum VirtioInterruptType { |
| 30 | + Config, |
| 31 | + Queue(u16), |
| 32 | +} |
| 33 | + |
| 34 | +pub trait VirtioInterrupt: Send + Sync { |
| 35 | + fn trigger(&self, int_type: VirtioInterruptType) -> std::result::Result<(), std::io::Error>; |
| 36 | + fn notifier(&self, _int_type: VirtioInterruptType) -> Option<EventFd> { |
| 37 | + None |
| 38 | + } |
| 39 | +} |
| 40 | + |
| 41 | +#[derive(Clone)] |
| 42 | +pub struct UserspaceMapping { |
| 43 | + pub host_addr: u64, |
| 44 | + pub mem_slot: u32, |
| 45 | + pub addr: GuestAddress, |
| 46 | + pub len: GuestUsize, |
| 47 | + pub mergeable: bool, |
| 48 | +} |
| 49 | + |
| 50 | +#[derive(Clone)] |
| 51 | +pub struct VirtioSharedMemory { |
| 52 | + pub offset: u64, |
| 53 | + pub len: u64, |
| 54 | +} |
| 55 | + |
| 56 | +#[derive(Clone)] |
| 57 | +pub struct VirtioSharedMemoryList { |
| 58 | + pub host_addr: u64, |
| 59 | + pub mem_slot: u32, |
| 60 | + pub addr: GuestAddress, |
| 61 | + pub len: GuestUsize, |
| 62 | + pub region_list: Vec<VirtioSharedMemory>, |
| 63 | +} |
| 64 | + |
| 65 | +/// Trait for virtio devices to be driven by a virtio transport. |
| 66 | +/// |
| 67 | +/// The lifecycle of a virtio device is to be moved to a virtio transport, which will then query the |
| 68 | +/// device. Once the guest driver has configured the device, `VirtioDevice::activate` will be called |
| 69 | +/// and all the events, memory, and queues for device operation will be moved into the device. |
| 70 | +/// Optionally, a virtio device can implement device reset in which it returns said resources and |
| 71 | +/// resets its internal. |
| 72 | +pub trait VirtioDevice: Send { |
| 73 | + /// The virtio device type. |
| 74 | + fn device_type(&self) -> u32; |
| 75 | + |
| 76 | + /// The maximum size of each queue that this device supports. |
| 77 | + fn queue_max_sizes(&self) -> &[u16]; |
| 78 | + |
| 79 | + /// The set of feature bits that this device supports. |
| 80 | + fn features(&self) -> u64 { |
| 81 | + 0 |
| 82 | + } |
| 83 | + |
| 84 | + /// Acknowledges that this set of features should be enabled. |
| 85 | + fn ack_features(&mut self, value: u64) { |
| 86 | + let _ = value; |
| 87 | + } |
| 88 | + |
| 89 | + /// Reads this device configuration space at `offset`. |
| 90 | + fn read_config(&self, _offset: u64, _data: &mut [u8]) { |
| 91 | + warn!( |
| 92 | + "No readable configuration fields for {}", |
| 93 | + VirtioDeviceType::from(self.device_type()) |
| 94 | + ); |
| 95 | + } |
| 96 | + |
| 97 | + /// Writes to this device configuration space at `offset`. |
| 98 | + fn write_config(&mut self, _offset: u64, _data: &[u8]) { |
| 99 | + warn!( |
| 100 | + "No writable configuration fields for {}", |
| 101 | + VirtioDeviceType::from(self.device_type()) |
| 102 | + ); |
| 103 | + } |
| 104 | + |
| 105 | + /// Activates this device for real usage. |
| 106 | + fn activate( |
| 107 | + &mut self, |
| 108 | + mem: GuestMemoryAtomic<GuestMemoryMmap>, |
| 109 | + interrupt_evt: Arc<dyn VirtioInterrupt>, |
| 110 | + queues: Vec<(usize, Queue, EventFd)>, |
| 111 | + ) -> ActivateResult; |
| 112 | + |
| 113 | + /// Optionally deactivates this device and returns ownership of the guest memory map, interrupt |
| 114 | + /// event, and queue events. |
| 115 | + fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { |
| 116 | + None |
| 117 | + } |
| 118 | + |
| 119 | + /// Returns the list of shared memory regions required by the device. |
| 120 | + fn get_shm_regions(&self) -> Option<VirtioSharedMemoryList> { |
| 121 | + None |
| 122 | + } |
| 123 | + |
| 124 | + /// Updates the list of shared memory regions required by the device. |
| 125 | + fn set_shm_regions( |
| 126 | + &mut self, |
| 127 | + _shm_regions: VirtioSharedMemoryList, |
| 128 | + ) -> std::result::Result<(), Error> { |
| 129 | + std::unimplemented!() |
| 130 | + } |
| 131 | + |
| 132 | + /// Some devices may need to do some explicit shutdown work. This method |
| 133 | + /// may be implemented to do this. The VMM should call shutdown() on |
| 134 | + /// every device as part of shutting down the VM. Acting on the device |
| 135 | + /// after a shutdown() can lead to unpredictable results. |
| 136 | + fn shutdown(&mut self) {} |
| 137 | + |
| 138 | + fn add_memory_region( |
| 139 | + &mut self, |
| 140 | + _region: &Arc<GuestRegionMmap>, |
| 141 | + ) -> std::result::Result<(), Error> { |
| 142 | + Ok(()) |
| 143 | + } |
| 144 | + |
| 145 | + /// Returns the list of userspace mappings associated with this device. |
| 146 | + fn userspace_mappings(&self) -> Vec<UserspaceMapping> { |
| 147 | + Vec::new() |
| 148 | + } |
| 149 | + |
| 150 | + /// Return the counters that this device exposes |
| 151 | + fn counters(&self) -> Option<HashMap<&'static str, Wrapping<u64>>> { |
| 152 | + None |
| 153 | + } |
| 154 | + |
| 155 | + /// Helper to allow common implementation of read_config |
| 156 | + fn read_config_from_slice(&self, config: &[u8], offset: u64, mut data: &mut [u8]) { |
| 157 | + let config_len = config.len() as u64; |
| 158 | + let data_len = data.len() as u64; |
| 159 | + if offset + data_len > config_len { |
| 160 | + error!( |
| 161 | + "Out-of-bound access to configuration: config_len = {} offset = {:x} length = {} for {}", |
| 162 | + config_len, |
| 163 | + offset, |
| 164 | + data_len, |
| 165 | + self.device_type() |
| 166 | + ); |
| 167 | + return; |
| 168 | + } |
| 169 | + if let Some(end) = offset.checked_add(data.len() as u64) { |
| 170 | + data.write_all(&config[offset as usize..std::cmp::min(end, config_len) as usize]) |
| 171 | + .unwrap(); |
| 172 | + } |
| 173 | + } |
| 174 | + |
| 175 | + /// Helper to allow common implementation of write_config |
| 176 | + fn write_config_helper(&self, config: &mut [u8], offset: u64, data: &[u8]) { |
| 177 | + let config_len = config.len() as u64; |
| 178 | + let data_len = data.len() as u64; |
| 179 | + if offset + data_len > config_len { |
| 180 | + error!( |
| 181 | + "Out-of-bound access to configuration: config_len = {} offset = {:x} length = {} for {}", |
| 182 | + config_len, |
| 183 | + offset, |
| 184 | + data_len, |
| 185 | + self.device_type() |
| 186 | + ); |
| 187 | + return; |
| 188 | + } |
| 189 | + |
| 190 | + if let Some(end) = offset.checked_add(config.len() as u64) { |
| 191 | + let mut offset_config = |
| 192 | + &mut config[offset as usize..std::cmp::min(end, config_len) as usize]; |
| 193 | + offset_config.write_all(data).unwrap(); |
| 194 | + } |
| 195 | + } |
| 196 | + |
| 197 | + /// Set the access platform trait to let the device perform address |
| 198 | + /// translations if needed. |
| 199 | + fn set_access_platform(&mut self, _access_platform: Arc<dyn AccessPlatform>) {} |
| 200 | +} |
| 201 | + |
| 202 | +/// Trait providing address translation the same way a physical DMA remapping |
| 203 | +/// table would provide translation between an IOVA and a physical address. |
| 204 | +/// The goal of this trait is to be used by virtio devices to perform the |
| 205 | +/// address translation before they try to read from the guest physical address. |
| 206 | +/// On the other side, the implementation itself should be provided by the code |
| 207 | +/// emulating the IOMMU for the guest. |
| 208 | +pub trait DmaRemapping { |
| 209 | + /// Provide a way to translate GVA address ranges into GPAs. |
| 210 | + fn translate_gva(&self, id: u32, addr: u64) -> std::result::Result<u64, std::io::Error>; |
| 211 | + /// Provide a way to translate GPA address ranges into GVAs. |
| 212 | + fn translate_gpa(&self, id: u32, addr: u64) -> std::result::Result<u64, std::io::Error>; |
| 213 | +} |
| 214 | + |
| 215 | +/// Structure to handle device state common to all devices |
| 216 | +#[derive(Default)] |
| 217 | +pub struct VirtioCommon { |
| 218 | + pub avail_features: u64, |
| 219 | + pub acked_features: u64, |
| 220 | + pub kill_evt: Option<EventFd>, |
| 221 | + pub interrupt_cb: Option<Arc<dyn VirtioInterrupt>>, |
| 222 | + pub pause_evt: Option<EventFd>, |
| 223 | + pub paused: Arc<AtomicBool>, |
| 224 | + pub paused_sync: Option<Arc<Barrier>>, |
| 225 | + pub epoll_threads: Option<Vec<thread::JoinHandle<()>>>, |
| 226 | + pub queue_sizes: Vec<u16>, |
| 227 | + pub device_type: u32, |
| 228 | + pub min_queues: u16, |
| 229 | + pub access_platform: Option<Arc<dyn AccessPlatform>>, |
| 230 | +} |
| 231 | + |
| 232 | +impl VirtioCommon { |
| 233 | + pub fn feature_acked(&self, feature: u64) -> bool { |
| 234 | + self.acked_features & 1 << feature == 1 << feature |
| 235 | + } |
| 236 | + |
| 237 | + pub fn ack_features(&mut self, value: u64) { |
| 238 | + let mut v = value; |
| 239 | + // Check if the guest is ACK'ing a feature that we didn't claim to have. |
| 240 | + let unrequested_features = v & !self.avail_features; |
| 241 | + if unrequested_features != 0 { |
| 242 | + warn!("Received acknowledge request for unknown feature."); |
| 243 | + |
| 244 | + // Don't count these features as acked. |
| 245 | + v &= !unrequested_features; |
| 246 | + } |
| 247 | + self.acked_features |= v; |
| 248 | + } |
| 249 | + |
| 250 | + pub fn activate( |
| 251 | + &mut self, |
| 252 | + queues: &[(usize, Queue, EventFd)], |
| 253 | + interrupt_cb: &Arc<dyn VirtioInterrupt>, |
| 254 | + ) -> ActivateResult { |
| 255 | + if queues.len() < self.min_queues.into() { |
| 256 | + error!( |
| 257 | + "Number of enabled queues lower than min: {} vs {}", |
| 258 | + queues.len(), |
| 259 | + self.min_queues |
| 260 | + ); |
| 261 | + return Err(ActivateError::BadActivate); |
| 262 | + } |
| 263 | + |
| 264 | + let kill_evt = EventFd::new(EFD_NONBLOCK).map_err(|e| { |
| 265 | + error!("failed creating kill EventFd: {}", e); |
| 266 | + ActivateError::BadActivate |
| 267 | + })?; |
| 268 | + self.kill_evt = Some(kill_evt); |
| 269 | + |
| 270 | + let pause_evt = EventFd::new(EFD_NONBLOCK).map_err(|e| { |
| 271 | + error!("failed creating pause EventFd: {}", e); |
| 272 | + ActivateError::BadActivate |
| 273 | + })?; |
| 274 | + self.pause_evt = Some(pause_evt); |
| 275 | + |
| 276 | + // Save the interrupt EventFD as we need to return it on reset |
| 277 | + // but clone it to pass into the thread. |
| 278 | + self.interrupt_cb = Some(interrupt_cb.clone()); |
| 279 | + |
| 280 | + Ok(()) |
| 281 | + } |
| 282 | + |
| 283 | + pub fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { |
| 284 | + // We first must resume the virtio thread if it was paused. |
| 285 | + if self.pause_evt.take().is_some() { |
| 286 | + self.resume().ok()?; |
| 287 | + } |
| 288 | + |
| 289 | + if let Some(kill_evt) = self.kill_evt.take() { |
| 290 | + // Ignore the result because there is nothing we can do about it. |
| 291 | + let _ = kill_evt.write(1); |
| 292 | + } |
| 293 | + |
| 294 | + if let Some(mut threads) = self.epoll_threads.take() { |
| 295 | + for t in threads.drain(..) { |
| 296 | + if let Err(e) = t.join() { |
| 297 | + error!("Error joining thread: {:?}", e); |
| 298 | + } |
| 299 | + } |
| 300 | + } |
| 301 | + |
| 302 | + // Return the interrupt |
| 303 | + Some(self.interrupt_cb.take().unwrap()) |
| 304 | + } |
| 305 | + |
| 306 | + pub fn dup_eventfds(&self) -> (EventFd, EventFd) { |
| 307 | + ( |
| 308 | + self.kill_evt.as_ref().unwrap().try_clone().unwrap(), |
| 309 | + self.pause_evt.as_ref().unwrap().try_clone().unwrap(), |
| 310 | + ) |
| 311 | + } |
| 312 | + |
| 313 | + pub fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) { |
| 314 | + self.access_platform = Some(access_platform); |
| 315 | + // Indirect descriptors feature is not supported when the device |
| 316 | + // requires the addresses held by the descriptors to be translated. |
| 317 | + self.avail_features &= !(1 << VIRTIO_F_RING_INDIRECT_DESC); |
| 318 | + } |
| 319 | +} |
| 320 | + |
| 321 | +impl Pausable for VirtioCommon { |
| 322 | + fn pause(&mut self) -> std::result::Result<(), MigratableError> { |
| 323 | + info!( |
| 324 | + "Pausing virtio-{}", |
| 325 | + VirtioDeviceType::from(self.device_type) |
| 326 | + ); |
| 327 | + self.paused.store(true, Ordering::SeqCst); |
| 328 | + if let Some(pause_evt) = &self.pause_evt { |
| 329 | + pause_evt |
| 330 | + .write(1) |
| 331 | + .map_err(|e| MigratableError::Pause(e.into()))?; |
| 332 | + |
| 333 | + // Wait for all threads to acknowledge the pause before going |
| 334 | + // any further. This is exclusively performed when pause_evt |
| 335 | + // eventfd is Some(), as this means the virtio device has been |
| 336 | + // activated. One specific case where the device can be paused |
| 337 | + // while it hasn't been yet activated is snapshot/restore. |
| 338 | + self.paused_sync.as_ref().unwrap().wait(); |
| 339 | + } |
| 340 | + |
| 341 | + Ok(()) |
| 342 | + } |
| 343 | + |
| 344 | + fn resume(&mut self) -> std::result::Result<(), MigratableError> { |
| 345 | + info!( |
| 346 | + "Resuming virtio-{}", |
| 347 | + VirtioDeviceType::from(self.device_type) |
| 348 | + ); |
| 349 | + self.paused.store(false, Ordering::SeqCst); |
| 350 | + if let Some(epoll_threads) = &self.epoll_threads { |
| 351 | + for t in epoll_threads.iter() { |
| 352 | + t.thread().unpark(); |
| 353 | + } |
| 354 | + } |
| 355 | + |
| 356 | + Ok(()) |
| 357 | + } |
| 358 | +} |
0 commit comments