@@ -26,9 +26,12 @@ use pal_async::task::Spawn;
26
26
use pal_async:: task:: Task ;
27
27
use std:: collections:: HashMap ;
28
28
use std:: collections:: hash_map;
29
+ use std:: sync:: Arc ;
29
30
use thiserror:: Error ;
30
31
use tracing:: Instrument ;
32
+ use user_driver:: vfio:: PciDeviceResetMethod ;
31
33
use user_driver:: vfio:: VfioDevice ;
34
+ use user_driver:: vfio:: vfio_set_device_reset_method;
32
35
use vm_resource:: AsyncResolveResource ;
33
36
use vm_resource:: ResourceId ;
34
37
use vm_resource:: ResourceResolver ;
@@ -92,6 +95,7 @@ impl NvmeManager {
92
95
driver_source : & VmTaskDriverSource ,
93
96
vp_count : u32 ,
94
97
save_restore_supported : bool ,
98
+ nvme_always_flr : bool ,
95
99
is_isolated : bool ,
96
100
saved_state : Option < NvmeSavedState > ,
97
101
dma_client_spawner : DmaClientSpawner ,
@@ -103,6 +107,7 @@ impl NvmeManager {
103
107
devices : HashMap :: new ( ) ,
104
108
vp_count,
105
109
save_restore_supported,
110
+ nvme_always_flr,
106
111
is_isolated,
107
112
dma_client_spawner,
108
113
} ;
@@ -220,12 +225,99 @@ struct NvmeManagerWorker {
220
225
vp_count : u32 ,
221
226
/// Running environment (memory layout) allows save/restore.
222
227
save_restore_supported : bool ,
228
+ nvme_always_flr : bool ,
223
229
/// If this VM is isolated or not. This influences DMA client allocations.
224
230
is_isolated : bool ,
225
231
#[ inspect( skip) ]
226
232
dma_client_spawner : DmaClientSpawner ,
227
233
}
228
234
235
+ async fn create_nvme_device (
236
+ driver_source : & VmTaskDriverSource ,
237
+ pci_id : & str ,
238
+ vp_count : u32 ,
239
+ nvme_always_flr : bool ,
240
+ is_isolated : bool ,
241
+ dma_client : Arc < dyn user_driver:: DmaClient > ,
242
+ ) -> Result < nvme_driver:: NvmeDriver < VfioDevice > , InnerError > {
243
+ // Disable FLR on vfio attach/detach; this allows faster system
244
+ // startup/shutdown with the caveat that the device needs to be properly
245
+ // sent through the shutdown path during servicing operations, as that is
246
+ // the only cleanup performed. If the device fails to initialize, turn FLR
247
+ // on and try again, so that the reset is invoked on the next attach.
248
+ let update_reset = |method : PciDeviceResetMethod | {
249
+ if let Err ( err) = vfio_set_device_reset_method ( pci_id, method) {
250
+ tracing:: warn!(
251
+ ?method,
252
+ err = & err as & dyn std:: error:: Error ,
253
+ "Failed to update reset_method"
254
+ ) ;
255
+ }
256
+ } ;
257
+ let mut last_err = None ;
258
+ let reset_methods = if nvme_always_flr {
259
+ & [ PciDeviceResetMethod :: Flr ] [ ..]
260
+ } else {
261
+ // If this code can't create a device without resetting it, then still try to issue an FLR
262
+ // in case that unwedges something weird in the device state.
263
+ // (This is implicit when the code in [`try_create_nvme_device`] opens a handle to the
264
+ // Vfio device).
265
+ & [ PciDeviceResetMethod :: NoReset , PciDeviceResetMethod :: Flr ] [ ..]
266
+ } ;
267
+ for reset_method in reset_methods {
268
+ update_reset ( * reset_method) ;
269
+ match try_create_nvme_device (
270
+ driver_source,
271
+ pci_id,
272
+ vp_count,
273
+ is_isolated,
274
+ dma_client. clone ( ) ,
275
+ )
276
+ . await
277
+ {
278
+ Ok ( device) => {
279
+ if !nvme_always_flr && !matches ! ( reset_method, PciDeviceResetMethod :: NoReset ) {
280
+ update_reset ( PciDeviceResetMethod :: NoReset ) ;
281
+ }
282
+ return Ok ( device) ;
283
+ }
284
+ Err ( err) => {
285
+ tracing:: error!(
286
+ pci_id,
287
+ ?reset_method,
288
+ err = & err as & dyn std:: error:: Error ,
289
+ "failed to create nvme device"
290
+ ) ;
291
+ last_err = Some ( err) ;
292
+ }
293
+ }
294
+ }
295
+ // Return the most reliable error (this code assumes that the reset methods are in increasing order
296
+ // of reliability).
297
+ Err ( last_err. unwrap ( ) )
298
+ }
299
+
300
+ async fn try_create_nvme_device (
301
+ driver_source : & VmTaskDriverSource ,
302
+ pci_id : & str ,
303
+ vp_count : u32 ,
304
+ is_isolated : bool ,
305
+ dma_client : Arc < dyn user_driver:: DmaClient > ,
306
+ ) -> Result < nvme_driver:: NvmeDriver < VfioDevice > , InnerError > {
307
+ let device = VfioDevice :: new ( driver_source, pci_id, dma_client)
308
+ . instrument ( tracing:: info_span!( "vfio_device_open" , pci_id) )
309
+ . await
310
+ . map_err ( InnerError :: Vfio ) ?;
311
+
312
+ // TODO: For now, any isolation means use bounce buffering. This
313
+ // needs to change when we have nvme devices that support DMA to
314
+ // confidential memory.
315
+ nvme_driver:: NvmeDriver :: new ( driver_source, vp_count, device, is_isolated)
316
+ . instrument ( tracing:: info_span!( "nvme_driver_init" , pci_id) )
317
+ . await
318
+ . map_err ( InnerError :: DeviceInitFailed )
319
+ }
320
+
229
321
impl NvmeManagerWorker {
230
322
async fn run ( & mut self , mut recv : mesh:: Receiver < Request > ) {
231
323
let ( join_span, nvme_keepalive) = loop {
@@ -315,26 +407,18 @@ impl NvmeManagerWorker {
315
407
} )
316
408
. map_err ( InnerError :: DmaClient ) ?;
317
409
318
- let device = VfioDevice :: new ( & self . driver_source , entry. key ( ) , dma_client)
319
- . instrument ( tracing:: info_span!( "vfio_device_open" , pci_id) )
320
- . await
321
- . map_err ( InnerError :: Vfio ) ?;
322
-
323
- // TODO: For now, any isolation means use bounce buffering. This
324
- // needs to change when we have nvme devices that support DMA to
325
- // confidential memory.
326
- let driver = nvme_driver:: NvmeDriver :: new (
410
+ let driver = create_nvme_device (
327
411
& self . driver_source ,
412
+ & pci_id,
328
413
self . vp_count ,
329
- device ,
414
+ self . nvme_always_flr ,
330
415
self . is_isolated ,
416
+ dma_client,
331
417
)
332
- . instrument ( tracing:: info_span!(
333
- "nvme_driver_init" ,
334
- pci_id = entry. key( )
335
- ) )
336
- . await
337
- . map_err ( InnerError :: DeviceInitFailed ) ?;
418
+ . instrument (
419
+ tracing:: info_span!( "create_nvme_device" , %pci_id, self . nvme_always_flr) ,
420
+ )
421
+ . await ?;
338
422
339
423
entry. insert ( driver)
340
424
}
0 commit comments