Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NvmExpressDxe: Request Number of Queues from Controller #1260

Open
wants to merge 4 commits into
base: dev/202405
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 174 additions & 43 deletions MdeModulePkg/Bus/Pci/NvmExpressDxe/NvmExpress.c
Original file line number Diff line number Diff line change
@@ -172,14 +172,20 @@ EnumerateNvmeDevNamespace (
Device->BlockIo.WriteBlocks = NvmeBlockIoWriteBlocks;
Device->BlockIo.FlushBlocks = NvmeBlockIoFlushBlocks;

//
// Create BlockIo2 Protocol instance
//
Device->BlockIo2.Media = &Device->Media;
Device->BlockIo2.Reset = NvmeBlockIoResetEx;
Device->BlockIo2.ReadBlocksEx = NvmeBlockIoReadBlocksEx;
Device->BlockIo2.WriteBlocksEx = NvmeBlockIoWriteBlocksEx;
Device->BlockIo2.FlushBlocksEx = NvmeBlockIoFlushBlocksEx;
// MU_CHANGE [BEGIN] - Request Number of Queues from Controller
if (Private->NumberOfDataQueuePairs > 1) {
// We have multiple data queues, so we can support the BlockIo2 protocol

// Create BlockIo2 Protocol instance
Device->BlockIo2.Media = &Device->Media;
Device->BlockIo2.Reset = NvmeBlockIoResetEx;
Device->BlockIo2.ReadBlocksEx = NvmeBlockIoReadBlocksEx;
Device->BlockIo2.WriteBlocksEx = NvmeBlockIoWriteBlocksEx;
Device->BlockIo2.FlushBlocksEx = NvmeBlockIoFlushBlocksEx;
}

// MU_CHANGE [END] - Request Number of Queues from Controller

InitializeListHead (&Device->AsyncQueue);

// MU_CHANGE Start - Add Media Sanitize
@@ -254,14 +260,13 @@ EnumerateNvmeDevNamespace (
//
Device->DeviceHandle = NULL;

// MU_CHANGE [BEGIN] - Request Number of Queues from Controller
Status = gBS->InstallMultipleProtocolInterfaces (
&Device->DeviceHandle,
&gEfiDevicePathProtocolGuid,
Device->DevicePath,
&gEfiBlockIoProtocolGuid,
&Device->BlockIo,
&gEfiBlockIo2ProtocolGuid,
&Device->BlockIo2,
&gEfiDiskInfoProtocolGuid,
&Device->DiskInfo,
NULL
@@ -271,6 +276,21 @@ EnumerateNvmeDevNamespace (
goto Exit;
}

if (Private->NumberOfDataQueuePairs > 1) {
// We have multiple data queues, so we can support the BlockIo2 protocol
Status = gBS->InstallMultipleProtocolInterfaces (
&Device->DeviceHandle,
&gEfiBlockIo2ProtocolGuid,
&Device->BlockIo2
);
if (EFI_ERROR (Status)) {
DEBUG ((DEBUG_ERROR, "%a: Failed to install BlockIo2 protocol\n", __func__));
goto Exit;
}
}

// MU_CHANGE [END] - Request Number of Queues from Controller

//
// Check if the NVMe controller supports the Security Send and Security Receive commands
//
@@ -288,12 +308,23 @@ EnumerateNvmeDevNamespace (
Device->DevicePath,
&gEfiBlockIoProtocolGuid,
&Device->BlockIo,
&gEfiBlockIo2ProtocolGuid,
&Device->BlockIo2,
&gEfiDiskInfoProtocolGuid,
&Device->DiskInfo,
NULL
);

// MU_CHANGE [BEGIN] - Request Number of Queues from Controller
if (Private->NumberOfDataQueuePairs > 1) {
// We have multiple data queues, so we need to uninstall the BlockIo2 protocol
gBS->UninstallMultipleProtocolInterfaces (
Device->DeviceHandle,
&gEfiBlockIo2ProtocolGuid,
&Device->BlockIo2
);
}

// MU_CHANGE [END] - Request Number of Queues from Controller

goto Exit;
}
}
@@ -477,6 +508,29 @@ UnregisterNvmeNamespace (
Handle
);

// MU_CHANGE [BEGIN] - Request Number of Queues from Controller
//
// If BlockIo2 is installed, uninstall it.
//
if (Device->Controller->NumberOfDataQueuePairs > 1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because BlockIo2 is not widely used, I would recommend trying to uninstall the other protocols first, since that's where real usage would be, then do BlockIo2 if required and not duplicate the OpenProtocol logic, just maintain if either uninstall failed

Copy link
Contributor Author

@VivianNK VivianNK Mar 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Part of uninstallMultipleProtocols involves reinstalling all protocols if even one of them fails to uninstall. to not have a split state. @apop5 can you confirm?
#1260 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's not what I mean, what I mean is that you do the uninstall of BlockIo2 if required, if that fails open the protocol on the dummy interface, then go uninstall the more widely used protocols and then open the dummy interface if those fail. My suggestion is to not do the open protocol twice, but simply keep track of whether the BlockIo2 protocol uninstall failed and if either uninstall failed, then do the openprotocol. I.e. don't do that part twice.

Status = gBS->UninstallProtocolInterface (
Handle,
&gEfiBlockIo2ProtocolGuid,
&Device->BlockIo2
);
if (EFI_ERROR (Status)) {
gBS->OpenProtocol (
Controller,
&gEfiNvmExpressPassThruProtocolGuid,
(VOID **)&DummyInterface,
This->DriverBindingHandle,
Handle,
EFI_OPEN_PROTOCOL_BY_CHILD_CONTROLLER
);
return Status;
}
}

//
// The Nvm Express driver installs the BlockIo and DiskInfo in the DriverBindingStart().
// Here should uninstall both of them.
@@ -487,12 +541,11 @@ UnregisterNvmeNamespace (
Device->DevicePath,
&gEfiBlockIoProtocolGuid,
&Device->BlockIo,
&gEfiBlockIo2ProtocolGuid,
&Device->BlockIo2,
&gEfiDiskInfoProtocolGuid,
&Device->DiskInfo,
NULL
);
// MU_CHANGE [END] - Request Number of Queues from Controller

if (EFI_ERROR (Status)) {
gBS->OpenProtocol (
@@ -957,9 +1010,11 @@ NvmExpressDriverBindingStart (
EFI_PHYSICAL_ADDRESS MappedAddr;
UINTN Bytes;
EFI_NVM_EXPRESS_PASS_THRU_PROTOCOL *Passthru;
// MU_CHANGE - Support alternative hardware queue sizes in NVME driver
UINTN QueuePageCount = PcdGetBool (PcdSupportAlternativeQueueSize) ?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are changing a previous MU_CHANGE, should this PR either be marked [SQUASH ON REBASE] or if completely superceding the previous commit(s), reverting that commit and applying this one as a [REBASE & FF]?

That might look like splitting this PR into multiple commits, one of which is squashed with the previous commit or reverting it.

NVME_ALTERNATIVE_TOTAL_QUEUE_BUFFER_IN_PAGES : 6;
// MU_CHANGE [BEGIN] - Allocate IO Queue Buffer
NVME_AQA *Aqa;
UINTN AdminQueuePageCount; // MU_CHANGE - Support alternative hardware queue sizes in NVME driver

// MU_CHANGE [END] - Allocate IO Queue Buffer

DEBUG ((DEBUG_INFO, "NvmExpressDriverBindingStart: start\n"));

@@ -1033,9 +1088,40 @@ NvmExpressDriverBindingStart (

// MU_CHANGE - Support alternative hardware queue sizes in NVME driver

// MU_CHANGE [BEGIN] - Allocate IO Queue Buffer
//
// Depending on PCD disablement, either support the default or alternative
// queue sizes.
// Set the Admin Queue Atttributes
//
Aqa = AllocateZeroPool (sizeof (NVME_AQA));

if (Aqa == NULL) {
DEBUG ((DEBUG_ERROR, "NvmExpressDriverBindingStart: allocating pool for Nvme Aqa Data failed!\n"));
Status = EFI_OUT_OF_RESOURCES;
goto Exit;
}

// Set the sizes of the admin submission & completion queues in number of entries
Aqa->Asqs = PcdGetBool (PcdSupportAlternativeQueueSize) ? MIN (NVME_ALTERNATIVE_MAX_QUEUE_SIZE, Private->Cap.Mqes) : NVME_ASQ_SIZE;
Aqa->Rsvd1 = 0;
Aqa->Acqs = PcdGetBool (PcdSupportAlternativeQueueSize) ? MIN (NVME_ALTERNATIVE_MAX_QUEUE_SIZE, Private->Cap.Mqes) : NVME_ACQ_SIZE;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be misremembering this change, but if we are now querying the NVMe controller for the queue size supported, should we not just use that, regardless of a PCD? Or is this for the case where a controller is wrong about its queue size?

In either case, it either seems to me like we don't need the PCD (and just take what the controller supports or the max we support) or that the HW needs to be fixed, but failing that we would need a PCD that tells us what queue size to use (but again I am dubious of not doing what the HW tells us).

Copy link
Contributor Author

@VivianNK VivianNK Mar 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PCD essentially changes what the maximum size we (the driver) support is. By default, the driver has hard-coded maximum sizes that vary depending on the queue type/which queue pair it belongs to. With the PCD there is one maximum size used for all queues.

This PR's change is regarding the number of queue pairs, but the PCD is concerning the driver's supported "size" of each queue in the number of queue entries per queue.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, I know there is history here, but I don't remember it, why not just change what size the driver supports to the uniform size?

I may be misunderstanding, but I thought part of the change is to query the HW for what queue size it supports? And then it doesn't matter what size the driver supports as long as we just do what the HW actually supports?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For compatibility, we should keep the Pcd, and can contact the original requestors to see if it's still needed. For now, this change can be made independently.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me, can you file an issue on mu_basecore to track removing the PCD (or at least finding out if it is still needed)?

Aqa->Rsvd2 = 0;

//
// Save Queue Pair Data for admin queues in controller data structure
//
Private->SqData[0].NumberOfEntries = Aqa->Asqs;
Private->CqData[0].NumberOfEntries = Aqa->Acqs;

//
// Set admin queue entry size to default
//
Private->SqData[0].EntrySize = NVME_IOSQES_MIN;
Private->CqData[0].EntrySize = NVME_IOCQES_MIN;

// Calculate the number of pages required for the admin queues
AdminQueuePageCount = EFI_SIZE_TO_PAGES (Private->SqData[0].NumberOfEntries * LShiftU64 (2, Private->SqData[0].EntrySize))
+ EFI_SIZE_TO_PAGES (Private->CqData[0].NumberOfEntries * LShiftU64 (2, Private->CqData[0].EntrySize));
// MU_CHANGE [END] - Allocate IO Queue Buffer
//
// Default:
// 6 x 4kB aligned buffers will be carved out of this buffer.
@@ -1059,20 +1145,26 @@ NvmExpressDriverBindingStart (
//
// Allocate 15 pages of memory, then map it for bus master read and write.
//

// MU_CHANGE [BEGIN] - Allocate IO Queue Buffer
//
// Allocate Admin Queues
//
Status = PciIo->AllocateBuffer (
PciIo,
AllocateAnyPages,
EfiBootServicesData,
QueuePageCount,
AdminQueuePageCount,
(VOID **)&Private->Buffer,
0
);
// MU_CHANGE [END] - Allocate IO Queue Buffer
if (EFI_ERROR (Status)) {
goto Exit;
}

// MU_CHANGE - Support alternative hardware queue sizes in NVME driver
Bytes = EFI_PAGES_TO_SIZE (QueuePageCount);
Bytes = EFI_PAGES_TO_SIZE (AdminQueuePageCount); // MU_CHANGE - Allocate IO Queue Buffer
Status = PciIo->Map (
PciIo,
EfiPciIoOperationBusMasterCommonBuffer,
@@ -1083,7 +1175,8 @@ NvmExpressDriverBindingStart (
);

// MU_CHANGE - Support alternative hardware queue sizes in NVME driver
if (EFI_ERROR (Status) || (Bytes != EFI_PAGES_TO_SIZE (QueuePageCount))) {
if (EFI_ERROR (Status) || (Bytes != EFI_PAGES_TO_SIZE (AdminQueuePageCount))) {
// MU_CHANGE - Allocate IO Queue Buffer
goto Exit;
}

@@ -1104,34 +1197,42 @@ NvmExpressDriverBindingStart (
InitializeListHead (&Private->AsyncPassThruQueue);
InitializeListHead (&Private->UnsubmittedSubtasks);

Status = NvmeControllerInit (Private);
Status = NvmeControllerInit (Private, Aqa); // MU_CHANGE - Allocate IO Queue Buffer
if (EFI_ERROR (Status)) {
goto Exit;
}

// MU_CHANGE [BEGIN] - Request Number of Queues from Controller

//
// Start the asynchronous I/O completion monitor
// The ProcessAsyncTaskList event and NVME_HC_ASYNC_TIMER timer are only used for the BlockIo2 protocol,
// which is only installed when the number of IO queues is greater than 1
//
Status = gBS->CreateEvent (
EVT_TIMER | EVT_NOTIFY_SIGNAL,
TPL_NOTIFY,
ProcessAsyncTaskList,
Private,
&Private->TimerEvent
);
if (EFI_ERROR (Status)) {
goto Exit;
}
if (Private->NumberOfDataQueuePairs > 1) {
Status = gBS->CreateEvent (
EVT_TIMER | EVT_NOTIFY_SIGNAL,
TPL_NOTIFY,
ProcessAsyncTaskList,
Private,
&Private->TimerEvent
);
if (EFI_ERROR (Status)) {
goto Exit;
}

Status = gBS->SetTimer (
Private->TimerEvent,
TimerPeriodic,
NVME_HC_ASYNC_TIMER
);
if (EFI_ERROR (Status)) {
goto Exit;
Status = gBS->SetTimer (
Private->TimerEvent,
TimerPeriodic,
NVME_HC_ASYNC_TIMER
);
if (EFI_ERROR (Status)) {
goto Exit;
}
}

// MU_CHANGE [END] - Request Number of Queues from Controller

Status = gBS->InstallMultipleProtocolInterfaces (
&Controller,
&gEfiNvmExpressPassThruProtocolGuid,
@@ -1194,7 +1295,14 @@ NvmExpressDriverBindingStart (

if ((Private != NULL) && (Private->Buffer != NULL)) {
// MU_CHANGE - Support alternative hardware queue sizes in NVME driver
PciIo->FreeBuffer (PciIo, QueuePageCount, Private->Buffer);
// MU_CHANGE [BEGIN] - Allocate IO Queue Buffer
Status = PciIo->FreeBuffer (PciIo, AdminQueuePageCount, Private->Buffer);

if (EFI_STATUS_ERROR (Status)) {
DEBUG ((DEBUG_ERROR, "%a: FreeBuffer failed with %r\n", __func__, Status));
}

// MU_CHANGE [END] - Allocate IO Queue Buffer
}

if ((Private != NULL) && (Private->ControllerData != NULL)) {
@@ -1271,8 +1379,8 @@ NvmExpressDriverBindingStop (
BOOLEAN IsEmpty;
EFI_TPL OldTpl;
// MU_CHANGE - Support alternative hardware queue sizes in NVME driver
UINT16 QueuePageCount = PcdGetBool (PcdSupportAlternativeQueueSize) ?
NVME_ALTERNATIVE_TOTAL_QUEUE_BUFFER_IN_PAGES : 6;
// MU_CHANGE - Allocate IO Queue Buffer
UINTN QueuePageCount;

if (NumberOfChildren == 0) {
Status = gBS->OpenProtocol (
@@ -1318,11 +1426,34 @@ NvmExpressDriverBindingStop (
Private->PciIo->Unmap (Private->PciIo, Private->Mapping);
}

// MU_CHANGE [BEGIN] - Allocate IO Queue Buffer
QueuePageCount = EFI_SIZE_TO_PAGES (Private->SqData[0].NumberOfEntries * LShiftU64 (2, Private->SqData[0].EntrySize))
+ EFI_SIZE_TO_PAGES (Private->CqData[0].NumberOfEntries * LShiftU64 (2, Private->CqData[0].EntrySize));
// MU_CHANGE [END] - Allocate IO Queue Buffer
if (Private->Buffer != NULL) {
// MU_CHANGE - Support alternative hardware queue sizes in NVME driver
Private->PciIo->FreeBuffer (Private->PciIo, QueuePageCount, Private->Buffer);
}

// MU_CHANGE [BEGIN] - Allocate IO Queue Buffer
if (Private->DataQueueMapping != NULL) {
Status = Private->PciIo->Unmap (Private->PciIo, Private->DataQueueMapping);

if (EFI_ERROR (Status)) {
DEBUG ((DEBUG_ERROR, "%a: Unmap DataQueueMapping failed %r\n", __func__, Status));
}
}

if (Private->DataQueueBuffer != NULL) {
Status = Private->PciIo->FreeBuffer (Private->PciIo, QueuePageCount*Private->NumberOfDataQueuePairs, Private->DataQueueBuffer);

if (EFI_ERROR (Status)) {
DEBUG ((DEBUG_ERROR, "%a: FreeBuffer DataQueueBuffer failed %r\n", __func__, Status));
}
}

// MU_CHANGE [END] - Allocate IO Queue Buffer

FreePool (Private->ControllerData);
FreePool (Private);
}
Loading