Skip to content

Commit

Permalink
nvme/rdma: Create memory domain per Protection Domain
Browse files Browse the repository at this point in the history
Add a global list of memory domains with reference counter.
Memory domains are used by NVME RDMA qpairs.

Also refactor ibv_resize_cq in nvme_rdma_ut.c to stub

Signed-off-by: Alexey Marchuk <[email protected]>
Change-Id: Ie58b7e99fcb2c57c967f5dee0417e74845d9e2d1
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8127
Community-CI: Broadcom CI <[email protected]>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <[email protected]>
Reviewed-by: Ben Walker <[email protected]>
Reviewed-by: Paul Luse <[email protected]>
Reviewed-by: Shuhei Matsumoto <[email protected]>
Reviewed-by: Ziye Yang <[email protected]>
  • Loading branch information
AlekseyMarchuk authored and tomzawadzki committed Aug 20, 2021
1 parent 4e52791 commit d06b609
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 5 deletions.
90 changes: 90 additions & 0 deletions lib/nvme/nvme_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*
* Copyright (c) Intel Corporation. All rights reserved.
* Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
* Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -38,6 +39,7 @@
#include "spdk/stdinc.h"

#include "spdk/assert.h"
#include "spdk/dma.h"
#include "spdk/log.h"
#include "spdk/trace.h"
#include "spdk/queue.h"
Expand Down Expand Up @@ -102,6 +104,13 @@

#define WC_PER_QPAIR(queue_depth) (queue_depth * 2)

struct nvme_rdma_memory_domain {
TAILQ_ENTRY(nvme_rdma_memory_domain) link;
uint32_t ref;
struct ibv_pd *pd;
struct spdk_memory_domain *domain;
};

enum nvme_rdma_wr_type {
RDMA_WR_TYPE_RECV,
RDMA_WR_TYPE_SEND,
Expand Down Expand Up @@ -223,6 +232,8 @@ struct nvme_rdma_qpair {
TAILQ_HEAD(, spdk_nvme_rdma_req) free_reqs;
TAILQ_HEAD(, spdk_nvme_rdma_req) outstanding_reqs;

struct nvme_rdma_memory_domain *memory_domain;

/* Counts of outstanding send and recv objects */
uint16_t current_num_recvs;
uint16_t current_num_sends;
Expand Down Expand Up @@ -289,6 +300,77 @@ static const char *rdma_cm_event_str[] = {
struct nvme_rdma_qpair *nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group,
uint32_t qp_num);

static TAILQ_HEAD(, nvme_rdma_memory_domain) g_memory_domains = TAILQ_HEAD_INITIALIZER(
g_memory_domains);
static pthread_mutex_t g_memory_domains_lock = PTHREAD_MUTEX_INITIALIZER;

static struct nvme_rdma_memory_domain *
nvme_rdma_get_memory_domain(struct ibv_pd *pd)
{
struct nvme_rdma_memory_domain *domain = NULL;
struct spdk_memory_domain_ctx dev_ctx;
int rc;

pthread_mutex_lock(&g_memory_domains_lock);

TAILQ_FOREACH(domain, &g_memory_domains, link) {
if (domain->pd == pd) {
domain->ref++;
pthread_mutex_unlock(&g_memory_domains_lock);
return domain;
}
}

domain = calloc(1, sizeof(*domain));
if (!domain) {
SPDK_ERRLOG("Memory allocation failed\n");
pthread_mutex_unlock(&g_memory_domains_lock);
return NULL;
}

dev_ctx.size = sizeof(dev_ctx);
dev_ctx.rdma.ibv_pd = pd;

rc = spdk_memory_domain_create(&domain->domain, SPDK_DMA_DEVICE_TYPE_RDMA, &dev_ctx,
SPDK_RDMA_DMA_DEVICE);
if (rc) {
SPDK_ERRLOG("Failed to create memory domain\n");
free(domain);
pthread_mutex_unlock(&g_memory_domains_lock);
return NULL;
}

domain->pd = pd;
domain->ref = 1;
TAILQ_INSERT_TAIL(&g_memory_domains, domain, link);

pthread_mutex_unlock(&g_memory_domains_lock);

return domain;
}

static void
nvme_rdma_put_memory_domain(struct nvme_rdma_memory_domain *device)
{
if (!device) {
return;
}

pthread_mutex_lock(&g_memory_domains_lock);

assert(device->ref > 0);

device->ref--;

if (device->ref == 0) {
spdk_memory_domain_destroy(device->domain);
TAILQ_REMOVE(&g_memory_domains, device, link);
free(device);
}

pthread_mutex_unlock(&g_memory_domains_lock);
}

static inline void *
nvme_rdma_calloc(size_t nmemb, size_t size)
{
Expand Down Expand Up @@ -625,6 +707,12 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
return -1;
}

rqpair->memory_domain = nvme_rdma_get_memory_domain(rqpair->rdma_qp->qp->pd);
if (!rqpair->memory_domain) {
SPDK_ERRLOG("Failed to get memory domain\n");
return -1;
}

/* ibv_create_qp will change the values in attr.cap. Make sure we store the proper value. */
rqpair->max_send_sge = spdk_min(NVME_RDMA_DEFAULT_TX_SGE, attr.cap.max_send_sge);
rqpair->max_recv_sge = spdk_min(NVME_RDMA_DEFAULT_RX_SGE, attr.cap.max_recv_sge);
Expand Down Expand Up @@ -1712,6 +1800,8 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
nvme_rdma_qpair_abort_reqs(qpair, 1);
nvme_qpair_deinit(qpair);

nvme_rdma_put_memory_domain(rqpair->memory_domain);

nvme_rdma_free_reqs(rqpair);
nvme_rdma_free_rsps(rqpair);
nvme_rdma_free(rqpair);
Expand Down
2 changes: 1 addition & 1 deletion mk/spdk.lib_deps.mk
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ DEPDIRS-thread := log util trace

DEPDIRS-nvme := log sock util
ifeq ($(CONFIG_RDMA),y)
DEPDIRS-nvme += rdma
DEPDIRS-nvme += rdma dma
endif
ifeq ($(CONFIG_VFIO_USER),y)
DEPDIRS-nvme += vfio_user
Expand Down
90 changes: 86 additions & 4 deletions test/unit/lib/nvme/nvme_rdma.c/nvme_rdma_ut.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
* Copyright (c) Intel Corporation. All rights reserved.
* Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -60,12 +60,35 @@ DEFINE_STUB(fcntl, int, (int fd, int cmd, ...), 0);
DEFINE_STUB_V(rdma_destroy_event_channel, (struct rdma_event_channel *channel));

DEFINE_STUB(ibv_dereg_mr, int, (struct ibv_mr *mr), 0);
DEFINE_STUB(ibv_resize_cq, int, (struct ibv_cq *cq, int cqe), 0);

int ibv_resize_cq(struct ibv_cq *cq, int cqe)
DEFINE_RETURN_MOCK(spdk_memory_domain_create, int);
int
spdk_memory_domain_create(struct spdk_memory_domain **domain, enum spdk_dma_device_type type,
struct spdk_memory_domain_ctx *ctx, const char *id)
{
static struct spdk_memory_domain *__dma_dev = (struct spdk_memory_domain *)0xdeaddead;

HANDLE_RETURN_MOCK(spdk_memory_domain_create);

*domain = __dma_dev;

return 0;
}

DEFINE_STUB(spdk_memory_domain_get_context, struct spdk_memory_domain_ctx *,
(struct spdk_memory_domain *device), NULL);
DEFINE_STUB(spdk_memory_domain_get_dma_device_type, enum spdk_dma_device_type,
(struct spdk_memory_domain *device), SPDK_DMA_DEVICE_TYPE_RDMA);
DEFINE_STUB_V(spdk_memory_domain_destroy, (struct spdk_memory_domain *device));
DEFINE_STUB(spdk_memory_domain_fetch_data, int, (struct spdk_memory_domain *src_domain,
void *src_domain_ctx, struct iovec *src_iov, uint32_t src_iov_cnt, struct iovec *dst_iov,
uint32_t dst_iov_cnt, spdk_memory_domain_fetch_data_cpl_cb cpl_cb, void *cpl_cb_arg), 0);
DEFINE_STUB(spdk_memory_domain_translate_data, int, (struct spdk_memory_domain *src_domain,
void *src_domain_ctx, struct spdk_memory_domain *dst_domain,
struct spdk_memory_domain_translation_ctx *dst_domain_ctx, void *addr, size_t len,
struct spdk_memory_domain_translation_result *result), 0);

/* ibv_reg_mr can be a macro, need to undefine it */
#ifdef ibv_reg_mr
#undef ibv_reg_mr
Expand Down Expand Up @@ -1055,7 +1078,8 @@ test_nvme_rdma_qpair_init(void)
{
struct nvme_rdma_qpair rqpair = {};
struct rdma_cm_id cm_id = {};
struct ibv_qp qp = {};
struct ibv_pd *pd = (struct ibv_pd *)0xfeedbeef;
struct ibv_qp qp = { .pd = pd };
struct nvme_rdma_ctrlr rctrlr = {};
int rc = 0;

Expand All @@ -1075,6 +1099,7 @@ test_nvme_rdma_qpair_init(void)
CU_ASSERT(rqpair.current_num_sends == 0);
CU_ASSERT(rqpair.current_num_recvs == 0);
CU_ASSERT(rqpair.cq == (struct ibv_cq *)0xFEEDBEEF);
CU_ASSERT(rqpair.memory_domain != NULL);
}

static void
Expand Down Expand Up @@ -1119,6 +1144,62 @@ test_nvme_rdma_qpair_submit_request(void)
nvme_rdma_free_reqs(&rqpair);
}

static void
test_nvme_rdma_memory_domain(void)
{
struct nvme_rdma_memory_domain *domain_1 = NULL, *domain_2 = NULL, *domain_tmp;
struct ibv_pd *pd_1 = (struct ibv_pd *)0x1, *pd_2 = (struct ibv_pd *)0x2;
/* Counters below are used to check the number of created/destroyed rdma_dma_device objects.
* Since other unit tests may create dma_devices, we can't just check that the queue is empty or not */
uint32_t dma_dev_count_start = 0, dma_dev_count = 0, dma_dev_count_end = 0;

TAILQ_FOREACH(domain_tmp, &g_memory_domains, link) {
dma_dev_count_start++;
}

/* spdk_memory_domain_create failed, expect fail */
MOCK_SET(spdk_memory_domain_create, -1);
domain_1 = nvme_rdma_get_memory_domain(pd_1);
CU_ASSERT(domain_1 == NULL);
MOCK_CLEAR(spdk_memory_domain_create);

/* Normal scenario */
domain_1 = nvme_rdma_get_memory_domain(pd_1);
SPDK_CU_ASSERT_FATAL(domain_1 != NULL);
CU_ASSERT(domain_1->domain != NULL);
CU_ASSERT(domain_1->pd == pd_1);
CU_ASSERT(domain_1->ref == 1);

/* Request the same pd, ref counter increased */
CU_ASSERT(nvme_rdma_get_memory_domain(pd_1) == domain_1);
CU_ASSERT(domain_1->ref == 2);

/* Request another pd */
domain_2 = nvme_rdma_get_memory_domain(pd_2);
SPDK_CU_ASSERT_FATAL(domain_2 != NULL);
CU_ASSERT(domain_2->domain != NULL);
CU_ASSERT(domain_2->pd == pd_2);
CU_ASSERT(domain_2->ref == 1);

TAILQ_FOREACH(domain_tmp, &g_memory_domains, link) {
dma_dev_count++;
}
CU_ASSERT(dma_dev_count == dma_dev_count_start + 2);

/* put domain_1, decrement refcount */
nvme_rdma_put_memory_domain(domain_1);

/* Release both devices */
CU_ASSERT(domain_2->ref == 1);
nvme_rdma_put_memory_domain(domain_1);
nvme_rdma_put_memory_domain(domain_2);

TAILQ_FOREACH(domain_tmp, &g_memory_domains, link) {
dma_dev_count_end++;
}
CU_ASSERT(dma_dev_count_start == dma_dev_count_end);
}

int main(int argc, char **argv)
{
CU_pSuite suite = NULL;
Expand Down Expand Up @@ -1147,6 +1228,7 @@ int main(int argc, char **argv)
CU_ADD_TEST(suite, test_nvme_rdma_parse_addr);
CU_ADD_TEST(suite, test_nvme_rdma_qpair_init);
CU_ADD_TEST(suite, test_nvme_rdma_qpair_submit_request);
CU_ADD_TEST(suite, test_nvme_rdma_memory_domain);

CU_basic_set_mode(CU_BRM_VERBOSE);
CU_basic_run_tests();
Expand Down

0 comments on commit d06b609

Please sign in to comment.