Skip to content

Commit c546349

Browse files
mrgolindkkranz
authored andcommitted
efa: Add option to create CQ with external memory
Extend the EFA direct verbs interface to enable creation of CQs on top of pre-allocated memory buffers. The memory can be passed by supplying a dmabuf fd and offset. Reviewed-by: Daniel Kranzdorf <[email protected]> Reviewed-by: Yonatan Nachum <[email protected]> Signed-off-by: Michael Margolin <[email protected]>
1 parent 155530e commit c546349

File tree

5 files changed

+83
-19
lines changed

5 files changed

+83
-19
lines changed

kernel-headers/rdma/efa-abi.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
22
/*
3-
* Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
3+
* Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
44
*/
55

66
#ifndef EFA_ABI_USER_H
@@ -56,14 +56,18 @@ struct efa_ibv_alloc_pd_resp {
5656
enum {
5757
EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0,
5858
EFA_CREATE_CQ_WITH_SGID = 1 << 1,
59+
EFA_CREATE_CQ_WITH_EXT_MEM_DMABUF = 1 << 2,
5960
};
6061

6162
struct efa_ibv_create_cq {
6263
__u32 comp_mask;
6364
__u32 cq_entry_size;
6465
__u16 num_sub_cqs;
6566
__u8 flags;
66-
__u8 reserved_58[5];
67+
__u8 reserved_58[1];
68+
__u32 ext_mem_fd;
69+
__aligned_u64 ext_mem_offset;
70+
__aligned_u64 ext_mem_length;
6771
};
6872

6973
enum {
@@ -131,6 +135,7 @@ enum {
131135
EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128 = 1 << 4,
132136
EFA_QUERY_DEVICE_CAPS_RDMA_WRITE = 1 << 5,
133137
EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV = 1 << 6,
138+
EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM_DMABUF = 1 << 7,
134139
};
135140

136141
struct efa_ibv_ex_query_device_resp {

providers/efa/efadv.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ enum {
2222
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID = 1 << 2,
2323
EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE = 1 << 3,
2424
EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV = 1 << 4,
25+
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF = 1 << 5,
2526
};
2627

2728
struct efadv_device_attr {
@@ -98,9 +99,20 @@ enum {
9899
EFADV_WC_EX_WITH_IS_UNSOLICITED = 1 << 1,
99100
};
100101

102+
enum {
103+
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF = 1 << 0,
104+
};
105+
101106
struct efadv_cq_init_attr {
102107
uint64_t comp_mask;
103108
uint64_t wc_flags;
109+
uint64_t flags;
110+
struct {
111+
uint64_t offset;
112+
uint64_t length;
113+
uint32_t fd;
114+
uint8_t reserved[4];
115+
} ext_mem_dmabuf;
104116
};
105117

106118
struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,

providers/efa/man/efadv_create_cq.3.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ Compatibility is handled using the comp_mask and inlen fields.
4747
struct efadv_cq_init_attr {
4848
uint64_t comp_mask;
4949
uint64_t wc_flags;
50+
uint64_t flags;
51+
struct {
52+
uint64_t offset;
53+
uint64_t length;
54+
uint32_t fd;
55+
uint8_t reserved[4];
56+
} ext_mem_dmabuf;
5057
};
5158
```
5259

@@ -65,6 +72,25 @@ struct efadv_cq_init_attr {
6572
EFADV_WC_EX_WITH_IS_UNSOLICITED:
6673
request for an option to check whether a receive WC is unsolicited.
6774

75+
*flags*
76+
: A bitwise OR of the various values described below.
77+
78+
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF:
79+
create CQ with external memory provided via dmabuf.
80+
81+
*ext_mem_dmabuf*
82+
: Structure containing information about external memory when using
83+
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF flag.
84+
85+
fd:
86+
file descriptor of the dmabuf.
87+
88+
offset:
89+
offset within the dmabuf.
90+
91+
length:
92+
length of the memory region to use.
93+
6894

6995
# Completion iterator functions
7096

providers/efa/man/efadv_query_device.3.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ struct efadv_device_attr {
8585
requests in order to receive RDMA write with immediate and a WC generated for such
8686
receive will be marked as unsolicited.
8787

88+
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF:
89+
Indicates that creating CQs with external memory buffers by passing dmabuf is
90+
supported.
91+
8892
*max_rdma_size*
8993
: Maximum RDMA transfer size in bytes.
9094

providers/efa/verbs.c

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ int efadv_query_device(struct ibv_context *ibvctx,
175175

176176
if (EFA_DEV_CAP(ctx, UNSOLICITED_WRITE_RECV))
177177
attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV;
178+
179+
if (EFA_DEV_CAP(ctx, CQ_WITH_EXT_MEM_DMABUF))
180+
attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF;
178181
}
179182

180183
if (vext_field_avail(typeof(*attr), max_rdma_size, inlen)) {
@@ -873,9 +876,9 @@ static void efa_cq_fill_pfns(struct efa_cq *cq,
873876
if (attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
874877
ibvcqx->read_dlid_path_bits = efa_wc_read_dlid_path_bits;
875878

876-
if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID))
879+
if (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID)
877880
cq->dv_cq.wc_read_sgid = efa_wc_read_sgid;
878-
if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_IS_UNSOLICITED))
881+
if (efa_attr->wc_flags & EFADV_WC_EX_WITH_IS_UNSOLICITED)
879882
cq->dv_cq.wc_is_unsolicited = efa_wc_is_unsolicited;
880883
}
881884

@@ -925,12 +928,20 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
925928
if (!cq)
926929
return NULL;
927930

928-
if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID))
931+
if (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID)
929932
cmd.flags |= EFA_CREATE_CQ_WITH_SGID;
930933

931934
num_sub_cqs = ctx->sub_cqs_per_cq;
932935
cmd.num_sub_cqs = num_sub_cqs;
933936
cmd.cq_entry_size = cqe_size;
937+
938+
if (efa_attr->flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF) {
939+
cmd.ext_mem_fd = efa_attr->ext_mem_dmabuf.fd;
940+
cmd.ext_mem_offset = efa_attr->ext_mem_dmabuf.offset;
941+
cmd.ext_mem_length = efa_attr->ext_mem_dmabuf.length;
942+
cmd.flags |= EFA_CREATE_CQ_WITH_EXT_MEM_DMABUF;
943+
}
944+
934945
if (attr->channel)
935946
cmd.flags |= EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL;
936947

@@ -950,17 +961,18 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
950961
cq->cqe_size = cqe_size;
951962
cq->dev = ibvctx->device;
952963

953-
cq->buf = mmap(NULL, cq->buf_size, PROT_READ, MAP_SHARED,
954-
ibvctx->cmd_fd, resp.q_mmap_key);
955-
if (cq->buf == MAP_FAILED)
956-
goto err_destroy_cq;
957-
958-
buf = cq->buf;
959-
sub_buf_size = cq->cqe_size * sub_cq_size;
960-
for (i = 0; i < num_sub_cqs; i++) {
961-
efa_sub_cq_initialize(&cq->sub_cq_arr[i], buf, sub_cq_size,
962-
cq->cqe_size);
963-
buf += sub_buf_size;
964+
if (!(efa_attr->flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF)) {
965+
cq->buf = mmap(NULL, cq->buf_size, PROT_READ, MAP_SHARED, ibvctx->cmd_fd,
966+
resp.q_mmap_key);
967+
if (cq->buf == MAP_FAILED)
968+
goto err_destroy_cq;
969+
970+
buf = cq->buf;
971+
sub_buf_size = cq->cqe_size * sub_cq_size;
972+
for (i = 0; i < num_sub_cqs; i++) {
973+
efa_sub_cq_initialize(&cq->sub_cq_arr[i], buf, sub_cq_size, cq->cqe_size);
974+
buf += sub_buf_size;
975+
}
964976
}
965977

966978
if (resp.comp_mask & EFA_CREATE_CQ_RESP_DB_OFF) {
@@ -991,29 +1003,33 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
9911003
struct ibv_cq *efa_create_cq(struct ibv_context *ibvctx, int ncqe,
9921004
struct ibv_comp_channel *channel, int vec)
9931005
{
1006+
struct efadv_cq_init_attr efa_attr = {};
9941007
struct ibv_cq_init_attr_ex attr_ex = {
9951008
.cqe = ncqe,
9961009
.channel = channel,
9971010
.comp_vector = vec
9981011
};
9991012
struct ibv_cq_ex *ibvcqx;
10001013

1001-
ibvcqx = create_cq(ibvctx, &attr_ex, NULL);
1014+
ibvcqx = create_cq(ibvctx, &attr_ex, &efa_attr);
10021015

10031016
return ibvcqx ? ibv_cq_ex_to_cq(ibvcqx) : NULL;
10041017
}
10051018

10061019
struct ibv_cq_ex *efa_create_cq_ex(struct ibv_context *ibvctx,
10071020
struct ibv_cq_init_attr_ex *attr_ex)
10081021
{
1009-
return create_cq(ibvctx, attr_ex, NULL);
1022+
struct efadv_cq_init_attr efa_attr = {};
1023+
1024+
return create_cq(ibvctx, attr_ex, &efa_attr);
10101025
}
10111026

10121027
struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,
10131028
struct ibv_cq_init_attr_ex *attr_ex,
10141029
struct efadv_cq_init_attr *efa_attr,
10151030
uint32_t inlen)
10161031
{
1032+
struct efadv_cq_init_attr local_efa_attr = {};
10171033
uint64_t supp_wc_flags = 0;
10181034
struct efa_context *ctx;
10191035

@@ -1043,7 +1059,8 @@ struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,
10431059
return NULL;
10441060
}
10451061

1046-
return create_cq(ibvctx, attr_ex, efa_attr);
1062+
memcpy(&local_efa_attr, efa_attr, min_t(uint32_t, inlen, sizeof(local_efa_attr)));
1063+
return create_cq(ibvctx, attr_ex, &local_efa_attr);
10471064
}
10481065

10491066
int efadv_query_cq(struct ibv_cq *ibvcq, struct efadv_cq_attr *attr, uint32_t inlen)

0 commit comments

Comments
 (0)