From bc52b9e25a980f46857f3c44c649e0968c1d40f0 Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Wed, 8 Jan 2025 00:24:26 +0000 Subject: [PATCH] prov/efa: Extend efa_ep interface Extend efa ep interface to make it cover all the applied features that efa-rdm ep interface supports today. It also refactors and moves several internal efa_rdm_ep functions to efa_base_ep.c to cover both efa_direct and efa_rdm ep. Signed-off-by: Shi Jin --- prov/efa/src/efa.h | 17 ++ prov/efa/src/efa_base_ep.c | 218 ++++++++++++++++++++- prov/efa/src/efa_base_ep.h | 9 + prov/efa/src/efa_cntr.c | 16 +- prov/efa/src/efa_ep.c | 281 ++++++++++++++++----------- prov/efa/src/rdm/efa_rdm_ep_fiops.c | 254 +++--------------------- prov/efa/test/efa_unit_test_cntr.c | 52 +++-- prov/efa/test/efa_unit_test_common.c | 91 ++++++++- prov/efa/test/efa_unit_test_cq.c | 2 +- prov/efa/test/efa_unit_test_ep.c | 270 ++++++++++++++++++++++++- prov/efa/test/efa_unit_test_msg.c | 7 +- prov/efa/test/efa_unit_test_rma.c | 5 +- prov/efa/test/efa_unit_tests.c | 13 +- prov/efa/test/efa_unit_tests.h | 20 +- 14 files changed, 877 insertions(+), 378 deletions(-) diff --git a/prov/efa/src/efa.h b/prov/efa/src/efa.h index 4d8e982355c..aef070fdc5f 100644 --- a/prov/efa/src/efa.h +++ b/prov/efa/src/efa.h @@ -227,4 +227,21 @@ bool efa_use_unsolicited_write_recv() return efa_env.use_unsolicited_write_recv && efa_device_support_unsolicited_write_recv(); } +/** + * Convenience macro for setopt with an enforced threshold + */ +#define EFA_EP_SETOPT_THRESHOLD(opt, field, threshold) { \ + size_t _val = *(size_t *) optval; \ + if (optlen != sizeof field) \ + return -FI_EINVAL; \ + if (_val > threshold) { \ + EFA_WARN(FI_LOG_EP_CTRL, \ + "Requested size of %zu for FI_OPT_" #opt " " \ + "exceeds the maximum (%zu)\n", \ + _val, threshold); \ + return -FI_EINVAL; \ + } \ + field = _val; \ +} + #endif /* EFA_H */ diff --git a/prov/efa/src/efa_base_ep.c b/prov/efa/src/efa_base_ep.c index 85068fa91c6..c5bf8d095fa 100644 --- a/prov/efa/src/efa_base_ep.c +++ b/prov/efa/src/efa_base_ep.c @@ -5,6 +5,7 @@ #include "efa.h" #include "efa_av.h" #include "efa_cq.h" +#include "efa_cntr.h" #include "rdm/efa_rdm_protocol.h" int efa_base_ep_bind_av(struct efa_base_ep *base_ep, struct efa_av *av) @@ -366,9 +367,10 @@ int efa_base_ep_construct(struct efa_base_ep *base_ep, base_ep->qp = NULL; base_ep->user_recv_qp = NULL; - base_ep->max_msg_size = info->ep_attr->max_msg_size; - base_ep->max_rma_size = info->ep_attr->max_msg_size; - base_ep->inject_msg_size = info->tx_attr->inject_size; + /* Use device's native limit as the default value of base ep*/ + base_ep->max_msg_size = (size_t) base_ep->domain->device->ibv_port_attr.max_msg_sz; + base_ep->max_rma_size = (size_t) base_ep->domain->device->max_rdma_size; + base_ep->inject_msg_size = (size_t) base_ep->domain->device->efa_attr.inline_buf_size; /* TODO: update inject_rma_size to inline size after firmware * supports inline rdma write */ base_ep->inject_rma_size = 0; @@ -531,3 +533,213 @@ struct efa_cq *efa_base_ep_get_rx_cq(struct efa_base_ep *ep) { return ep->util_ep.rx_cq ? container_of(ep->util_ep.rx_cq, struct efa_cq, util_cq) : NULL; } + +/** + * @brief Construct the ibv qp init attr for given ep and cq + * + * @param ep a ptr to the efa_base_ep + * @param attr_ex the constructed qp attr + * @param tx_cq tx cq + * @param rx_cq rx cq + */ +static +void efa_base_ep_construct_ibv_qp_init_attr_ex(struct efa_base_ep *ep, + struct ibv_qp_init_attr_ex *attr_ex, + struct ibv_cq_ex *tx_cq, + struct ibv_cq_ex *rx_cq) +{ + struct fi_info *info; + + if (ep->info->ep_attr->type == FI_EP_RDM) { + attr_ex->qp_type = IBV_QPT_DRIVER; + info = ep->domain->device->rdm_info; + } else { + assert(ep->info->ep_attr->type == FI_EP_DGRAM); + attr_ex->qp_type = IBV_QPT_UD; + info = ep->domain->device->dgram_info; + } + attr_ex->cap.max_send_wr = info->tx_attr->size; + attr_ex->cap.max_send_sge = info->tx_attr->iov_limit; + attr_ex->cap.max_recv_wr = info->rx_attr->size; + attr_ex->cap.max_recv_sge = info->rx_attr->iov_limit; + attr_ex->cap.max_inline_data = ep->domain->device->efa_attr.inline_buf_size; + attr_ex->pd = ep->domain->ibv_pd; + attr_ex->qp_context = ep; + attr_ex->sq_sig_all = 1; + + attr_ex->send_cq = ibv_cq_ex_to_cq(tx_cq); + attr_ex->recv_cq = ibv_cq_ex_to_cq(rx_cq); +} + +/** + * @brief check the in order aligned 128 bytes support for a given ibv_wr_op code + * + * @param ep efa_rdm_ep + * @param op_code ibv wr op code + * @return int 0 if in order aligned 128 bytes is supported, -FI_EOPNOTSUPP if + * it is not supported. Other negative integer for other errors. + */ +int efa_base_ep_check_qp_in_order_aligned_128_bytes(struct efa_base_ep *ep, + enum ibv_wr_opcode op_code) +{ + struct efa_qp *qp = NULL; + struct ibv_qp_init_attr_ex attr_ex = {0}; + int ret, retv; + struct ibv_cq_ex *ibv_cq_ex = NULL; + enum ibv_cq_ex_type ibv_cq_ex_type; + struct fi_cq_attr cq_attr = {0}; + + ret = efa_cq_ibv_cq_ex_open(&cq_attr, ep->domain->device->ibv_ctx, &ibv_cq_ex, &ibv_cq_ex_type); + if (ret) { + EFA_WARN(FI_LOG_CQ, "Unable to create extended CQ: %d\n", ret); + ret = -FI_EINVAL; + goto out; + } + + /* Create a dummy qp for query only */ + efa_base_ep_construct_ibv_qp_init_attr_ex(ep, &attr_ex, ibv_cq_ex, ibv_cq_ex); + + ret = efa_qp_create(&qp, &attr_ex, FI_TC_UNSPEC); + if (ret) + goto out; + + if (!efa_qp_support_op_in_order_aligned_128_bytes(qp, op_code)) + ret = -FI_EOPNOTSUPP; + +out: + if (qp) + efa_qp_destruct(qp); + + if (ibv_cq_ex) { + retv = -ibv_destroy_cq(ibv_cq_ex_to_cq(ibv_cq_ex)); + if (retv) + EFA_WARN(FI_LOG_EP_CTRL, "Unable to close ibv cq: %s\n", + fi_strerror(-retv)); + } + return ret; +} + +/** + * @brief Insert tx/rx cq into the cntrs the ep is bind to + * + * @param ep efa_base_ep + * @return int 0 on success, negative integer on failure + */ +int efa_base_ep_insert_cntr_ibv_cq_poll_list(struct efa_base_ep *ep) +{ + int i, ret; + struct efa_cntr *efa_cntr; + struct util_cntr *util_cntr; + struct efa_cq *tx_cq, *rx_cq; + + tx_cq = efa_base_ep_get_tx_cq(ep); + rx_cq = efa_base_ep_get_rx_cq(ep); + + for (i = 0; i < CNTR_CNT; i++) { + util_cntr = ep->util_ep.cntrs[i]; + if (util_cntr) { + efa_cntr = container_of(util_cntr, struct efa_cntr, util_cntr); + if (tx_cq) { + ret = efa_ibv_cq_poll_list_insert(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &tx_cq->ibv_cq); + if (ret) + return ret; + } + if (rx_cq) { + ret = efa_ibv_cq_poll_list_insert(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &rx_cq->ibv_cq); + if (ret) + return ret; + } + ofi_genlock_lock(&efa_cntr->util_cntr.ep_list_lock); + efa_cntr->need_to_scan_ep_list = true; + ofi_genlock_unlock(&efa_cntr->util_cntr.ep_list_lock); + } + } + + return FI_SUCCESS; +} + +/** + * @brief Remove tx/rx cq from the cntr that ep is bind to + * + * @param ep efa_base_ep + */ +void efa_base_ep_remove_cntr_ibv_cq_poll_list(struct efa_base_ep *ep) +{ + int i; + struct efa_cntr *efa_cntr; + struct util_cntr *util_cntr; + struct efa_cq *tx_cq, *rx_cq; + + tx_cq = efa_base_ep_get_tx_cq(ep); + rx_cq = efa_base_ep_get_rx_cq(ep); + + for (i = 0; i< CNTR_CNT; i++) { + util_cntr = ep->util_ep.cntrs[i]; + if (util_cntr) { + efa_cntr = container_of(util_cntr, struct efa_cntr, util_cntr); + if (tx_cq && !ofi_atomic_get32(&tx_cq->util_cq.ref)) + efa_ibv_cq_poll_list_remove(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &tx_cq->ibv_cq); + + if (rx_cq && !ofi_atomic_get32(&rx_cq->util_cq.ref)) + efa_ibv_cq_poll_list_remove(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &rx_cq->ibv_cq); + } + } +} + +/** + * @brief Create and enable the IBV QP that backs the EP + * + * @param ep efa_base_ep + * @param create_user_recv_qp whether to create the user_recv_qp. This boolean + * is only true for the zero copy recv mode in the efa-rdm endpoint + * + * @return int 0 on success, negative integer on failure + */ +int efa_base_ep_create_and_enable_qp(struct efa_base_ep *ep, bool create_user_recv_qp) +{ + struct ibv_qp_init_attr_ex attr_ex = { 0 }; + struct efa_cq *scq, *rcq; + struct ibv_cq_ex *tx_ibv_cq, *rx_ibv_cq; + int err; + + scq = efa_base_ep_get_tx_cq(ep); + rcq = efa_base_ep_get_rx_cq(ep); + + if (!scq && !rcq) { + EFA_WARN(FI_LOG_EP_CTRL, + "Endpoint is not bound to a send or receive completion queue\n"); + return -FI_ENOCQ; + } + + if (!scq && ofi_needs_tx(ep->info->caps)) { + EFA_WARN(FI_LOG_EP_CTRL, + "Endpoint is not bound to a send completion queue when it has transmit capabilities enabled (FI_SEND).\n"); + return -FI_ENOCQ; + } + + if (!rcq && ofi_needs_rx(ep->info->caps)) { + EFA_WARN(FI_LOG_EP_CTRL, + "Endpoint is not bound to a receive completion queue when it has receive capabilities enabled. (FI_RECV)\n"); + return -FI_ENOCQ; + } + + tx_ibv_cq = scq ? scq->ibv_cq.ibv_cq_ex : rcq->ibv_cq.ibv_cq_ex; + rx_ibv_cq = rcq ? rcq->ibv_cq.ibv_cq_ex : scq->ibv_cq.ibv_cq_ex; + + efa_base_ep_construct_ibv_qp_init_attr_ex(ep, &attr_ex, tx_ibv_cq, rx_ibv_cq); + + err = efa_base_ep_create_qp(ep, &attr_ex); + if (err) + return err; + + if (create_user_recv_qp) { + err = efa_qp_create(&ep->user_recv_qp, &attr_ex, ep->info->tx_attr->tclass); + if (err) { + efa_base_ep_destruct_qp(ep); + return err; + } + ep->user_recv_qp->base_ep = ep; + } + + return efa_base_ep_enable(ep); +} diff --git a/prov/efa/src/efa_base_ep.h b/prov/efa/src/efa_base_ep.h index d8d205815b7..11a91c440d8 100644 --- a/prov/efa/src/efa_base_ep.h +++ b/prov/efa/src/efa_base_ep.h @@ -117,4 +117,13 @@ struct efa_cq *efa_base_ep_get_tx_cq(struct efa_base_ep *ep); struct efa_cq *efa_base_ep_get_rx_cq(struct efa_base_ep *ep); +int efa_base_ep_check_qp_in_order_aligned_128_bytes(struct efa_base_ep *base_ep, + enum ibv_wr_opcode op_code); + +int efa_base_ep_insert_cntr_ibv_cq_poll_list(struct efa_base_ep *ep); + +void efa_base_ep_remove_cntr_ibv_cq_poll_list(struct efa_base_ep *ep); + +int efa_base_ep_create_and_enable_qp(struct efa_base_ep *ep, bool create_user_recv_qp); + #endif diff --git a/prov/efa/src/efa_cntr.c b/prov/efa/src/efa_cntr.c index 8082ae76fd1..b5bf458bca3 100644 --- a/prov/efa/src/efa_cntr.c +++ b/prov/efa/src/efa_cntr.c @@ -180,18 +180,16 @@ static void efa_rdm_cntr_progress(struct util_cntr *cntr) static void efa_cntr_progress(struct util_cntr *cntr) { - struct util_ep *ep; - struct fid_list_entry *fid_entry; struct dlist_entry *item; + struct efa_ibv_cq_poll_list_entry *poll_list_entry; + struct efa_cntr *efa_cntr; + + efa_cntr = container_of(cntr, struct efa_cntr, util_cntr); ofi_genlock_lock(&cntr->ep_list_lock); - dlist_foreach(&cntr->ep_list, item) { - fid_entry = container_of(item, struct fid_list_entry, entry); - ep = container_of(fid_entry->fid, struct util_ep, ep_fid.fid); - if (ep->tx_cq) - efa_cq_progress(ep->tx_cq); - if (ep->rx_cq && ep->rx_cq != ep->tx_cq) - efa_cq_progress(ep->rx_cq); + dlist_foreach(&efa_cntr->ibv_cq_poll_list, item) { + poll_list_entry = container_of(item, struct efa_ibv_cq_poll_list_entry, entry); + efa_rdm_cq_poll_ibv_cq(efa_env.efa_cq_read_size, poll_list_entry->cq); } ofi_genlock_unlock(&cntr->ep_list_lock); } diff --git a/prov/efa/src/efa_ep.c b/prov/efa/src/efa_ep.c index 3b8b9190629..582e075ebaa 100644 --- a/prov/efa/src/efa_ep.c +++ b/prov/efa/src/efa_ep.c @@ -15,24 +15,165 @@ extern struct fi_ops_rma efa_rma_ops; static int efa_ep_getopt(fid_t fid, int level, int optname, void *optval, size_t *optlen) { - switch (level) { - case FI_OPT_ENDPOINT: + struct efa_base_ep *ep; + + ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); + + if (level != FI_OPT_ENDPOINT) return -FI_ENOPROTOOPT; + + switch (optname) { + case FI_OPT_EFA_RNR_RETRY: + if (*optlen < sizeof(size_t)) + return -FI_ETOOSMALL; + *(size_t *)optval = ep->rnr_retry; + *optlen = sizeof(size_t); + break; + /* p2p is required for efa direct ep */ + case FI_OPT_FI_HMEM_P2P: + if (*optlen < sizeof(int)) + return -FI_ETOOSMALL; + *(int *)optval = FI_HMEM_P2P_REQUIRED; + *optlen = sizeof(int); + break; + case FI_OPT_MAX_MSG_SIZE: + if (*optlen < sizeof (size_t)) + return -FI_ETOOSMALL; + *(size_t *) optval = ep->max_msg_size; + *optlen = sizeof (size_t); + break; + case FI_OPT_MAX_RMA_SIZE: + if (*optlen < sizeof (size_t)) + return -FI_ETOOSMALL; + *(size_t *) optval = ep->max_rma_size; + *optlen = sizeof (size_t); + break; + case FI_OPT_INJECT_MSG_SIZE: + if (*optlen < sizeof (size_t)) + return -FI_ETOOSMALL; + *(size_t *) optval = ep->inject_msg_size; + *optlen = sizeof (size_t); + break; + case FI_OPT_INJECT_RMA_SIZE: + if (*optlen < sizeof (size_t)) + return -FI_ETOOSMALL; + *(size_t *) optval = ep->inject_rma_size; + *optlen = sizeof (size_t); + break; + /* Emulated read/write is NOT used for efa direct ep */ + case FI_OPT_EFA_EMULATED_READ: /* fall through */ + case FI_OPT_EFA_EMULATED_WRITE: + if (*optlen < sizeof(bool)) + return -FI_ETOOSMALL; + *(bool *)optval = false; + *optlen = sizeof(bool); + break; default: + EFA_INFO(FI_LOG_EP_CTRL, "Unknown / unsupported endpoint option\n"); return -FI_ENOPROTOOPT; } - return 0; + + return FI_SUCCESS; } static int efa_ep_setopt(fid_t fid, int level, int optname, const void *optval, size_t optlen) { - switch (level) { - case FI_OPT_ENDPOINT: + int ret, intval; + struct efa_base_ep *ep; + + ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); + + if (level != FI_OPT_ENDPOINT) return -FI_ENOPROTOOPT; + + switch (optname) { + case FI_OPT_EFA_RNR_RETRY: + if (optlen != sizeof(size_t)) + return -FI_EINVAL; + + /* + * Application is required to call to fi_setopt before EP + * enabled. If it's calling to fi_setopt after EP enabled, + * fail the call. + * + * efa_ep->qp will be NULL before EP enabled, use it to check + * if the call to fi_setopt is before or after EP enabled for + * convience, instead of calling to ibv_query_qp + */ + if (ep->efa_qp_enabled) { + EFA_WARN(FI_LOG_EP_CTRL, + "The option FI_OPT_EFA_RNR_RETRY is required " + "to be set before EP enabled\n"); + return -FI_EINVAL; + } + + if (!efa_domain_support_rnr_retry_modify(ep->domain)) { + EFA_WARN(FI_LOG_EP_CTRL, + "RNR capability is not supported\n"); + return -FI_ENOSYS; + } + ep->rnr_retry = *(size_t *)optval; + break; + case FI_OPT_FI_HMEM_P2P: + if (optlen != sizeof(int)) + return -FI_EINVAL; + + intval = *(int *)optval; + + if (intval == FI_HMEM_P2P_DISABLED) { + EFA_WARN(FI_LOG_EP_CTRL, "p2p is required by implementation\n"); + return -FI_EOPNOTSUPP; + } + break; + case FI_OPT_MAX_MSG_SIZE: + EFA_EP_SETOPT_THRESHOLD(MAX_MSG_SIZE, ep->max_msg_size, (size_t) ep->domain->device->ibv_port_attr.max_msg_sz) + break; + case FI_OPT_MAX_RMA_SIZE: + EFA_EP_SETOPT_THRESHOLD(MAX_RMA_SIZE, ep->max_rma_size, (size_t) ep->domain->device->efa_attr.max_rdma_size) + break; + case FI_OPT_INJECT_MSG_SIZE: + EFA_EP_SETOPT_THRESHOLD(INJECT_MSG_SIZE, ep->inject_msg_size, (size_t) ep->domain->device->efa_attr.inline_buf_size) + break; + case FI_OPT_INJECT_RMA_SIZE: + EFA_EP_SETOPT_THRESHOLD(INJECT_RMA_SIZE, ep->inject_rma_size, (size_t) 0) + break; + /* no op as efa direct ep will not use cuda api and shm in data transfer */ + case FI_OPT_CUDA_API_PERMITTED: /* fall through */ + case FI_OPT_SHARED_MEMORY_PERMITTED: + break; + /* no op as efa direct ep will always use rdma for rma operations in data transfer */ + case FI_OPT_EFA_USE_DEVICE_RDMA: + if (optlen != sizeof(bool)) + return -FI_EINVAL; + if (!(*(bool *)optval) && (ep->info->caps & FI_RMA)) { + EFA_WARN(FI_LOG_EP_CTRL, "Device rdma is required for rma operations\n"); + return -FI_EOPNOTSUPP; + } + break; + case FI_OPT_EFA_SENDRECV_IN_ORDER_ALIGNED_128_BYTES: + if (optlen != sizeof(bool)) + return -FI_EINVAL; + if (*(bool *)optval) { + ret = efa_base_ep_check_qp_in_order_aligned_128_bytes(ep, IBV_WR_SEND); + if (ret) + return ret; + } + break; + case FI_OPT_EFA_WRITE_IN_ORDER_ALIGNED_128_BYTES: + if (optlen != sizeof(bool)) + return -FI_EINVAL; + if (*(bool *)optval) { + ret = efa_base_ep_check_qp_in_order_aligned_128_bytes(ep, IBV_WR_RDMA_WRITE); + if (ret) + return ret; + } + break; default: + EFA_INFO(FI_LOG_EP_CTRL, "Unknown / unsupported endpoint option\n"); return -FI_ENOPROTOOPT; } - return 0; + + return FI_SUCCESS; } static struct fi_ops_ep efa_ep_base_ops = { @@ -46,25 +187,25 @@ static struct fi_ops_ep efa_ep_base_ops = { .tx_size_left = fi_no_tx_size_left, }; -static void efa_ep_destroy(struct efa_base_ep *ep) +static int efa_ep_close(fid_t fid) { + struct efa_base_ep *ep; int ret; + ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); + + /* We need to free the util_ep first to avoid race conditions + * with other threads progressing the cntr. */ + efa_base_ep_close_util_ep(ep); + + efa_base_ep_remove_cntr_ibv_cq_poll_list(ep); + ret = efa_base_ep_destruct(ep); if (ret) { EFA_WARN(FI_LOG_EP_CTRL, "Unable to close base endpoint\n"); } free(ep); -} - -static int efa_ep_close(fid_t fid) -{ - struct efa_base_ep *ep; - - ep = container_of(fid, struct efa_base_ep, util_ep.ep_fid.fid); - - efa_ep_destroy(ep); return 0; } @@ -108,6 +249,11 @@ static int efa_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) break; case FI_CLASS_AV: av = container_of(bfid, struct efa_av, util_av.av_fid.fid); + /* Bind util provider endpoint and av */ + ret = ofi_ep_bind_av(&ep->util_ep, &av->util_av); + if (ret) + return ret; + ret = efa_base_ep_bind_av(ep, av); if (ret) return ret; @@ -127,6 +273,7 @@ static int efa_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) return ret; break; default: + EFA_WARN(FI_LOG_EP_CTRL, "invalid fid class\n"); return -EINVAL; } @@ -178,66 +325,20 @@ static int efa_ep_setflags(struct fid_ep *ep_fid, uint64_t flags) static int efa_ep_enable(struct fid_ep *ep_fid) { - struct ibv_qp_init_attr_ex attr_ex = { 0 }; struct efa_base_ep *ep; - struct efa_cq *scq, *rcq; int err; ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); - scq = ep->util_ep.tx_cq ? container_of(ep->util_ep.tx_cq, struct efa_cq, util_cq) : NULL; - rcq = ep->util_ep.rx_cq ? container_of(ep->util_ep.rx_cq, struct efa_cq, util_cq) : NULL; - - if (!scq && !rcq) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a send or receive completion queue\n"); - return -FI_ENOCQ; - } - - if (!scq && ofi_needs_tx(ep->info->caps)) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a send completion queue when it has transmit capabilities enabled (FI_SEND).\n"); - return -FI_ENOCQ; - } - - if (!rcq && ofi_needs_rx(ep->info->caps)) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a receive completion queue when it has receive capabilities enabled. (FI_RECV)\n"); - return -FI_ENOCQ; - } - - if (scq) { - attr_ex.cap.max_send_wr = ep->info->tx_attr->size; - attr_ex.cap.max_send_sge = ep->info->tx_attr->iov_limit; - attr_ex.send_cq = ibv_cq_ex_to_cq(scq->ibv_cq.ibv_cq_ex); - } else { - attr_ex.send_cq = ibv_cq_ex_to_cq(rcq->ibv_cq.ibv_cq_ex); - } - - if (rcq) { - attr_ex.cap.max_recv_wr = ep->info->rx_attr->size; - attr_ex.cap.max_recv_sge = ep->info->rx_attr->iov_limit; - attr_ex.recv_cq = ibv_cq_ex_to_cq(rcq->ibv_cq.ibv_cq_ex); - } else { - attr_ex.recv_cq = ibv_cq_ex_to_cq(scq->ibv_cq.ibv_cq_ex); - } - - attr_ex.cap.max_inline_data = - ep->domain->device->efa_attr.inline_buf_size; - - assert(EFA_EP_TYPE_IS_DGRAM(ep->domain->info)); - attr_ex.qp_type = IBV_QPT_UD; - attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; - attr_ex.pd = container_of(ep->util_ep.domain, struct efa_domain, util_domain)->ibv_pd; - - attr_ex.qp_context = ep; - attr_ex.sq_sig_all = 1; - - err = efa_base_ep_create_qp(ep, &attr_ex); + err = efa_base_ep_create_and_enable_qp(ep, false); if (err) return err; - return efa_base_ep_enable(ep); + err = efa_base_ep_insert_cntr_ibv_cq_poll_list(ep); + if (err) + efa_base_ep_destruct_qp(ep); + + return err; } static int efa_ep_control(struct fid *fid, int command, void *arg) @@ -317,42 +418,9 @@ struct fi_ops_cm efa_ep_cm_ops = { int efa_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, struct fid_ep **ep_fid, void *context) { - struct efa_domain *domain; - const struct fi_info *prov_info; struct efa_base_ep *ep; int ret; - domain = container_of(domain_fid, struct efa_domain, - util_domain.domain_fid); - - if (!user_info || !user_info->ep_attr || !user_info->domain_attr || - strncmp(domain->device->ibv_ctx->device->name, user_info->domain_attr->name, - strlen(domain->device->ibv_ctx->device->name))) { - EFA_INFO(FI_LOG_DOMAIN, "Invalid info->domain_attr->name\n"); - return -FI_EINVAL; - } - - prov_info = efa_domain_get_prov_info(domain, user_info->ep_attr->type); - assert(prov_info); - - assert(user_info->ep_attr); - ret = ofi_check_ep_attr(&efa_util_prov, user_info->fabric_attr->api_version, prov_info, user_info); - if (ret) - return ret; - - if (user_info->tx_attr) { - ret = ofi_check_tx_attr(&efa_prov, prov_info->tx_attr, - user_info->tx_attr, user_info->mode); - if (ret) - return ret; - } - - if (user_info->rx_attr) { - ret = ofi_check_rx_attr(&efa_prov, prov_info, user_info->rx_attr, user_info->mode); - if (ret) - return ret; - } - ep = calloc(1, sizeof(*ep)); if (!ep) return -FI_ENOMEM; @@ -361,13 +429,6 @@ int efa_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, if (ret) goto err_ep_destroy; - /* struct efa_send_wr and efa_recv_wr allocates memory for 2 IOV - * So check with an assert statement that iov_limit is 2 or less - */ - assert(user_info->tx_attr->iov_limit <= 2); - - ep->domain = domain; - *ep_fid = &ep->util_ep.ep_fid; (*ep_fid)->fid.fclass = FI_CLASS_EP; (*ep_fid)->fid.context = context; @@ -381,6 +442,8 @@ int efa_ep_open(struct fid_domain *domain_fid, struct fi_info *user_info, return 0; err_ep_destroy: - efa_ep_destroy(ep); + efa_base_ep_destruct(ep); + if (ep) + free(ep); return ret; } diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 1981ed9825f..86579d06112 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -14,25 +14,6 @@ #include "efa_rdm_pke_req.h" #include "efa_cntr.h" -static -void efa_rdm_ep_construct_ibv_qp_init_attr_ex(struct efa_rdm_ep *ep, - struct ibv_qp_init_attr_ex *attr_ex, - struct ibv_cq_ex *tx_cq, - struct ibv_cq_ex *rx_cq) -{ - attr_ex->cap.max_send_wr = ep->base_ep.domain->device->rdm_info->tx_attr->size; - attr_ex->cap.max_send_sge = ep->base_ep.domain->device->rdm_info->tx_attr->iov_limit; - attr_ex->cap.max_recv_wr = ep->base_ep.domain->device->rdm_info->rx_attr->size; - attr_ex->cap.max_recv_sge = ep->base_ep.domain->device->rdm_info->rx_attr->iov_limit; - attr_ex->cap.max_inline_data = ep->base_ep.domain->device->efa_attr.inline_buf_size; - attr_ex->qp_type = IBV_QPT_DRIVER; - attr_ex->pd = efa_rdm_ep_domain(ep)->ibv_pd; - attr_ex->qp_context = ep; - attr_ex->sq_sig_all = 1; - - attr_ex->send_cq = ibv_cq_ex_to_cq(tx_cq); - attr_ex->recv_cq = ibv_cq_ex_to_cq(rx_cq); -} static inline struct efa_rdm_cq *efa_rdm_ep_get_tx_rdm_cq(struct efa_rdm_ep *ep) @@ -46,68 +27,6 @@ struct efa_rdm_cq *efa_rdm_ep_get_rx_rdm_cq(struct efa_rdm_ep *ep) return ep->base_ep.util_ep.rx_cq ? container_of(ep->base_ep.util_ep.rx_cq, struct efa_rdm_cq, efa_cq.util_cq) : NULL; } -/** - * @brief set the "efa_qp" field in the efa_rdm_ep->efa_base_ep - * called by efa_rdm_ep_open() - * - * @param[in,out] ep The EFA RDM endpoint to set the qp in - * @return int 0 on success, negative libfabric error code otherwise - * @todo merge this function with #efa_base_ep_construct - */ -static -int efa_rdm_ep_create_base_ep_ibv_qp(struct efa_rdm_ep *ep) -{ - struct ibv_qp_init_attr_ex attr_ex = { 0 }; - struct efa_cq *tx_cq, *rx_cq; - struct ibv_cq_ex *tx_ibv_cq, *rx_ibv_cq; - int ret; - - tx_cq = efa_base_ep_get_tx_cq(&ep->base_ep); - rx_cq = efa_base_ep_get_rx_cq(&ep->base_ep); - - if (!tx_cq && !rx_cq) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a send or receive completion queue\n"); - return -FI_ENOCQ; - } - - if (!tx_cq && ofi_needs_tx(ep->base_ep.info->caps)) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a send completion queue when it has transmit capabilities enabled (FI_SEND).\n"); - return -FI_ENOCQ; - } - - if (!rx_cq && ofi_needs_rx(ep->base_ep.info->caps)) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint is not bound to a receive completion queue when it has receive capabilities enabled (FI_RECV).\n"); - return -FI_ENOCQ; - } - - tx_ibv_cq = tx_cq ? tx_cq->ibv_cq.ibv_cq_ex : rx_cq->ibv_cq.ibv_cq_ex; - rx_ibv_cq = rx_cq ? rx_cq->ibv_cq.ibv_cq_ex : tx_cq->ibv_cq.ibv_cq_ex; - - efa_rdm_ep_construct_ibv_qp_init_attr_ex(ep, &attr_ex, tx_ibv_cq, rx_ibv_cq); - - ret = efa_base_ep_create_qp(&ep->base_ep, &attr_ex); - if (ret) - return ret; - - /** - * Create separate user_recv_qp to receive pkts that carries user data - * without any headers. - */ - if (ep->use_zcpy_rx) { - ret = efa_qp_create(&ep->base_ep.user_recv_qp, &attr_ex, ep->base_ep.info->tx_attr->tclass); - if (ret) { - efa_base_ep_destruct_qp(&ep->base_ep); - return ret; - } - ep->base_ep.user_recv_qp->base_ep = &ep->base_ep; - } - - return FI_SUCCESS; -} - static int efa_rdm_pke_pool_mr_reg_handler(struct ofi_bufpool_region *region) { @@ -554,11 +473,26 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, EFA_INFO(FI_LOG_EP_CTRL, "efa_rdm_ep->host_id: i-%017lx\n", efa_rdm_ep->host_id); } + /** + * These fields are set as efa device's default limit in base_ep + * Override the them to the values that are supported by efa-rdm. + * The info->ep_attr->max_msg_size is UINT64_MAX for efa-rdm because + * it supports segmentation of a large message into small pieces that + * fit into the device limit. The info->tx_attr->inject_size is currently + * the MIN(efa_mtu_size - max_hdr_size, shm_inject_size) + * as it supports emulated injection by copying user tx buffer into + * internal bounce buffer. + */ + efa_rdm_ep->base_ep.max_msg_size = info->ep_attr->max_msg_size; + efa_rdm_ep->base_ep.max_rma_size = info->ep_attr->max_msg_size; + efa_rdm_ep->base_ep.inject_msg_size = info->tx_attr->inject_size; + efa_rdm_ep->base_ep.inject_rma_size = info->tx_attr->inject_size; + + /* efa_rdm_ep's own fields */ efa_rdm_ep->max_tagged_size = info->ep_attr->max_msg_size; efa_rdm_ep->max_atomic_size = info->ep_attr->max_msg_size; efa_rdm_ep->inject_tagged_size = info->tx_attr->inject_size; efa_rdm_ep->inject_atomic_size = info->tx_attr->inject_size; - efa_rdm_ep->base_ep.inject_rma_size = info->tx_attr->inject_size; efa_rdm_ep->efa_max_outstanding_tx_ops = efa_domain->device->rdm_info->tx_attr->size; efa_rdm_ep->efa_max_outstanding_rx_ops = efa_domain->device->rdm_info->rx_attr->size; efa_rdm_ep->use_device_rdma = efa_rdm_get_use_device_rdma(info->fabric_attr->api_version); @@ -892,30 +826,6 @@ void efa_rdm_ep_wait_send(struct efa_rdm_ep *efa_rdm_ep) ofi_genlock_unlock(&efa_rdm_ep_domain(efa_rdm_ep)->srx_lock); } -static inline -void efa_rdm_ep_remove_cntr_ibv_cq_poll_list(struct efa_rdm_ep *ep) -{ - int i; - struct efa_cntr *efa_cntr; - struct util_cntr *util_cntr; - struct efa_cq *tx_cq, *rx_cq; - - tx_cq = efa_base_ep_get_tx_cq(&ep->base_ep); - rx_cq = efa_base_ep_get_rx_cq(&ep->base_ep); - - for (i = 0; i< CNTR_CNT; i++) { - util_cntr = ep->base_ep.util_ep.cntrs[i]; - if (util_cntr) { - efa_cntr = container_of(util_cntr, struct efa_cntr, util_cntr); - if (tx_cq && !ofi_atomic_get32(&tx_cq->util_cq.ref)) - efa_ibv_cq_poll_list_remove(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &tx_cq->ibv_cq); - - if (rx_cq && !ofi_atomic_get32(&rx_cq->util_cq.ref)) - efa_ibv_cq_poll_list_remove(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &rx_cq->ibv_cq); - } - } -} - static inline void efa_rdm_ep_remove_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep) { @@ -1007,7 +917,7 @@ static int efa_rdm_ep_close(struct fid *fid) * with other threads progressing the cq. */ efa_base_ep_close_util_ep(&efa_rdm_ep->base_ep); - efa_rdm_ep_remove_cntr_ibv_cq_poll_list(efa_rdm_ep); + efa_base_ep_remove_cntr_ibv_cq_poll_list(&efa_rdm_ep->base_ep); efa_rdm_ep_remove_cq_ibv_cq_poll_list(efa_rdm_ep); @@ -1181,39 +1091,6 @@ void efa_rdm_ep_update_shm(struct efa_rdm_ep *ep) efa_rdm_ep_close_shm_resources(ep); } -static inline -int efa_rdm_ep_insert_cntr_ibv_cq_poll_list(struct efa_rdm_ep *ep) -{ - int i, ret; - struct efa_cntr *efa_cntr; - struct util_cntr *util_cntr; - struct efa_cq *tx_cq, *rx_cq; - tx_cq = efa_base_ep_get_tx_cq(&ep->base_ep); - rx_cq = efa_base_ep_get_rx_cq(&ep->base_ep); - - for (i = 0; i < CNTR_CNT; i++) { - util_cntr = ep->base_ep.util_ep.cntrs[i]; - if (util_cntr) { - efa_cntr = container_of(util_cntr, struct efa_cntr, util_cntr); - if (tx_cq) { - ret = efa_ibv_cq_poll_list_insert(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &tx_cq->ibv_cq); - if (ret) - return ret; - } - if (rx_cq) { - ret = efa_ibv_cq_poll_list_insert(&efa_cntr->ibv_cq_poll_list, &efa_cntr->util_cntr.ep_list_lock, &rx_cq->ibv_cq); - if (ret) - return ret; - } - ofi_genlock_lock(&efa_cntr->util_cntr.ep_list_lock); - efa_cntr->need_to_scan_ep_list = true; - ofi_genlock_unlock(&efa_cntr->util_cntr.ep_list_lock); - } - } - - return FI_SUCCESS; -} - static inline int efa_rdm_ep_insert_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep) { @@ -1271,6 +1148,7 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) struct fi_peer_srx_context peer_srx_context = {0}; struct fi_rx_attr peer_srx_attr = {0}; struct util_srx_ctx *srx_ctx; + bool create_user_recv_qp = false; switch (command) { case FI_ENABLE: @@ -1301,14 +1179,10 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) ep->base_ep.inject_rma_size = MIN(ep->base_ep.inject_rma_size, efa_rdm_ep_domain(ep)->device->efa_attr.inline_buf_size); + create_user_recv_qp = true; } - ret = efa_rdm_ep_create_base_ep_ibv_qp(ep); - if (ret) - return ret; - - /* efa_base_ep_enable destroys qp in the error path */ - ret = efa_base_ep_enable(&ep->base_ep); + ret = efa_base_ep_create_and_enable_qp(&ep->base_ep, create_user_recv_qp); if (ret) return ret; @@ -1316,7 +1190,7 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) if (ret) goto err_destroy_qp; - ret = efa_rdm_ep_insert_cntr_ibv_cq_poll_list(ep); + ret = efa_base_ep_insert_cntr_ibv_cq_poll_list(&ep->base_ep); if (ret) goto err_destroy_qp; @@ -1572,72 +1446,6 @@ static int efa_rdm_ep_set_use_device_rdma(struct efa_rdm_ep *ep, bool use_device return 0; } -/** - * @brief check the in order aligned 128 bytes support for a given ibv_wr_op code - * - * @param ep efa_rdm_ep - * @param op_code ibv wr op code - * @return int 0 if in order aligned 128 bytes is supported, -FI_EOPNOTSUPP if - * it is not supported. Other negative integer for other errors. - */ -static -int efa_rdm_ep_check_qp_in_order_aligned_128_bytes(struct efa_rdm_ep *ep, - enum ibv_wr_opcode op_code) -{ - struct efa_qp *qp = NULL; - struct ibv_qp_init_attr_ex attr_ex = {0}; - int ret, retv; - struct ibv_cq_ex *ibv_cq_ex = NULL; - enum ibv_cq_ex_type ibv_cq_ex_type; - struct fi_cq_attr cq_attr = {0}; - - ret = efa_cq_ibv_cq_ex_open(&cq_attr, efa_rdm_ep_domain(ep)->device->ibv_ctx, &ibv_cq_ex, &ibv_cq_ex_type); - if (ret) { - EFA_WARN(FI_LOG_CQ, "Unable to create extended CQ: %d\n", ret); - ret = -FI_EINVAL; - goto out; - } - - /* Create a dummy qp for query only */ - efa_rdm_ep_construct_ibv_qp_init_attr_ex(ep, &attr_ex, ibv_cq_ex, ibv_cq_ex); - - ret = efa_qp_create(&qp, &attr_ex, FI_TC_UNSPEC); - if (ret) - goto out; - - if (!efa_qp_support_op_in_order_aligned_128_bytes(qp, op_code)) - ret = -FI_EOPNOTSUPP; - -out: - if (qp) - efa_qp_destruct(qp); - - if (ibv_cq_ex) { - retv = -ibv_destroy_cq(ibv_cq_ex_to_cq(ibv_cq_ex)); - if (retv) - EFA_WARN(FI_LOG_EP_CTRL, "Unable to close ibv cq: %s\n", - fi_strerror(-retv)); - } - return ret; -} - -/** - * Convenience macro for setopt with an enforced threshold - */ -#define EFA_RDM_EP_SETOPT_THRESHOLD(opt, field, threshold) { \ - size_t _val = *(size_t *) optval; \ - if (optlen != sizeof field) \ - return -FI_EINVAL; \ - if (_val > threshold) { \ - EFA_WARN(FI_LOG_EP_CTRL, \ - "Requested size of %zu for FI_OPT_" #opt " " \ - "exceeds the maximum (%zu)\n", \ - _val, threshold); \ - return -FI_EINVAL; \ - } \ - field = _val; \ -} - /** * @brief implement the fi_setopt() API for EFA RDM endpoint * @param[in] fid fid to endpoint @@ -1718,28 +1526,28 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname, return ret; break; case FI_OPT_MAX_MSG_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(MAX_MSG_SIZE, efa_rdm_ep->base_ep.max_msg_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) + EFA_EP_SETOPT_THRESHOLD(MAX_MSG_SIZE, efa_rdm_ep->base_ep.max_msg_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) break; case FI_OPT_MAX_TAGGED_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(MAX_TAGGED_SIZE, efa_rdm_ep->max_tagged_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) + EFA_EP_SETOPT_THRESHOLD(MAX_TAGGED_SIZE, efa_rdm_ep->max_tagged_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) break; case FI_OPT_MAX_RMA_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(MAX_RMA_SIZE, efa_rdm_ep->base_ep.max_rma_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) + EFA_EP_SETOPT_THRESHOLD(MAX_RMA_SIZE, efa_rdm_ep->base_ep.max_rma_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) break; case FI_OPT_MAX_ATOMIC_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(MAX_ATOMIC_SIZE, efa_rdm_ep->max_atomic_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) + EFA_EP_SETOPT_THRESHOLD(MAX_ATOMIC_SIZE, efa_rdm_ep->max_atomic_size, efa_rdm_ep->base_ep.info->ep_attr->max_msg_size) break; case FI_OPT_INJECT_MSG_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(INJECT_MSG_SIZE, efa_rdm_ep->base_ep.inject_msg_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) + EFA_EP_SETOPT_THRESHOLD(INJECT_MSG_SIZE, efa_rdm_ep->base_ep.inject_msg_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) break; case FI_OPT_INJECT_TAGGED_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(INJECT_TAGGED_SIZE, efa_rdm_ep->inject_tagged_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) + EFA_EP_SETOPT_THRESHOLD(INJECT_TAGGED_SIZE, efa_rdm_ep->inject_tagged_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) break; case FI_OPT_INJECT_RMA_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(INJECT_RMA_SIZE, efa_rdm_ep->base_ep.inject_rma_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) + EFA_EP_SETOPT_THRESHOLD(INJECT_RMA_SIZE, efa_rdm_ep->base_ep.inject_rma_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) break; case FI_OPT_INJECT_ATOMIC_SIZE: - EFA_RDM_EP_SETOPT_THRESHOLD(INJECT_ATOMIC_SIZE, efa_rdm_ep->inject_atomic_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) + EFA_EP_SETOPT_THRESHOLD(INJECT_ATOMIC_SIZE, efa_rdm_ep->inject_atomic_size, efa_rdm_ep->base_ep.info->tx_attr->inject_size) break; case FI_OPT_EFA_USE_DEVICE_RDMA: if (optlen != sizeof(bool)) @@ -1756,7 +1564,7 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname, * application buffer on device */ if (*(bool *)optval) { - ret = efa_rdm_ep_check_qp_in_order_aligned_128_bytes(efa_rdm_ep, IBV_WR_RDMA_READ); + ret = efa_base_ep_check_qp_in_order_aligned_128_bytes(&efa_rdm_ep->base_ep, IBV_WR_RDMA_READ); if (ret) return ret; } @@ -1766,7 +1574,7 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname, if (optlen != sizeof(bool)) return -FI_EINVAL; if (*(bool *)optval) { - ret = efa_rdm_ep_check_qp_in_order_aligned_128_bytes(efa_rdm_ep, IBV_WR_RDMA_WRITE); + ret = efa_base_ep_check_qp_in_order_aligned_128_bytes(&efa_rdm_ep->base_ep, IBV_WR_RDMA_WRITE); if (ret) return ret; } diff --git a/prov/efa/test/efa_unit_test_cntr.c b/prov/efa/test/efa_unit_test_cntr.c index 2aa2ea60927..d5447294c3f 100644 --- a/prov/efa/test/efa_unit_test_cntr.c +++ b/prov/efa/test/efa_unit_test_cntr.c @@ -10,7 +10,7 @@ * @return int the length of the ibv_cq_poll_list */ static -int test_efa_rdm_cntr_get_ibv_cq_poll_list_length(struct fid_cntr *cntr_fid) +int test_efa_cntr_get_ibv_cq_poll_list_length(struct fid_cntr *cntr_fid) { int i = 0; struct dlist_entry *item; @@ -30,14 +30,12 @@ int test_efa_rdm_cntr_get_ibv_cq_poll_list_length(struct fid_cntr *cntr_fid) * * @param state struct efa_resource that is managed by the framework */ -void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resource **state) +static +void test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep_impl(struct efa_resource *resource) { - struct efa_resource *resource = *state; struct fid_cntr *cntr; struct fi_cntr_attr cntr_attr = {0}; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM); - assert_int_equal(fi_cntr_open(resource->domain, &cntr_attr, &cntr, NULL), 0); /* TODO: expand this test to all flags */ @@ -46,7 +44,7 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resou assert_int_equal(fi_enable(resource->ep), 0); /* efa_unit_test_resource_construct binds single OFI CQ as both tx/rx cq of ep */ - assert_int_equal(test_efa_rdm_cntr_get_ibv_cq_poll_list_length(cntr), 1); + assert_int_equal(test_efa_cntr_get_ibv_cq_poll_list_length(cntr), 1); /* ep must be closed before cq/av/eq... */ fi_close(&resource->ep->fid); @@ -55,21 +53,35 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resou fi_close(&cntr->fid); } +void test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled(resource, FI_EP_RDM); + test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep_impl(resource); +} + +void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM); + test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep_impl(resource); +} + /** * @brief Check the length of ibv_cq_poll_list in cntr when separate tx/rx cq is bind to 1 ep. * * @param state struct efa_resource that is managed by the framework */ -void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_resource **state) +static +void test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep_impl(struct efa_resource *resource) { - struct efa_resource *resource = *state; struct fid_cq *txcq, *rxcq; struct fi_cq_attr cq_attr = {0}; struct fid_cntr *cntr; struct fi_cntr_attr cntr_attr = {0}; - efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM); - assert_int_equal(fi_cq_open(resource->domain, &cq_attr, &txcq, NULL), 0); assert_int_equal(fi_ep_bind(resource->ep, &txcq->fid, FI_SEND), 0); @@ -85,7 +97,7 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_r assert_int_equal(fi_enable(resource->ep), 0); - assert_int_equal(test_efa_rdm_cntr_get_ibv_cq_poll_list_length(cntr), 2); + assert_int_equal(test_efa_cntr_get_ibv_cq_poll_list_length(cntr), 2); /* ep must be closed before cq/av/eq... */ fi_close(&resource->ep->fid); @@ -95,7 +107,23 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_r fi_close(&cntr->fid); } -void test_efa_cntr_post_initial_rx_pkts(struct efa_resource **state) +void test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct_for_efa_direct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM); + test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep_impl(resource); +} + +void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM); + test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep_impl(resource); +} + +void test_efa_rdm_cntr_post_initial_rx_pkts(struct efa_resource **state) { struct efa_resource *resource = *state; struct efa_rdm_ep *efa_rdm_ep; diff --git a/prov/efa/test/efa_unit_test_common.c b/prov/efa/test/efa_unit_test_common.c index 47cae69f20b..3799cd72860 100644 --- a/prov/efa/test/efa_unit_test_common.c +++ b/prov/efa/test/efa_unit_test_common.c @@ -2,6 +2,7 @@ /* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_unit_tests.h" +#include "efa_cq.h" #include "efa_rdm_pke_utils.h" #include "efa_rdm_pke_nonreq.h" #include "efa_rdm_pke_req.h" @@ -70,15 +71,17 @@ struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type) return hints; } +/* TODO: remove use_efa_direct after we have efa_direct implemented in fi_info */ void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, enum fi_ep_type ep_type, uint32_t fi_version, struct fi_info *hints, - bool enable_ep, bool open_cq) + bool enable_ep, bool open_cq, bool use_efa_direct) { int ret = 0; struct fi_av_attr av_attr = {0}; struct fi_cq_attr cq_attr = {0}; struct fi_eq_attr eq_attr = {0}; + struct efa_domain *efa_domain; ret = fi_getinfo(fi_version, NULL, NULL, 0ULL, hints, &resource->info); if (ret) @@ -92,6 +95,17 @@ void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, if (ret) goto err; + /* + * TODO: Remove this function pointer override when we have it assigned + * for efa-direct correctly. + */ + if (use_efa_direct) { + efa_domain = container_of(resource->domain, struct efa_domain, util_domain.domain_fid); + + efa_domain->util_domain.domain_fid.ops->endpoint = efa_ep_open; + efa_domain->util_domain.domain_fid.ops->cq_open = efa_cq_open; + } + ret = fi_endpoint(resource->domain, resource->info, &resource->ep, NULL); if (ret) goto err; @@ -137,7 +151,74 @@ void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_ if (!resource->hints) goto err; efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), - resource->hints, true, true); + resource->hints, true, true, false); + return; + +err: + efa_unit_test_resource_destruct(resource); + + /* Fail test early if the resource struct fails to initialize */ + assert_int_equal(1, 0); +} + +struct fi_info* efa_unit_test_alloc_hints_for_efa_direct(enum fi_ep_type ep_type) +{ + struct fi_info *hints = efa_unit_test_alloc_hints(ep_type); + + if (!hints) + return NULL; + + /* Start from the minimal caps that efa ep can support */ + /* TODO: Add required mode bits for efa-direct when we implement it in fi_info */ + /* TODO: Use efa_direct as prov_name when we have it implemented */ + hints->caps |= FI_MSG; + hints->domain_attr->mr_mode = MR_MODE_BITS; + + return hints; +} + +void efa_unit_test_resource_construct_for_efa_direct(struct efa_resource *resource, enum fi_ep_type ep_type) +{ + resource->hints = efa_unit_test_alloc_hints_for_efa_direct(ep_type); + if (!resource->hints) + goto err; + + efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(2, 0), + resource->hints, true, true, true); + return; + +err: + efa_unit_test_resource_destruct(resource); + + /* Fail test early if the resource struct fails to initialize */ + assert_int_equal(1, 0); +} + +void efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled(struct efa_resource *resource, enum fi_ep_type ep_type) +{ + resource->hints = efa_unit_test_alloc_hints_for_efa_direct(ep_type); + if (!resource->hints) + goto err; + + efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(2, 0), + resource->hints, false, true, true); + return; + +err: + efa_unit_test_resource_destruct(resource); + + /* Fail test early if the resource struct fails to initialize */ + assert_int_equal(1, 0); +} + +void efa_unit_test_resource_construct_for_efa_direct_no_cq_and_ep_not_enabled(struct efa_resource *resource, enum fi_ep_type ep_type) +{ + resource->hints = efa_unit_test_alloc_hints_for_efa_direct(ep_type); + if (!resource->hints) + goto err; + + efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(2, 0), + resource->hints, false, false, true); return; err: @@ -154,7 +235,7 @@ void efa_unit_test_resource_construct_ep_not_enabled(struct efa_resource *resour if (!resource->hints) goto err; efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), - resource->hints, false, true); + resource->hints, false, true, false); return; err: @@ -171,7 +252,7 @@ void efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(struct efa_resour if (!resource->hints) goto err; efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), - resource->hints, false, false); + resource->hints, false, false, false); return; err: @@ -194,7 +275,7 @@ void efa_unit_test_resource_construct_rdm_shm_disabled(struct efa_resource *reso goto err; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, false, true); + resource->hints, false, true, false); ret = fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_SHARED_MEMORY_PERMITTED, &shm_permitted, diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index e939d182b60..46a1c71a123 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -820,7 +820,7 @@ static void test_efa_cq_read(struct efa_resource *resource, fi_addr_t *addr, struct ibv_qp_ex *ibv_qpx; struct efa_base_ep *base_ep; - efa_unit_test_resource_construct(resource, FI_EP_DGRAM); + efa_unit_test_resource_construct_for_efa_direct(resource, FI_EP_RDM); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_qpx = base_ep->qp->ibv_qp_ex; diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index f2d1d1f0e7a..9918645aa21 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -597,7 +597,7 @@ void test_efa_rdm_ep_rma_queue_before_handshake(struct efa_resource **state, int resource->hints->caps |= FI_MSG | FI_TAGGED | FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true); + resource->hints, true, true, false); /* ensure we don't have RMA capability. */ efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -685,7 +685,7 @@ void test_efa_rdm_ep_rma_inconsistent_unsolicited_write_recv(struct efa_resource resource->hints->caps |= FI_MSG | FI_TAGGED | FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 22), - resource->hints, true, true); + resource->hints, true, true, false); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -794,7 +794,7 @@ void test_efa_rdm_ep_rma_without_caps(struct efa_resource **state) resource->hints->caps &= ~FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true); + resource->hints, true, true, false); /* ensure we don't have RMA capability. */ efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -845,7 +845,7 @@ void test_efa_rdm_ep_atomic_without_caps(struct efa_resource **state) resource->hints->caps &= ~FI_ATOMIC; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true); + resource->hints, true, true, false); /* ensure we don't have ATOMIC capability. */ efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -1004,7 +1004,7 @@ static void test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, ofi_hmem_disable_p2p = cuda_p2p_disabled; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, false, true); + resource->hints, false, true, false); /* System memory P2P should always be enabled */ assert_true(g_efa_hmem_info[FI_HMEM_SYSTEM].initialized); @@ -1317,7 +1317,7 @@ void test_efa_rdm_ep_rx_refill_impl(struct efa_resource **state, int threshold, assert_non_null(resource->hints); resource->hints->rx_attr->size = rx_size; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true); + resource->hints, true, true, false); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); assert_int_equal(efa_rdm_ep_get_rx_pool_size(efa_rdm_ep), rx_size); @@ -1395,3 +1395,261 @@ void test_efa_rdm_ep_support_unsolicited_write_recv(struct efa_resource **state) assert_int_equal(efa_use_unsolicited_write_recv(), efa_rdm_ep_support_unsolicited_write_recv(efa_rdm_ep)); } + +/** + * @brief Test the default operational sizes for efa_rdm_ep + * + * @param state + */ +void test_efa_rdm_ep_default_sizes(struct efa_resource **state) +{ + struct efa_rdm_ep *efa_rdm_ep; + struct efa_resource *resource = *state; + + efa_unit_test_resource_construct(resource, FI_EP_RDM); + + efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); + + /* sizes shared with base_ep */ + assert_int_equal(efa_rdm_ep->base_ep.max_msg_size, resource->info->ep_attr->max_msg_size); + assert_int_equal(efa_rdm_ep->base_ep.max_rma_size, resource->info->ep_attr->max_msg_size); + assert_int_equal(efa_rdm_ep->base_ep.inject_msg_size, resource->info->tx_attr->inject_size); + assert_int_equal(efa_rdm_ep->base_ep.inject_rma_size, resource->info->tx_attr->inject_size); + + /* efa_rdm_ep's own fields */ + assert_int_equal(efa_rdm_ep->max_tagged_size, resource->info->ep_attr->max_msg_size); + assert_int_equal(efa_rdm_ep->max_atomic_size, resource->info->ep_attr->max_msg_size); + assert_int_equal(efa_rdm_ep->inject_tagged_size, resource->info->tx_attr->inject_size); + assert_int_equal(efa_rdm_ep->inject_atomic_size, resource->info->tx_attr->inject_size); +} + +/** + * @brief Test the fi_endpoint API for efa_ep + * for rdm ep type (because the dgram ep type should + * have the same logic) + * @param state + */ +void test_efa_ep_open(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_base_ep *efa_ep; + struct efa_domain *efa_domain; + + efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled(resource, FI_EP_RDM); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + efa_domain = container_of(resource->domain, struct efa_domain, + util_domain.domain_fid); + + /* Check various size limits defaults */ + assert_true(efa_ep->max_msg_size == efa_domain->device->ibv_port_attr.max_msg_sz); + assert_true(efa_ep->max_rma_size == efa_domain->device->max_rdma_size); + assert_true(efa_ep->inject_msg_size == efa_domain->device->efa_attr.inline_buf_size); + /* TODO: update inject_rma_size to inline size after firmware + * supports inline rdma write */ + assert_true(efa_ep->inject_rma_size == 0); +} + +/** + * @brief Test the fi_cancel API for efa_ep + * (for rdm ep type because dgram logic should be the same) + * It should return -FI_ENOSYS as device doesn't support it; + * @param state + */ +void test_efa_ep_cancel(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + int ret; + + efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled(resource, FI_EP_RDM); + + ret = fi_cancel((struct fid *)resource->ep, NULL); + assert_int_equal(ret, -FI_ENOSYS); +} + +/** + * @brief Test the fi_getopt API fo efa_ep + * + * @param state + */ +void test_efa_ep_getopt(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + int optval_int; + bool optval_bool; + size_t optval_size_t; + size_t optlen; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled(resource, FI_EP_RDM); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + + optlen = sizeof(optval_int); + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_FI_HMEM_P2P, &optval_int, &optlen), 0); + assert_int_equal(optval_int, FI_HMEM_P2P_REQUIRED); + + optlen = sizeof(optval_bool); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_EMULATED_READ, &optval_bool, &optlen), 0); + assert_false(optval_bool); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_EMULATED_WRITE, &optval_bool, &optlen), 0); + assert_false(optval_bool); + + optlen = sizeof(optval_size_t); + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_RNR_RETRY, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->rnr_retry); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_MAX_MSG_SIZE, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->max_msg_size); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_MAX_RMA_SIZE, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->max_rma_size); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_INJECT_MSG_SIZE, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->inject_msg_size); + + assert_int_equal(fi_getopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_INJECT_RMA_SIZE, &optval_size_t, &optlen), 0); + assert_int_equal(optval_size_t, efa_ep->inject_rma_size); +} + +/** + * @brief Test the fi_setopt API for efa_ep + * When RMA is requested, FI_OPT_EFA_USE_DEVICE_RDMA + * cannot be set as false + * @param state + */ +void test_efa_ep_setopt_use_device_rdma(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + bool optval; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled(resource, FI_EP_RDM); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + + /* Hard code RMA caps in ep->info for local testing purpose */ + efa_ep->info->caps |= FI_RMA; + + /* Disable rdma is not allowed when user requests FI_RMA */ + optval = false; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_USE_DEVICE_RDMA, &optval, sizeof(optval)), -FI_EOPNOTSUPP); +} + +/** + * @brief Test the fi_setopt API for efa_ep + * FI_OPT_FI_HMEM_P2P cannot be set as FI_HMEM_P2P_DISABLED + * @param state + */ +void test_efa_ep_setopt_hmem_p2p(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + int optval; + int optvals[] = { + FI_HMEM_P2P_DISABLED, + FI_HMEM_P2P_ENABLED, + FI_HMEM_P2P_PREFERRED, + FI_HMEM_P2P_REQUIRED, + }; + size_t num_optvals = sizeof(optvals) / sizeof(int); + int i, expected_return; + + efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled(resource, FI_EP_RDM); + + /* FI_HMEM_P2P_DISABLED is not allowed */ + for (i = 0; i < num_optvals; i++) { + optval = optvals[i]; + expected_return = (optval == FI_HMEM_P2P_DISABLED) ? -FI_EOPNOTSUPP : FI_SUCCESS; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_FI_HMEM_P2P, &optval, sizeof(optval)), expected_return); + } +} + +/** + * @brief Test the fi_setopt API for efa_ep with FI_OPT_EFA_RNR_RETRY + * @param state + */ +void test_efa_ep_setopt_rnr_retry(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + size_t optval; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled(resource, FI_EP_RDM); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + assert_false(efa_ep->efa_qp_enabled); + + optval = 7; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_RNR_RETRY, &optval, sizeof(optval)), FI_SUCCESS); + assert_int_equal(efa_ep->rnr_retry, optval); + + /* hack qp enabled status to allow local test */ + efa_ep->efa_qp_enabled = true; + /* fi_setopt should fail when it's called after ep enable */ + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_RNR_RETRY, &optval, sizeof(optval)), -FI_EINVAL); + /* recover */ + efa_ep->efa_qp_enabled = false; +} + +/** + * @brief Test the fi_setopt API for efa_ep with FI_OPT_*_SIZE + * @param state + */ +void test_efa_ep_setopt_sizes(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + size_t optval; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled(resource, FI_EP_RDM); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + + size_t size_thresholds[] = { + [FI_OPT_MAX_MSG_SIZE] = (size_t) efa_ep->domain->device->ibv_port_attr.max_msg_sz, + [FI_OPT_MAX_RMA_SIZE] = (size_t) efa_ep->domain->device->max_rdma_size, + [FI_OPT_INJECT_MSG_SIZE] = (size_t) efa_ep->domain->device->efa_attr.inline_buf_size, + [FI_OPT_INJECT_RMA_SIZE] = (size_t) 0, + }; + int optnames[] = { + FI_OPT_MAX_MSG_SIZE, + FI_OPT_MAX_RMA_SIZE, + FI_OPT_INJECT_MSG_SIZE, + FI_OPT_INJECT_RMA_SIZE, + }; + size_t num_optnames = sizeof(optnames) / sizeof(int); + int i, optname; + + for (i = 0; i < num_optnames; i++) { + optname = optnames[i]; + + /* set optval <= threshold is allowed */ + optval = 0.5 * size_thresholds[optname]; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, optname, &optval, sizeof(optval)), FI_SUCCESS); + + /* set optval > threshold is NOT allowed */ + optval = size_thresholds[optname] + 10; + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, optname, &optval, sizeof(optval)), -FI_EINVAL); + } +} + +/** + * @brief Test fi_ep_bind and fi_enable API for efa_ep + * + * @param state + */ +void test_efa_ep_bind_and_enable(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_base_ep *efa_ep; + + efa_unit_test_resource_construct_for_efa_direct(resource, FI_EP_RDM); + + efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + + assert_true(efa_ep->efa_qp_enabled); + /* we shouldn't have user recv qp for efa-direct */ + assert_true(efa_ep->user_recv_qp == NULL); +} \ No newline at end of file diff --git a/prov/efa/test/efa_unit_test_msg.c b/prov/efa/test/efa_unit_test_msg.c index 81781aeb6d6..e48aa5d509f 100644 --- a/prov/efa/test/efa_unit_test_msg.c +++ b/prov/efa/test/efa_unit_test_msg.c @@ -5,7 +5,6 @@ #include "efa_unit_tests.h" #include "ofi_util.h" -extern struct fi_ops_msg efa_msg_ops; static void test_efa_msg_recv_prep(struct efa_resource *resource, fi_addr_t *addr) @@ -16,8 +15,7 @@ static void test_efa_msg_recv_prep(struct efa_resource *resource, size_t raw_addr_len = sizeof(raw_addr); int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); - resource->ep->msg = &efa_msg_ops; + efa_unit_test_resource_construct_for_efa_direct(resource, FI_EP_RDM); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_qp = base_ep->qp->ibv_qp; @@ -108,8 +106,7 @@ static void test_efa_msg_send_prep(struct efa_resource *resource, size_t raw_addr_len = sizeof(raw_addr); int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); - resource->ep->msg = &efa_msg_ops; + efa_unit_test_resource_construct_for_efa_direct(resource, FI_EP_RDM); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_qpx = base_ep->qp->ibv_qp_ex; diff --git a/prov/efa/test/efa_unit_test_rma.c b/prov/efa/test/efa_unit_test_rma.c index cb42a8528fd..162496a456d 100644 --- a/prov/efa/test/efa_unit_test_rma.c +++ b/prov/efa/test/efa_unit_test_rma.c @@ -15,10 +15,11 @@ static void test_efa_rma_prep(struct efa_resource *resource, fi_addr_t *addr) size_t raw_addr_len = sizeof(raw_addr); int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM); - resource->ep->rma = &efa_rma_ops; + efa_unit_test_resource_construct_for_efa_direct(resource, FI_EP_RDM); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); + /* Add rma caps explicitly to ep->info to allow local test */ + base_ep->info->caps |= FI_RMA; ibv_qpx = base_ep->qp->ibv_qp_ex; ibv_qpx->wr_start = &efa_mock_ibv_wr_start_no_op; /* this mock will save the send work request (wr) in a global list */ diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 63316838a21..93991120fd4 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -118,6 +118,7 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_ep_rx_refill_threshold_larger_than_rx_size, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_rma_inconsistent_unsolicited_write_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_support_unsolicited_write_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_ep_default_sizes, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_dgram_cq_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_failed_poll, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), @@ -201,7 +202,7 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_cq_post_initial_rx_pkts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_cntr_post_initial_rx_pkts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_cntr_post_initial_rx_pkts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_peer_reorder_expected_msg_id, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_peer_reorder_smaller_msg_id, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_peer_reorder_larger_msg_id, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), @@ -233,6 +234,16 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_cq_read_recv_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_send_failure, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_recv_failure, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_open, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_cancel, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_getopt, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_setopt_use_device_rdma, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_setopt_hmem_p2p, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_setopt_rnr_retry, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_setopt_sizes, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_ep_bind_and_enable, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), }; cmocka_set_message_output(CM_OUTPUT_XML); diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index a13033e6f8b..30ed823c8c8 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -40,10 +40,15 @@ void efa_unit_test_resource_construct_ep_not_enabled( struct efa_resource *resource, enum fi_ep_type ep_type); void efa_unit_test_resource_construct_no_cq_and_ep_not_enabled( struct efa_resource *resource, enum fi_ep_type ep_type); +void efa_unit_test_resource_construct_for_efa_direct(struct efa_resource *resource, enum fi_ep_type ep_type); +void efa_unit_test_resource_construct_for_efa_direct_ep_not_enabled( + struct efa_resource *resource, enum fi_ep_type ep_type); +void efa_unit_test_resource_construct_for_efa_direct_no_cq_and_ep_not_enabled( + struct efa_resource *resource, enum fi_ep_type ep_type); void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, enum fi_ep_type ep_type, uint32_t fi_version, struct fi_info *hints, - bool enable_ep, bool open_cq); + bool enable_ep, bool open_cq, bool use_efa_direct); void efa_unit_test_resource_construct_rdm_shm_disabled(struct efa_resource *resource); @@ -138,6 +143,7 @@ void test_efa_rdm_ep_rx_refill_threshold_smaller_than_rx_size(); void test_efa_rdm_ep_rx_refill_threshold_larger_than_rx_size(); void test_efa_rdm_ep_support_unsolicited_write_recv(); void test_efa_rdm_ep_rma_inconsistent_unsolicited_write_recv(); +void test_efa_rdm_ep_default_sizes(); void test_dgram_cq_read_empty_cq(); void test_ibv_cq_ex_read_empty_cq(); void test_ibv_cq_ex_read_failed_poll(); @@ -221,7 +227,7 @@ void test_efa_rdm_cq_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(); void test_efa_rdm_cq_post_initial_rx_pkts(); void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(); void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(); -void test_efa_cntr_post_initial_rx_pkts(); +void test_efa_rdm_cntr_post_initial_rx_pkts(); void test_efa_rdm_peer_reorder_expected_msg_id(); void test_efa_rdm_peer_reorder_smaller_msg_id(); void test_efa_rdm_peer_reorder_larger_msg_id(); @@ -253,6 +259,16 @@ void test_efa_cq_read_send_success(); void test_efa_cq_read_recv_success(); void test_efa_cq_read_send_failure(); void test_efa_cq_read_recv_failure(); +void test_efa_ep_open(); +void test_efa_ep_cancel(); +void test_efa_ep_getopt(); +void test_efa_ep_setopt_use_device_rdma(); +void test_efa_ep_setopt_hmem_p2p(); +void test_efa_ep_setopt_rnr_retry(); +void test_efa_ep_setopt_sizes(); +void test_efa_ep_bind_and_enable(); +void test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(); +void test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(); static inline int efa_unit_test_get_dlist_length(struct dlist_entry *head)