Skip to content

Commit

Permalink
prov/efa: Modify how EFA selects new intranode provider
Browse files Browse the repository at this point in the history
Change the env variable that controls EFA's intranode provider from
FI_EFA_USE_SM2 to FI_EFA_INTRANODE_PROVIDER which makes it more generic.
This switches the variable from a bool to a string, and allows it to
also take the place of FI_EFA_ENANLE_SHM_TRANSFER, which we deprecate in
this commit.  When someone sets FI_EFA_INTRANODE_PROVIDER=efa, we turn
EFA's intranode optimization off. FI_EFA_INTRANODE_PROVIDER will
override the value of FI_EFA_ENANLE_SHM_TRANSFER.

Signed-off-by: Seth Zegelstein <[email protected]>
  • Loading branch information
a-szegel committed Jul 10, 2023
1 parent 98e1aff commit 331c616
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 23 deletions.
5 changes: 4 additions & 1 deletion prov/efa/src/efa_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,10 @@ static int efa_domain_init_rdm(struct efa_domain *efa_domain, struct fi_info *in
{
int err;

efa_shm_info_create(info, &efa_domain->shm_info);
if (strcmp(efa_env.intranode_provider, "efa"))
efa_shm_info_create(info, &efa_domain->shm_info);
else
efa_domain->shm_info = NULL;

if (efa_domain->shm_info) {
err = fi_fabric(efa_domain->shm_info->fabric_attr,
Expand Down
29 changes: 23 additions & 6 deletions prov/efa/src/efa_env.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ struct efa_env efa_env = {
.efa_write_segment_size = 1073741824, /* need to confirm this constant. */
.rnr_retry = 3, /* Setting this value to EFA_RNR_INFINITE_RETRY makes the firmware retry indefinitey */
.host_id_file = "/sys/devices/virtual/dmi/id/board_asset_tag", /* Available on EC2 instances and containers */
.use_sm2 = false,
.huge_page_setting = EFA_ENV_HUGE_PAGE_UNSPEC,
.intranode_provider = "shm",
};

/**
Expand Down Expand Up @@ -127,7 +127,6 @@ void efa_env_param_get(void)
}

fi_param_get_int(&efa_prov, "tx_queue_size", &efa_env.tx_queue_size);
fi_param_get_int(&efa_prov, "enable_shm_transfer", &efa_env.enable_shm_transfer);
fi_param_get_int(&efa_prov, "use_zcpy_rx", &efa_env.use_zcpy_rx);
fi_param_get_int(&efa_prov, "set_cuda_sync_memops", &efa_env.set_cuda_sync_memops);
fi_param_get_int(&efa_prov, "zcpy_rx_seed", &efa_env.zcpy_rx_seed);
Expand Down Expand Up @@ -164,13 +163,30 @@ void efa_env_param_get(void)
&efa_env.efa_read_segment_size);
fi_param_get_size_t(&efa_prov, "inter_max_gdrcopy_message_size",
&efa_env.efa_max_gdrcopy_msg_size);
fi_param_get_bool(&efa_prov, "use_sm2", &efa_env.use_sm2);

int use_huge_page;
if (fi_param_get_bool(&efa_prov, "use_huge_page", &use_huge_page) ==0) {
efa_env.huge_page_setting = use_huge_page ? EFA_ENV_HUGE_PAGE_ENABLED : EFA_ENV_HUGE_PAGE_DISABLED;
}

fi_param_get_int(&efa_prov, "enable_shm_transfer", &efa_env.enable_shm_transfer);
if (efa_env.enable_shm_transfer == 0) {
efa_env.intranode_provider = "efa";
EFA_WARN(FI_LOG_CORE, "FI_EFA_ENABLE_SHM_TRANSFER is deprecated (and will be removed in a future release), "
"use FI_EFA_INTRANODE_PROVIDER=efa to specify the EFA provider for intra-node communication.\n");
}

/* Setting FI_EFA_INTRANODE_PROVIDER will override FI_EFA_ENABLE_SHM_TRANSFER=0 */
fi_param_get_str(&efa_prov, "intranode_provider", &efa_env.intranode_provider);
if (strcmp(efa_env.intranode_provider, "efa") &&
strcmp(efa_env.intranode_provider, "shm") &&
strcmp(efa_env.intranode_provider, "sm2")) {
EFA_WARN(FI_LOG_CORE, "FI_EFA_INTRANODE_PROVIDER=%s, EFA supports 'shm', 'sm2' and 'efa'"
" for intra-node communication. Unsupported provider name. Aborting...\n",
efa_env.intranode_provider);
abort();
}

efa_fork_support_request_initialize();
}

Expand All @@ -181,8 +197,9 @@ void efa_env_define()
"Defines the minimum number of credits a sender requests from a receiver (Default: 32).");
fi_param_define(&efa_prov, "tx_queue_size", FI_PARAM_INT,
"Defines the maximum number of unacknowledged sends with the NIC.");
/* TODO Remove enable_shm_transfer on future release */
fi_param_define(&efa_prov, "enable_shm_transfer", FI_PARAM_INT,
"Enable using SHM provider to perform TX operations between processes on the same system. (Default: 1)");
"(Deprecated, use FI_EFA_INTRANODE_PROVIDER=efa to turn off SHM. Will remove in future release.) Enable using SHM provider to perform TX operations between processes on the same system. (Default: 1)");
fi_param_define(&efa_prov, "use_zcpy_rx", FI_PARAM_INT,
"Enables the use of application's receive buffers in place of bounce-buffers when feasible. (Default: 1)");
fi_param_define(&efa_prov, "set_cuda_sync_memops", FI_PARAM_INT,
Expand Down Expand Up @@ -235,13 +252,13 @@ void efa_env_define()
"Enables fork support and disables internal usage of huge pages. Has no effect on kernels which set copy-on-fork for registered pages, generally 5.13 and later. (Default: false)");
fi_param_define(&efa_prov, "runt_size", FI_PARAM_INT,
"The maximum number of bytes that will be eagerly sent by inflight messages uses runting read message protocol (Default 307200).");
fi_param_define(&efa_prov, "use_sm2", FI_PARAM_BOOL,
"Use the experimental shared memory provider SM2 for intra node communication.");
fi_param_define(&efa_prov, "use_huge_page", FI_PARAM_BOOL,
"Whether EFA provider can use huge page memory for internal buffer. "
"Using huge page memory has a small performance advantage, but can "
"cause system to run out of huge page memory. By default, EFA provider "
"will use huge page unless FI_EFA_FORK_SAFE is set to 1/on/true.");
fi_param_define(&efa_prov, "intranode_provider", FI_PARAM_STRING,
"The name of the provider that EFA should offload intra-node communications to (Default shm).");
}


Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/efa_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ struct efa_env {
* is malformatted, the program should proceed with a default host id, e.g. 0.
*/
char *host_id_file;
int use_sm2;
enum efa_env_huge_page_setting huge_page_setting;
char *intranode_provider;
};

/**
Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/efa_prov_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ int efa_prov_info_alloc_for_rdm(struct fi_info **prov_info_rdm_ptr,
* then send the packet entry. Therefore the maximum inject size is
* pkt_entry_size - maximum_header_size.
*/
if (efa_env.enable_shm_transfer)
if (strcmp(efa_env.intranode_provider, "efa"))
min_pkt_size = MIN(device->rdm_info->ep_attr->max_msg_size, efa_env.shm_max_medium_size);
else
min_pkt_size = device->rdm_info->ep_attr->max_msg_size;
Expand Down
18 changes: 6 additions & 12 deletions prov/efa/src/efa_shm.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,6 @@ void efa_shm_info_create(const struct fi_info *app_info, struct fi_info **shm_in
int ret;
struct fi_info *shm_hints;

char *shm_provider;
if (efa_env.use_sm2) {
shm_provider = "sm2";
} else {
shm_provider = "shm";
}

shm_hints = fi_allocinfo();
shm_hints->caps = app_info->caps;
shm_hints->caps &= ~FI_REMOTE_COMM;
Expand Down Expand Up @@ -133,18 +126,19 @@ void efa_shm_info_create(const struct fi_info *app_info, struct fi_info **shm_in
*/
shm_hints->tx_attr->op_flags = FI_COMPLETION;
shm_hints->rx_attr->op_flags = FI_COMPLETION;
shm_hints->fabric_attr->name = strdup(shm_provider);
shm_hints->fabric_attr->prov_name = strdup(shm_provider);
shm_hints->fabric_attr->name = strdup(efa_env.intranode_provider);
shm_hints->fabric_attr->prov_name = strdup(efa_env.intranode_provider);
shm_hints->ep_attr->type = FI_EP_RDM;

ret = fi_getinfo(FI_VERSION(1, 19), NULL, NULL,
OFI_GETINFO_HIDDEN, shm_hints, shm_info);
fi_freeinfo(shm_hints);
if (ret) {
EFA_WARN(FI_LOG_CORE, "Disabling EFA shared memory support; failed to get shm provider's info: %s\n",
fi_strerror(-ret));
EFA_WARN(FI_LOG_CORE, "Disabling EFA's shared memory support; "
"Failed to get info struct for provider %s: %s\n",
efa_env.intranode_provider, fi_strerror(-ret));
*shm_info = NULL;
} else {
assert(!strcmp((*shm_info)->fabric_attr->name, shm_provider));
assert(!strcmp((*shm_info)->fabric_attr->name, efa_env.intranode_provider));
}
}
8 changes: 6 additions & 2 deletions prov/efa/src/rdm/efa_rdm_ep_fiops.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void efa_rdm_pke_pool_mr_dereg_handler(struct ofi_bufpool_region *region)

/**
* @brief creates a packet entry pool.
*
*
* The pool is allowed to grow if
* max_cnt is 0 and is fixed size otherwise.
*
Expand Down Expand Up @@ -936,7 +936,11 @@ void efa_rdm_ep_set_use_shm_for_tx(struct efa_rdm_ep *ep)
return;
}

ep->use_shm_for_tx = efa_env.enable_shm_transfer;
if (strcmp(efa_env.intranode_provider, "efa"))
ep->use_shm_for_tx = true;
else
ep->use_shm_for_tx = false;

return;
}

Expand Down

0 comments on commit 331c616

Please sign in to comment.