Skip to content

Commit

Permalink
DAOS-17111 cart: Use only swim ctx for outage
Browse files Browse the repository at this point in the history
The "network outage" detection for swim (see the existing
crt_swim_update_last_unpack_hlc) uses all crt contexts. In an engine, if
the swim context can't receive or send any message, while at least one
other context can and does receive messages constantly, then swim will
not detect any "network outage", leading to more false positive DEAD
events. The purpose of that detection is to find out swim-specific
"network outages", where swim may be unable to receive any swim message.

This patch changes the detection algorithm to use only the swim crt
context:

  - Remove crt_context.cc_last_unpack_hlc.

  - Update crt_swim_membs.csm_last_unpack_hlc when receiving swim
    requests and replies.

Signed-off-by: Li Wei <[email protected]>
  • Loading branch information
liw committed Feb 18, 2025
1 parent b0a0c17 commit 362f288
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 26 deletions.
4 changes: 2 additions & 2 deletions src/cart/crt_hg_proc.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -450,8 +451,7 @@ crt_hg_unpack_header(hg_handle_t handle, struct crt_rpc_priv *rpc_priv,

/* Sync the HLC. Clients never decode requests. */
D_ASSERT(crt_is_service());
rc = d_hlc_get_msg(rpc_priv->crp_req_hdr.cch_hlc,
&ctx->cc_last_unpack_hlc, &clock_offset);
rc = d_hlc_get_msg(rpc_priv->crp_req_hdr.cch_hlc, NULL /* hlc_out */, &clock_offset);
if (rc != 0) {
REPORT_HLC_SYNC_ERR("failed to sync HLC for request: opc=%x ts="
DF_U64" offset="DF_U64" from=%u\n",
Expand Down
2 changes: 0 additions & 2 deletions src/cart/crt_internal_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,6 @@ struct crt_context {

/** timeout per-context */
uint32_t cc_timeout_sec;
/** HLC time of last received RPC */
uint64_t cc_last_unpack_hlc;

/** Per-context statistics (server-side only) */
/** Total number of timed out requests, of type counter */
Expand Down
43 changes: 21 additions & 22 deletions src/cart/crt_swim.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -376,6 +377,14 @@ crt_swim_lookup_id(swim_id_t id)
return grp_ver;
}

static void crt_swim_update_last_unpack_hlc(struct crt_swim_membs *csm, uint64_t hlc)
{
crt_swim_csm_lock(csm);
if (csm->csm_last_unpack_hlc < hlc)
csm->csm_last_unpack_hlc = hlc;
crt_swim_csm_unlock(csm);
}

static void crt_swim_srv_cb(crt_rpc_t *rpc)
{
struct crt_rpc_priv *rpc_priv = container_of(rpc, struct crt_rpc_priv, crp_pub);
Expand All @@ -396,6 +405,8 @@ static void crt_swim_srv_cb(crt_rpc_t *rpc)

D_ASSERT(crt_is_service());

crt_swim_update_last_unpack_hlc(csm, hlc);

from_id = rpc_priv->crp_req_hdr.cch_src_rank;

/* Initialize empty array in case of error in reply */
Expand Down Expand Up @@ -605,6 +616,14 @@ static void crt_swim_cli_cb(const struct crt_cb_info *cb_info)
if (to_id == ctx->sc_target)
ctx->sc_deadline = 0;
swim_ctx_unlock(ctx);
} else {
struct crt_swim_membs *csm = &grp_priv->gp_membs_swim;

/*
* Although some errors also suggest incoming messages, we keep
* it simple for now.
*/
crt_swim_update_last_unpack_hlc(csm, hlc);
}

reply_rc = cb_info->cci_rc ? cb_info->cci_rc : rpc_out->rc;
Expand Down Expand Up @@ -987,24 +1006,6 @@ static void crt_swim_new_incarnation(struct swim_context *ctx,
state->sms_incarnation = incarnation;
}

static void crt_swim_update_last_unpack_hlc(struct crt_swim_membs *csm)
{
struct crt_context *ctx = NULL;
d_list_t *ctx_list;

D_RWLOCK_RDLOCK(&crt_gdata.cg_rwlock);

ctx_list = crt_provider_get_ctx_list(true, crt_gdata.cg_primary_prov);
d_list_for_each_entry(ctx, ctx_list, cc_link) {
uint64_t hlc = ctx->cc_last_unpack_hlc;

if (csm->csm_last_unpack_hlc < hlc)
csm->csm_last_unpack_hlc = hlc;
}

D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock);
}

static void
crt_metrics_sample_delay(crt_context_t crt_ctx, uint64_t delay, bool glitch)
{
Expand Down Expand Up @@ -1048,10 +1049,8 @@ static int64_t crt_swim_progress_cb(crt_context_t crt_ctx, int64_t timeout_us, v
} else if (rc == -DER_TIMEDOUT || rc == -DER_CANCELED) {
uint64_t now = swim_now_ms();

crt_swim_update_last_unpack_hlc(csm);

/*
* Check for network idle in all contexts.
* Check for network idle in swim context.
* If the time passed from last received RPC till now is more
* than 2/3 of suspicion timeout suspends eviction.
* The max_delay should be less suspicion timeout to guarantee
Expand All @@ -1064,7 +1063,7 @@ static int64_t crt_swim_progress_cb(crt_context_t crt_ctx, int64_t timeout_us, v
uint64_t max_delay = swim_suspect_timeout_get() * 2 / 3;

if (delay > max_delay) {
D_ERROR("Network outage detected (idle during "
D_ERROR("SWIM network outage detected (idle during "
"%lu.%lu sec > expected %lu.%lu sec).\n",
delay / 1000, delay % 1000,
max_delay / 1000, max_delay % 1000);
Expand Down

0 comments on commit 362f288

Please sign in to comment.