Skip to content

Commit

Permalink
Propagate raft_recover errors and improve tracing
Browse files Browse the repository at this point in the history
At the moment, the raft_recover error messages are not propagated,
so the caller gets a cryptic error code (1), without further
details.

This change ensures that the error messages are propagated and adds
a few extra trace messages.

Furthermore, tracing is disabled unless we start the node (not the
case when we're actually trying to recover it). For now, we're adding a
"dqliteTracingMaybeEnable" call in "dqlite_node_recover_ext".
  • Loading branch information
petrutlucian94 committed Sep 5, 2024
1 parent eb777c2 commit 4d39111
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/raft/raft.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ int raft_recover(struct raft *r, const struct raft_configuration *conf)

rv = r->io->recover(r->io, conf);
if (rv != 0) {
ErrMsgTransfer(r->io->errmsg, r->errmsg, "io");
return rv;
}

Expand Down
9 changes: 9 additions & 0 deletions src/raft/uv.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ static int uvFilterSegments(struct uv *uv,
ErrMsgPrintf(uv->io->errmsg,
"closed segment %s is past last snapshot %s",
segment->filename, snapshot_filename);
tracef("corrupted raft state, error: %s", uv->io->errmsg);
return RAFT_CORRUPT;
}

Expand Down Expand Up @@ -369,6 +370,7 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,
rv = UvList(uv, &snapshots, &n_snapshots, &segments, &n_segments,
uv->io->errmsg);
if (rv != 0) {
tracef("failed to list snapshots and segments, error: %d", rv);
goto err;
}

Expand All @@ -377,12 +379,14 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,
char snapshot_filename[UV__FILENAME_LEN];
*snapshot = RaftHeapMalloc(sizeof **snapshot);
if (*snapshot == NULL) {
tracef("malloc failed");
rv = RAFT_NOMEM;
goto err;
}
rv = UvSnapshotLoad(uv, &snapshots[n_snapshots - 1], *snapshot,
uv->io->errmsg);
if (rv != 0) {
tracef("snapshot load failed: %d", rv);
RaftHeapFree(*snapshot);
*snapshot = NULL;
goto err;
Expand All @@ -401,6 +405,7 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,
rv = uvFilterSegments(uv, (*snapshot)->index, snapshot_filename,
&segments, &n_segments);
if (rv != 0) {
tracef("failed to filter segments: %d", rv);
goto err;
}
if (segments != NULL) {
Expand All @@ -420,6 +425,7 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,
rv = uvSegmentLoadAll(uv, *start_index, segments, n_segments,
entries, n);
if (rv != 0) {
tracef("failed to load all segments: %d", rv);
goto err;
}

Expand All @@ -435,6 +441,7 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,
"is behind "
"last snapshot's index %llu",
last_index, (*snapshot)->index);
tracef("corrupted raft state, error: %s", uv->io->errmsg);
rv = RAFT_CORRUPT;
goto err;
}
Expand Down Expand Up @@ -583,6 +590,7 @@ static int uvRecover(struct raft_io *io, const struct raft_configuration *conf)
rv = uvLoadSnapshotAndEntries(uv, &snapshot, &start_index, &entries,
&n_entries, 0);
if (rv != 0) {
tracef("failed to load raft snapshot and entries, error: %d", rv);
return rv;
}

Expand All @@ -599,6 +607,7 @@ static int uvRecover(struct raft_io *io, const struct raft_configuration *conf)

rv = uvSegmentCreateClosedWithConfiguration(uv, next_index, conf);
if (rv != 0) {
tracef("failed to create segment, error: %d", rv);
return rv;
}

Expand Down
7 changes: 7 additions & 0 deletions src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,7 @@ int dqlite_node_recover_ext(dqlite_node *n,
struct dqlite_node_info_ext infos[],
int n_info)
{
dqliteTracingMaybeEnable(true);
tracef("dqlite node recover ext");
struct raft_configuration configuration;
int i;
Expand All @@ -1031,6 +1032,7 @@ int dqlite_node_recover_ext(dqlite_node *n,
for (i = 0; i < n_info; i++) {
struct dqlite_node_info_ext *info = &infos[i];
if (!node_info_valid(info)) {
tracef("invalid node info");
rv = DQLITE_MISUSE;
goto out;
}
Expand All @@ -1040,6 +1042,7 @@ int dqlite_node_recover_ext(dqlite_node *n,
rv = raft_configuration_add(&configuration, info->id, address,
raft_role);
if (rv != 0) {
tracef("unable to add server to raft configuration, error: %d", rv);
assert(rv == RAFT_NOMEM);
rv = DQLITE_NOMEM;
goto out;
Expand All @@ -1049,11 +1052,15 @@ int dqlite_node_recover_ext(dqlite_node *n,
int lock_fd;
rv = acquire_dir(n->config.raft_dir, &lock_fd);
if (rv != 0) {
tracef("couldn't acquire lock, error: %d", rv);
goto out;
}

rv = raft_recover(&n->raft, &configuration);
if (rv != 0) {
tracef("raft recovery failed, error: %d", rv);
snprintf(n->errmsg, DQLITE_ERRMSG_BUF_SIZE, "raft_recover(): %s",
raft_errmsg(&n->raft));
rv = DQLITE_ERROR;
goto out;
}
Expand Down

0 comments on commit 4d39111

Please sign in to comment.