Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Propagate raft_recover errors and improve tracing #702

Merged
merged 1 commit into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/raft/raft.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ int raft_recover(struct raft *r, const struct raft_configuration *conf)

rv = r->io->recover(r->io, conf);
if (rv != 0) {
ErrMsgTransfer(r->io->errmsg, r->errmsg, "io");
return rv;
}

Expand Down
11 changes: 11 additions & 0 deletions src/raft/uv.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ static int uvFilterSegments(struct uv *uv,
ErrMsgPrintf(uv->io->errmsg,
"closed segment %s is past last snapshot %s",
segment->filename, snapshot_filename);
tracef("corrupted raft state, error: %s", uv->io->errmsg);
return RAFT_CORRUPT;
}

Expand Down Expand Up @@ -369,6 +370,7 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,
rv = UvList(uv, &snapshots, &n_snapshots, &segments, &n_segments,
uv->io->errmsg);
if (rv != 0) {
tracef("failed to list snapshots and segments, error: %d", rv);
goto err;
}

Expand All @@ -377,12 +379,14 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,
char snapshot_filename[UV__FILENAME_LEN];
*snapshot = RaftHeapMalloc(sizeof **snapshot);
if (*snapshot == NULL) {
tracef("malloc failed");
rv = RAFT_NOMEM;
goto err;
}
rv = UvSnapshotLoad(uv, &snapshots[n_snapshots - 1], *snapshot,
uv->io->errmsg);
if (rv != 0) {
tracef("snapshot load failed: %d", rv);
RaftHeapFree(*snapshot);
*snapshot = NULL;
goto err;
Expand All @@ -401,6 +405,7 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,
rv = uvFilterSegments(uv, (*snapshot)->index, snapshot_filename,
&segments, &n_segments);
if (rv != 0) {
tracef("failed to filter segments: %d", rv);
goto err;
}
if (segments != NULL) {
Expand All @@ -420,6 +425,7 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,
rv = uvSegmentLoadAll(uv, *start_index, segments, n_segments,
entries, n);
if (rv != 0) {
tracef("failed to load all segments: %d", rv);
goto err;
}

Expand Down Expand Up @@ -447,6 +453,9 @@ static int uvLoadSnapshotAndEntries(struct uv *uv,

err:
assert(rv != 0);
tracef("auto-recovery: %d, load depth: %d, error: %s",
uv->auto_recovery, depth, uv->io->errmsg);

if (*snapshot != NULL) {
snapshotDestroy(*snapshot);
*snapshot = NULL;
Expand Down Expand Up @@ -583,6 +592,7 @@ static int uvRecover(struct raft_io *io, const struct raft_configuration *conf)
rv = uvLoadSnapshotAndEntries(uv, &snapshot, &start_index, &entries,
&n_entries, 0);
if (rv != 0) {
tracef("failed to load raft snapshot and entries, error: %d", rv);
return rv;
}

Expand All @@ -599,6 +609,7 @@ static int uvRecover(struct raft_io *io, const struct raft_configuration *conf)

rv = uvSegmentCreateClosedWithConfiguration(uv, next_index, conf);
if (rv != 0) {
tracef("failed to create segment, error: %d", rv);
return rv;
}

Expand Down
7 changes: 7 additions & 0 deletions src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,7 @@
struct dqlite_node_info_ext infos[],
int n_info)
{
dqliteTracingMaybeEnable(true);
tracef("dqlite node recover ext");
struct raft_configuration configuration;
int i;
Expand All @@ -1031,6 +1032,7 @@
for (i = 0; i < n_info; i++) {
struct dqlite_node_info_ext *info = &infos[i];
if (!node_info_valid(info)) {
tracef("invalid node info");
rv = DQLITE_MISUSE;
goto out;
}
Expand All @@ -1040,6 +1042,7 @@
rv = raft_configuration_add(&configuration, info->id, address,
raft_role);
if (rv != 0) {
tracef("unable to add server to raft configuration, error: %d", rv);
assert(rv == RAFT_NOMEM);
rv = DQLITE_NOMEM;
goto out;
Expand All @@ -1049,11 +1052,15 @@
int lock_fd;
rv = acquire_dir(n->config.raft_dir, &lock_fd);
if (rv != 0) {
tracef("couldn't acquire lock, error: %d", rv);
goto out;
}

rv = raft_recover(&n->raft, &configuration);
if (rv != 0) {
tracef("raft recovery failed, error: %d", rv);
snprintf(n->errmsg, DQLITE_ERRMSG_BUF_SIZE, "raft_recover(): %s",

Check warning on line 1062 in src/server.c

View check run for this annotation

Codecov / codecov/patch

src/server.c#L1062

Added line #L1062 was not covered by tests
raft_errmsg(&n->raft));
rv = DQLITE_ERROR;
goto out;
}
Expand Down
Loading