Skip to content

Commit

Permalink
Properly handle slow track acceptance
Browse files Browse the repository at this point in the history
In case the Pathfinder server was very slow (>idle timeout) to respond
to libpaf client's track request, libpaf attempted to generate an
inform message in a not-yet-accepted protocol transaction, triggering
a libpaf Pathfinder protocol module-level assertion.

With this change, libpaf restarts the link in such a case.

This bug could likely also have been triggered by an application
failing to call paf_process(), even though the context's fd had been
active for a long time. While a violation of the API contract, it
still shouldn't trigger an assertion. At least in theory, the cause of
such a failure could be scheduling latency (i.e., the application's
thread is being starved).

This bug only affected Pathfinder protocol v3 connections.

Resolves #34.

Signed-off-by: Mattias Rönnblom <[email protected]>
  • Loading branch information
m-ronnblom committed Dec 14, 2024
1 parent ff85a5f commit caa4755
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 7 deletions.
37 changes: 30 additions & 7 deletions src/link.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,22 +201,38 @@ static void track_notify_cb(int64_t ta_id, bool is_query, void *cb_data)
server_active(link);
}

static void track_complete_cb(int64_t ta_id UT_UNUSED, void *cb_data)
static void track_complete_cb(int64_t ta_id, void *cb_data)
{
struct link *link = cb_data;

log_link_track_completed(link);

assert(link->track_ta_id == ta_id);

link->track_ta_id = -1;
link->track_accepted = false;

handle_error(link);
}

static void track_accept_cb(int64_t ta_id, void *cb_data)
{
struct link *link = cb_data;

log_link_track_accepted(link);

assert(link->track_ta_id == ta_id);

link->track_accepted = true;
}

static void configure_idle_tracking(struct link *link)
{
if (conn_is_track_supported(link->conn)) {
link->track_ta_id =
conn_track_nb(link->conn, fail_cb, NULL, track_notify_cb,
track_complete_cb, link);
conn_track_nb(link->conn, fail_cb, track_accept_cb,
track_notify_cb, track_complete_cb, link);
link->track_accepted = false;

schedule_idle_query_tmo(link);
}
Expand Down Expand Up @@ -925,11 +941,18 @@ static void check_idle(struct link *link)
if (is_track_querying(link)) {
log_link_query_timeout(link);
handle_error(link);
} else {
conn_track_inform(link->conn, link->track_ta_id, true);
link->track_query_ts = ut_ftime(CLOCK_MONOTONIC);
schedule_idle_reply_tmo(link);
return;
}

if (!link->track_accepted) {
log_link_track_accept_timeout(link);
handle_error(link);
return;
}

conn_track_inform(link->conn, link->track_ta_id, true);
link->track_query_ts = ut_ftime(CLOCK_MONOTONIC);
schedule_idle_reply_tmo(link);
}

static void try_sync_services(struct link *link)
Expand Down
1 change: 1 addition & 0 deletions src/link.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ struct link

int64_t idle_tmo;
int64_t track_ta_id;
bool track_accepted;
double max_idle_time;
double track_query_ts;

Expand Down
6 changes: 6 additions & 0 deletions src/log_link.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
#define log_link_query_timeout(link) \
log_link_debug(link, "Server query timed out.")

#define log_link_track_accept_timeout(link) \
log_link_debug(link, "Server failed to accept track request in time.")

#define log_link_idle_time_changed(link, max_idle_time) \
log_link_debug(link, "Maximum idle time was adjusted to %.0f s.", \
max_idle_time)
Expand All @@ -34,6 +37,9 @@
log_link_debug(link, "Received server track reply in %d ms.", \
(latency) * 1e3)

#define log_link_track_accepted(link) \
log_link_debug(link, "Track transaction accepted.")

#define log_link_track_unsolicited_reply(link) \
log_link_debug(link, "Received unsolicited server track reply.")

Expand Down

0 comments on commit caa4755

Please sign in to comment.