diff --git a/Makefile b/Makefile index ff1810a87..06e70e853 100644 --- a/Makefile +++ b/Makefile @@ -651,7 +651,8 @@ OPTIONS_OBJS += src/quic_rx.o src/mux_quic.o src/h3.o src/quic_tx.o \ src/quic_cc_nocc.o src/qpack-dec.o src/quic_cc.o \ src/cfgparse-quic.o src/qmux_trace.o src/qpack-enc.o \ src/qpack-tbl.o src/h3_stats.o src/quic_stats.o \ - src/quic_fctl.o src/cbuf.o src/quic_rules.o + src/quic_fctl.o src/cbuf.o src/quic_rules.o \ + src/quic_drs.o src/quic_cc_bbr.o endif ifneq ($(USE_QUIC_OPENSSL_COMPAT:0=),) diff --git a/include/haproxy/quic_cc-t.h b/include/haproxy/quic_cc-t.h index 3e9573910..5660f057e 100644 --- a/include/haproxy/quic_cc-t.h +++ b/include/haproxy/quic_cc-t.h @@ -36,6 +36,7 @@ extern struct quic_cc_algo quic_cc_algo_nr; extern struct quic_cc_algo quic_cc_algo_cubic; +extern struct quic_cc_algo quic_cc_algo_bbr; extern struct quic_cc_algo *default_quic_cc_algo; /* Fake algorithm with its fixed window */ @@ -80,6 +81,7 @@ struct quic_cc_event { enum quic_cc_algo_type { QUIC_CC_ALGO_TP_NEWRENO, QUIC_CC_ALGO_TP_CUBIC, + QUIC_CC_ALGO_TP_BBR, QUIC_CC_ALGO_TP_NOCC, }; @@ -87,7 +89,7 @@ struct quic_cc { /* is there only for debugging purpose. */ struct quic_conn *qc; struct quic_cc_algo *algo; - uint32_t priv[18]; + uint32_t priv[248]; }; struct quic_cc_path { diff --git a/include/haproxy/quic_cc_drs.h b/include/haproxy/quic_cc_drs.h new file mode 100644 index 000000000..968b97d0e --- /dev/null +++ b/include/haproxy/quic_cc_drs.h @@ -0,0 +1,38 @@ +#include + +#include + +#define false 0 +#define true 1 + +/* Rate sample */ +struct quic_rs { + uint64_t delivered; + uint64_t prior_delivered; + uint64_t tx_in_flight; + uint64_t lost; + uint64_t prior_lost; + uint32_t interval; + uint32_t prior_time; + uint32_t send_elapsed; + uint32_t ack_elapsed; + uint32_t is_app_limited; +}; + +/* Delivery rate sampling */ +struct quic_drs { + struct quic_rs rs; + struct wf wf; + uint64_t round_count; + uint64_t next_round_delivered; + uint64_t delivered; + uint64_t lost; + uint32_t delivered_ts; + uint32_t first_sent_ts; + int is_cwnd_limited; /* boolean */ + int app_limited; /* boolean */ +}; + +extern struct pool_head *pool_head_quic_drs; + +void quic_cc_drs_init(struct quic_drs *drs); diff --git a/src/quic_cc.c b/src/quic_cc.c index 8fd99d3c1..87d95f0d3 100644 --- a/src/quic_cc.c +++ b/src/quic_cc.c @@ -22,7 +22,7 @@ #include -struct quic_cc_algo *default_quic_cc_algo = &quic_cc_algo_cubic; +struct quic_cc_algo *default_quic_cc_algo = &quic_cc_algo_bbr; /* * Initialize congestion control with as algorithm depending on diff --git a/src/quic_cc_bbr.c b/src/quic_cc_bbr.c new file mode 100644 index 000000000..0a4351999 --- /dev/null +++ b/src/quic_cc_bbr.c @@ -0,0 +1,858 @@ +#include + +#include +#include +#include +#include +#include +#include + +/* TODO: check ->delivery_rate usage here vs RFC */ +/* XXX TO BE REMOVED */ +#define true 1 +#define false 0 + +#define BBR_STARTUP_PACING_GAIN 277 /* 4 * ln(2)=2.77 */ +#define BBR_STARTUP_CWND_GAIN 200 /* percent */ +#define BBR_DRAIN_PACING_GAIN 50 + +/* XXX check this: XXX */ +#define BBR_DEFAULT_CWND_GAIN BBR_STARTUP_PACING_GAIN + +/* BBRLossThresh (2%) */ +#define BBR_LOSS_THRESH_MULT 2 +#define BBR_LOSS_THRESH_DIVI 100 +/* BBRBeta (0.7) */ +#define BBR_BETA_MULT 7 +#define BBR_BETA_DIVI 10 +/* BBRHeadroom (0.15) */ +#define BBR_HEADROOM_MULT 15 +#define BBR_HEADROOM_DIVI 100 + +#define BBR_MAX_BW_FILTERLEN 2 +#define BBR_EXTRA_ACKED_FILTERLEN 10 + +#define BBR_MIN_RTT_FILTERLEN 10000 /* ms */ +#define BBR_PROBE_RTT_CWND_GAIN 50 /* 50% */ +#define BBR_PROBE_RTT_DURATION 200 /* ms */ +#define BBR_PROBE_RTT_INTERVAL 5000 /* ms */ + +/* 4.1.1: State Transition Diagram */ +/* BBR state */ +enum bbr_state { + BBR_ST_STARTUP, + BBR_ST_DRAIN, + BBR_ST_PROBE_BW_DOWN, + BBR_ST_PROBE_BW_CRUISE, + BBR_ST_PROBE_BW_REFILL, + BBR_ST_PROBE_BW_UP, + BBR_ST_PROBE_RTT, +}; + +enum bbr_ack_phase { + BBR_ACK_PHASE_ACKS_PROBE_STARTING, + BBR_ACK_PHASE_ACKS_PROBE_STOPPING, + BBR_ACK_PHASE_ACKS_PROBE_FEEDBACK, + BBR_ACK_PHASE_ACKS_REFILLING, +}; + +struct bbr { + struct quic_drs *drs; + /* 2.4 Output Control Parameters */ + uint64_t pacing_rate; + uint64_t send_quantum; + /* 2.5 Pacing State and Parameters */ + uint64_t pacing_gain; + //uint32_t next_departure_time; /* XXX check this XXX */ + /* 2.6. cwnd State and Parameters */ + uint64_t cwnd_gain; + /* 2.7 General Algorithm State */ + enum bbr_state state; + uint64_t round_count; + int round_start; /* boolean */ + uint64_t next_round_delivered; + int idle_restart; /* boolean */ + /* 2.9.1 Data Rate Network Path Model Parameters */ + uint64_t max_bw; + uint64_t bw_lo; + uint64_t bw; + uint64_t prior_cwnd; + /* 2.9.2 Data Volume Network Path Model Parameters */ + uint32_t min_rtt; + uint64_t extra_acked; + uint64_t bytes_lost_in_round; + uint64_t loss_events_in_round; + uint64_t offload_budget; + uint64_t probe_up_cnt; + uint32_t cycle_stamp; + enum bbr_ack_phase ack_phase; + int bw_probe_wait; /* XXX TODO: check this type. */ + int bw_probe_samples; + int bw_probe_up_rounds; + uint64_t bw_probe_up_acks; + uint64_t max_inflight; + uint64_t inflight_hi; + uint64_t bw_hi; + uint64_t inflight_lo; + /* 2.10 State for Responding to Congestion */ + int loss_round_start; /* boolean */ + uint64_t bw_latest; + int loss_in_round; /* boolean */ + uint64_t loss_round_delivered; + uint64_t rounds_since_bw_probe; + uint64_t inflight_latest; + /* 2.11 Estimating BBR.max_bw */ + struct wf max_bw_filter; + uint64_t cycle_count; + /* 2.12 Estimating BBR.extra_acked */ + uint32_t extra_acked_interval_start; + uint64_t extra_acked_delivered; + struct wf extra_acked_filter; + /* 2.13 Startup Parameters and State */ + int full_bw_reached; /* boolean */ + int full_bw_now; /* boolean */ + uint64_t full_bw; + int full_bw_count; + /* 2.14 ProbeRTT and min_rtt Parameters and State */ + /* 2.14.1 Parameters for Estimating BBR.min_rtt */ + uint32_t min_rtt_stamp; + /* 2.14.2 Parameters for Scheduling ProbeRTT */ + uint32_t probe_rtt_min_delay; /* ms */ + uint32_t probe_rtt_min_stamp; /* ms */ + uint32_t probe_rtt_done_stamp; + int probe_rtt_round_done; /* boolean */ + int probe_rtt_expired; /* boolean */ +}; + +static inline uint64_t bbr_min_pipe_cwnd(struct quic_cc_path *path) +{ + return 4 * path->mtu; +} + +static inline int bbr_is_probing_bw(struct bbr *bbr) +{ + switch (bbr->state) { + case BBR_ST_PROBE_BW_DOWN: + case BBR_ST_PROBE_BW_CRUISE: + case BBR_ST_PROBE_BW_REFILL: + case BBR_ST_PROBE_BW_UP: + return 1; + default: + return 0; + } +} + +static void bbr_reset_congestion_signals(struct bbr *bbr) +{ + bbr->loss_in_round = false; + bbr->bw_latest = 0; + bbr->inflight_latest = 0; +} + +static void bbr_reset_lower_bounds(struct bbr *bbr) +{ + bbr->bw_lo = UINT64_MAX; + bbr->inflight_lo = UINT64_MAX; +} + +static void bbr_init_round_counting(struct bbr *bbr) +{ + bbr->next_round_delivered = 0; + bbr->round_start = false; + bbr->round_count = 0; +} + +static void bbr_reset_full_bw(struct bbr *bbr) +{ + bbr->full_bw = 0; + bbr->full_bw_count = 0; + bbr->full_bw_now = false; +} + +static void bbr_init_pacing_rate(struct bbr *bbr) +{ + /* XXX Not clear at this time XXX */ +} + +static void bbr_enter_startup(struct bbr *bbr) +{ + bbr->state = BBR_ST_STARTUP; + bbr->pacing_gain = BBR_STARTUP_PACING_GAIN; + bbr->cwnd_gain = BBR_DEFAULT_CWND_GAIN; +} + +static void bbr_enter_drain(struct bbr *bbr) +{ + bbr->state = BBR_ST_DRAIN; + bbr->pacing_gain = BBR_DRAIN_PACING_GAIN; /* pace slowly */ + bbr->cwnd_gain = BBR_DEFAULT_CWND_GAIN; +} + +static uint64_t bbr_bdp_multiple(struct bbr *bbr, struct quic_cc_path *path, + uint64_t bw, uint64_t gain) +{ + uint64_t bdp; + + if (bbr->min_rtt == UINT32_MAX) + return path->initial_wnd; /* no valid RTT samples yet */ + + bdp = bw * bbr->min_rtt / 1000; + + return gain * bdp / 100; +} + +static void bbr_update_offload_budget(struct bbr *bbr, struct quic_cc_path *path) +{ + bbr->offload_budget = 3 * path->send_quantum; +} + +static uint64_t bbr_quantization_budget(struct bbr *bbr, struct quic_cc_path *path, + uint64_t inflight) +{ + bbr_update_offload_budget(bbr, path); + inflight = MAX(inflight, bbr->offload_budget); + inflight = MAX(inflight, bbr_min_pipe_cwnd(path)); + if (bbr->state == BBR_ST_PROBE_BW_UP) + inflight += 2 * path->mtu; + + return inflight; +} + +static uint64_t bbr_inflight(struct bbr *bbr, struct quic_cc_path *path, + uint64_t bw, uint64_t gain) +{ + uint64_t inflight = bbr_bdp_multiple(bbr, path, bw, gain); + return bbr_quantization_budget(bbr, path, inflight); +} + +static void bbr_update_max_inflight(struct bbr *bbr, struct quic_cc_path *path) +{ + uint64_t inflight; + + /* Not defined by RFC */ + //BBRUpdateAggregationBudget(); + inflight = bbr_bdp_multiple(bbr, path, bbr->bw, bbr->cwnd_gain); + inflight += bbr->extra_acked; + bbr->max_inflight = bbr_quantization_budget(bbr, path, inflight); +} + +static int quic_cc_bbr_init(struct quic_cc *cc) +{ + struct bbr *bbr = quic_cc_priv(cc); + + bbr->drs = pool_alloc(pool_head_quic_drs); + if (!bbr->drs) + return 0; + + quic_cc_drs_init(bbr->drs); + wf_init(&bbr->max_bw_filter, BBR_MAX_BW_FILTERLEN); + wf_init(&bbr->extra_acked_filter, BBR_EXTRA_ACKED_FILTERLEN); + /* InitWindowedMaxFilter() */ + bbr->min_rtt = UINT32_MAX; /* ms */ /* XXX check this XXX */ + bbr->min_rtt_stamp = now_ms; + bbr->probe_rtt_done_stamp = TICK_ETERNITY; /* XXX check this XXX */ + bbr->probe_rtt_round_done = false; + bbr->prior_cwnd = 0; + bbr->idle_restart = false; + bbr->extra_acked_interval_start = now_ms; + bbr->extra_acked_delivered = 0; + bbr->full_bw_reached = false; + + bbr_reset_congestion_signals(bbr); + bbr_reset_lower_bounds(bbr); + bbr_init_round_counting(bbr); + bbr_reset_full_bw(bbr); + bbr_init_pacing_rate(bbr); + bbr_enter_startup(bbr); + + /* Not in RFC */ + bbr->loss_round_start = false; + bbr->loss_round_delivered = UINT64_MAX; + bbr->send_quantum = 0; /* XXX check this */ + bbr->max_bw = 0; + bbr->bw = 0; + bbr->extra_acked = 0; + bbr->bytes_lost_in_round = 0; + bbr->loss_events_in_round = 0; + bbr->offload_budget = 0; + bbr->probe_up_cnt = UINT64_MAX; + bbr->cycle_stamp = TICK_ETERNITY; + bbr->ack_phase = 0; + bbr->bw_probe_wait = 0; + bbr->bw_probe_samples = 0; + bbr->bw_probe_up_rounds = 0; + bbr->bw_probe_up_acks = 0; + bbr->max_inflight = 0; + bbr->inflight_hi = UINT64_MAX; + bbr->bw_hi = UINT64_MAX; + bbr->cycle_count = 0; + bbr->probe_rtt_min_delay = TICK_ETERNITY; + bbr->probe_rtt_min_stamp = now_ms; + bbr->probe_rtt_expired = false; + + return 1; +} + +static void bbr_handle_restart_from_idle(struct bbr *bbr) +{ +} + +static void bbr_on_transmit(struct bbr *bbr) +{ + bbr_handle_restart_from_idle(bbr); +} + +static void bbr_check_startup_high_loss() +{ +} + +static void bbr_check_startup_done(struct bbr *bbr) +{ + bbr_check_startup_high_loss(); + if (bbr->state == BBR_ST_STARTUP && bbr->full_bw_reached) + bbr_enter_drain(bbr); +} + +static void bbr_start_round(struct bbr *bbr) +{ + bbr->next_round_delivered = bbr->drs->delivered; +} + +static void bbr_update_round(struct bbr *bbr, uint64_t ack_packet_delivered) +{ + if (ack_packet_delivered >= bbr->next_round_delivered) { + bbr_start_round(bbr); + bbr->round_count++; + bbr->rounds_since_bw_probe++; + bbr->round_start = 1; + bbr->bytes_lost_in_round = 0; + } + else { + bbr->round_start = 0; + } +} + +static void bbr_pick_probe_wait(struct bbr *bbr) +{ + /* TODO */ +#if 0 + BBR.rounds_since_bw_probe = + random_int_between(0, 1); /* 0 or 1 */ + /* Decide the random wall clock bound for wait: */ + BBR.bw_probe_wait = + 2 + random_float_between(0.0, 1.0); /* 0..1 sec */ +#endif +} + +static void bbr_raise_inflight_hi_slope(struct bbr *bbr, struct quic_cc_path *path) +{ + uint64_t growth_this_round = path->mtu << bbr->bw_probe_up_rounds; + + bbr->bw_probe_up_rounds = MIN(bbr->bw_probe_up_rounds + 1, 30); + bbr->probe_up_cnt = MAX(path->cwnd / growth_this_round, 1) * path->mtu; +} + +static void bbr_start_probe_bw_down(struct bbr *bbr, uint32_t ts) +{ + bbr_reset_congestion_signals(bbr); + bbr->probe_up_cnt = UINT64_MAX; + bbr_pick_probe_wait(bbr); + bbr->cycle_stamp = ts; + bbr->ack_phase = BBR_ACK_PHASE_ACKS_PROBE_STOPPING; + bbr_start_round(bbr); + bbr->state = BBR_ST_PROBE_BW_DOWN; + bbr->pacing_gain = 90; + bbr->cwnd_gain = 200; +} + +static void bbr_start_probe_bw_cruise(struct bbr *bbr) +{ + bbr->state = BBR_ST_PROBE_BW_CRUISE; + bbr->pacing_gain = 100; + bbr->cwnd_gain = 200; +} + +static void bbr_start_probe_bw_refill(struct bbr *bbr) +{ + bbr_reset_lower_bounds(bbr); + bbr->bw_probe_up_rounds = 0; + bbr->bw_probe_up_acks = 0; + bbr->ack_phase = BBR_ACK_PHASE_ACKS_REFILLING; + bbr_start_round(bbr); + bbr->state = BBR_ST_PROBE_BW_REFILL; + bbr->pacing_gain = 100; + bbr->cwnd_gain = 200; +} + +static void bbr_start_probe_bw_up(struct bbr *bbr, struct quic_cc_path *path, + uint32_t ts) +{ + bbr->ack_phase = BBR_ACK_PHASE_ACKS_PROBE_STARTING; + bbr_start_round(bbr); + bbr->cycle_stamp = ts; + bbr->state = BBR_ST_PROBE_BW_UP; + bbr->pacing_gain = 125; + bbr->cwnd_gain = 225; + bbr_raise_inflight_hi_slope(bbr, path); +} + +static void bbr_check_drain_done(struct bbr *bbr, struct quic_cc_path *path, uint32_t ts) +{ + if (bbr->state == BBR_ST_DRAIN && + path->in_flight <= bbr_inflight(bbr, path, bbr->bw, 100)) + bbr_start_probe_bw_down(bbr, ts); +} + +static void bbr_update_latest_delivery_signals(struct quic_cc *cc) +{ + struct bbr *bbr = quic_cc_priv(cc); + struct quic_drs *drs = bbr->drs; + struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc); + + bbr->loss_round_start = 0; + bbr->bw_latest = MAX(bbr->bw_latest, path->delivery_rate); + bbr->inflight_latest = MAX(bbr->inflight_latest, drs->rs.delivered); + if (drs->rs.prior_delivered >= bbr->loss_round_delivered) { + bbr->loss_round_delivered = drs->delivered; + bbr->loss_round_start = 1; + } +} + +static void bbr_advance_max_bw_filter(struct bbr *bbr) +{ + bbr->cycle_count++; +} + +static uint64_t bbr_target_inflight(struct bbr *bbr, struct quic_cc_path *path) +{ + uint64_t bdp = bbr_inflight(bbr, path, bbr->bw, 100); + return MIN(bdp, path->cwnd); +} + +static void bbr_handle_inflight_too_high(struct bbr *bbr, struct quic_cc_path *path, + struct quic_rs *rs, uint32_t ts) +{ + bbr->bw_probe_samples = 0; + if (!rs->is_app_limited) + bbr->inflight_hi = + MAX(rs->tx_in_flight, bbr_target_inflight(bbr, path) * BBR_BETA_MULT / BBR_BETA_DIVI); + + if (bbr->state == BBR_ST_PROBE_BW_UP) + bbr_start_probe_bw_down(bbr, ts); +} + +static inline int bbr_rs_is_inflight_too_high(struct quic_rs *rs) +{ + return rs->lost * BBR_LOSS_THRESH_DIVI > rs->tx_in_flight * BBR_LOSS_THRESH_MULT; +} + +static int bbr_is_inflight_too_high(struct bbr *bbr, struct quic_cc_path *path, + uint32_t ts) +{ + if (!bbr_rs_is_inflight_too_high(&bbr->drs->rs)) + return 0; + + if (bbr->bw_probe_samples) + bbr_handle_inflight_too_high(bbr, path, &bbr->drs->rs, ts); + + return 1; +} + +static void bbr_probe_inflight_hi_upward(struct bbr *bbr, struct quic_cc_path *path, uint32_t acked) +{ + if (!bbr->drs->is_cwnd_limited || path->cwnd < bbr->inflight_hi) + return; /* not fully using inflight_hi, so don't grow it */ + + bbr->bw_probe_up_acks += acked; + if (bbr->bw_probe_up_acks >= bbr->probe_up_cnt) { + uint64_t delta; + + delta = bbr->bw_probe_up_acks / bbr->probe_up_cnt; + bbr->bw_probe_up_acks -= delta * bbr->probe_up_cnt; + bbr->inflight_hi += delta * path->mtu; + } + + if (bbr->round_start) + bbr_raise_inflight_hi_slope(bbr, path); +} + +/* Track ACK state and update BBR.max_bw window and + * BBR.inflight_hi. + */ +static void bbr_adapt_upper_bounds(struct bbr *bbr, struct quic_cc_path *path, + uint32_t acked, uint32_t ts) +{ + if (bbr->ack_phase == BBR_ACK_PHASE_ACKS_PROBE_STARTING && bbr->round_start) + /* starting to get bw probing samples */ + bbr->ack_phase = BBR_ACK_PHASE_ACKS_PROBE_FEEDBACK; + + if (bbr->ack_phase == BBR_ACK_PHASE_ACKS_PROBE_STOPPING && bbr->round_start) { + /* end of samples from bw probing phase */ + if (bbr_is_probing_bw(bbr) && !bbr->drs->rs.is_app_limited) + bbr_advance_max_bw_filter(bbr); + } + + if (bbr_is_inflight_too_high(bbr, path, ts)) + return; + + /* bbr->bw_hi never be updated */ + if (bbr->inflight_hi == UINT64_MAX /* || bbr->bw_hi == UINT64_MAX */) + return; + + if (bbr->drs->rs.tx_in_flight > bbr->inflight_hi) + bbr->inflight_hi = bbr->drs->rs.tx_in_flight; + + if (path->delivery_rate > bbr->bw_hi) + bbr->bw_hi = path->delivery_rate; + + if (bbr->state == BBR_ST_PROBE_BW_UP) + bbr_probe_inflight_hi_upward(bbr, path, acked); +} + + +static inline int bbr_has_elapsed_in_phase(struct bbr *bbr, + uint32_t interval, uint32_t ts) +{ + //return ts > bbr->cycle_stamp + interval; + /* Note that should be set to */ + return tick_is_lt(tick_add(bbr->cycle_stamp, interval), ts); +} + +static int bbr_is_reno_coexistence_probe_time(struct bbr *bbr, struct quic_cc_path *path) +{ + uint64_t reno_rounds; + + reno_rounds = bbr_target_inflight(bbr, path) / path->mtu; + return bbr->rounds_since_bw_probe >= MIN(reno_rounds, 63); +} + +/* Return a volume of data that tries to leave free headroom in the bottleneck + * buffer or link for other flows, for fairness convergence and lower RTTs and + * loss. + */ +static uint64_t bbr_inflight_with_headroom(struct bbr *bbr, struct quic_cc_path *path) +{ + uint64_t headroom; + + if (bbr->inflight_hi == UINT64_MAX) + return UINT64_MAX; + + headroom = MAX(path->mtu, + bbr->inflight_hi * BBR_HEADROOM_MULT / BBR_HEADROOM_DIVI); + return MAX(bbr->inflight_hi - headroom, bbr_min_pipe_cwnd(path)); +} + +/* Is it time to transition from DOWN or CRUISE to REFILL? */ +static int bbr_is_time_to_probe_bw(struct bbr *bbr, struct quic_cc_path *path, + uint32_t ts) +{ + if (bbr_has_elapsed_in_phase(bbr, bbr->bw_probe_wait, ts) || + bbr_is_reno_coexistence_probe_time(bbr, path)) { + bbr_start_probe_bw_refill(bbr); + return 1; + } + + return 0; +} + +/* Time to transition from DOWN to CRUISE? */ +static int bbr_is_time_to_cruise(struct bbr *bbr, struct quic_cc_path *path) +{ + if (path->in_flight > bbr_inflight_with_headroom(bbr, path)) + return false; /* not enough headroom */ + + if (path->in_flight <= bbr_inflight(bbr, path, bbr->max_bw, 1)) + return true; /* inflight <= estimated BDP */ +} + +/* Time to transition from UP to DOWN? */ +static int bbr_is_time_to_go_down(struct bbr *bbr, struct quic_cc_path *path) +{ + if (bbr->drs->is_cwnd_limited && path->cwnd >= bbr->inflight_hi) { + bbr_reset_full_bw(bbr); /* bw is limited by inflight_hi */ + bbr->full_bw = path->delivery_rate; + } + else if (bbr->full_bw_now) { + return 1; /* we estimate we've fully used path bw */ + } + + return 0; +} + +/* The core state machine logic for ProbeBW: */ +static void bbr_update_probe_bw_cycle_phase(struct bbr *bbr, struct quic_cc_path *path, + uint32_t acked, uint32_t ts) +{ + if (!bbr->full_bw_reached) + return; /* only handling steady-state behavior here */ + + bbr_adapt_upper_bounds(bbr, path, acked, ts); + if (!bbr_is_probing_bw(bbr)) + return; /* only handling ProbeBW states here: */ + + switch (bbr->state) { + case BBR_ST_PROBE_BW_DOWN: + if (bbr_is_time_to_probe_bw(bbr, path, ts)) + return;/* already decided state transition */ + + if (bbr_is_time_to_cruise(bbr, path)) + bbr_start_probe_bw_cruise(bbr); + break; + + case BBR_ST_PROBE_BW_CRUISE: + if (bbr_is_time_to_probe_bw(bbr, path, ts)) + return; /* already decided state transition */ + + break; + + case BBR_ST_PROBE_BW_REFILL: + /* After one round of REFILL, start UP */ + if (bbr->round_start) { + bbr->bw_probe_samples = 1; + bbr_start_probe_bw_up(bbr, path, ts); + } + break; + + case BBR_ST_PROBE_BW_UP: + if (bbr_is_time_to_go_down(bbr, path)) + bbr_start_probe_bw_down(bbr, ts); + break; + + default: + break; + } +} + +static void bbr_update_min_rtt(struct bbr *bbr, uint32_t ts) +{ + bbr->probe_rtt_expired = + ts > bbr->probe_rtt_min_stamp + ProbeRTTInterval; + if (rs.rtt >= 0 && + (rs.rtt < bbr->probe_rtt_min_delay || + bbr->probe_rtt_expired)) { + bbr->probe_rtt_min_delay = rs.rtt; + bbr->probe_rtt_min_stamp = Now(); + } + + + min_rtt_expired = + Now() > bbr->min_rtt_stamp + MinRTTFilterLen; + if (bbr->probe_rtt_min_delay < bbr->min_rtt || + min_rtt_expired) { + bbr->min_rtt = bbr->probe_rtt_min_delay; + bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; + } +} + +static void bbr_update_max_bw(struct bbr *bbr, struct quic_cc_path *path, + uint64_t ack_packet_delivered) +{ + struct quic_rs *rs = &bbr->drs->rs; + + bbr_update_round(bbr, ack_packet_delivered); + if (path->delivery_rate >= bbr->max_bw || !rs->is_app_limited) { + wf_update(&bbr->max_bw_filter, path->delivery_rate, bbr->cycle_count); + bbr->max_bw = wf_get_best(&bbr->max_bw_filter); + } +} + +static void bbr_init_lower_bounds(struct bbr *bbr, struct quic_cc_path *path) +{ + if (bbr->bw_lo == UINT64_MAX) + bbr->bw_lo = bbr->max_bw; + if (bbr->inflight_lo == UINT64_MAX) + bbr->inflight_lo = path->cwnd; +} + +static void bbr_loss_lower_bounds(struct bbr *bbr) +{ + bbr->bw_lo = MAX(bbr->bw_latest, bbr->bw_lo * BBR_BETA_MULT / BBR_BETA_DIVI); + bbr->inflight_lo = MAX(bbr->inflight_latest, + bbr->inflight_lo * BBR_BETA_MULT / BBR_BETA_DIVI); +} + +static void bbr_adapt_lower_bounds_from_congestion(struct bbr *bbr, struct quic_cc_path *path) +{ + if (bbr_is_probing_bw(bbr)) + return; + + if (bbr->loss_in_round) { + bbr_init_lower_bounds(bbr, path); + bbr_loss_lower_bounds(bbr); + } +} + +static void bbr_update_congestion_signals(struct bbr *bbr, struct quic_cc_path *path, + uint64_t bytes_lost, uint64_t ack_packet_delivered) +{ + bbr_update_max_bw(bbr, path, ack_packet_delivered); + if (bytes_lost) { + bbr->bytes_lost_in_round += bytes_lost; + ++bbr->loss_events_in_round; + + if (!bbr->loss_in_round) { + bbr->loss_in_round = 1; + bbr->loss_round_delivered = bbr->drs->delivered; + } + } + + if (!bbr->loss_round_start) + return; /* wait until end of round trip */ + + bbr_adapt_lower_bounds_from_congestion(bbr, path); /* once per round, adapt */ + bbr->loss_in_round = 0; +} + +static void bbr_update_ack_aggregation(struct bbr *bbr, + struct quic_cc_path *path, + uint32_t acked) +{ + uint32_t interval = now_ms - bbr->extra_acked_interval_start; + uint64_t expected_delivered = bbr->bw * interval; + uint64_t extra; + + if (bbr->extra_acked_delivered <= expected_delivered) { + bbr->extra_acked_delivered = 0; + bbr->extra_acked_interval_start = now_ms; + expected_delivered = 0; + } + + bbr->extra_acked_delivered += acked; + extra = bbr->extra_acked_delivered - expected_delivered; + extra = MIN(extra, path->cwnd); + + /* XXX CHECK THIS XXX: the RFC make use of a parameter + * to be passed to wf_update(). + */ + wf_update(&bbr->extra_acked_filter, extra, bbr->round_count); + bbr->extra_acked = wf_get_best(&bbr->extra_acked_filter); +} + +static void bbr_advance_latest_delivery_signals(struct bbr *bbr, + struct quic_cc_path *path) +{ + if (bbr->loss_round_start) { + bbr->bw_latest = path->delivery_rate; + bbr->inflight_latest = bbr->drs->rs.delivered; + } +} + +static void bbr_check_full_bw_reached(struct bbr *bbr, struct quic_cc_path *path) +{ + struct quic_rs *rs = &bbr->drs->rs; + + if (bbr->full_bw_now || rs->is_app_limited) + return; /* no need to check for a full pipe now */ + + if (path->delivery_rate >= bbr->full_bw * 1.25) { + bbr_reset_full_bw(bbr); /* bw is still growing, so reset */ + bbr->full_bw = path->delivery_rate; /* record new baseline bw */ + return; + } + + if (!bbr->round_start) + return; + + bbr->full_bw_count++; /* another round w/o much growth */ + bbr->full_bw_now = bbr->full_bw_count >= 3; + if (bbr->full_bw_now) + bbr->full_bw_reached = true; +} + +static void bbr_bound_bw_for_model(struct bbr *bbr) +{ + bbr->bw = MIN(bbr->max_bw, bbr->bw_lo); +} + +static void bbr_update_model_and_state(struct quic_cc *cc, + uint32_t acked, + uint32_t bytes_lost, + uint32_t ts) +{ + struct bbr *bbr = quic_cc_priv(cc); + struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc); + + bbr_update_latest_delivery_signals(cc); + bbr_update_congestion_signals(bbr, path, bytes_lost, acked); + bbr_update_ack_aggregation(bbr, path, acked); + bbr_check_full_bw_reached(bbr, path); + bbr_check_startup_done(bbr); + bbr_check_drain_done(bbr, path, ts); + bbr_update_probe_bw_cycle_phase(bbr, path, acked, ts); + bbr_update_min_rtt(bbr, ts); + bbr_check_probe_rtt(); + bbr_advance_latest_delivery_signals(bbr, path); + bbr_bound_bw_for_model(bbr); +} + +static void bbr_update_control_parameters() +{ + bbr_set_pacing_rate(); + bbr_set_send_quantum(); + bbr_set_cwnd(); +} + +static void bbr_update_on_ack(struct quic_cc *cc, + uint32_t acked, uint32_t bytes_lost, uint32_t ts) +{ + bbr_update_model_and_state(cc, acked, bytes_lost, ts); + bbr_update_control_parameters(); +} + +static void bbr_note_loss(struct bbr *bbr, uint64_t C_delivered) +{ + if (!bbr->loss_in_round) /* first loss in this round trip? */ + bbr->loss_round_delivered = C_delivered; + bbr->loss_in_round = 1; +} + +/* At what prefix of packet did losses exceed BBRLossThresh? */ +static uint64_t bbr_inflight_hi_from_lost_packet(struct quic_rs *rs, struct quic_tx_packet *pkt) +{ + size = pkt->size; + /* What was in flight before this packet? */ + inflight_prev = rs->tx_in_flight - size; + /* What was lost before this packet? */ + lost_prev = rs->lost - size; + lost_prefix = + (BBR_LOSS_THRESH * inflight_prev - lost_prev) / (1 - BBR_LOSS_THRESH); + /* At what inflight value did losses cross BBRLossThresh? */ + inflight = inflight_prev + lost_prefix; + + return inflight; +} + +static bbr_handle_lost_packet(struct bbr *bbr, struct quic_tx_packet *pkt) +{ + struct quic_rs *rs = &bbr->drs->rs; + + bbr_note_loss(); + if (!bbr->bw_probe_samples) + return; /* not a packet sent while probing bandwidth */ + + rs->tx_in_flight = pkt->tx_in_flight; /* inflight at transmit */ + rs->lost = C.lost - pkt->lost; /* data lost since transmit */ + rs->is_app_limited = pkt->is_app_limited; + if (is_flight_too_high(rs)) { + rs->tx_in_flight = bbr_inflight_hi_from_lost_packet(rs, pkt); + bbr_handle_inflight_too_high(rs); + } +} + +static void bbr_update_on_loss(struct quic_cc *cc, struct quic_tx_packet *pkt) +{ + bbr_handle_lost_packet(quic_cc_priv(cc), pkt); +} + +struct quic_cc_algo quic_cc_algo_bbr = { + .type = QUIC_CC_ALGO_TP_BBR, + .init = quic_cc_bbr_init, +}; + +void quic_cc_bbr_check(void) +{ + struct quic_cc *cc; + BUG_ON_HOT(sizeof(struct bbr) > sizeof(cc->priv)); +} + +INITCALL0(STG_REGISTER, quic_cc_bbr_check); diff --git a/src/quic_drs.c b/src/quic_drs.c new file mode 100644 index 000000000..718a51c3e --- /dev/null +++ b/src/quic_drs.c @@ -0,0 +1,37 @@ +/* Direct Rate Sampling */ + +#include +#include +#include +#include + +DECLARE_POOL(pool_head_quic_drs, "quic_drs", sizeof(struct quic_drs)); + +static void quic_cc_rs_init(struct quic_rs *rs) +{ + rs->interval = UINT32_MAX; + rs->delivered = 0; + rs->prior_delivered = 0; + rs->prior_time = TICK_ETERNITY; + rs->tx_in_flight = 0; + rs->lost = 0; + rs->prior_lost = 0; + rs->send_elapsed = 0; + rs->ack_elapsed = 0; + rs->is_app_limited = 0; +} + +void quic_cc_drs_init(struct quic_drs *drs) +{ + quic_cc_rs_init(&drs->rs); + wf_init(&drs->wf, 12); + drs->round_count = 0; + drs->next_round_delivered = 0; + drs->delivered = 0; + drs->lost = 0; + drs->delivered_ts = TICK_ETERNITY; + drs->first_sent_ts = TICK_ETERNITY; + drs->app_limited = false; + drs->is_cwnd_limited = false; +} +