Skip to content

Commit

Permalink
fix(enginenetx): compute zero time when we start dialing (#1594)
Browse files Browse the repository at this point in the history
Previously, the code was computing the zero time when we started
resolving. However, I have observed in the wild that, if the DNS lookup
time is high, we're going to have several ready tactics. We did not
previously see this bug because we gave priority to bridges and stats
tactics, hence we always had some ready tactics from the get go.

This PR is part of settling the dust after the changes requested in the
#1552 review.

The related tracking issue is ooni/probe#2704.

---------

Co-authored-by: Arturo Filastò <[email protected]>
  • Loading branch information
bassosimone and hellais committed May 10, 2024
1 parent 9f77da7 commit b124b69
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 11 deletions.
3 changes: 3 additions & 0 deletions internal/enginenetx/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ func filterAssignInitialDelays(input <-chan *httpsDialerTactic) <-chan *httpsDia

index := 0
for tx := range input {
// TODO(bassosimone): what do we do now about the user configured
// initial delays? Should we declare them as deprecated?

// rewrite the delays
tx.InitialDelay = happyEyeballsDelay(index)
index++
Expand Down
51 changes: 40 additions & 11 deletions internal/enginenetx/httpsdialer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"net"
"sync"
"sync/atomic"
"time"

Expand Down Expand Up @@ -206,24 +207,22 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo
// pattern used by `./internal/httpclientx` to perform attempts faster
// in case there is an initial early failure.

// TODO(bassosimone): the algorithm to filter and assign initial
// delays is broken because, if the DNS runs for more than one
// second, then several policies will immediately be due. We should
// probably use a better strategy that takes as the zero the time
// when the first dialing policy becomes available.

// We need a cancellable context to interrupt the tactics emitter early when we
// immediately get a valid response and we don't need to use other tactics.
ctx, cancel := context.WithCancel(ctx)
defer cancel()

// Create structure for computing the zero dialing time once during
// the first dial, so that subsequent attempts use happy eyeballs based
// on the moment in which we tried the first dial.
t0 := &httpsDialerWorkerZeroTime{}

// The emitter will emit tactics and then close the channel when done. We spawn 16 workers
// that handle tactics in parallel and post results on the collector channel.
emitter := httpsDialerFilterTactics(hd.policy.LookupTactics(ctx, hostname, port))
collector := make(chan *httpsDialerErrorOrConn)
joiner := make(chan any)
const parallelism = 16
t0 := time.Now()
for idx := 0; idx < parallelism; idx++ {
go hd.worker(ctx, joiner, emitter, t0, collector)
}
Expand Down Expand Up @@ -257,6 +256,36 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo
return httpsDialerReduceResult(connv, errorv)
}

// httpsDialerWorkerZeroTime contains the zero time used when dialing. We set this
// zero time when we start the first dialing attempt, such that subsequent attempts
// are correctly spaced starting from such a zero time.
//
// A previous approach was that we were taking the zero time when we started
// getting tactics, but this approach was wrong, because it caused several tactics
// to be ready, when the DNS lookup was slow.
//
// The zero value of this structure is ready to use.
type httpsDialerWorkerZeroTime struct {
// mu provides mutual exclusion.
mu sync.Mutex

// t is the zero time.
t time.Time
}

// Get returns the zero dialing time. The first invocation of this method
// saves the zero dialing time and subsquent invocations just return it.
//
// This method is safe to be called concurrently by goroutines.
func (t0 *httpsDialerWorkerZeroTime) Get() time.Time {
defer t0.mu.Unlock()
t0.mu.Lock()
if t0.t.IsZero() {
t0.t = time.Now()
}
return t0.t
}

// httpsDialerFilterTactics filters the tactics to:
//
// 1. be paranoid and filter out nil tactics if any;
Expand Down Expand Up @@ -297,7 +326,7 @@ func (hd *httpsDialer) worker(
ctx context.Context,
joiner chan<- any,
reader <-chan *httpsDialerTactic,
t0 time.Time,
t0 *httpsDialerWorkerZeroTime,
writer chan<- *httpsDialerErrorOrConn,
) {
// let the parent know that we terminated
Expand All @@ -321,7 +350,7 @@ func (hd *httpsDialer) worker(
func (hd *httpsDialer) dialTLS(
ctx context.Context,
logger model.Logger,
t0 time.Time,
t0 *httpsDialerWorkerZeroTime,
tactic *httpsDialerTactic,
) (model.TLSConn, error) {
// honor happy-eyeballs delays and wait for the tactic to be ready to run
Expand Down Expand Up @@ -398,10 +427,10 @@ func (hd *httpsDialer) dialTLS(
// return the context error if the context expires.
func httpsDialerTacticWaitReady(
ctx context.Context,
t0 time.Time,
t0 *httpsDialerWorkerZeroTime,
tactic *httpsDialerTactic,
) error {
deadline := t0.Add(tactic.InitialDelay)
deadline := t0.Get().Add(tactic.InitialDelay)
delta := time.Until(deadline)
if delta <= 0 {
return nil
Expand Down

0 comments on commit b124b69

Please sign in to comment.