Skip to content

Commit

Permalink
refactor(enginenetx): make LookupTactics async (#1300)
Browse files Browse the repository at this point in the history
Rather than waiting for LookupTactics to complete, make it async and let
it stream the tactics back to the caller. This design allows us to start
connecting while DNS lookups are still in progress when we have
configured beacons for specific hosts. In turn, this means we could
perform more operations in the same unit of time, by overalapping some
DNS lookups and TCP+TLS dials. Additionally, the new design would also
work quite well with a DNS resolver that awaits for additional responses
after the first one and returns all of them as tactics.

While there, recognize that the HTTPSDialer code and the code in the
related structs was a bit more complex than it should be. We don't need
to explicitly honor the context when moving data between goroutines as
long as the writer goroutines write until completion and then close the
channel, and as long as reader goroutines read until either the channel
is closed (when there's a single writer) or all the possible writers
have completed (otherwise). Networking code and networking-like code is
the only code that MAY block and for which we really need a context.

With the new simplified design, all the goroutines will join before
`DialTLSContext` returns, hence we don't need anymore a `sync.WaitGroup`
to make sure we're not leaking any goroutine in this code.

Part of ooni/probe#2531
  • Loading branch information
bassosimone authored Sep 25, 2023
1 parent 363f4b8 commit 1af54cf
Show file tree
Hide file tree
Showing 8 changed files with 310 additions and 289 deletions.
122 changes: 78 additions & 44 deletions internal/enginenetx/httpsdialer_internal_test.go
Original file line number Diff line number Diff line change
@@ -1,59 +1,15 @@
package enginenetx

import (
"context"
"crypto/tls"
"errors"
"fmt"
"net"
"sync"
"sync/atomic"
"testing"

"github.com/ooni/probe-cli/v3/internal/mocks"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
)

func TestHTTPSDialerTacticsEmitter(t *testing.T) {
t.Run("we correctly handle the case of a canceled context", func(t *testing.T) {
hd := &HTTPSDialer{
idGenerator: &atomic.Int64{},
logger: model.DiscardLogger,
netx: &netxlite.Netx{Underlying: nil}, // nil means: use netxlite's singleton
policy: &HTTPSDialerNullPolicy{},
resolver: netxlite.NewStdlibResolver(model.DiscardLogger),
rootCAs: netxlite.NewMozillaCertPool(),
wg: &sync.WaitGroup{},
}

var tactics []*HTTPSDialerTactic
for idx := 0; idx < 255; idx++ {
tactics = append(tactics, &HTTPSDialerTactic{
Endpoint: net.JoinHostPort(fmt.Sprintf("10.0.0.%d", idx), "443"),
InitialDelay: 0,
SNI: "www.example.com",
VerifyHostname: "www.example.com",
})
}

ctx, cancel := context.WithCancel(context.Background())
cancel() // we want the tested function to run with a canceled context

out := hd.tacticsEmitter(ctx, tactics...)

for range out {
// Here we do nothing!
//
// Ideally, we would like to count and assert that we have
// got no tactic from the channel but the selection of ready
// channels is nondeterministic, so we cannot really be
// asserting that. This leaves us with asking the question
// of what we should be asserting here?
}
})
}

func TestHTTPSDialerVerifyCertificateChain(t *testing.T) {
t.Run("without any peer certificate", func(t *testing.T) {
tlsConn := &mocks.TLSConn{
Expand Down Expand Up @@ -81,3 +37,81 @@ func TestHTTPSDialerVerifyCertificateChain(t *testing.T) {
}
})
}

func TestHTTPSDialerReduceResult(t *testing.T) {
t.Run("we return the first conn in a list of conns and close the other conns", func(t *testing.T) {
var closed int
expect := &mocks.TLSConn{} // empty
connv := []model.TLSConn{
expect,
&mocks.TLSConn{
Conn: mocks.Conn{
MockClose: func() error {
closed++
return nil
},
},
},
&mocks.TLSConn{
Conn: mocks.Conn{
MockClose: func() error {
closed++
return nil
},
},
},
}

conn, err := httpsDialerReduceResult(connv, nil)
if err != nil {
t.Fatal(err)
}

if conn != expect {
t.Fatal("unexpected conn")
}

if closed != 2 {
t.Fatal("did not call close")
}
})

t.Run("we join together a list of errors", func(t *testing.T) {
expectErr := "connection_refused\ninterrupted"
errorv := []error{errors.New("connection_refused"), errors.New("interrupted")}

conn, err := httpsDialerReduceResult(nil, errorv)
if err == nil || err.Error() != expectErr {
t.Fatal("unexpected err", err)
}

if conn != nil {
t.Fatal("expected nil conn")
}
})

t.Run("with a single error we return such an error", func(t *testing.T) {
expected := errors.New("connection_refused")
errorv := []error{expected}

conn, err := httpsDialerReduceResult(nil, errorv)
if !errors.Is(err, expected) {
t.Fatal("unexpected err", err)
}

if conn != nil {
t.Fatal("expected nil conn")
}
})

t.Run("we return errDNSNoAnswer if we don't have any conns or errors to return", func(t *testing.T) {
conn, err := httpsDialerReduceResult(nil, nil)
if !errors.Is(err, errDNSNoAnswer) {
t.Fatal("unexpected error", err)
}

if conn != nil {
t.Fatal("expected nil conn")
}
})
}
52 changes: 23 additions & 29 deletions internal/enginenetx/httpsdialer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
// short indicates whether this is a short test
short bool

// policy is the dialer policy
policy enginenetx.HTTPSDialerPolicy

// stats is the stats tracker to use.
stats enginenetx.HTTPSDialerStatsTracker

Expand All @@ -101,7 +98,6 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "net.SplitHostPort failure",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
stats: &enginenetx.HTTPSDialerNullStatsTracker{},
endpoint: "www.example.com", // note: here the port is missing
scenario: netemx.InternetScenario,
Expand All @@ -112,25 +108,26 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
},

// This test case ensures that we handle the case of a nonexistent domain
// where we get a dns_no_answer error. The original DNS error is lost in
// background goroutines and what we report to the caller is just that there
// is no available IP address and tactic to attempt using.
{
name: "hd.policy.LookupTactics failure",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
stats: &enginenetx.HTTPSDialerNullStatsTracker{},
endpoint: "www.example.nonexistent:443", // note: the domain does not exist
scenario: netemx.InternetScenario,
configureDPI: func(dpi *netem.DPIEngine) {
// nothing
},
expectErr: "dns_nxdomain_error",
expectErr: "dns_no_answer",
},

// This test case is the common case: all is good with multiple addresses to dial (I am
// not testing the case of a single address because it's a subcase of this one)
{
name: "successful dial with multiple addresses",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
stats: &enginenetx.HTTPSDialerNullStatsTracker{},
endpoint: "www.example.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Expand All @@ -157,7 +154,6 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with TCP connect errors",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
stats: &enginenetx.HTTPSDialerNullStatsTracker{},
endpoint: "www.example.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Expand Down Expand Up @@ -192,7 +188,6 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with TLS handshake errors",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
stats: &enginenetx.HTTPSDialerNullStatsTracker{},
endpoint: "www.example.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Expand Down Expand Up @@ -223,7 +218,6 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with a TLS certificate valid for ANOTHER domain",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
stats: &enginenetx.HTTPSDialerNullStatsTracker{},
endpoint: "wrong.host.badssl.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Expand All @@ -249,7 +243,6 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with TLS certificate signed by an unknown authority",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
stats: &enginenetx.HTTPSDialerNullStatsTracker{},
endpoint: "untrusted-root.badssl.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Expand All @@ -275,7 +268,6 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with expired TLS certificate",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
stats: &enginenetx.HTTPSDialerNullStatsTracker{},
endpoint: "expired.badssl.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Expand All @@ -299,9 +291,8 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
// This is a corner case: what if the context is canceled after the DNS lookup
// but before we start dialing? Are we closing all goroutines and returning correctly?
{
name: "with context being canceled in OnStarting",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
name: "with context being canceled in OnStarting",
short: true,
stats: &httpsDialerCancelingContextStatsTracker{
cancel: nil,
flags: httpsDialerCancelingContextStatsTrackerOnStarting,
Expand All @@ -322,15 +313,15 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
configureDPI: func(dpi *netem.DPIEngine) {
// nothing
},
expectErr: "context canceled",
expectErr: "interrupted\ninterrupted",
},

// This is another corner case: what happens if the context is canceled after we
// have a good connection but before we're able to report it to the caller?
// This is another corner case: what happens if the context is canceled
// right after we eastablish a connection? Because of how the current code
// is written, the easiest thing to do is to just return the conn.
{
name: "with context being canceled in OnSuccess for the first success",
short: true,
policy: &enginenetx.HTTPSDialerNullPolicy{},
name: "with context being canceled in OnSuccess for the first success",
short: true,
stats: &httpsDialerCancelingContextStatsTracker{
cancel: nil,
flags: httpsDialerCancelingContextStatsTrackerOnSuccess,
Expand All @@ -351,7 +342,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
configureDPI: func(dpi *netem.DPIEngine) {
// nothing
},
expectErr: "context canceled",
expectErr: "",
}}

for _, tc := range allTestCases {
Expand Down Expand Up @@ -382,12 +373,16 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
// create the getaddrinfo resolver
resolver := netx.NewStdlibResolver(log.Log)

policy := &enginenetx.HTTPSDialerNullPolicy{
Logger: log.Log,
Resolver: resolver,
}

// create the TLS dialer
dialer := enginenetx.NewHTTPSDialer(
log.Log,
netx,
tc.policy,
resolver,
policy,
tc.stats,
)
defer dialer.CloseIdleConnections()
Expand Down Expand Up @@ -428,9 +423,6 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
if tlsConn != nil {
defer tlsConn.Close()
}

// wait for background connections to join
dialer.WaitGroup().Wait()
}()

// now verify that we have closed all the connections
Expand Down Expand Up @@ -510,8 +502,10 @@ func TestHTTPSDialerHostNetworkQA(t *testing.T) {
MockGetaddrinfoLookupANY: tproxy.GetaddrinfoLookupANY,
MockGetaddrinfoResolverNetwork: tproxy.GetaddrinfoResolverNetwork,
}},
&enginenetx.HTTPSDialerNullPolicy{},
resolver,
&enginenetx.HTTPSDialerNullPolicy{
Logger: log.Log,
Resolver: resolver,
},
&enginenetx.HTTPSDialerNullStatsTracker{},
)

Expand Down
Loading

0 comments on commit 1af54cf

Please sign in to comment.