Skip to content

Commit

Permalink
fix(minipipeline): change DNSDiff algorithm (#1407)
Browse files Browse the repository at this point in the history
We were running the algorithm on single IP addresses resolved by the
probe against the set resolved by the control.

This is incorrect, because we should compare to the whole set resolved
by the probe. This fact is relevant because v0.4 says there's
consistency if a single IP address or ASN resolved by the probe
intersects with the corresponding control set.

Conversely, our comparison required that each IP address resolved by the
probe belonged to the control set of addresses or ASNs.

This diff solves the problem.

While there, remove DNSPossiblyInvalidAddrs. I have determined we need
to run DNSDiff independently of bogons checks.

Incidentally, the new DNSDiff code I am adding is straight from Web
Connectivity v0.4, so we should be good.

Part of ooni/probe#2634
  • Loading branch information
bassosimone authored Nov 30, 2023
1 parent 689c243 commit 00a37ce
Show file tree
Hide file tree
Showing 140 changed files with 80 additions and 1,149 deletions.
1 change: 0 additions & 1 deletion internal/cmd/minipipeline/testdata/analysis.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrs": {},
"DNSPossiblyInvalidAddrsClassic": {},
"DNSPossiblyNonexistingDomains": {},
"HTTPDiffBodyProportionFactor": 1,
Expand Down
1 change: 0 additions & 1 deletion internal/cmd/minipipeline/testdata/analysis_classic.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrs": {},
"DNSPossiblyInvalidAddrsClassic": {},
"DNSPossiblyNonexistingDomains": null,
"HTTPDiffBodyProportionFactor": 1,
Expand Down
12 changes: 0 additions & 12 deletions internal/cmd/minipipeline/testdata/observations.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@
"130.192.16.171"
],
"ControlTCPConnectFailure": null,
"MatchWithControlIPAddress": null,
"MatchWithControlIPAddressASN": null,
"ControlTLSHandshakeFailure": null,
"ControlHTTPFailure": null,
"ControlHTTPResponseStatusCode": null,
Expand Down Expand Up @@ -77,8 +75,6 @@
"130.192.16.171"
],
"ControlTCPConnectFailure": null,
"MatchWithControlIPAddress": null,
"MatchWithControlIPAddressASN": null,
"ControlTLSHandshakeFailure": null,
"ControlHTTPFailure": null,
"ControlHTTPResponseStatusCode": null,
Expand Down Expand Up @@ -124,8 +120,6 @@
"130.192.16.171"
],
"ControlTCPConnectFailure": null,
"MatchWithControlIPAddress": null,
"MatchWithControlIPAddressASN": null,
"ControlTLSHandshakeFailure": null,
"ControlHTTPFailure": null,
"ControlHTTPResponseStatusCode": null,
Expand Down Expand Up @@ -169,8 +163,6 @@
"130.192.16.171"
],
"ControlTCPConnectFailure": null,
"MatchWithControlIPAddress": null,
"MatchWithControlIPAddressASN": null,
"ControlTLSHandshakeFailure": null,
"ControlHTTPFailure": null,
"ControlHTTPResponseStatusCode": null,
Expand Down Expand Up @@ -214,8 +206,6 @@
"130.192.16.171"
],
"ControlTCPConnectFailure": null,
"MatchWithControlIPAddress": null,
"MatchWithControlIPAddressASN": null,
"ControlTLSHandshakeFailure": null,
"ControlHTTPFailure": null,
"ControlHTTPResponseStatusCode": null,
Expand Down Expand Up @@ -276,8 +266,6 @@
"130.192.16.171"
],
"ControlTCPConnectFailure": "",
"MatchWithControlIPAddress": true,
"MatchWithControlIPAddressASN": true,
"ControlTLSHandshakeFailure": "",
"ControlHTTPFailure": "",
"ControlHTTPResponseStatusCode": 200,
Expand Down
4 changes: 0 additions & 4 deletions internal/cmd/minipipeline/testdata/observations_classic.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@
"130.192.16.171"
],
"ControlTCPConnectFailure": null,
"MatchWithControlIPAddress": null,
"MatchWithControlIPAddressASN": null,
"ControlTLSHandshakeFailure": null,
"ControlHTTPFailure": null,
"ControlHTTPResponseStatusCode": null,
Expand Down Expand Up @@ -99,8 +97,6 @@
"130.192.16.171"
],
"ControlTCPConnectFailure": "",
"MatchWithControlIPAddress": true,
"MatchWithControlIPAddressASN": true,
"ControlTLSHandshakeFailure": "",
"ControlHTTPFailure": "",
"ControlHTTPResponseStatusCode": 200,
Expand Down
84 changes: 11 additions & 73 deletions internal/minipipeline/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ func AnalyzeWebObservations(container *WebObservationsContainer) *WebAnalysis {
analysis.ComputeDNSExperimentFailure(container)
analysis.ComputeDNSTransactionsWithBogons(container)
analysis.ComputeDNSTransactionsWithUnexpectedFailures(container)
analysis.ComputeDNSPossiblyInvalidAddrs(container)
analysis.ComputeDNSPossiblyInvalidAddrsClassic(container)
analysis.ComputeDNSPossiblyNonexistingDomains(container)

Expand Down Expand Up @@ -57,19 +56,13 @@ type WebAnalysis struct {
// not related to the domain we're querying for.
DNSTransactionsWithUnexpectedFailures optional.Value[map[int64]bool]

// DNSPossiblyInvalidAddrs contains the addresses that are not valid for the
// DNSPossiblyInvalidAddrsClassic contains the addresses that are not valid for the
// domain. An addres is valid for the domain if:
//
// 1. we can TLS handshake with the expected SNI using this address; or
// 1. the address was resolved by the TH; or
//
// 2. the address was resolved by the TH; or
//
// 3. the address ASN belongs to the set of ASNs obtained by mapping
// 2. the address ASN belongs to the set of ASNs obtained by mapping
// addresses resolved by the TH to their corresponding ASN.
DNSPossiblyInvalidAddrs optional.Value[map[string]bool]

// DNSPossiblyInvalidAddrsClassic is like DNSPossiblyInvalidAddrs but does
// not use TLS to validate the IP addresses.
DNSPossiblyInvalidAddrsClassic optional.Value[map[string]bool]

// DNSPossiblyNonexistingDomains lists all the domains for which both
Expand Down Expand Up @@ -255,67 +248,6 @@ func (wa *WebAnalysis) ComputeDNSTransactionsWithUnexpectedFailures(c *WebObserv
wa.DNSTransactionsWithUnexpectedFailures = optional.Some(state)
}

// ComputeDNSPossiblyInvalidAddrs computes the DNSPossiblyInvalidAddrs field.
func (wa *WebAnalysis) ComputeDNSPossiblyInvalidAddrs(c *WebObservationsContainer) {
// Implementation note: in the case in which DoH returned answers, here
// it still feels okay to consider them. We should avoid flagging DoH
// failures as measurement failures but if DoH returns us some unexpected
// even-non-bogon addr, it seems worth flagging for now.
//
// See https://github.com/ooni/probe/issues/2274

var state map[string]bool

// pass 1: insert candidates into the state map
for _, obs := range c.KnownTCPEndpoints {
addr := obs.IPAddress.Unwrap()

// skip the comparison if we don't have info about matching
if obs.MatchWithControlIPAddress.IsNone() || obs.MatchWithControlIPAddressASN.IsNone() {
continue
}

// flip state from None to empty when we see the first couple of
// (probe, th) failures allowing us to perform a comparison
if state == nil {
state = make(map[string]bool)
}

// an address is suspicious if we have information regarding its potential
// matching with TH info and we know it does not match
if !obs.MatchWithControlIPAddress.Unwrap() && !obs.MatchWithControlIPAddressASN.Unwrap() {
state[addr] = true
continue
}
}

// pass 2: remove IP addresses we could validate using TLS handshakes
//
// we need to perform this second step because the order with which we walk
// through c.KnownTCPEndpoints is not fixed _and_ in any case, there is no
// guarantee that we'll observe 80/tcp entries _before_ 443/tcp ones. So, by
// applying this algorithm as a second step, we ensure that we're always
// able to remove TLS-validate addresses from the "bad" set.
for _, obs := range c.KnownTCPEndpoints {
addr := obs.IPAddress.Unwrap()

// we cannot do much if we don't have TLS handshake information
if obs.TLSHandshakeFailure.IsNone() {
continue
}

// we should not modify the state if the TLS handshake failed
if obs.TLSHandshakeFailure.Unwrap() != "" {
continue
}

delete(state, addr)
}

// note that optional.Some constructs None if state is nil
wa.DNSPossiblyInvalidAddrs = optional.Some(state)
}

// ComputeDNSPossiblyInvalidAddrsClassic computes the DNSPossiblyInvalidAddrsClassic field.
func (wa *WebAnalysis) ComputeDNSPossiblyInvalidAddrsClassic(c *WebObservationsContainer) {
// Implementation note: in the case in which DoH returned answers, here
Expand All @@ -331,7 +263,7 @@ func (wa *WebAnalysis) ComputeDNSPossiblyInvalidAddrsClassic(c *WebObservationsC
addr := obs.IPAddress.Unwrap()

// skip the comparison if we don't have info about matching
if obs.MatchWithControlIPAddress.IsNone() || obs.MatchWithControlIPAddressASN.IsNone() {
if obs.DNSResolvedAddrs.IsNone() || obs.ControlDNSResolvedAddrs.IsNone() {
continue
}

Expand All @@ -341,9 +273,15 @@ func (wa *WebAnalysis) ComputeDNSPossiblyInvalidAddrsClassic(c *WebObservationsC
state = make(map[string]bool)
}

measurement := obs.DNSResolvedAddrs.Unwrap()
control := obs.ControlDNSResolvedAddrs.Unwrap()

ipAddrIntersection := DNSDiffFindCommonIPAddressIntersection(measurement, control)
asnIntersection := DNSDiffFindCommonASNsIntersection(measurement, control)

// an address is suspicious if we have information regarding its potential
// matching with TH info and we know it does not match
if !obs.MatchWithControlIPAddress.Unwrap() && !obs.MatchWithControlIPAddressASN.Unwrap() {
if ipAddrIntersection.Len() <= 0 && asnIntersection.Len() <= 0 {
state[addr] = true
continue
}
Expand Down
63 changes: 63 additions & 0 deletions internal/minipipeline/dnsdiff.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package minipipeline

import "github.com/ooni/probe-cli/v3/internal/geoipx"

// DNSDiffFindCommonIPAddressIntersection returns the set of IP addresses that
// belong to both the measurement and the control sets.
func DNSDiffFindCommonIPAddressIntersection(measurement, control Set[string]) Set[string] {
const (
inMeasurement = 1 << 0
inControl = 1 << 1
inBoth = inMeasurement | inControl
)

ipmap := make(map[string]int)
for _, ipAddr := range measurement.Keys() {
ipmap[ipAddr] |= inMeasurement
}
for _, ipAddr := range control.Keys() {
ipmap[ipAddr] |= inControl
}

state := NewSet[string]()
for key, value := range ipmap {
// just in case an empty string slipped through
if key != "" && (value&inBoth) == inBoth {
state.Add(key)
}
}

return state
}

// DNSDiffFindCommonIPAddressIntersection returns the set of ASNs that belong to both the set of ASNs
// obtained from the measurement and the one obtained from the control.
func DNSDiffFindCommonASNsIntersection(measurement, control Set[string]) Set[int64] {
const (
inMeasurement = 1 << 0
inControl = 1 << 1
inBoth = inMeasurement | inControl
)

asnmap := make(map[int64]int)
for _, ipAddr := range measurement.Keys() {
if asn, _, err := geoipx.LookupASN(ipAddr); err == nil && asn > 0 {
asnmap[int64(asn)] |= inMeasurement
}
}
for _, ipAddr := range control.Keys() {
if asn, _, err := geoipx.LookupASN(ipAddr); err == nil && asn > 0 {
asnmap[int64(asn)] |= inControl
}
}

state := NewSet[int64]()
for key, value := range asnmap {
// zero means that ASN lookup failed
if key != 0 && (value&inBoth) == inBoth {
state.Add(key)
}
}

return state
}
31 changes: 0 additions & 31 deletions internal/minipipeline/observation.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"net/url"
"strconv"

"github.com/ooni/probe-cli/v3/internal/geoipx"
"github.com/ooni/probe-cli/v3/internal/measurexlite"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
Expand Down Expand Up @@ -205,13 +204,6 @@ type WebObservation struct {
// ControlTCPConnectFailure is the control's TCP connect failure.
ControlTCPConnectFailure optional.Value[string]

// MatchWithControlIPAddress is true if also the control resolved this IP address.
MatchWithControlIPAddress optional.Value[bool]

// MatchWithControlIPAddressASN is true if the ASN associated to IPAddress
// is one of the ASNs obtained by mapping the TH-resolved IP addresses to ASNs.
MatchWithControlIPAddressASN optional.Value[bool]

// ControlTLSHandshakeFailure is the control's TLS handshake failure.
ControlTLSHandshakeFailure optional.Value[string]

Expand Down Expand Up @@ -465,15 +457,6 @@ func (c *WebObservationsContainer) controlMatchDNSLookupResults(inputDomain stri
thAddrMap[addr] = true
}

// (re)map out all the ASNs discovered by the TH using the same ASN
// database used to build the probe's ASN mapping
thASNMap := make(map[int64]bool)
for _, addr := range resp.DNS.Addrs {
if asn, _, err := geoipx.LookupASN(addr); err == nil && asn != 0 {
thASNMap[int64(asn)] = true
}
}

// walk through the list of known TCP observations
for _, obs := range c.KnownTCPEndpoints {
// obtain the domain from which we obtained the endpoint's address
Expand All @@ -488,8 +471,6 @@ func (c *WebObservationsContainer) controlMatchDNSLookupResults(inputDomain stri
obs.ControlDNSDomain = optional.Some(inputDomain)
obs.ControlDNSLookupFailure = optional.Some(utilsStringPointerToString(resp.DNS.Failure))
obs.ControlDNSResolvedAddrs = optional.Some(NewSet(resp.DNS.Addrs...))
obs.MatchWithControlIPAddress = optional.Some(true)
obs.MatchWithControlIPAddressASN = optional.Some(true)
continue
}

Expand All @@ -509,18 +490,6 @@ func (c *WebObservationsContainer) controlMatchDNSLookupResults(inputDomain stri

// register the resolved IP addresses
obs.ControlDNSResolvedAddrs = optional.Some(NewSet(resp.DNS.Addrs...))

// compute whether also the TH observed this addr
obs.MatchWithControlIPAddress = optional.Some(thAddrMap[addr])

// cannot continue unless we know the probe's ASN
ourASN := obs.IPAddressASN.UnwrapOr(0)
if ourASN <= 0 {
continue
}

// register whether there is matching in terms of the ASNs
obs.MatchWithControlIPAddressASN = optional.Some(thASNMap[ourASN])
}
}

Expand Down
47 changes: 0 additions & 47 deletions internal/minipipeline/observation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,53 +115,6 @@ func TestWebObservationsContainerIngestHTTPRoundTripEvents(t *testing.T) {
}

func TestWebObservationsContainerIngestControlMessages(t *testing.T) {
t.Run("we don't set MatchWithControlIPAddressASN when we don't have probe ASN info", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupFailures: []*WebObservation{},
KnownTCPEndpoints: map[int64]*WebObservation{
1: {
DNSDomain: optional.Some("dns.google"),
IPAddress: optional.Some("8.8.8.8"),
IPAddressASN: optional.None[int64](), // no ASN info!
EndpointTransactionID: optional.Some(int64(1)),
EndpointPort: optional.Some("443"),
EndpointAddress: optional.Some("8.8.8.8:443"),
},
},
knownIPAddresses: map[string]*WebObservation{},
}

thRequest := &model.THRequest{
HTTPRequest: "https://dns.google/",
}

thResponse := &model.THResponse{
DNS: model.THDNSResult{
Failure: nil,
Addrs: []string{"8.8.8.8"},
},
}

if err := container.IngestControlMessages(thRequest, thResponse); err != nil {
t.Fatal(err)
}

entry := container.KnownTCPEndpoints[1]

// we should have set MatchWithControlIPAddress
if entry.MatchWithControlIPAddress.IsNone() {
t.Fatal("MatchWithControlIPAddress is not set")
}
if entry.MatchWithControlIPAddress.Unwrap() == false {
t.Fatal("MatchWithControlIPAddress is not true")
}

// we should not have set MatchWithControlIPAddressASN
if !entry.MatchWithControlIPAddressASN.IsNone() {
t.Fatal("MatchWithControlIPAddressASN should not be set")
}
})

t.Run("we don't save TLS handshake failures when the SNI is different", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupFailures: []*WebObservation{},
Expand Down
Loading

0 comments on commit 00a37ce

Please sign in to comment.