Skip to content

Commit

Permalink
refactor(minipipeline): change DNSDiff foundations (#1408)
Browse files Browse the repository at this point in the history
Here we start using sets and we are consolidating the algorithm to say
that there are unexpected addresses.

Part of ooni/probe#2634
  • Loading branch information
bassosimone committed Nov 30, 2023
1 parent 00a37ce commit c549df9
Show file tree
Hide file tree
Showing 70 changed files with 308 additions and 290 deletions.
4 changes: 2 additions & 2 deletions internal/cmd/minipipeline/testdata/analysis.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"DNSLookupSuccessWithInvalidAddresses": [],
"DNSLookupSuccessWithInvalidAddressesClassic": [],
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrsClassic": {},
"DNSPossiblyNonexistingDomains": {},
"HTTPDiffBodyProportionFactor": 1,
"HTTPDiffStatusCodeMatch": true,
Expand Down
4 changes: 2 additions & 2 deletions internal/cmd/minipipeline/testdata/analysis_classic.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"DNSLookupSuccessWithInvalidAddresses": [],
"DNSLookupSuccessWithInvalidAddressesClassic": [],
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrsClassic": {},
"DNSPossiblyNonexistingDomains": null,
"HTTPDiffBodyProportionFactor": 1,
"HTTPDiffStatusCodeMatch": true,
Expand Down
216 changes: 113 additions & 103 deletions internal/minipipeline/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ import (
func AnalyzeWebObservations(container *WebObservationsContainer) *WebAnalysis {
analysis := &WebAnalysis{}

analysis.ComputeDNSLookupSuccessWithInvalidAddresses(container)
analysis.ComputeDNSLookupSuccessWithInvalidAddressesClassic(container)
analysis.ComputeDNSExperimentFailure(container)
analysis.ComputeDNSTransactionsWithBogons(container)
analysis.ComputeDNSTransactionsWithUnexpectedFailures(container)
analysis.ComputeDNSPossiblyInvalidAddrsClassic(container)
analysis.ComputeDNSPossiblyNonexistingDomains(container)

analysis.ComputeTCPTransactionsWithUnexpectedTCPConnectFailures(container)
Expand All @@ -34,37 +34,24 @@ func AnalyzeWebObservations(container *WebObservationsContainer) *WebAnalysis {
// WebAnalysis summarizes the content of [*WebObservationsContainer].
//
// The zero value of this struct is ready to use.
//
// For optional fields, they are None (i.e., `null` in JSON, `nil` in Go) when the corresponding
// algorithm either didn't run or didn't encounter enough data to determine a non-None result. When
// they are not None, they can still be empty (e.g., `{}` in JSON and in Go). In the latter case,
// them being empty means we encountered good enough data to determine whether we needed to add
// something to such a field and decided not to. For example, DNSTransactionWithBogons being None
// means that there are no suitable transactions to inspect. It being empty, instead, means we
// have transactions to inspect but none of them contains bogons. In other words, most fields are
// three state and one should take this into account when performing data analysis.
type WebAnalysis struct {
// DNSLookupSuccessWithInvalidAddresses contains DNS transactions with invalid IP addresses by
// taking into account control info, bogons, and TLS handshakes.
DNSLookupSuccessWithInvalidAddresses Set[int64]

// DNSLookupSuccessWithInvalidAddressesClassic is like DNSLookupInvalid but the algorithm is more relaxed
// to be compatible with Web Connectivity v0.4's behavior.
DNSLookupSuccessWithInvalidAddressesClassic Set[int64]

// DNSExperimentFailure is the first failure experienced by a getaddrinfo-like resolver.
DNSExperimentFailure optional.Value[string]

// DNSTransactionsWithBogons contains the list of DNS transactions containing bogons.
DNSTransactionsWithBogons optional.Value[map[int64]bool]

// DNSTransactionsWithUnexpectedFailures contains the DNS transaction IDs that
// contain failures while the control measurement succeeded. Note that we don't
// include DNS-over-HTTPS failures inside the list, because a DoH failure is
// not related to the domain we're querying for.
DNSTransactionsWithUnexpectedFailures optional.Value[map[int64]bool]

// DNSPossiblyInvalidAddrsClassic contains the addresses that are not valid for the
// domain. An addres is valid for the domain if:
//
// 1. the address was resolved by the TH; or
//
// 2. the address ASN belongs to the set of ASNs obtained by mapping
// addresses resolved by the TH to their corresponding ASN.
DNSPossiblyInvalidAddrsClassic optional.Value[map[string]bool]

// DNSPossiblyNonexistingDomains lists all the domains for which both
// the probe and the TH failed to perform DNS lookups.
DNSPossiblyNonexistingDomains optional.Value[map[string]bool]
Expand Down Expand Up @@ -119,6 +106,109 @@ type WebAnalysis struct {
TCPTransactionsWithUnexplainedUnexpectedFailures optional.Value[map[int64]bool]
}

// ComputeDNSLookupSuccessWithInvalidAddresses computes the DNSLookupInvalid field.
func (wa *WebAnalysis) ComputeDNSLookupSuccessWithInvalidAddresses(c *WebObservationsContainer) {
// fill the invalid set
var already Set[int64]
for _, obs := range c.DNSLookupSuccesses {
// avoid considering a lookup we already considered
if already.Contains(obs.DNSTransactionID.Unwrap()) {
continue
}
already.Add(obs.DNSTransactionID.Unwrap())

// lookups once we started following redirects should not be considered
if obs.TagDepth.IsNone() || obs.TagDepth.Unwrap() != 0 {
continue
}

// if there's a bogon, mark as invalid
if !obs.IPAddressBogon.IsNone() && obs.IPAddressBogon.Unwrap() {
wa.DNSLookupSuccessWithInvalidAddresses.Add(obs.DNSTransactionID.Unwrap())
continue
}

// when there is no control info, we cannot say much
if obs.ControlDNSResolvedAddrs.IsNone() {
continue
}

// obtain measurement and control
measurement := obs.DNSResolvedAddrs.Unwrap()
control := obs.ControlDNSResolvedAddrs.Unwrap()

// this lookup is good if there is IP addresses intersection
if DNSDiffFindCommonIPAddressIntersection(measurement, control).Len() > 0 {
continue
}

// this lookup is good if there is ASN intersection
if DNSDiffFindCommonASNsIntersection(measurement, control).Len() > 0 {
continue
}

// mark as invalid
wa.DNSLookupSuccessWithInvalidAddresses.Add(obs.DNSTransactionID.Unwrap())
}

// undo using TLS handshake info
for _, obs := range c.KnownTCPEndpoints {
// we must have a successuful TLS handshake
if obs.TLSHandshakeFailure.IsNone() || obs.TLSHandshakeFailure.Unwrap() != "" {
continue
}

// we must have a DNSTransactionID
txid := obs.DNSTransactionID.UnwrapOr(0)
if txid <= 0 {
continue
}

// this is actually valid
wa.DNSLookupSuccessWithInvalidAddresses.Remove(txid)
}
}

// ComputeDNSLookupSuccessWithInvalidAddressesClassic computes the DNSLookupInvalidClassic field.
func (wa *WebAnalysis) ComputeDNSLookupSuccessWithInvalidAddressesClassic(c *WebObservationsContainer) {
var already Set[int64]

for _, obs := range c.DNSLookupSuccesses {
// avoid considering a lookup we already considered
if already.Contains(obs.DNSTransactionID.Unwrap()) {
continue
}
already.Add(obs.DNSTransactionID.Unwrap())

// lookups once we started following redirects should not be considered
if obs.TagDepth.IsNone() || obs.TagDepth.Unwrap() != 0 {
continue
}

// when there is no control info, we cannot say much
if obs.ControlDNSResolvedAddrs.IsNone() {
continue
}

// obtain measurement and control
measurement := obs.DNSResolvedAddrs.Unwrap()
control := obs.ControlDNSResolvedAddrs.Unwrap()

// this lookup is good if there is IP addresses intersection
if DNSDiffFindCommonIPAddressIntersection(measurement, control).Len() > 0 {
continue
}

// this lookup is good if there is ASN intersection
if DNSDiffFindCommonASNsIntersection(measurement, control).Len() > 0 {
continue
}

// mark as invalid
wa.DNSLookupSuccessWithInvalidAddressesClassic.Add(obs.DNSTransactionID.Unwrap())
}
}

// ComputeDNSExperimentFailure computes the DNSExperimentFailure field.
func (wa *WebAnalysis) ComputeDNSExperimentFailure(c *WebObservationsContainer) {

Expand Down Expand Up @@ -162,43 +252,6 @@ func (wa *WebAnalysis) ComputeDNSExperimentFailure(c *WebObservationsContainer)
}
}

// ComputeDNSTransactionsWithBogons computes the DNSTransactionsWithBogons field.
func (wa *WebAnalysis) ComputeDNSTransactionsWithBogons(c *WebObservationsContainer) {
// Implementation note: any bogon IP address resolved by a DoH service
// is STILL suspicious since it SHOULD NOT happen. TODO(bassosimone): an
// even better algorithm could possibly check whether also the TH has
// observed bogon IP addrs and avoid flagging in such a case.
//
// See https://github.com/ooni/probe/issues/2274 for more information.

// we cannot flip the state from None to empty until we inspect at least
// a single successful DNS lookup transaction
if len(c.DNSLookupSuccesses) <= 0 {
return
}
state := make(map[int64]bool)

for _, obs := range c.DNSLookupSuccesses {
// do nothing if we don't know whether there's a bogon
if obs.IPAddressBogon.IsNone() {
continue
}

// do nothing if there is no bogon
if !obs.IPAddressBogon.Unwrap() {
continue
}

// update state
if id := obs.DNSTransactionID.UnwrapOr(0); id > 0 {
state[id] = true
}
}

// note that optional.Some constructs None if state is nil
wa.DNSTransactionsWithBogons = optional.Some(state)
}

// ComputeDNSTransactionsWithUnexpectedFailures computes the DNSTransactionsWithUnexpectedFailures field.
func (wa *WebAnalysis) ComputeDNSTransactionsWithUnexpectedFailures(c *WebObservationsContainer) {
// Implementation note: a DoH failure is not information about the URL we're
Expand Down Expand Up @@ -248,49 +301,6 @@ func (wa *WebAnalysis) ComputeDNSTransactionsWithUnexpectedFailures(c *WebObserv
wa.DNSTransactionsWithUnexpectedFailures = optional.Some(state)
}

// ComputeDNSPossiblyInvalidAddrsClassic computes the DNSPossiblyInvalidAddrsClassic field.
func (wa *WebAnalysis) ComputeDNSPossiblyInvalidAddrsClassic(c *WebObservationsContainer) {
// Implementation note: in the case in which DoH returned answers, here
// it still feels okay to consider them. We should avoid flagging DoH
// failures as measurement failures but if DoH returns us some unexpected
// even-non-bogon addr, it seems worth flagging for now.
//
// See https://github.com/ooni/probe/issues/2274

var state map[string]bool

for _, obs := range c.KnownTCPEndpoints {
addr := obs.IPAddress.Unwrap()

// skip the comparison if we don't have info about matching
if obs.DNSResolvedAddrs.IsNone() || obs.ControlDNSResolvedAddrs.IsNone() {
continue
}

// flip state from None to empty when we see the first couple of
// (probe, th) failures allowing us to perform a comparison
if state == nil {
state = make(map[string]bool)
}

measurement := obs.DNSResolvedAddrs.Unwrap()
control := obs.ControlDNSResolvedAddrs.Unwrap()

ipAddrIntersection := DNSDiffFindCommonIPAddressIntersection(measurement, control)
asnIntersection := DNSDiffFindCommonASNsIntersection(measurement, control)

// an address is suspicious if we have information regarding its potential
// matching with TH info and we know it does not match
if ipAddrIntersection.Len() <= 0 && asnIntersection.Len() <= 0 {
state[addr] = true
continue
}
}

// note that optional.Some constructs None if state is nil
wa.DNSPossiblyInvalidAddrsClassic = optional.Some(state)
}

// ComputeDNSPossiblyNonexistingDomains computes the DNSPossiblyNonexistingDomains field.
func (wa *WebAnalysis) ComputeDNSPossiblyNonexistingDomains(c *WebObservationsContainer) {
var state map[string]bool
Expand Down
19 changes: 0 additions & 19 deletions internal/minipipeline/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,25 +73,6 @@ func TestWebAnalysisComputeDNSExperimentFailure(t *testing.T) {
})
}

func TestWebAnalysisComputeDNSTransactionsWithBogons(t *testing.T) {
t.Run("when there's no IPAddressBogon", func(t *testing.T) {
container := &WebObservationsContainer{
DNSLookupSuccesses: []*WebObservation{
{
/* empty */
},
},
}

wa := &WebAnalysis{}
wa.ComputeDNSTransactionsWithBogons(container)

if v := wa.DNSTransactionsWithBogons.UnwrapOr(nil); len(v) != 0 {
t.Fatal("DNSTransactionsWithBogons is not none")
}
})
}

func TestWebAnalysisComputeTCPTransactionsWithUnexpectedHTTPFailures(t *testing.T) {
t.Run("when both measurement and control fail", func(t *testing.T) {
container := &WebObservationsContainer{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"DNSLookupSuccessWithInvalidAddresses": [],
"DNSLookupSuccessWithInvalidAddressesClassic": [],
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrsClassic": {},
"DNSPossiblyNonexistingDomains": {},
"HTTPDiffBodyProportionFactor": null,
"HTTPDiffStatusCodeMatch": null,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"DNSLookupSuccessWithInvalidAddresses": [],
"DNSLookupSuccessWithInvalidAddressesClassic": [],
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrsClassic": {},
"DNSPossiblyNonexistingDomains": null,
"HTTPDiffBodyProportionFactor": null,
"HTTPDiffStatusCodeMatch": null,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"DNSLookupSuccessWithInvalidAddresses": [],
"DNSLookupSuccessWithInvalidAddressesClassic": [],
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrsClassic": {},
"DNSPossiblyNonexistingDomains": {},
"HTTPDiffBodyProportionFactor": null,
"HTTPDiffStatusCodeMatch": null,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"DNSLookupSuccessWithInvalidAddresses": [],
"DNSLookupSuccessWithInvalidAddressesClassic": [],
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrsClassic": {},
"DNSPossiblyNonexistingDomains": null,
"HTTPDiffBodyProportionFactor": null,
"HTTPDiffStatusCodeMatch": null,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
{
"DNSLookupSuccessWithInvalidAddresses": [
1,
2
],
"DNSLookupSuccessWithInvalidAddressesClassic": [
1,
2
],
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrsClassic": {
"104.154.89.105": true
},
"DNSPossiblyNonexistingDomains": {},
"HTTPDiffBodyProportionFactor": 1,
"HTTPDiffStatusCodeMatch": true,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
{
"DNSLookupSuccessWithInvalidAddresses": [
2
],
"DNSLookupSuccessWithInvalidAddressesClassic": [
2
],
"DNSExperimentFailure": null,
"DNSTransactionsWithBogons": {},
"DNSTransactionsWithUnexpectedFailures": null,
"DNSPossiblyInvalidAddrsClassic": {
"104.154.89.105": true
},
"DNSPossiblyNonexistingDomains": null,
"HTTPDiffBodyProportionFactor": null,
"HTTPDiffStatusCodeMatch": null,
Expand Down
Loading

0 comments on commit c549df9

Please sign in to comment.