Skip to content

Commit

Permalink
fix(minipipeline): handle IP-addr URLs in classic linear analysis (#1472
Browse files Browse the repository at this point in the history
)

This diff fixes
ooni/probe#1511 (comment).

This is the rationale of the diff: we need to track the origin of IP
addresses:

* "dns" if discovered using DNS;
* "th" if discovered using the test helper;
* null otherwise.

When filtering for classic analysis, we include "dns" entries if
resolved using getaddrinfo, drop "th" entries because they're not
relevant, and include null entries under the assumption that the probe
discovered them either directly from the input URL or because a redirect
redirected to an URL containing an IP address.

We also update the minipipeline test suite and show that the only
changes are related to the test added by
#1471.
  • Loading branch information
bassosimone committed Jan 24, 2024
1 parent 4fb9f0f commit 2cc9231
Show file tree
Hide file tree
Showing 186 changed files with 1,701 additions and 11 deletions.
6 changes: 6 additions & 0 deletions internal/cmd/minipipeline/testdata/analysis.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down Expand Up @@ -146,6 +147,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down Expand Up @@ -207,6 +209,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down Expand Up @@ -268,6 +271,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down Expand Up @@ -327,6 +331,7 @@
"DNSQueryType": "AAAA",
"DNSEngine": "doh",
"DNSResolvedAddrs": null,
"IPAddressOrigin": null,
"IPAddress": null,
"IPAddressASN": null,
"IPAddressBogon": null,
Expand Down Expand Up @@ -386,6 +391,7 @@
"DNSQueryType": "AAAA",
"DNSEngine": "udp",
"DNSResolvedAddrs": null,
"IPAddressOrigin": null,
"IPAddress": null,
"IPAddressASN": null,
"IPAddressBogon": null,
Expand Down
2 changes: 2 additions & 0 deletions internal/cmd/minipipeline/testdata/analysis_classic.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down Expand Up @@ -141,6 +142,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down
6 changes: 6 additions & 0 deletions internal/cmd/minipipeline/testdata/observations.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"DNSQueryType": "AAAA",
"DNSEngine": "udp",
"DNSResolvedAddrs": null,
"IPAddressOrigin": null,
"IPAddress": null,
"IPAddressASN": null,
"IPAddressBogon": null,
Expand Down Expand Up @@ -71,6 +72,7 @@
"DNSQueryType": "AAAA",
"DNSEngine": "doh",
"DNSResolvedAddrs": null,
"IPAddressOrigin": null,
"IPAddress": null,
"IPAddressASN": null,
"IPAddressBogon": null,
Expand Down Expand Up @@ -134,6 +136,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down Expand Up @@ -195,6 +198,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down Expand Up @@ -256,6 +260,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down Expand Up @@ -319,6 +324,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down
2 changes: 2 additions & 0 deletions internal/cmd/minipipeline/testdata/observations_classic.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down Expand Up @@ -78,6 +79,7 @@
"DNSResolvedAddrs": [
"130.192.16.171"
],
"IPAddressOrigin": "dns",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressBogon": false,
Expand Down
23 changes: 22 additions & 1 deletion internal/minipipeline/classic.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,33 @@ func ClassicFilter(input *WebObservationsContainer) (output *WebObservationsCont
for _, entry := range input.KnownTCPEndpoints {
ipAddr := entry.IPAddress.Unwrap() // it MUST be there
txid := entry.EndpointTransactionID.Unwrap()
if output.knownIPAddresses[ipAddr] == nil {

// Determine whether to keep entry depending on the IP addr origin
switch entry.IPAddressOrigin.UnwrapOr("") {

// If the address origin is the TH, then it does not belong to classic analysis
case IPAddressOriginTH:
continue

// If the address origin is the DNS, then it depends on whether it was
// resolved via getaddrinfo or via another resolver
case IPAddressOriginDNS:
if output.knownIPAddresses[ipAddr] == nil {
continue
}

// If the address origin is unknown, then we assume the probe
// already knows it, e.g., via the URL or via a subsequent redirect
// and thus we keep this specific entry
default:
// nothing
}

// Discard all the entries where we're not fetching body
if !entry.TagFetchBody.UnwrapOr(false) {
continue
}

output.KnownTCPEndpoints[txid] = entry
}

Expand Down
19 changes: 16 additions & 3 deletions internal/minipipeline/observation.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ const (
WebObservationTypeHTTPRoundTrip
)

// These are the possible origins for IP addresses.
const (
IPAddressOriginDNS = "dns"
IPAddressOriginTH = "th"
)

// WebObservation is an observation of the flow that starts with a DNS lookup that
// either fails or discovers an IP address and proceeds by documenting binding such an
// address to a part to obtain and use a TCP or UDP endpoint.
Expand Down Expand Up @@ -145,6 +151,9 @@ type WebObservation struct {
//
// 3. when the input URL contains an IP address.

// IPAddressOrigin is the optional origin of the IP address.
IPAddressOrigin optional.Value[string]

// IPAddress is the optional IP address that this observation is about. We typically derive
// this value from a DNS lookup, but sometimes we know it from other means (e.g., from
// the Web Connectivity test helper response). When DNSLookupFailure contains an nonempty
Expand Down Expand Up @@ -357,6 +366,7 @@ func (c *WebObservationsContainer) ingestDNSLookupSuccesses(
DNSQueryType: optional.Some(ev.QueryType),
DNSEngine: optional.Some(ev.Engine),
DNSResolvedAddrs: optional.Some(addrs),
IPAddressOrigin: optional.Some(IPAddressOriginDNS),
IPAddress: optional.Some(ipAddr),
IPAddressASN: utilsGeoipxLookupASN(lookupper, ipAddr),
IPAddressBogon: optional.Some(netxlite.IsBogon(ipAddr)),
Expand All @@ -383,9 +393,10 @@ func (c *WebObservationsContainer) IngestTCPConnectEvents(
obs, found := c.knownIPAddresses[ev.IP]
if !found {
obs = &WebObservation{
IPAddress: optional.Some(ev.IP),
IPAddressASN: utilsGeoipxLookupASN(lookupper, ev.IP),
IPAddressBogon: optional.Some(netxlite.IsBogon(ev.IP)),
IPAddressOrigin: optional.None[string](), // we don't know!
IPAddress: optional.Some(ev.IP),
IPAddressASN: utilsGeoipxLookupASN(lookupper, ev.IP),
IPAddressBogon: optional.Some(netxlite.IsBogon(ev.IP)),
}
}

Expand All @@ -403,6 +414,7 @@ func (c *WebObservationsContainer) IngestTCPConnectEvents(
DNSDomain: obs.DNSDomain,
DNSLookupFailure: obs.DNSLookupFailure,
DNSResolvedAddrs: obs.DNSResolvedAddrs,
IPAddressOrigin: obs.IPAddressOrigin,
IPAddress: obs.IPAddress,
IPAddressASN: obs.IPAddressASN,
IPAddressBogon: obs.IPAddressBogon,
Expand Down Expand Up @@ -531,6 +543,7 @@ func (c *WebObservationsContainer) controlMatchDNSLookupResults(inputDomain stri
// handle the case in which the IP address has been provided by the control, which
// is a case where the domain is empty and the IP address is in thAddrMap
if domain == "" && thAddrMap[addr] {
obs.IPAddressOrigin = optional.Some(IPAddressOriginTH)
obs.ControlDNSDomain = optional.Some(inputDomain)
obs.ControlDNSLookupFailure = optional.Some(utilsStringPointerToString(resp.DNS.Failure))
obs.ControlDNSResolvedAddrs = optional.Some(NewSet(resp.DNS.Addrs...))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -112,6 +113,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -158,6 +160,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -202,6 +205,7 @@
"DNSQueryType": "AAAA",
"DNSEngine": "udp",
"DNSResolvedAddrs": null,
"IPAddressOrigin": null,
"IPAddress": null,
"IPAddressASN": null,
"IPAddressBogon": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -109,6 +110,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"DNSQueryType": "AAAA",
"DNSEngine": "udp",
"DNSResolvedAddrs": null,
"IPAddressOrigin": null,
"IPAddress": null,
"IPAddressASN": null,
"IPAddressBogon": null,
Expand Down Expand Up @@ -60,6 +61,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -106,6 +108,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -154,6 +157,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -63,6 +64,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -115,6 +116,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -161,6 +163,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -207,6 +210,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -251,6 +255,7 @@
"DNSQueryType": "AAAA",
"DNSEngine": "udp",
"DNSResolvedAddrs": null,
"IPAddressOrigin": null,
"IPAddress": null,
"IPAddressASN": null,
"IPAddressBogon": null,
Expand Down Expand Up @@ -295,6 +300,7 @@
"DNSQueryType": "AAAA",
"DNSEngine": "doh",
"DNSResolvedAddrs": null,
"IPAddressOrigin": null,
"IPAddress": null,
"IPAddressASN": null,
"IPAddressBogon": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down Expand Up @@ -109,6 +110,7 @@
"DNSResolvedAddrs": [
"104.154.89.105"
],
"IPAddressOrigin": "dns",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressBogon": false,
Expand Down
Loading

0 comments on commit 2cc9231

Please sign in to comment.