From d4398d9086b220541d54bcbbf981838457463b98 Mon Sep 17 00:00:00 2001 From: hynnot Date: Mon, 27 Jan 2025 16:26:52 +0100 Subject: [PATCH 1/3] Add support for passing a list to filters in list_measurements --- .../routers/v1/measurements.py | 120 ++++++++---------- .../tests/test_measurements.py | 91 +++++++++++++ 2 files changed, 142 insertions(+), 69 deletions(-) create mode 100644 ooniapi/services/oonimeasurements/tests/test_measurements.py diff --git a/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py b/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py index c889c200..1511a2c8 100644 --- a/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py +++ b/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py @@ -421,7 +421,7 @@ class MeasurementBase(BaseModel): probe_asn: Optional[str] = Field(default=None, title="ASN of the measurement probe") probe_cc: Optional[str] = Field(default=None, title="country code of the probe ASN") report_id: Optional[str] = Field(default=None, title="report id of the measurement") - scores: Optional[str] = Field( + scores: Optional[Dict[str, object]] = Field( default=None, title="blocking scores of the measurement" ) test_name: Optional[str] = Field(default=None, title="test name of the measurement") @@ -531,46 +531,46 @@ def genurl(base_url: str, path: str, **kw) -> str: async def list_measurements( request: Request, response: Response, - report_id: Annotated[ - Optional[str], - Query(description="Report_id to search measurements for", min_length=3), - ], - input: Annotated[ - Optional[str], - Query( - description="Input (for example a URL or IP address) to search measurements for", - min_length=3, - ), - ], - domain: Annotated[ - Optional[str], - Query(description="Domain to search measurements for", min_length=3), - ], - probe_cc: Annotated[Optional[str], Query(description="Two letter country code")], + report_id: Optional[str] = Query( + None, + description="Report_id to search measurements for", + min_length=3, + ), + input: Optional[str] = Query( + None, + description="Input (for example a URL or IP address) to search measurements for", + min_length=3, + ), + domain: Optional[str] = Query( + None, + description="Domain to search measurements for", + min_length=3, + ), + probe_cc: Annotated[Optional[str], Query(description="Two letter country code")] = None, probe_asn: Annotated[ Union[str, int, None], Query(description='Autonomous system number in the format "ASXXX"'), - ], + ] = None, test_name: Annotated[ Optional[str], Query(description="Name of the test"), - ], + ] = None, category_code: Annotated[ Optional[str], Query(description="Category code from the citizenlab list"), - ], + ] = None, since: Annotated[ Optional[str], Query( description='Start date of when measurements were run (ex. "2016-10-20T10:30:00")' ), - ], + ] = None, until: Annotated[ Optional[str], Query( description='End date of when measurement were run (ex. "2016-10-20T10:30:00")' ), - ], + ] = None, confirmed: Annotated[ Optional[bool], Query( @@ -579,7 +579,7 @@ async def list_measurements( "Default: no filtering (show both true and false)" ) ), - ], + ] = None, anomaly: Annotated[ Optional[bool], Query( @@ -588,7 +588,7 @@ async def list_measurements( "Default: no filtering (show both true and false)" ) ), - ], + ] = None, failure: Annotated[ Optional[bool], Query( @@ -597,22 +597,22 @@ async def list_measurements( "Default: no filtering (show both true and false)" ) ), - ], + ] = None, software_version: Annotated[ Optional[str], Query(description="Filter measurements by software version. Comma-separated."), - ], + ] = None, test_version: Annotated[ Optional[str], Query(description="Filter measurements by test version. Comma-separated."), - ], + ] = None, engine_version: Annotated[ Optional[str], Query(description="Filter measurements by engine version. Comma-separated."), - ], + ] = None, ooni_run_link_id: Annotated[ Optional[str], Query(description="Filter measurements by OONIRun ID.") - ], + ] = None, order_by: Annotated[ Optional[str], Query( @@ -626,7 +626,7 @@ async def list_measurements( "test_name", ], ), - ], + ] = None, order: Annotated[ str, Query( @@ -651,12 +651,6 @@ async def list_measurements( # - lang: 'curl' # source: | # curl "https://api.ooni.io/api/v1/measurements?probe_cc=IT&confirmed=true&since=2017-09-01" - if ( - probe_asn is not None - and isinstance(probe_asn, str) - and probe_asn.startswith("AS") - ): - probe_asn = int(probe_asn[2:]) software_versions = None if software_version: software_versions = commasplit(software_version) @@ -735,28 +729,31 @@ async def list_measurements( fpwhere.append(sql.text("report_id = :report_id")) if probe_cc: - if probe_cc == "ZZ": - log.info("Refusing list_measurements with probe_cc set to ZZ") - raise AbortMeasurementList - query_params["probe_cc"] = probe_cc - fpwhere.append(sql.text("probe_cc = :probe_cc")) + probe_cc_list = probe_cc.split(",") + query_params["probe_cc"] = probe_cc_list + fpwhere.append(sql.text("probe_cc IN :probe_cc")) else: fpwhere.append(sql.text("probe_cc != 'ZZ'")) if probe_asn is not None: - if probe_asn == 0: - log.info("Refusing list_measurements with probe_asn set to 0") - raise AbortMeasurementList - query_params["probe_asn"] = probe_asn - fpwhere.append(sql.text("probe_asn = :probe_asn")) + if isinstance(probe_asn, str): + probe_asn_list = probe_asn.split(",") + probe_asn_integer_list = [] + for probe_asn_value in probe_asn_list: + if probe_asn_value.startswith("AS"): + probe_asn_integer_list.append(int(probe_asn_value[2:])) + query_params["probe_asn"] = probe_asn_integer_list + fpwhere.append(sql.text("probe_asn IN :probe_asn")) + else: # https://ooni.org/post/2020-ooni-probe-asn-incident-report/ # https://github.com/ooni/explorer/issues/495 fpwhere.append(sql.text("probe_asn != 0")) if test_name is not None: - query_params["test_name"] = test_name - fpwhere.append(sql.text("test_name = :test_name")) + test_name_list = test_name.split(",") + query_params["test_name"] = test_name_list + fpwhere.append(sql.text("test_name IN :test_name")) if software_versions is not None: query_params["software_versions"] = software_versions @@ -809,8 +806,9 @@ async def list_measurements( elif domain or category_code: # both domain and category_code can be set at the same time if domain: - query_params["domain"] = domain - fpwhere.append(sql.text("domain = :domain")) + domain_list = domain.split(",") + query_params["domain"] = domain_list + fpwhere.append(sql.text("domain IN :domain")) if category_code: query_params["category_code"] = category_code @@ -853,7 +851,7 @@ async def list_measurements( probe_asn="AS{}".format(row["probe_asn"]), test_name=row["test_name"], measurement_start_time=row["measurement_start_time"], - input=row["input"], + input_=row["input"], anomaly=row["anomaly"] == "t", # TODO: This is wrong confirmed=row["confirmed"] == "t", failure=row["msm_failure"] == "t", @@ -861,22 +859,6 @@ async def list_measurements( ) ) - results.append( - { - "measurement_uid": msmt_uid, - "measurement_url": url, - "report_id": row["report_id"], - "probe_cc": row["probe_cc"], - "probe_asn": "AS{}".format(row["probe_asn"]), - "test_name": row["test_name"], - "measurement_start_time": row["measurement_start_time"], - "input": row["input"], - "anomaly": row["anomaly"] == "t", - "confirmed": row["confirmed"] == "t", - "failure": row["msm_failure"] == "t", - "scores": json.loads(row["scores"]), - } - ) except OperationalError as exc: log.error(exc) if isinstance(exc.orig, QueryCanceledError): @@ -889,8 +871,8 @@ async def list_measurements( # Replace the special value INULL for "input" with None for i, r in enumerate(results): - if r["input"] == INULL: - results[i]["input"] = None + if r.input_ == INULL: + results[i].input_ = None pages = -1 count = -1 diff --git a/ooniapi/services/oonimeasurements/tests/test_measurements.py b/ooniapi/services/oonimeasurements/tests/test_measurements.py new file mode 100644 index 00000000..aea48404 --- /dev/null +++ b/ooniapi/services/oonimeasurements/tests/test_measurements.py @@ -0,0 +1,91 @@ +import pytest + + +route = "api/v1/measurements" + + +def test_list_measurements(client): + response = client.get(route) + json = response.json() + + assert isinstance(json["results"], list), json + assert len(json["results"]) == 100 + + +def test_list_measurements_with_since_and_until(client): + params = { + "since": "2024-01-01", + "until": "2024-01-02", + } + + response = client.get(route, params=params) + json = response.json() + + assert isinstance(json["results"], list), json + assert len(json["results"]) == 100 + + +@pytest.mark.parametrize( + "filter_param, filter_value", + [ + ("test_name", "web_connectivity"), + ("probe_cc", "IT"), + ("probe_asn", "AS30722"), + ] +) +def test_list_measurements_with_one_value_to_filters(client, filter_param, filter_value): + params = {} + params[filter_param] = filter_value + + response = client.get(route, params=params) + + json = response.json() + assert isinstance(json["results"], list), json + assert len(json["results"]) > 0 + for result in json["results"]: + assert result[filter_param] == filter_value, result + + +def test_list_measurements_with_one_value_to_filters_not_in_the_result(client): + params = { + "domain": "cloudflare-dns.com", + } + + response = client.get(route, params=params) + + json = response.json() + assert isinstance(json["results"], list), json + assert len(json["results"]) > 0 + + +@pytest.mark.parametrize( + "filter_param, filter_value", + [ + ("test_name", "web_connectivity,dnscheck,stunreachability,tor"), + ("probe_cc", "IT,US,RU"), + ("probe_asn", "AS30722,3269,7738,55430"), + ] +) +def test_list_measurements_with_multiple_values_to_filters(client, filter_param, filter_value): + params = {} + params[filter_param] = filter_value + + response = client.get(route, params=params) + + json = response.json() + assert isinstance(json["results"], list), json + assert len(json["results"]) > 0 + for result in json["results"]: + assert result[filter_param] in filter_value, result + + +def test_list_measurements_with_multiple_values_to_filters_not_in_the_result(client): + params = { + "domain": "cloudflare-dns.com, adblock.doh.mullvad.net, 1.1.1.1", + } + + response = client.get(route, params=params) + + json = response.json() + assert isinstance(json["results"], list), json + assert len(json["results"]) > 0 From d4fc3768c19e4412993d74a27bc020d910c6ad22 Mon Sep 17 00:00:00 2001 From: hynnot Date: Mon, 3 Feb 2025 15:44:50 +0100 Subject: [PATCH 2/3] Fix bug with input response and improve test to validate it --- .../src/oonimeasurements/routers/v1/measurements.py | 2 +- .../oonimeasurements/tests/test_measurements.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py b/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py index 1511a2c8..4785a420 100644 --- a/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py +++ b/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/measurements.py @@ -851,7 +851,7 @@ async def list_measurements( probe_asn="AS{}".format(row["probe_asn"]), test_name=row["test_name"], measurement_start_time=row["measurement_start_time"], - input_=row["input"], + input=row["input"], anomaly=row["anomaly"] == "t", # TODO: This is wrong confirmed=row["confirmed"] == "t", failure=row["msm_failure"] == "t", diff --git a/ooniapi/services/oonimeasurements/tests/test_measurements.py b/ooniapi/services/oonimeasurements/tests/test_measurements.py index aea48404..903323e2 100644 --- a/ooniapi/services/oonimeasurements/tests/test_measurements.py +++ b/ooniapi/services/oonimeasurements/tests/test_measurements.py @@ -46,9 +46,10 @@ def test_list_measurements_with_one_value_to_filters(client, filter_param, filte assert result[filter_param] == filter_value, result -def test_list_measurements_with_one_value_to_filters_not_in_the_result(client): +def test_list_measurements_with_one_value_to_filters_not_present_in_the_result(client): + domain = "cloudflare-dns.com" params = { - "domain": "cloudflare-dns.com", + "domain": domain, } response = client.get(route, params=params) @@ -56,6 +57,8 @@ def test_list_measurements_with_one_value_to_filters_not_in_the_result(client): json = response.json() assert isinstance(json["results"], list), json assert len(json["results"]) > 0 + for result in json["results"]: + assert domain in result["input"], result @pytest.mark.parametrize( @@ -80,8 +83,9 @@ def test_list_measurements_with_multiple_values_to_filters(client, filter_param, def test_list_measurements_with_multiple_values_to_filters_not_in_the_result(client): + domainCollection = "cloudflare-dns.com, adblock.doh.mullvad.net, 1.1.1.1" params = { - "domain": "cloudflare-dns.com, adblock.doh.mullvad.net, 1.1.1.1", + "domain": domainCollection } response = client.get(route, params=params) @@ -89,3 +93,5 @@ def test_list_measurements_with_multiple_values_to_filters_not_in_the_result(cli json = response.json() assert isinstance(json["results"], list), json assert len(json["results"]) > 0 + for result in json["results"]: + assert any(domain in result["input"] for domain in domainCollection), result From 5f1d60fb0525ed665e8c8ae0880bcd196fda56f7 Mon Sep 17 00:00:00 2001 From: hynnot Date: Tue, 4 Feb 2025 09:49:50 +0100 Subject: [PATCH 3/3] Fix error testing with multiple values in domain filter --- ooniapi/services/oonimeasurements/tests/test_measurements.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ooniapi/services/oonimeasurements/tests/test_measurements.py b/ooniapi/services/oonimeasurements/tests/test_measurements.py index 903323e2..2cf72b5e 100644 --- a/ooniapi/services/oonimeasurements/tests/test_measurements.py +++ b/ooniapi/services/oonimeasurements/tests/test_measurements.py @@ -93,5 +93,6 @@ def test_list_measurements_with_multiple_values_to_filters_not_in_the_result(cli json = response.json() assert isinstance(json["results"], list), json assert len(json["results"]) > 0 + domain_list = domainCollection.split(", ") for result in json["results"]: - assert any(domain in result["input"] for domain in domainCollection), result + assert any(domain in result["input"] for domain in domain_list), result