From b892babef08a86c7680283a6dafa14eaa57d0ffe Mon Sep 17 00:00:00 2001 From: Saksham Sirohi Date: Mon, 10 Mar 2025 08:53:56 +0000 Subject: [PATCH] fix: test_source_osv.py #4633 --- cve_bin_tool/data_sources/osv_source.py | 28 +++++++----------- test/test_source_osv.py | 38 ++++++++++++++----------- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/cve_bin_tool/data_sources/osv_source.py b/cve_bin_tool/data_sources/osv_source.py index 6cfed5ef75..d0fb913c35 100644 --- a/cve_bin_tool/data_sources/osv_source.py +++ b/cve_bin_tool/data_sources/osv_source.py @@ -49,25 +49,17 @@ def __init__( self.session = None async def update_ecosystems(self): - """Gets names of all ecosystems that OSV provides.""" - - ecosystems = [] - - # Inspect the list of files and folders at the top level in the GS bucket. - stdout, _, _ = await aio_run_command(["gsutil", "ls", self.gs_url]) - lines = stdout.split(b"\n") - - # For each line in the directory listing determine if it is a folder that - # contains all.zip. - for line in lines: - ecosystem_zip = line + b"all.zip" - stdout, _, _ = await aio_run_command(["gsutil", "ls", ecosystem_zip]) - if stdout.strip(b"\n") == ecosystem_zip: - # Found a valid ecosystem - ecosystem = str(line).split("/")[-2] - ecosystems.append(ecosystem) + """Gets names of all ecosystems from OSV's ecosystems.txt.""" + ecosystems_url = ( + "https://osv-vulnerabilities.storage.googleapis.com/ecosystems.txt" + ) - self.ecosystems = ecosystems + async with aiohttp.ClientSession() as session: + async with session.get(ecosystems_url, timeout=300) as response: + response.raise_for_status() + ecosystems_txt = await response.text() + self.ecosystems = set(ecosystems_txt.strip().split("\n")) + self.ecosystems.discard("[EMPTY]") async def get_ecosystem(self, ecosystem_url, session, mode="json"): """Fetches either a specific CVE or all.zip(containing all CVEs) file from an ecosystem.""" diff --git a/test/test_source_osv.py b/test/test_source_osv.py index 9bb105b0ce..b316bde574 100644 --- a/test/test_source_osv.py +++ b/test/test_source_osv.py @@ -1,7 +1,7 @@ # Copyright (C) 2022 Intel Corporation # SPDX-License-Identifier: GPL-3.0-or-later - +import asyncio import io import shutil import tempfile @@ -169,21 +169,27 @@ def teardown_class(cls): @pytest.mark.skipif(not EXTERNAL_SYSTEM(), reason="Needs network connection.") async def test_update_ecosystems(self): await self.osv.update_ecosystems() - - ecosystems_txt = make_http_requests( - "text", url=self.ecosystems_url, timeout=300 - ).strip("\n") - expected_ecosystems = set(ecosystems_txt.split("\n")) - - # Because ecosystems.txt does not contain the complete list, this must be - # manually fixed up. - expected_ecosystems.add("DWF") - expected_ecosystems.add("JavaScript") - - # Assert that there are no missing ecosystems - assert all(x in self.osv.ecosystems for x in expected_ecosystems) - # Assert that there are no extra ecosystems - assert all(x in expected_ecosystems for x in self.osv.ecosystems) + loop = asyncio.get_running_loop() + ecosystems_txt = await loop.run_in_executor( + None, + lambda: make_http_requests("text", url=self.ecosystems_url, timeout=300), + ) + expected_top_level = set(ecosystems_txt.strip().split("\n")) + + # Validate parent ecosystems + code_parent_ecosystems = {e.split(":")[0] for e in self.osv.ecosystems} + expected_top_level.update({"DWF", "JavaScript"}) + expected_top_level.discard("[EMPTY]") + missing_parents = expected_top_level - code_parent_ecosystems + extra_parents = code_parent_ecosystems - expected_top_level + + if missing_parents or extra_parents: + error_msg = [] + if missing_parents: + error_msg.append(f"Missing parent ecosystems: {missing_parents}") + if extra_parents: + error_msg.append(f"Unexpected parent ecosystems: {extra_parents}") + pytest.fail("\n".join(error_msg)) @pytest.mark.asyncio @pytest.mark.skipif(not EXTERNAL_SYSTEM(), reason="Needs network connection.")