Skip to content

Commit

Permalink
Merge pull request #156 from DataDog/fix-155
Browse files Browse the repository at this point in the history
Fix package extraction for namespaced npm packages (fixes #155)
  • Loading branch information
christophetd authored Feb 15, 2023
2 parents 8cde39b + 9ac5bd0 commit 91a35fb
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 13 deletions.
7 changes: 4 additions & 3 deletions guarddog/scanners/npm_package_scanner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import os
import pathlib
import typing
from urllib.parse import urlparse

import requests
Expand All @@ -16,7 +17,7 @@ class NPMPackageScanner(PackageScanner):
def __init__(self) -> None:
super().__init__(Analyzer(ECOSYSTEM.NPM))

def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> dict:
def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
git_target = None
if urlparse(package_name).hostname is not None and package_name.endswith('.git'):
git_target = package_name
Expand Down Expand Up @@ -44,8 +45,8 @@ def download_and_get_package_info(self, directory: str, package_name: str, versi

tarball_url = details["dist"]["tarball"]
file_extension = pathlib.Path(tarball_url).suffix
zippath = os.path.join(directory, package_name + file_extension)
zippath = os.path.join(directory, package_name.replace("/", "-") + file_extension)
unzippedpath = zippath.removesuffix(file_extension)
self.download_compressed(tarball_url, zippath, unzippedpath)

return data
return data, unzippedpath
12 changes: 7 additions & 5 deletions guarddog/scanners/pypi_package_scanner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import typing

from guarddog.analyzer.analyzer import Analyzer
from guarddog.ecosystems import ECOSYSTEM
Expand All @@ -10,11 +11,11 @@ class PypiPackageScanner(PackageScanner):
def __init__(self) -> None:
super().__init__(Analyzer(ECOSYSTEM.PYPI))

def download_and_get_package_info(self, directory: str, package_name: str, version=None):
self.download_package(package_name, directory, version)
return get_package_info(package_name)
def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
extract_dir = self.download_package(package_name, directory, version)
return get_package_info(package_name), extract_dir

def download_package(self, package_name, directory, version=None) -> None:
def download_package(self, package_name, directory, version=None) -> str:
"""Downloads the PyPI distribution for a given package and version
Args:
Expand All @@ -28,7 +29,7 @@ def download_package(self, package_name, directory, version=None) -> None:
Exception: "Compressed file for package does not exist."
Exception: "Error retrieving package: " + <error message>
Returns:
None
Path where the package was extracted
"""

data = get_package_info(package_name)
Expand Down Expand Up @@ -60,6 +61,7 @@ def download_package(self, package_name, directory, version=None) -> None:
unzippedpath = zippath.removesuffix(file_extension)

self.download_compressed(url, zippath, unzippedpath)
return unzippedpath
else:
raise Exception(f"Compressed file for {package_name} does not exist on PyPI.")
else:
Expand Down
6 changes: 3 additions & 3 deletions guarddog/scanners/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,16 +241,16 @@ def scan_local(self, path, rules=None, callback: typing.Callable[[dict], None] =
raise Exception(f"Path {path} does not exist.")

@abstractmethod
def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> dict:
def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
raise NotImplementedError('download_and_get_package_info is not implemented')

def _scan_remote(self, name, base_dir, version=None, rules=None, write_package_info=False):
directory = os.path.join(os.path.dirname(os.path.abspath(__file__)), base_dir)
file_path = os.path.join(directory, name)

file_path = None
package_info = None
try:
package_info = self.download_and_get_package_info(directory, name, version)
package_info, file_path = self.download_and_get_package_info(directory, name, version)
except Exception as e:
log.debug("Unable to download package, ignoring: " + str(e))
return {'issues': 0, 'errors': {'download-package': str(e)}}
Expand Down
15 changes: 13 additions & 2 deletions tests/core/test_npm_package_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,28 @@
def test_download_and_get_package_info():
scanner = NPMPackageScanner()
with tempfile.TemporaryDirectory() as tmpdirname:
data = scanner.download_and_get_package_info(tmpdirname, "minivlad")
data, path = scanner.download_and_get_package_info(tmpdirname, "minivlad")
assert path
assert path.endswith("/minivlad")
assert os.path.exists(os.path.join(tmpdirname, "minivlad", "package", "package.json"))
assert "1.0.0" in data["versions"]


def test_download_and_get_package_info_npm_namespaced():
scanner = NPMPackageScanner()
with tempfile.TemporaryDirectory() as tmpdirname:
data, path = scanner.download_and_get_package_info(tmpdirname, "@datadog/browser-logs")
assert path
assert path.endswith("/@datadog-browser-logs")
assert os.path.exists(os.path.join(tmpdirname, "@datadog-browser-logs"))


@pytest.mark.parametrize("identifier", ["expressjs/express", "https://github.com/expressjs/express.git"])
@pytest.mark.skip("Git targets are not yet supported for npm")
def test_download_and_get_package_info_from_github(identifier):
scanner = NPMPackageScanner()
with tempfile.TemporaryDirectory() as tmpdirname:
data = scanner.download_and_get_package_info(tmpdirname, "identifier")
data, path = scanner.download_and_get_package_info(tmpdirname, "identifier")
assert os.path.exists(os.path.join(tmpdirname, "express", "package", "package.json"))
assert "1.0.0" in data["versions"]

Expand Down

0 comments on commit 91a35fb

Please sign in to comment.