Skip to content

Commit

Permalink
Merge pull request #144 from DataDog/local-target
Browse files Browse the repository at this point in the history
Local target
  • Loading branch information
christophetd authored Feb 9, 2023
2 parents 1e3a5f5 + 02532a6 commit c4d11dc
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 3 deletions.
4 changes: 4 additions & 0 deletions guarddog/analyzer/metadata/pypi/empty_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
Detects if a package contains an empty description
"""
import logging
from typing import Optional

from guarddog.analyzer.metadata.empty_information import EmptyInfoDetector

MESSAGE = "This package has an empty description on PyPi"

log = logging.getLogger("guarddog")


class PypiEmptyInfoDetector(EmptyInfoDetector):
def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
version: Optional[str] = None) -> tuple[bool, str]:
log.debug(f"Running PyPI empty description heuristic on package {name} version {version}")
return len(package_info["info"]["description"].strip()) == 0, EmptyInfoDetector.MESSAGE_TEMPLATE % "PyPI"
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Detects if a maintainer's email domain might have been compromised.
"""

from datetime import datetime
from typing import Optional

Expand Down
4 changes: 4 additions & 0 deletions guarddog/analyzer/metadata/pypi/release_zero.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
Detects when a package has its latest release version to 0.0.0
"""
import logging
from typing import Optional

from guarddog.analyzer.metadata.release_zero import ReleaseZeroDetector

log = logging.getLogger("guarddog")


class PypiReleaseZeroDetector(ReleaseZeroDetector):

def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
version: Optional[str] = None) -> tuple[bool, str]:
log.debug(f"Running zero version heuristic on PyPI package {name} version {version}")
return (package_info["info"]["version"] in ["0.0.0", "0.0"],
ReleaseZeroDetector.MESSAGE_TEMPLATE % package_info["info"]["version"])
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import configparser
import hashlib
import logging
import os
import re
from typing import Optional, Tuple
Expand All @@ -16,6 +17,8 @@
GH_REPO_REGEX = r'(?:https?://)?(?:www\.)?github\.com/(?:[\w-]+/)(?:[\w-]+)'
GH_REPO_OWNER_REGEX = r'(?:https?://)?(?:www\.)?github\.com/([\w-]+)/([\w-]+)'

log = logging.getLogger("guarddog")


def extract_owner_and_repo(url) -> Tuple[Optional[str], Optional[str]]:
match = re.search(GH_REPO_OWNER_REGEX, url)
Expand Down Expand Up @@ -213,6 +216,8 @@ def detect(self, package_info, path: Optional[str] = None, name: Optional[str] =
raise Exception("Detector needs the name of the package")
if path is None:
raise Exception("Detector needs the path of the package")

log.debug(f"Running repository integrity mismatch heuristic on PyPI package {name} version {version}")
# let's extract a source repository (GitHub only for now) if we can
github_urls, best_github_candidate = find_github_candidates(package_info)
if len(github_urls) == 0:
Expand All @@ -224,6 +229,7 @@ def detect(self, package_info, path: Optional[str] = None, name: Optional[str] =
if github_url is None:
return False, "Could not find a good GitHub url in the project's description"

log.debug(f"Using GitHub URL {github_url}")
# ok, now let's try to find the version! (I need to know which version we are scanning)
if version is None:
version = package_info["info"]["version"]
Expand Down
6 changes: 5 additions & 1 deletion guarddog/analyzer/metadata/pypi/typosquatting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
import os
from datetime import datetime, timedelta
from typing import Optional
Expand All @@ -9,6 +10,9 @@
from guarddog.analyzer.metadata.typosquatting import TyposquatDetector


log = logging.getLogger("guarddog")


class PypiTyposquatDetector(TyposquatDetector):
"""
Detector for typosquatting attacks. Detects if a package name is a typosquat of one of the top 1000 packages.
Expand Down Expand Up @@ -77,7 +81,7 @@ def detect(self, package_info, path: Optional[str] = None, name: Optional[str] =
typosquatting from
@param **kwargs:
"""

log.debug(f"Running typosquatting heuristic on PyPI package {name}")
similar_package_names = self.get_typosquatted_package(package_info["info"]["name"])
if len(similar_package_names) > 0:
return True, TyposquatDetector.MESSAGE_TEMPLATE % ", ".join(similar_package_names)
Expand Down
17 changes: 16 additions & 1 deletion guarddog/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,21 @@ def display_result(result: dict) -> None:
return return_value # this is mostly for testing


def is_local_target(identifier: str) -> bool:
"""
@param identifier: The name/path of the package as passed to "guarddog ecosystem scan"
@return: Whether the identifier should be consider a local path
"""
if identifier.startswith("/") or identifier.startswith("./"):
return True

# If this looks like an archive, consider it as a local target if the target exists on the local filesystem
if identifier.endswith(".tar.gz") or identifier.endswith(".zip") or identifier.endswith(".whl"):
return os.path.exists(identifier)

return False


def _scan(identifier, version, rules, exclude_rules, output_format, exit_non_zero_on_finding, ecosystem: ECOSYSTEM):
"""Scan a package
Expand All @@ -146,7 +161,7 @@ def _scan(identifier, version, rules, exclude_rules, output_format, exit_non_zer
sys.stderr.write(f"Command scan is not supported for ecosystem {ecosystem}")
exit(1)
results = {}
if os.path.exists(identifier):
if is_local_target(identifier):
results = scanner.scan_local(identifier, rule_param)
else:
try:
Expand Down
19 changes: 19 additions & 0 deletions tests/core/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os
import unittest.mock

import guarddog.cli


def test_is_local_target():
assert guarddog.cli.is_local_target("/tmp/foo")
assert guarddog.cli.is_local_target("./foo")
assert not guarddog.cli.is_local_target("foo")

with unittest.mock.patch('os.path.exists') as mock:
mock.return_value = True
assert guarddog.cli.is_local_target("foo.tar.gz")

with unittest.mock.patch('os.path.exists') as mock:
mock.return_value = False
assert not guarddog.cli.is_local_target("foo.tar.gz")

0 comments on commit c4d11dc

Please sign in to comment.