From a3e07134209f3118aa1929bdd6b06b6ec1629b93 Mon Sep 17 00:00:00 2001 From: Abdullah Qureshi <111448185+arbimaq@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:08:28 +0500 Subject: [PATCH] added unused dependencies finder --- .../unused_dependencies/__init__.py | 9 +++ edx_repo_tools/unused_dependencies/config.py | 3 + .../unused_dependencies/gather_imports.py | 80 +++++++++++++++++++ .../populate_requirements.py | 22 +++++ .../unused_dependencies.py | 22 +++++ 5 files changed, 136 insertions(+) create mode 100644 edx_repo_tools/unused_dependencies/__init__.py create mode 100644 edx_repo_tools/unused_dependencies/config.py create mode 100644 edx_repo_tools/unused_dependencies/gather_imports.py create mode 100644 edx_repo_tools/unused_dependencies/populate_requirements.py create mode 100644 edx_repo_tools/unused_dependencies/unused_dependencies.py diff --git a/edx_repo_tools/unused_dependencies/__init__.py b/edx_repo_tools/unused_dependencies/__init__.py new file mode 100644 index 00000000..d0b33d23 --- /dev/null +++ b/edx_repo_tools/unused_dependencies/__init__.py @@ -0,0 +1,9 @@ +from sys import argv + +from unused_dependencies import unused_dependencies + +if __name__ == '__main__': + if len(argv) < 2: + print("Usage: python unused_dependencies.py ") + exit(-1) + print(unused_dependencies(argv[1])) \ No newline at end of file diff --git a/edx_repo_tools/unused_dependencies/config.py b/edx_repo_tools/unused_dependencies/config.py new file mode 100644 index 00000000..7e52f05b --- /dev/null +++ b/edx_repo_tools/unused_dependencies/config.py @@ -0,0 +1,3 @@ +blacklist = {'.', '..', '.ci', 'node_modules', 'static', 'public', 'build', 'docs', 'media', 'scripts','.github', '.git', 'requirements', 'tests', '.tox', 'migrations', 'test_utils', 'tests', '__pycache__'} + +whitelist = {'tox', 'pip', 'twine', 'pydocstyle', 'wheel', 'edx-sphinx-theme', 'edx-lint', 'pytest-cookies', 'pycodestyle', 'build', 'Sphinx', 'doc8', 'virtualenv', 'pip-tools', 'tox-battery', 'sphinx_rtd_theme'} diff --git a/edx_repo_tools/unused_dependencies/gather_imports.py b/edx_repo_tools/unused_dependencies/gather_imports.py new file mode 100644 index 00000000..5a258f7a --- /dev/null +++ b/edx_repo_tools/unused_dependencies/gather_imports.py @@ -0,0 +1,80 @@ + +import ast +import re + +from os import listdir +from os.path import join +from genericpath import isdir, isfile + +from config import blacklist + +# can't use ast because .py files include invalid identifier e.g. {{cookiecutter.class_name}} +def parse_imports(path): + """ + parses `path` as a python file and returns the list of all the modules + imported in the file. Any sub-modules imported will be ignored and the + base module will be considered. + """ + with open(path) as fh: + root = ast.parse(fh.read(), path) + + for node in ast.iter_child_nodes(root): + if isinstance(node, ast.Import): + # import package.module -> package + module = node.names[0].name.split('.')[0] + elif isinstance(node, ast.ImportFrom): + if node.module is None: + continue + # from package.module import name, othername -> package + module = node.module.split('.')[0] + else: + continue + yield module + + +def manually_parse_imports(path): + """ + parses `path` as a python file and returns the list of all the modules + imported in the file. Any sub-modules imported will be ignored and the + base module will be considered. + """ + import_match = re.compile("^(from|import)") + from_import = re.compile('^from (.*) import') + package_import = re.compile('^import (.*)') + + with open(path) as f: + isdocstring = False + for line in f: + line = line.strip() + if not line: + continue + if line.startswith('"""'): + isdocstring = not isdocstring + continue + if isdocstring: + continue + if line.startswith('#'): + continue + if import_match.match(line) is None: + # we don't support imports anywhere other then the top of file + break + + package = from_import.match(line) if line.startswith('from') else package_import.match(line) + yield package.groups()[0].split(' ')[0].split('.')[0] + +def gather_imports( path, imports = set()): + """ + Walks `path` recursively and parses each .py file to generate a set of + all the imports. + """ + content = listdir(path) + files = [f for f in content if isfile(join(path, f))] + for file in files: + if file.endswith('.py'): + imports.update(manually_parse_imports(join(path, file))) + + dirs = {d for d in content if isdir(join(path, d))} + for dir in dirs - blacklist: + gather_imports(join(path, dir)) + + return imports \ No newline at end of file diff --git a/edx_repo_tools/unused_dependencies/populate_requirements.py b/edx_repo_tools/unused_dependencies/populate_requirements.py new file mode 100644 index 00000000..7ddd9564 --- /dev/null +++ b/edx_repo_tools/unused_dependencies/populate_requirements.py @@ -0,0 +1,22 @@ + +from os import walk +from os.path import join + + +def populate_requirements(path, requirements = set()): + """ + returns a set of all the requirements listed in any .in file inside + `path`. The version numbers and comments are removed from the returned + set. + """ + for dirpath, dirs, files in walk(path): + for file in files: + if file.endswith('.in'): + filepath = join(dirpath, file) + with open(filepath) as req_file: + # `package=10.3 # some comments` -> package + requirements.update(package.split(' ')[0].split('=')[0].strip('\n') for package in req_file if package[0] not in ['#', '-', '\n', ' ']) + for dir in dirs: + populate_requirements(join(dirpath, dir), requirements) + + return requirements \ No newline at end of file diff --git a/edx_repo_tools/unused_dependencies/unused_dependencies.py b/edx_repo_tools/unused_dependencies/unused_dependencies.py new file mode 100644 index 00000000..19366bb1 --- /dev/null +++ b/edx_repo_tools/unused_dependencies/unused_dependencies.py @@ -0,0 +1,22 @@ +from os.path import join, exists + +from populate_requirements import populate_requirements +from gather_imports import gather_imports +from config import whitelist + + +def unused_dependencies(path): + """ + check for any package listed in any .in file that is not imported in + any .py file. + """ + if not exists(path): + print("Error: {} doesn't exist.".format(path)) + exit(1) + requirements = populate_requirements(join(path, 'requirements')) + imports = gather_imports(path) + + print(requirements) + print(imports) + unused = (requirements - imports) - whitelist + assert len(unused) == 0, "The following packages are unused: {}".format(unused)