Skip to content

Commit

Permalink
added unused dependencies finder
Browse files Browse the repository at this point in the history
  • Loading branch information
arbimaq committed Jun 24, 2024
1 parent 31f6be7 commit a3e0713
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 0 deletions.
9 changes: 9 additions & 0 deletions edx_repo_tools/unused_dependencies/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from sys import argv

from unused_dependencies import unused_dependencies

if __name__ == '__main__':
if len(argv) < 2:
print("Usage: python unused_dependencies.py <repo-name>")
exit(-1)
print(unused_dependencies(argv[1]))
3 changes: 3 additions & 0 deletions edx_repo_tools/unused_dependencies/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
blacklist = {'.', '..', '.ci', 'node_modules', 'static', 'public', 'build', 'docs', 'media', 'scripts','.github', '.git', 'requirements', 'tests', '.tox', 'migrations', 'test_utils', 'tests', '__pycache__'}

whitelist = {'tox', 'pip', 'twine', 'pydocstyle', 'wheel', 'edx-sphinx-theme', 'edx-lint', 'pytest-cookies', 'pycodestyle', 'build', 'Sphinx', 'doc8', 'virtualenv', 'pip-tools', 'tox-battery', 'sphinx_rtd_theme'}
80 changes: 80 additions & 0 deletions edx_repo_tools/unused_dependencies/gather_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

import ast
import re

from os import listdir
from os.path import join
from genericpath import isdir, isfile

from config import blacklist

# can't use ast because .py files include invalid identifier e.g. {{cookiecutter.class_name}}
def parse_imports(path):
"""
parses `path` as a python file and returns the list of all the modules
imported in the file. Any sub-modules imported will be ignored and the
base module will be considered.
"""
with open(path) as fh:
root = ast.parse(fh.read(), path)

for node in ast.iter_child_nodes(root):
if isinstance(node, ast.Import):
# import package.module -> package
module = node.names[0].name.split('.')[0]
elif isinstance(node, ast.ImportFrom):
if node.module is None:
continue
# from package.module import name, othername -> package
module = node.module.split('.')[0]
else:
continue
yield module


def manually_parse_imports(path):
"""
parses `path` as a python file and returns the list of all the modules
imported in the file. Any sub-modules imported will be ignored and the
base module will be considered.
"""
import_match = re.compile("^(from|import)")
from_import = re.compile('^from (.*) import')
package_import = re.compile('^import (.*)')

with open(path) as f:
isdocstring = False
for line in f:
line = line.strip()
if not line:
continue
if line.startswith('"""'):
isdocstring = not isdocstring
continue
if isdocstring:
continue
if line.startswith('#'):
continue
if import_match.match(line) is None:
# we don't support imports anywhere other then the top of file
break

package = from_import.match(line) if line.startswith('from') else package_import.match(line)
yield package.groups()[0].split(' ')[0].split('.')[0]

def gather_imports( path, imports = set()):
"""
Walks `path` recursively and parses each .py file to generate a set of
all the imports.
"""
content = listdir(path)
files = [f for f in content if isfile(join(path, f))]
for file in files:
if file.endswith('.py'):
imports.update(manually_parse_imports(join(path, file)))

dirs = {d for d in content if isdir(join(path, d))}
for dir in dirs - blacklist:
gather_imports(join(path, dir))

return imports
22 changes: 22 additions & 0 deletions edx_repo_tools/unused_dependencies/populate_requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

from os import walk
from os.path import join


def populate_requirements(path, requirements = set()):
"""
returns a set of all the requirements listed in any .in file inside
`path`. The version numbers and comments are removed from the returned
set.
"""
for dirpath, dirs, files in walk(path):
for file in files:
if file.endswith('.in'):
filepath = join(dirpath, file)
with open(filepath) as req_file:
# `package=10.3 # some comments` -> package
requirements.update(package.split(' ')[0].split('=')[0].strip('\n') for package in req_file if package[0] not in ['#', '-', '\n', ' '])
for dir in dirs:
populate_requirements(join(dirpath, dir), requirements)

return requirements
22 changes: 22 additions & 0 deletions edx_repo_tools/unused_dependencies/unused_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from os.path import join, exists

from populate_requirements import populate_requirements
from gather_imports import gather_imports
from config import whitelist


def unused_dependencies(path):
"""
check for any package listed in any .in file that is not imported in
any .py file.
"""
if not exists(path):
print("Error: {} doesn't exist.".format(path))
exit(1)
requirements = populate_requirements(join(path, 'requirements'))
imports = gather_imports(path)

print(requirements)
print(imports)
unused = (requirements - imports) - whitelist
assert len(unused) == 0, "The following packages are unused: {}".format(unused)

0 comments on commit a3e0713

Please sign in to comment.