Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added unused dependencies finder #528

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions edx_repo_tools/unused_dependencies/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from sys import argv

from unused_dependencies import unused_dependencies

if __name__ == '__main__':
if len(argv) < 2:
print("Usage: python unused_dependencies.py <repo-name>")
exit(-1)
print(unused_dependencies(argv[1]))
3 changes: 3 additions & 0 deletions edx_repo_tools/unused_dependencies/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
blacklist = {'.', '..', '.ci', 'node_modules', 'static', 'public', 'build', 'docs', 'media', 'scripts','.github', '.git', 'requirements', 'tests', '.tox', 'migrations', 'test_utils', 'tests', '__pycache__'}

whitelist = {'tox', 'pip', 'twine', 'pydocstyle', 'wheel', 'edx-sphinx-theme', 'edx-lint', 'pytest-cookies', 'pycodestyle', 'build', 'Sphinx', 'doc8', 'virtualenv', 'pip-tools', 'tox-battery', 'sphinx_rtd_theme'}
80 changes: 80 additions & 0 deletions edx_repo_tools/unused_dependencies/gather_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

import ast
import re

from os import listdir
from os.path import join
from genericpath import isdir, isfile

from config import blacklist

# can't use ast because .py files include invalid identifier e.g. {{cookiecutter.class_name}}
def parse_imports(path):
"""
parses `path` as a python file and returns the list of all the modules
imported in the file. Any sub-modules imported will be ignored and the
base module will be considered.
"""
with open(path) as fh:
root = ast.parse(fh.read(), path)

for node in ast.iter_child_nodes(root):
if isinstance(node, ast.Import):
# import package.module -> package
module = node.names[0].name.split('.')[0]
elif isinstance(node, ast.ImportFrom):
if node.module is None:
continue
# from package.module import name, othername -> package
module = node.module.split('.')[0]
else:
continue
yield module


def manually_parse_imports(path):
"""
parses `path` as a python file and returns the list of all the modules
imported in the file. Any sub-modules imported will be ignored and the
base module will be considered.
"""
import_match = re.compile("^(from|import)")
from_import = re.compile('^from (.*) import')
package_import = re.compile('^import (.*)')

with open(path) as f:
isdocstring = False
for line in f:
line = line.strip()
if not line:
continue
if line.startswith('"""'):
isdocstring = not isdocstring
continue
if isdocstring:
continue
if line.startswith('#'):
continue
if import_match.match(line) is None:
# we don't support imports anywhere other then the top of file
break

package = from_import.match(line) if line.startswith('from') else package_import.match(line)
yield package.groups()[0].split(' ')[0].split('.')[0]

def gather_imports( path, imports = set()):
"""
Walks `path` recursively and parses each .py file to generate a set of
all the imports.
"""
content = listdir(path)
files = [f for f in content if isfile(join(path, f))]
for file in files:
if file.endswith('.py'):
imports.update(parse_imports(join(path, file)))

dirs = {d for d in content if isdir(join(path, d))}
for dir in dirs - blacklist:
gather_imports(join(path, dir))

return imports
22 changes: 22 additions & 0 deletions edx_repo_tools/unused_dependencies/populate_requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

from os import walk
from os.path import join


def populate_requirements(path, requirements = set()):
"""
returns a set of all the requirements listed in any .in file inside
`path`. The version numbers and comments are removed from the returned
set.
"""
for dirpath, dirs, files in walk(path):
for file in files:
if file.endswith('.in'):
filepath = join(dirpath, file)
with open(filepath) as req_file:
# `package=10.3 # some comments` -> package
requirements.update(package.split(' ')[0].split('=')[0].strip('\n') for package in req_file if package[0] not in ['#', '-', '\n', ' '])
for dir in dirs:
populate_requirements(join(dirpath, dir), requirements)

return requirements
23 changes: 23 additions & 0 deletions edx_repo_tools/unused_dependencies/unused_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from os.path import join, exists

from populate_requirements import populate_requirements
from gather_imports import gather_imports
from config import whitelist


def unused_dependencies(path):
"""
check for any package listed in any .in file that is not imported in
any .py file.
"""
if not exists(path):
print("Error: {} doesn't exist.".format(path))
exit(1)
requirements = populate_requirements(join(path, 'requirements'))
imports = gather_imports(path)

# print(requirements)
# print(imports)
unused = (requirements - imports) - whitelist
print(unused)
# assert len(unused) == 0, "The following packages are unused: {}".format(unused)