-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdepscan.py
365 lines (278 loc) · 11 KB
/
depscan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This module provides functions to scan python source files for dependencies.
@author: steve
"""
import sys
import os
import ast
import tempfile
import logging
from subprocess import call
from types import ModuleType
from types import StringTypes
log = logging.getLogger(__name__)
class Dependency(object):
"""A Dependency can be a python package or module
Args:
name (): module or package name
deptype (): the type of dependency
origin (): where this dependency may be found locally
"""
def __init__(self, name, deptype=None, origin=None, level=0):
self.name = name
self.type = deptype
self.origin = origin
self.level = level
self.baseline = False
self.comment = ''
def __repr__(self):
return str(self.__dict__)
def __str__(self):
return '<{} [{}] {:d}>'.format(self.__class__.__name__, self.name, self.level)
class Visitor(ast.NodeVisitor):
"""Base Class for Abstract Syntax Tree Traversal"""
def __init__(self):
self._data = []
self.nodes=[]
def add(self, datum):
self._data.append(datum)
@property
def data(self):
return self._data
def visit(self, node):
super(Visitor, self).visit(node)
return self
class FuncLister(Visitor):
"""Traverse Abstract Syntax Tree and extract function definitions"""
def visit_FunctionDef(self, node):
self.nodes.append(node)
self.add(node.name)
self.generic_visit(node)
class KeywordLister(Visitor):
"""Traverse Abstract Syntax Tree and extract function definitions"""
def visit_keyword(self, node):
self.nodes.append(node)
self.add(node.arg)
self.generic_visit(node)
class ClassLister(Visitor):
"""Traverse Abstract Syntax Tree and extract class definitions"""
def visit_ClassDef(self, node):
self.add(node.name)
self.nodes.append(node)
self.generic_visit(node)
class ImportLister(Visitor):
"""Traverse Abstract Syntax Tree and extract import items"""
def visit_Import(self, node):
for name in node.names:
self.add(name.name)
self.generic_visit(node)
def visit_ImportFrom(self, node):
module_name = node.module
self.add(module_name)
self.generic_visit(node)
def get_ast(source_or_file):
"""returns an abstract syntax tree (ast) object"""
source = source_or_file
filename = '<unknown>'
source_type = type(source_or_file)
if source_type in StringTypes:
if os.path.isfile(source):
filename = source
source = open(source_or_file, 'rb').read()
return ast.parse(source, filename, mode='exec')
def get_functions(source_or_file, prop='data'):
"""extract function names and return as list"""
tree = get_ast(source_or_file)
lister = FuncLister().visit(tree)
return getattr(lister, prop)
def get_classes(source_or_file, prop='data'):
"""extract class names and return as list"""
tree = get_ast(source_or_file)
lister = ClassLister().visit(tree)
return getattr(lister, prop)
def get_imports(source_or_file):
"""extract imports and return as list"""
tree = get_ast(source_or_file)
lister = ImportLister().visit(tree)
return lister.data
def get_keywords(source_or_file):
"""extract imports and return as list"""
tree = get_ast(source_or_file)
lister = KeywordLister().visit(tree)
return lister.data
class DependencyScanner(object):
"""
target (str|module): the item under test
"""
def __init__(self, target):
self._target = target.strip()
self.builtins = {}
self.dependencies = {}
self.import_errors = {}
self.libs = {}
self.deps = []
self.baseline = {}
@property
def target(self):
if isinstance(self._target, StringTypes) and os.path.isfile(self._target):
log.debug('Input File Identified')
dirname, filename = os.path.split(self._target)
module_name = os.path.splitext(filename)[0]
script_txt = 'import sys;sys.path.append(\\"{}\\");'.format(dirname)
script_txt += '__import__(\\"{}\\")'.format(module_name)
target = '-c "{}"'.format(script_txt)
log.debug('Generated Script: {}'.format(script_txt))
return target
elif isinstance(self._target, ModuleType):
log.debug('Module Identified')
return self._target.__file__
else:
try:
log.debug('Testing Import...')
module_name = self._target.strip()
log.debug('Testing Import: {}'.format(module_name))
oldstdout = sys.stdout
oldstderr = sys.stderr
sys.stdout = None
sys.stderr = None
__import__(module_name)
return '-c "import {}"'.format(self._target)
except ImportError:
raise
except AttributeError:
return '-c "import {}"'.format(self._target)
finally:
sys.stdout = oldstdout
sys.stderr = oldstderr
@staticmethod
def _parse_stream(stream):
"""returns list of lines related to import"""
return [l.strip() for l in stream if l.strip().lower().startswith('import')]
@staticmethod
def _parse_line(line):
"""returns list of lines related to import"""
if line.startswith('import '):
line = line.replace('import ', '')
return line
def scan(self):
self._scan_baseline()
self._scan_using_import_trace()
self._scan_using_ast()
def _scan_using_ast(self):
top_level_imports = get_imports(self._target)
for t in top_level_imports:
if t in self.dependencies.keys():
self.dependencies[t].level = 1
else:
log.debug('AST FOUND NEW: "{}"'.format(t))
dep = Dependency(t, level=1)
self.deps.append(dep)
self.dependencies[dep.name] = dep
def _scan_baseline(self):
baseline=open(tempfile.NamedTemporaryFile().name,'wb')
try:
cmd = ' '.join([sys.executable, '-v -c ""'])
retcode = call(cmd, shell=True, stderr=baseline)
log.debug('Baseline Scan Return Code: {:d}'.format(retcode))
except:
log.debug('Baseline Scan Failed: cmd="{}"'.format(cmd))
baseline.close()
with open(baseline.name,'rb') as baseline:
for line in self._parse_stream(baseline):
log.debug('[BL] {}'.format(line))
if line.startswith('import'):
# get the name, pedigree
_name, pedigree = line.split(' ',1)[1].split('#')
dep = Dependency(_name.strip())
dep.baseline = True
dep.comment = pedigree.strip()
dep.type = dep.comment.split(' ',1)[0]
self.deps.append(dep)
self.baseline[_name] = dep
def _scan_using_import_trace(self):
stdout=open(tempfile.NamedTemporaryFile().name,'wb')
stderr=open(tempfile.NamedTemporaryFile().name,'wb')
retcode = None
try:
cmd = ' '.join([sys.executable, '-v', self.target])
retcode = call(cmd, shell=True, stdout=stdout, stderr=stderr)
if retcode < 0:
print >>sys.stderr, "Child was terminated by signal", -retcode
elif retcode >0:
log.warn("Child returned : {}".format(str(retcode)))
log.warn("Missing Dependency in {} likely".format(self._target))
else:
log.debug("Child returned : {}".format(str(retcode)))
except OSError as e:
print >>sys.stderr, "Execution failed:", e
stdout.close()
stderr.close()
stderr_list = []
with open(stderr.name,'rb') as stderr:
stderr_list = self._parse_stream(stderr)
for line in stderr_list:
#line = stderr.readline()
if line.startswith('import'):
#print(line.strip())
dep = Dependency('NONE')
if '#' in line:
dep.comment = line.split('#',1)[1]
_name, pedigree = line.split(' ',1)[1].split('#')
pedigree = pedigree.split('from')
dep.name = _name.strip()
dep.type = pedigree[0].strip()
elif ' as ' in line:
continue
_name, pedigree = line.split(' ',1)[1], ''
dep.name = _name.strip()
dep.type = 'alias'
else:
log.debug(line)
_name, pedigree = line.split(' ',1)[1], ''
dep.name = _name.strip()
#print('{:40} {}'.format(item, origin))
if len(pedigree)>1:
dep.origin = pedigree[1].strip()
if 'builtin' in dep.type and dep.name not in self.baseline:
self.builtins[_name] = dep.origin
elif dep.name not in self.baseline:
self.dependencies[dep.name] = dep
self.deps.append(dep)
elif 'ImportError' in line:
_name = line.rsplit(' ',1)[1]
dep = Dependency(_name.strip())
self.import_errors[_name.strip()] = dep
self.dependencies[_name.strip()] = dep
self.deps.append(dep)
def _print_title(name, width=80):
print('='*width)
title = ''.join(['{:^',str(width), '}'])
print(title.format(name))
print('='*width)
def _print_break(width=80):
print('\n'+'*'*width)
print('*'*width + '\n')
def _main():
logging.basicConfig()
log.setLevel(logging.WARN)
if len(sys.argv) == 1:
print('no target provided')
return 1
else:
target = sys.argv[1]
depscan = DependencyScanner(target)
depscan.scan()
_print_title('Dependencies', width=80)
sorted_deps = sorted(depscan.dependencies.items())
for i, (name, dep) in enumerate(sorted_deps):
lvl = '(TOP_LEVEL)' if dep.level==1 else ''
print '{:3d}: {name:40} {level}'.format(i, name=dep.name, level=lvl)
if depscan.import_errors:
_print_title('Import Errors (Missing Dependencies)', width=80)
sorted_deps = sorted(depscan.import_errors.items())
for i, (name, dep) in enumerate(sorted_deps):
print '{:3d}: {name:40}'.format(i, name=dep.name)
if __name__ == '__main__':
_main()