-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawl.py
executable file
·130 lines (110 loc) · 4.27 KB
/
crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/local/bin/python3
#FIXME
import subprocess
import json
from pprint import pprint
import cloc
import os
import shutil
from tabulate import tabulate
import argparse
def run_cloc(name, url, working_dir):
#clone into tmp
os.chdir(working_dir)
subprocess.call(['git', 'clone', url])
os.chdir(name)
repo_out = cloc.cloc_repo()
os.chdir('../..')
shutil.rmtree(working_dir + os.sep + name) #clean up as we go along
return repo_out
# a bit hacky, should probs be generalized
def cloc_servo(working_dir):
url = 'https://github.com/servo/servo.git'
os.chdir(working_dir)
if not os.path.exists('servo'):
subprocess.call(['git', 'clone', url])
os.chdir('servo' + os.sep + 'components')
components = []
for component in os.listdir(os.getcwd()):
os.chdir(component)
comp_out = cloc.cloc_repo()
comp_out.insert(0, "servo-"+component)
os.chdir('..')
components.append(comp_out)
os.chdir('../../..')
return components
def cloc_rust(working_dir):
url = 'https://github.com/rust-lang/rust'
os.chdir(working_dir)
if not os.path.exists('rust'):
subprocess.call(['git', 'clone', url])
os.chdir('rust/src')
components = []
for component in os.listdir(os.getcwd()):
if os.path.isdir(component):
os.chdir(component)
comp_out = cloc.cloc_repo()
comp_out.insert(0, "rust-"+component)
os.chdir('..')
components.append(comp_out)
os.chdir('../../..')
return components
# TODO add granularity -- should probably make an entry for anything with a cargo.toml
# currently using symlinks to capture granularity for style
# don't cd all over
def cloc_dir(working_dir, verbose):
#os.chdir(working_dir)
components = []
for subdir, dirs, _ in os.walk(working_dir):
if 'Cargo.toml' in os.listdir(subdir):
#print(subdir, os.listdir(subdir))
comp_out = cloc.cloc_dir(subdir, verbose)
comp_out.insert(0, subdir)
components.append(comp_out)
return components
#for component in os.listdir(os.getcwd()):
# if os.path.isdir(component) and 'Cargo.toml' in os.listdir(component) :# or os.path.islink(component):
# print(component)
#os.chdir(component)
#comp_out = cloc.cloc_repo()
#comp_out.insert(0, component)
#os.chdir(working_dir)
#components.append(comp_out)
#os.chdir(working_dir) # this is pretty shit
return components
def main():
parser = argparse.ArgumentParser(description='cloc for rustlang')
parser.add_argument('--servo', action='store_true', help='include servo in analysis')
parser.add_argument('--rust', action='store_true', help='include rust in analysis')
parser.add_argument('--dir', action='store', help='analyze a particular existing directory. skips github')
parser.add_argument('--keep', action='store_true', help='keep temporary directory (will keep servo/rust too)')
parser.add_argument('--verbose', action="store_true", help='output file specific statistics to cloc-rust.out',)
args = parser.parse_args()
cwd = os.getcwd();
#cmd = ['curl', 'https://api.github.com/search/repositories?q=language:rust+user:servo&sort=stars', '-o', 'crawl.json']
#subprocess.call(cmd)
output = []
#with open('crawl.json') as data_file:
# data = json.load(data_file)
#items = data['items']
working_dir = 'cloc_tmp'
if not os.path.exists(working_dir):
os.makedirs(working_dir)
#for x in items:
# repo_out = run_cloc(x['name'], x['clone_url'], working_dir)
# repo_out.insert(0,x['name'])
# output.append(repo_out)
# servo isn't listed with language:rust
if args.servo:
output += cloc_servo(working_dir)
if args.rust:
output += cloc_rust(working_dir)
if args.dir:
output += cloc_dir(args.dir, args.verbose)
os.chdir(cwd) #in case you've ended up somewhere weird
if not args.keep:
shutil.rmtree(working_dir) #clean up
headers = ["files", "blank", "comment", "code", "unsafe", "%unsafe", "fns", "unsafe fns", "%unsafe fns", "panics"]
print(tabulate(output,headers=headers))
if __name__ == "__main__":
main()