Skip to content

Commit

Permalink
Create an authors script
Browse files Browse the repository at this point in the history
I was curious to the contributions for the stochastic_tools module so I added this tool
  • Loading branch information
aeslaughter committed Nov 10, 2020
1 parent dbf7540 commit 8b4f305
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 2 deletions.
5 changes: 5 additions & 0 deletions .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,8 @@ Justin Herter <[email protected]> EC2 Default User <ec2-user@ip-10-0-74-120
Justin Herter <[email protected]> herter <[email protected]>
Som L. Dhulipala <[email protected]> somu15 <[email protected]>
Samuel Tew <[email protected]> Samuel K. Tew <[email protected]>
Peter German <[email protected]>
Peter German <[email protected]>
Peter German <[email protected]>
Zachary M. Prince <[email protected]>
Zachary M. Prince <[email protected]>
4 changes: 3 additions & 1 deletion python/mooseutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
from .mooseutils import touch, unique_list, gold, make_chunks, camel_to_space
from .mooseutils import text_diff, unidiff, text_unidiff, run_profile, list_files, check_output, run_time
from .mooseutils import generate_filebase, recursive_update, fuzzyEqual, fuzzyAbsoluteEqual
from .gitutils import is_git_repo, git_commit, git_commit_message, git_merge_commits, git_ls_files, git_root_dir, git_init_submodule, git_submodule_status, git_version
from .gitutils import is_git_repo, git_commit, git_commit_message, git_merge_commits, git_ls_files
from .gitutils import git_root_dir, git_init_submodule, git_submodule_status, git_version
from .gitutils import git_authors, git_lines
from .message import mooseDebug, mooseWarning, mooseMessage, mooseError
from .MooseException import MooseException
from .eval_path import eval_path
Expand Down
39 changes: 38 additions & 1 deletion python/mooseutils/gitutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import re
import subprocess
import logging
import collections
from .mooseutils import check_output

def is_git_repo(working_dir=os.getcwd()):
Expand Down Expand Up @@ -41,13 +42,15 @@ def git_merge_commits(working_dir=os.getcwd()):
out = check_output(['git', 'log', '-1', '--merges', '--pretty=format:%P'], cwd=working_dir)
return out.strip(' \n').split(' ')

def git_ls_files(working_dir=os.getcwd(), recurse_submodules=False):
def git_ls_files(working_dir=os.getcwd(), recurse_submodules=False, exclude=None):
"""
Return a list of files via 'git ls-files'.
"""
cmd = ['git', 'ls-files']
if recurse_submodules:
cmd.append('--recurse-submodules')
if exclude is not None:
cmd += ['--exclude', exclude]
out = set()
for fname in check_output(cmd, cwd=working_dir).split('\n'):
out.add(os.path.abspath(os.path.join(working_dir, fname)))
Expand Down Expand Up @@ -93,3 +96,37 @@ def git_version():
if match is None:
raise SystemError("git --version failed to return correctly formatted version number")
return (int(match.group('major')), int(match.group('minor')), int(match.group('patch')))

def git_authors(loc=None):
"""
Return a complete list of authors for the given location.
Inputs:
loc: File/directory to consider
"""
if not os.path.exists(loc):
raise OSError("The supplied location must be a file or directory: {}".format(loc))
loc = loc or os.getcwd()
out = check_output(['git', 'shortlog', '-n', '-c', '-s', '--', loc])
names = list()
for match in re.finditer(r'^\s*\d+\s*(?P<name>.*?)$', out, flags=re.MULTILINE):
names.append(match.group('name'))
return names

def git_lines(filename, blank=False):
"""
Return the number of lines per author for the given filename
Inputs:
filename: Filename to consider
blank[bool]: Include/exclude blank lindes
"""
if not os.path.isfile(filename):
raise OSError("File does not exist: {}".format(filename))
regex = re.compile(r'^.*?\((?P<name>.*?)\s+\d{4}-\d{2}-\d{2}.*?\)\s+(?P<content>.*?)$', flags=re.MULTILINE)
counts = collections.defaultdict(int)
blame = check_output(['git', 'blame', '--', filename], encoding='utf-8')
for line in blame.splitlines():
match = regex.search(line)
if blank or len(match.group('content')) > 0:
counts[match.group('name')] += 1
return counts
24 changes: 24 additions & 0 deletions python/mooseutils/tests/test_gitutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,5 +85,29 @@ def testGitVersion2(self, re_func):
with self.assertRaises(SystemError):
ver = mooseutils.git_version()

def testGitAuthors(self):
names = mooseutils.git_authors(mooseutils.__file__)
self.assertIn('Andrew E. Slaughter', names)

with self.assertRaises(OSError) as e:
mooseutils.git_authors('wrong')

def testGitLines(self):
with open(__file__, 'r') as fid:
lines = fid.readlines()

n_with_blank = len(lines)
n_no_blank = n_with_blank - len([l for l in lines if not l.strip()])

counts = mooseutils.git_lines(__file__)
self.assertIn('Andrew E. Slaughter', counts)
self.assertTrue(counts['Andrew E. Slaughter'] > 0)
self.assertEqual(n_no_blank, sum(list(counts.values())))

counts = mooseutils.git_lines(__file__, blank=True)
self.assertIn('Andrew E. Slaughter', counts)
self.assertTrue(counts['Andrew E. Slaughter'] > 0)
self.assertEqual(n_with_blank, sum(list(counts.values())))

if __name__ == '__main__':
unittest.main(verbosity=2, buffer=True)
99 changes: 99 additions & 0 deletions scripts/authors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
import os
import collections
import argparse
import multiprocessing
import mooseutils

# List of available languages and an associated function for testing if a filename is that language
LANGUAGES = collections.OrderedDict()
LANGUAGES['C++'] = lambda f: f.endswith(('.C', '.h'))
LANGUAGES['Python'] = lambda f: f.endswith('.py')
LANGUAGES['Input'] = lambda f: f.endswith(('.i', '.hit'))
LANGUAGES['Markdown'] = lambda f: f.endswith('.md')
LANGUAGES['Make'] = lambda f: f.endswith(('Makefile', '.mk'))
LANGUAGES['YAML'] = lambda f: f.endswith('.yml')

def get_options():
"""Return the command-line options"""
parser = argparse.ArgumentParser(description='Tool for listing author line counts.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('locations', nargs='*', type=str, default=[mooseutils.git_root_dir()],
help='The repository directory to consider.')
parser.add_argument('-j', '--num-threads', type=int, default=os.cpu_count(),
help="The number of threads to use for computing the counts.")
parser.add_argument('--exclude', nargs=1, type=str, default='contrib',
help="Exclude pattern passed to git ls-files call.")
parser.add_argument('-l', '--languages', nargs='+', type=str, choices=list(LANGUAGES.keys()),
default=list(LANGUAGES.keys()),
help="Limit the analysis the the listed languages.")

return parser.parse_args()

def target(filename):
"""Helper for counting the lines, by author of the given filename"""
return mooseutils.git_lines(filename)

def update_count(c, lang, counts):
"""
Add the counts from to the total count
Input:
c[dict]: Local counts with authors as keys, returned from 'target' function
lang[str]: The language key that the 'c' count dict is associated
counts[dict of dict]: The global count by author, then language
"""
for key, value in c.items():
counts[key][lang] += value

def report(counts):
"""
Prints the global count in a table on the screen
"""
titles = list(list(counts.values())[0].keys()) + ['Total']
row_format = '{:>25}'
row_format += "{:>10}" * (len(titles))
n = 25 + 10 * len(titles)
totals = {k:0 for k in titles}
print('-'*n)
print(row_format.format("Name", *titles))
print('-'*n)

for author, row in reversed(sorted(counts.items(), key=lambda item:sum(item[1].values()))):
row['Total'] = sum(row.values())
values = ['{:,}'.format(row[key]) for key in titles]
for key in titles:
totals[key] += row[key]
print(row_format.format(author, *values))
print('-'*n)
print(row_format.format('TOTAL', *['{:,}'.format(totals[key]) for key in titles]))

if __name__ == '__main__':
args = get_options()

# Populate desired langauges
lang = collections.OrderedDict()
for key in args.languages:
lang[key] = LANGUAGES[key]

# List all files in the repository
all_files = set()
for location in args.locations:
all_files.update(mooseutils.git_ls_files(os.path.abspath(args.locations[0]), exclude=args.exclude))

# Group filenames by extension
groups = collections.defaultdict(list)
for filename in all_files:
for key, func in lang.items():
if func(filename):
groups[key].append(filename)

# Report author counts by file type
counts = collections.defaultdict(lambda: {g:0 for g in lang.keys()})
for group, files in groups.items():
print('Counting {} lines...'.format(group), end='')
with multiprocessing.Pool(processes=args.num_threads) as pool:
for c in pool.imap_unordered(target, files):
update_count(c, group, counts)
print('done')
report(counts)
50 changes: 50 additions & 0 deletions scripts/tests/test_authors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python3
#* This file is part of the MOOSE framework
#* https://www.mooseframework.org
#*
#* All rights reserved, see COPYRIGHT for full restrictions
#* https://github.com/idaholab/moose/blob/master/COPYRIGHT
#*
#* Licensed under LGPL 2.1, please see LICENSE for details
#* https://www.gnu.org/licenses/lgpl-2.1.html

import os
import sys
import unittest
import mooseutils

class Test(unittest.TestCase):

def testLocationFolders(self):
locations = [os.path.join(mooseutils.git_root_dir(), 'python', 'mooseutils'),
os.path.join(mooseutils.git_root_dir(), 'python', 'moosesqa')]

out = mooseutils.check_output(['./authors.py', *locations, '-j', '1'],
cwd=os.path.join(mooseutils.git_root_dir(), 'scripts'))
self.assertIn('Andrew', out)
self.assertIn('C++', out)
self.assertIn('Python', out)
self.assertIn('Input', out)
self.assertIn('Markdown', out)
self.assertIn('Make', out)
self.assertIn('YAML', out)
self.assertIn('Total', out)
self.assertIn('TOTAL', out)

def testLanguage(self):
locations = [os.path.join(mooseutils.git_root_dir(), 'python', 'mooseutils')]
out = mooseutils.check_output(['./authors.py', *locations, '-j', '1', '-l', 'Python'],
cwd=os.path.join(mooseutils.git_root_dir(), 'scripts'))
self.assertIn('Andrew', out)
self.assertNotIn('C++', out)
self.assertIn('Python', out)
self.assertNotIn('Input', out)
self.assertNotIn('Markdown', out)
self.assertNotIn('Make', out)
self.assertNotIn('YAML', out)
self.assertIn('Total', out)
self.assertIn('TOTAL', out)


if __name__ == '__main__':
unittest.main(verbosity=2, buffer=True)
7 changes: 7 additions & 0 deletions scripts/tests/tests
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,11 @@
design = 'newsletter/index.md'
requirement = "The system shall include a utility for determining author contributions to the repository in the previous month.."
[]
[authors]
type = PythonUnitTest
input = test_authors.py
issues = '#16137'
design = 'python/index.md'
requirement = "The system shall include a utility for counting the number of lines for unique contributor."
[]
[]

0 comments on commit 8b4f305

Please sign in to comment.