Skip to content

Commit

Permalink
Initial entrypoint + util module implementation
Browse files Browse the repository at this point in the history
simple lookup and calculation of metric for delay from PR opening to getting first review comment, and PR getting review label to getting first comment
  • Loading branch information
mshriver committed Jul 14, 2020
1 parent 56b2859 commit ff34298
Show file tree
Hide file tree
Showing 10 changed files with 211 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
max-line-length = 100
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

# Ignore dynaconf secret files
.secrets.*

.eggs
.idea
.vscode
.qe-pr-metrics
qe_pr_metrics.egg-info/

settings.yaml
20 changes: 20 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
repos:
- repo: https://github.com/asottile/reorder_python_imports
rev: v2.3.0
hooks:
- id: reorder-python-imports
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.1.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: debug-statements
- repo: https://github.com/psf/black
rev: 19.10b0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.3
hooks:
- id: flake8
5 changes: 5 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from dynaconf import Dynaconf

settings = Dynaconf(
envvar_prefix="METRICS", settings_files=["settings.yaml", ".secrets.yaml"],
)
38 changes: 38 additions & 0 deletions scripts/gh_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import json
import time
from pathlib import Path

import click
from tabulate import tabulate

from config import settings
from utils import github_client


# parent click group for gather and graph commands
@click.group()
def generate_metrics():
pass


@generate_metrics.command("gather", help="Gather PR metrics for given GH repo")
@click.option("--repo-name", default="SatelliteQE/robottelo")
@click.option(
"--metric", type=click.Choice(["time_to_comment"]), default="time_to_comment"
)
@click.option(
"--file-output",
default=settings.get("metrics_output_file_prefix", "gh-pr-metrics"),
help="Will only take file name (with or without extension), but not a full path."
"Will append an epoch timestamp to the file name.",
)
def gather(repo_name, metric, output):
metrics = getattr(github_client, metric)(
repo_name=repo_name
) # execute method from github util

click.echo(tabulate(metrics.values(), showindex=metrics.keys(), headers="keys"))

output_filename = f"{Path(output.stem)}-{int(time())}.json"
with open(output_filename, "w") as output_file:
json.dump(metrics, output_file)
3 changes: 3 additions & 0 deletions settings.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
gh_repo: SatelliteQE/Robottelo
gh_token: <GH token with read>
metrics_output_file_prefix: "gh-pr-metrics"
24 changes: 18 additions & 6 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,18 +1,30 @@
[metadata]
name = 'qe-pr-metrics'
name = qe-pr-metrics
description = 'tool for collecting metrics on PRs'
long-description = file: README.md
long-description-content-type: text/markdown
author = 'Mike Shriver'
author_email = '[email protected]'
author-email = '[email protected]'
url='https://gitlab.cee.redhat.com/mshriver/qe-pr-metrics'

[options]
zip_safe = False
include_pacakge_data = True
packages = find:
entry_points = file:entry_points.txt
setup_requires = setuptools_scm>=3.0.0
install_requires =
PyGithub
install_requires =
attrs
attrdict
cached-property
click
dynaconf>=3.0
PyGithub
tabulate

[options.extras_require]
dev =
pre-commit
ipython

[options.entry_points]
console_scripts =
github-metrics = scripts.gh_metrics:generate_metrics
7 changes: 1 addition & 6 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
from setuptools import setup

with open('README.md') as readme:
readme_text = readme_file.read()

setup(
use_scm_version=True,
long_description=readme_text,
setup(use_scm_version=True)
Empty file added utils/__init__.py
Empty file.
113 changes: 113 additions & 0 deletions utils/github_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import attr
from attrdict import AttrDict
from cached_property import cached_property
from github import Github

from config import settings


GH_repo = settings.gh_repo
GH_TOKEN = settings.gh_token

gh_api = Github(GH_TOKEN)

"""
Functions for interacting with github's API, calculating a PR's various timing metrics.
"""


class PullRequestMetrics(AttrDict):
"""Dummy class to provide distinct type around AttrDict"""

pass


@attr.s
class PRWrapper(object):
"""Class for compositing additional properties onto the GH PR instance"""

pr = attr.ib() # the GH api PR object

@cached_property
def first_review(self):
"""When the first review on the PR occurred
Returns None if there are no reviews
"""
reviews_not_by_author = [
review
for review in self.pr.get_reviews()
if review.user.login != self.pr.user.login
]
reviews_not_by_author.sort(key=lambda r: r.submitted_at)
return None if not reviews_not_by_author else reviews_not_by_author[0]

@cached_property
def review_label_added(self):
"""Determine when the review label was added"""
events = [
event
for event in self.pr.get_issue_events()
if (event.label and event.label.name == "review")
and event.event == "labeled"
]
return None if not events else events[0]

@cached_property
def create_to_first_review(self):
"""given a PR, calculate the time from its creation to the first review
If the PR had a 'do not merge' label,
use the time that the label was removed instead of when the PR was created
Args:
pr: a PRWrapper object
"""
# days delta as float between pr created and first review
# TODO factor in DO NOT MERGE label event
if self.first_review is None:
return None
else:
return (self.first_review.submitted_at - self.pr.created_at).total_seconds()

@cached_property
def review_label_to_first_review(self):
"""given a PR,
calculate time from the review label being applied to when it got first review
"""
if self.first_review is None or self.review_label_added is None:
return None
else:
return (
self.first_review.submitted_at - self.review_label_added.created_at
).total_seconds()


def time_to_comment(repo_name):
"""Iterate over the PRs in the repo and calculate times to the first comment
Calculates the time delta per-PR from creation to comment, and from 'review' label to comment
Args:
repo_name: string repository name, including the owner/org (example: SatelliteQE/robottelo)
Returns:
dict, keyed on the PR number, where values are dictionaries containing timing metrics
"""
repo = gh_api.get_repo(repo_name)
prs = repo.get_pulls(state="open", sort="created", base="master")
pr_metrics = dict()
for pr in prs:
pr = PRWrapper(pr)
# TODO: multi-threaded processing of PRs

pr_metrics[pr.pr.number] = PullRequestMetrics(
create_to_review=pr.create_to_first_review,
label_to_review=pr.review_label_to_first_review,
)

return pr_metrics


# for debugging purposes
if __name__ == "__main__":
metrics = time_to_comment("SatelliteQE/robottelo")

0 comments on commit ff34298

Please sign in to comment.