From ffb623e0d959ae8b30a5fdaf3deb2323054adbc8 Mon Sep 17 00:00:00 2001 From: Mike Shriver Date: Thu, 16 Dec 2021 14:20:57 -0500 Subject: [PATCH] Add contributors query Use contributioncollection type in gql to fetch data in weekly intervals for a given user. Tabulate the various contribution types by week, separate table for each user Add new options for team and user Add option for output table format, default to fancy_grid --- scripts/gh_metrics.py | 104 +++++++++++++++-- setup.cfg | 9 +- utils/GQL_Queries/contributors_query.py | 114 +++++++++++++++++++ utils/GQL_Queries/github_wrappers.py | 143 ++++++++++++++++++++---- utils/GQL_Queries/review_teams_query.py | 1 - utils/metrics_calculators.py | 58 +++++++++- 6 files changed, 391 insertions(+), 38 deletions(-) create mode 100644 utils/GQL_Queries/contributors_query.py diff --git a/scripts/gh_metrics.py b/scripts/gh_metrics.py index 3f619f2..7b7a8e1 100644 --- a/scripts/gh_metrics.py +++ b/scripts/gh_metrics.py @@ -2,12 +2,14 @@ from pathlib import Path import click +from tabulate import multiline_formats from tabulate import tabulate from config import METRICS_OUTPUT from config import settings from utils import file_io from utils import metrics_calculators +from utils.GQL_Queries.github_wrappers import OrgWrapper # keys that will be read from settings files (dynaconf parsing) for command input defaults @@ -22,6 +24,7 @@ def report(): # reused options for multiple metrics functions +# TODO read defaults from settings output_prefix_option = click.option( "--output-file-prefix", default=settings.get(SETTINGS_OUTPUT_PREFIX, "metrics-report"), @@ -39,11 +42,35 @@ def report(): multiple=True, help="The repository name, like robottelo. ", ) +team_name_option = click.option( + "--team", + default=[], + multiple=True, + help="The github team name slug (URL field form, like quality-engineers)", +) +user_name_option = click.option( + "--user", + default=[], + multiple=True, + help="The github login name (URL field form, like mshriver)", +) pr_count_option = click.option( "--pr-count", default=50, help="Number of PRs to include in metrics counts, will start from most recently created", ) +num_weeks_option = click.option( + "--num-weeks", + default=4, + type=click.IntRange(1, 52), + help="Number of weeks of metrics history to collect", +) +table_format_option = click.option( + "--table-format", + default="fancy_grid", + type=click.Choice(multiline_formats), + help="The tabulate output format, https://github.com/astanin/python-tabulate#multiline-cells", +) @report.command( @@ -54,7 +81,8 @@ def report(): @repo_name_option @output_prefix_option @pr_count_option -def repo_pr_metrics(org, repo, output_file_prefix, pr_count): +@table_format_option +def repo_pr_metrics(org, repo, output_file_prefix, pr_count, table_format): for repo_name in repo: click.echo(f"Collecting metrics for {org}/{repo_name} ...") @@ -67,7 +95,7 @@ def repo_pr_metrics(org, repo, output_file_prefix, pr_count): click.echo(header) click.echo("-" * len(header)) click.echo( - tabulate(pr_metrics, headers="keys", tablefmt="github", floatfmt=".1f") + tabulate(pr_metrics, headers="keys", tablefmt=table_format, floatfmt=".1f") ) header = f"Review Metric Statistics for [{repo_name}]" @@ -75,7 +103,9 @@ def repo_pr_metrics(org, repo, output_file_prefix, pr_count): click.echo(header) click.echo("-" * len(header)) click.echo( - tabulate(stat_metrics, headers="keys", tablefmt="github", floatfmt=".1f") + tabulate( + stat_metrics, headers="keys", tablefmt=table_format, floatfmt=".1f" + ) ) pr_metrics_filename = METRICS_OUTPUT.joinpath( @@ -105,12 +135,15 @@ def repo_pr_metrics(org, repo, output_file_prefix, pr_count): ) -@report.command("reviewer-report") +@report.command( + "reviewer-report", help="Gather metrics on reviewer actions within a GH repo" +) @org_name_option @repo_name_option @output_prefix_option @pr_count_option -def reviewer_actions(org, repo, output_file_prefix, pr_count): +@table_format_option +def reviewer_actions(org, repo, output_file_prefix, pr_count, table_format): """ Generate metrics for tier reviewer groups, and general contributors Will collect tier reviewer teams from the github org @@ -127,13 +160,13 @@ def reviewer_actions(org, repo, output_file_prefix, pr_count): click.echo(f"\n{'-' * len(header)}") click.echo(header) click.echo("-" * len(header)) - click.echo(tabulate(t1_metrics, headers="keys", tablefmt="github")) + click.echo(tabulate(t1_metrics, headers="keys", tablefmt=table_format)) header = f"Tier2 Reviewer actions by week for [{repo_name}]" click.echo(f"\n{'-' * len(header)}") click.echo(header) click.echo("-" * len(header)) - click.echo(tabulate(t2_metrics, headers="keys", tablefmt="github")) + click.echo(tabulate(t2_metrics, headers="keys", tablefmt=table_format)) tier1_metrics_filename = METRICS_OUTPUT.joinpath( f"{Path(output_file_prefix).stem}-" @@ -160,3 +193,60 @@ def reviewer_actions(org, repo, output_file_prefix, pr_count): tier2_metrics_filename, tabulate(t2_metrics, headers="keys", tablefmt="html"), ) + + +@report.command("contributor-report") +@org_name_option +@output_prefix_option +@team_name_option +@num_weeks_option +@table_format_option +@user_name_option +def contributor_actions(org, output_file_prefix, team, num_weeks, table_format, user): + """Collect count metrics of various contribution types""" + + orgwrap = OrgWrapper(name=org) + + collected_users = [] + + if not (user or team): + click.echo("ERROR: Need to specify either a team and/or user") + + collaborators = list(user) # might be empty, we're gonna add users from the team(s) + + for team_name in team: + # Assert the team exists and list its members + team_members = orgwrap.team_members(team=team_name) + click.echo(f"Team members for {org}/{team_name}:\n" + "\n".join(team_members)) + collaborators.extend(team_members) + + # maybe drop this into an OrgWrapper function + # replacing the function label here in the loop + for user in collaborators: + if user not in collected_users: + click.echo(f"Retrieving metrics for user: {user}") + collected_users.append(user) + else: + click.echo(f"Skipping user (member of multiple teams): {user}") + # collect metrics for the given user if not already covered by another team + contributor_counts = metrics_calculators.contributor_actions( + user=user, num_weeks=num_weeks + ) + + header = f"Contributions by week for [{user}]" + click.echo(f"\n{'-' * len(header)}") + click.echo(header) + click.echo("-" * len(header)) + click.echo(tabulate(contributor_counts, tablefmt=table_format, headers="keys")) + + user_metrics_filename = METRICS_OUTPUT.joinpath( + f"{Path(output_file_prefix).stem}-" + f"{user}-" + "contributor-" + f"{datetime.now().isoformat(timespec='minutes')}.html" + ) + click.echo(f"\nWriting contributor metrics as HTML to {user_metrics_filename}") + file_io.write_to_output( + user_metrics_filename, + tabulate(contributor_counts, headers="keys", tablefmt="html"), + ) diff --git a/setup.cfg b/setup.cfg index 88d7950..b61d7b4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,18 +8,21 @@ author-email = 'mshriver@redhat.com' url='https://gitlab.cee.redhat.com/mshriver/qe-pr-metrics' [options] +python_requires = >=3.8 packages = find: setup_requires = setuptools_scm>=3.0.0 install_requires = attrs - attrdict + python-box cached-property click - dynaconf>=3.0 - gql + dynaconf + gql>=3.0.0rc0 logzero PyGithub + python-dateutil requests + requests-toolbelt # gql 3.0 http transport tabulate [options.extras_require] diff --git a/utils/GQL_Queries/contributors_query.py b/utils/GQL_Queries/contributors_query.py new file mode 100644 index 0000000..2bb4f5f --- /dev/null +++ b/utils/GQL_Queries/contributors_query.py @@ -0,0 +1,114 @@ +# Importable string for GQL query + +org_team_members_query = """query orgTeamMembers($organization: String!, $team: String!) { + organization(login:$organization) { + team(slug:$team) { + name + members{ + nodes { + login + name + } + } + } + } +} +""" # noqa: E501 + +contributions_counts_by_user_query = """query getContributions($user: String! $from_date: DateTime!, $to_date: DateTime!) { + user(login:$user) { + contributionsCollection (from:$from_date, to: $to_date) { + pullRequestContributionsByRepository { + repository {name} + contributions { totalCount } + } + pullRequestReviewContributionsByRepository { + repository {name} + contributions { totalCount } + + } + issueContributionsByRepository { + repository {name} + contributions {totalCount} + } + commitContributionsByRepository { + repository {name} + contributions {totalCount} + } + } + } +} +""" # noqa: E501 + +contributions_by_org_members_query = """query getContributions ($organization: String!, $team: String!, $from_date: DateTime!, $to_date: DateTime!) { + organization(login:$organization) { + team(slug:$team) { + name + members { + nodes { + login + contributionsCollection (from: $from_date, to: $to_date) { + pullRequestContributionsByRepository (maxRepositories:10) { + repository {name} + contributions (last:10){ + nodes { + pullRequest { + number + changedFiles + deletions + additions + } + occurredAt + } + } + } + pullRequestReviewContributionsByRepository (maxRepositories: 10) { + repository {name} + contributions (last:50) { + nodes { + occurredAt + pullRequest { number } + } + } + } + } + } + } + } + } +} +""" # noqa: E501 + +contributions_counts_by_org_members_query = """ +query contributions_counts_by_org_members_query ($organization: String!, $team: String!, $from_date: DateTime!, $to_date: DateTime!) { + organization(login: $organization) { + team(slug: $team) { + name + members { + nodes { + login + contributionsCollection (from: $from_date, to: $to_date) { + pullRequestContributionsByRepository { + repository {name} + contributions { totalCount } + } + pullRequestReviewContributionsByRepository { + repository {name} + contributions { totalCount } + + } + issueContributionsByRepository { + repository {name} + contributions {totalCount} + } + commitContributionsByRepository { + repository {name} + contributions {totalCount} + } + } + } + } + } + } +} +""" # noqa: E501 diff --git a/utils/GQL_Queries/github_wrappers.py b/utils/GQL_Queries/github_wrappers.py index b19bef5..30b3a13 100644 --- a/utils/GQL_Queries/github_wrappers.py +++ b/utils/GQL_Queries/github_wrappers.py @@ -1,15 +1,19 @@ +from collections import defaultdict from datetime import datetime +from datetime import timedelta import attr +from box import Box from cached_property import cached_property -from gql import Client as gql_client +from gql import Client as GqlClient from gql import gql from gql.transport.requests import RequestsHTTPTransport from logzero import logger from config import settings -from utils.GQL_Queries.pr_query import pr_review_query -from utils.GQL_Queries.review_teams_query import org_teams_query +from utils.GQL_Queries import contributors_query +from utils.GQL_Queries import pr_query +from utils.GQL_Queries import review_teams_query GH_TOKEN = settings.gh_token @@ -18,6 +22,21 @@ SECONDS_TO_HOURS = 3600 +WEEK_DELTA = timedelta(weeks=1) +NOW = datetime.now() + + +@attr.s +class GQLClient: + transport = RequestsHTTPTransport( + url=GH_GQL_URL, headers={"Authorization": f"bearer {GH_TOKEN}"} + ) + + @cached_property + def session(self): + client = GqlClient(transport=self.transport, fetch_schema_from_transport=True) + return client + @attr.s class RepoWrapper: @@ -26,14 +45,7 @@ class RepoWrapper: organization = attr.ib() repo_name = attr.ib() - @cached_property - def client_session(self): - transport = RequestsHTTPTransport( - url=GH_GQL_URL, headers={"Authorization": f"bearer {GH_TOKEN}"} - ) - client = gql_client(transport=transport, fetch_schema_from_transport=True) - with client as session: - yield session + gql_client = GQLClient() @cached_property def reviewer_teams(self): @@ -42,9 +54,11 @@ def reviewer_teams(self): Returns: dictionary, keyed on 'tier1' and 'tier2', with lists of team members """ - org_teams = self.client_session.execute( - gql(org_teams_query), variable_values={"organization": self.organization}, - )["organization"]["teams"]["nodes"] + with self.gql_client.session as gql_session: + org_teams = gql_session.execute( + gql(review_teams_query.org_teams_query), + variable_values={"organization": self.organization}, + )["organization"]["teams"]["nodes"] try: settings_team_names = settings.reviewer_teams.get(self.organization).get( self.repo_name @@ -82,16 +96,20 @@ def pull_requests(self, count=100, block_count=50): pr_nodes = [] fetched = 0 # tracks total number of PRs pulled gql_pr_cursor = None - while fetched < count: - pr_block = self.client_session.execute( - gql(pr_review_query), - variable_values={"prCursor": gql_pr_cursor, "blockCount": block_count}, - ) - gql_pr_cursor = pr_block["repository"]["pullRequests"]["pageInfo"][ - "endCursor" - ] - pr_nodes.extend(pr_block["repository"]["pullRequests"]["nodes"]) - fetched += block_count + with self.gql_client.session as gql_session: + while fetched < count: + pr_block = gql_session.execute( + gql(pr_query.pr_review_query), + variable_values={ + "prCursor": gql_pr_cursor, + "blockCount": block_count, + }, + ) + gql_pr_cursor = pr_block["repository"]["pullRequests"]["pageInfo"][ + "endCursor" + ] + pr_nodes.extend(pr_block["repository"]["pullRequests"]["nodes"]) + fetched += block_count prws = {} # flatten data_blocks a bit, we just want the nodes for pr_node in pr_nodes: @@ -218,6 +236,8 @@ class ReadyWrapper(EventWrapper): class PRWrapper: """Class for modeling the data returned from the GQL query for PRs""" + gql_client = GQLClient() + number = attr.ib() repo = attr.ib() url = attr.ib() @@ -404,3 +424,78 @@ def hours_from_tier1_to_tier2(self): / SECONDS_TO_HOURS, 1, ) + + +@attr.s +class OrgWrapper: + """Wrap the org queries""" + + gql_client = GQLClient() + + name = attr.ib() + + def team_members(self, team): + """Get the logins for the given team""" + with self.gql_client.session as gql_session: + gql_data = gql_session.execute( + gql(contributors_query.org_team_members_query), + variable_values={"organization": self.name, "team": team}, + ) + return [ + u["login"] for u in gql_data["organization"]["team"]["members"]["nodes"] + ] + + +@attr.s +class UserWrapper: + """wrap the user queries""" + + gql_client = GQLClient() + + login = attr.ib() + + def contributions(self, from_date=None, to_date=None): + """Get the contributions collections for date range + + Args: + from_date: iso8601 datetime, defaults to 1 week ago + to_date: iso8601 datetime, defaults to now + + Return: + list of dicts with contribution counts, looks like: + ```[{'login': 'gh-name', + 'contributionsCollection': { + 'pullRequestContributionsByRepository': [{'repository': {'name': 'airgun'}, + 'contributions': {'totalCount': 3}}], + 'pullRequestReviewContributionsByRepository': [], + 'issueContributionsByRepository': [], + 'commitContributionsByRepository': []}}, + """ # noqa: E501 + from_date = from_date or (NOW - WEEK_DELTA) + to_date = to_date or NOW + with self.gql_client.session as gql_session: + gql_data = gql_session.execute( + gql(contributors_query.contributions_counts_by_user_query), + variable_values={ + "user": self.login, + "from_date": from_date.isoformat(timespec="seconds"), + "to_date": to_date.isoformat(timespec="seconds"), + }, + ) + # flatten dictionary value lists to repo name key and count value + # also shortening the type string + flattened_counts = defaultdict(lambda: defaultdict(dict)) + for cont_type, repo_conts in Box( + gql_data["user"]["contributionsCollection"] + ).items(): + short_type = cont_type[ + 0 : cont_type.index("ContributionsByRepository") # noqa: E203 + ] + if repo_conts: + for repo_cont in repo_conts: + flattened_counts[short_type][ + repo_cont.repository.name + ] = repo_cont.contributions.totalCount + else: # some are empty lists + flattened_counts[short_type] = {} + return flattened_counts diff --git a/utils/GQL_Queries/review_teams_query.py b/utils/GQL_Queries/review_teams_query.py index 785ba23..7ea118d 100644 --- a/utils/GQL_Queries/review_teams_query.py +++ b/utils/GQL_Queries/review_teams_query.py @@ -1,5 +1,4 @@ # Importable strings for GQL queries -# Paginated on 50 PRs at a time org_teams_query = """query getReviewerTeams($organization: String!) { organization(login:$organization) { diff --git a/utils/metrics_calculators.py b/utils/metrics_calculators.py index 6290f21..cfad757 100644 --- a/utils/metrics_calculators.py +++ b/utils/metrics_calculators.py @@ -1,12 +1,17 @@ from collections import defaultdict from datetime import date +from datetime import datetime +from datetime import timedelta from statistics import fmean from statistics import median from statistics import pstdev -from attrdict import AttrDict +from box import Box +from dateutil.rrule import rrule +from dateutil.rrule import WEEKLY from .GQL_Queries.github_wrappers import RepoWrapper +from .GQL_Queries.github_wrappers import UserWrapper EMPTY = "---" @@ -33,8 +38,8 @@ """ -class PullRequestMetrics(AttrDict): - """Dummy class to provide distinct type around AttrDict""" +class PullRequestMetrics(Box): + """Dummy class to provide distinct type around Box""" pass @@ -153,3 +158,50 @@ def reviewer_actions(organization, repository, pr_count=100): t1_metrics.sort(key=lambda m: m["Week"], reverse=True) t2_metrics.sort(key=lambda m: m["Week"], reverse=True) return t1_metrics, t2_metrics + + +def contributor_actions(user, num_weeks): + """ + Gather metrics for contributions by week for members of an organization team + + Query will include PR, issue, PR review, and commit contributions by repository, by week + + Iterate over weekly recurrance queries + + Data returned is ready for tabulate with headers=keys + Organize metrics by type of action, first column is week, finally by repository + """ + userwrap = UserWrapper(login=user) + dated_counts = defaultdict(list) + now = datetime.now() + starting_date = now - timedelta(weeks=num_weeks) + # rrule will create a list of start/stop times for weekly interval + datelist = rrule(WEEKLY, until=now, dtstart=starting_date) + # iterate over sets of start/stop by zipping the list against itself + # relying on python to keep these list items in order. + for from_date, to_date in zip(datelist, datelist[1:]): + + user_contributions = userwrap.contributions( + from_date=from_date, to_date=to_date + ) + # {'pullRequest': {'repo-metrics': 1}, + # 'pullRequestReview': {'robottelo': 1}, + # 'issue': {}, + # 'commit': {} + + # want: + # {'week': [from0, from1, from2] + # 'pullRequest': [{'repo': 1}, {}, {'other': 2}]} + + dated_counts["week"].append(from_date.strftime(DATE_FMT)) + + for cont_type, cont_repos in user_contributions.items(): + # Convert the raw value dicts to table cell values + if cont_repos: + dated_counts[cont_type].append( + "\n".join(f"{r}: {c}" for r, c in cont_repos.items()) + ) + else: + dated_counts[cont_type].append("---") + + return dated_counts