From 129aee812ac5fb239c42e942b851ff45e3557c06 Mon Sep 17 00:00:00 2001 From: Kevin Lai <70177777+klai95@users.noreply.github.com> Date: Fri, 14 Apr 2023 14:35:32 -0500 Subject: [PATCH] Filter maintenance.timeline to return data from expected date range based on limit (#960) * added a method to filter commit activity data based on start date and end date for maintenance timeline to align parity with usage timelime * fixed test error * implemented _process_maintenance_timeline to return data that is consistent with how the usage timeline is implemented, refactored test_activity_dashboard to reflect the code changes, and cleaned up code to improve readability in model.py and test_activity_dashboard.py in general * updated test_fixtures.py and resolved merge conflicts * updated test_activity_dashboard.py * refactored variable name and some more code cleanup * refactored methods in test_activity_dashoboard.py * code cleanup * refactored name for generate_expected_maintenance_timeline * minor spacing fix * addressed code review feedback and created _process_for _dates to be used by both _process_usage_timeline and _process_maintenance_timeline to avoid code duplication * minor code cleanup * refactored model.py such that latest_commits.json and commit_activity.json contain data in which plugin is lower case to maintain parity with the implementation of install activity * refactored _process_maintenance_timline method to be more readable * refactored code to improve readability in model.py --- backend/api/_tests/test_activity_dashboard.py | 82 +++++++++++-------- backend/api/_tests/test_fixtures.py | 17 +++- backend/api/model.py | 44 +++++++--- backend/api/models/_tests/test_metrics.py | 6 +- 4 files changed, 99 insertions(+), 50 deletions(-) diff --git a/backend/api/_tests/test_activity_dashboard.py b/backend/api/_tests/test_activity_dashboard.py index 85a4202d7..13d5eb8b0 100644 --- a/backend/api/_tests/test_activity_dashboard.py +++ b/backend/api/_tests/test_activity_dashboard.py @@ -7,7 +7,7 @@ from datetime import datetime from dateutil.relativedelta import relativedelta -from api._tests.test_fixtures import generate_expected_timeline +from api._tests.test_fixtures import generate_expected_usage_timeline, generate_expected_maintenance_timeline BASE = datetime.today().date().replace(day=1) DATE_LIST = [BASE - relativedelta(months=x) for x in range(12) if x % 2 == 0] @@ -18,10 +18,19 @@ PLUGIN_NAME_CLEAN = 'string-1' MOCK_PLUGIN_RECENT_INSTALLS = {PLUGIN_NAME_CLEAN: 25, 'foo': 10, 'bar': 30} MOCK_PLUGIN_LATEST_COMMIT = 1672531200000 -MOCK_PLUGIN_TOTAL_COMMIT = 200 MOCK_PLUGIN_TOTAL_COMMIT_EMPTY = 0 -MOCK_PLUGIN_COMMIT_ACTIVITY = [{'timestamp': 1643673600000, 'commits': 200}] -MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY = [] +MOCK_PLUGIN_TOTAL_COMMIT_NONEMPTY = 5 +MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT = [] +MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY = [ + {'timestamp': 1672531200000, 'commits': 0}, + {'timestamp': 1675209600000, 'commits': 0}, + {'timestamp': 1677628800000, 'commits': 0} +] +MOCK_PLUGIN_COMMIT_ACTIVITY_NONEMPTY = [ + {'timestamp': 1672531200000, 'commits': 1}, + {'timestamp': 1675209600000, 'commits': 3}, + {'timestamp': 1677628800000, 'commits': 1} +] def generate_expected_metrics(timeline=None, total_installs=0, installs_in_last_30_days=0, latest_commit=None, @@ -35,7 +44,7 @@ def generate_expected_metrics(timeline=None, total_installs=0, installs_in_last_ } }, 'maintenance': { - 'timeline': commit_activity if commit_activity else MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, + 'timeline': commit_activity, 'stats': { 'latest_commit_timestamp': latest_commit, 'total_commits': total_commit @@ -43,6 +52,7 @@ def generate_expected_metrics(timeline=None, total_installs=0, installs_in_last_ } } + class TestActivityDashboard(unittest.TestCase): @patch.object(model, 'get_latest_commit', return_value=None) @@ -52,29 +62,32 @@ class TestActivityDashboard(unittest.TestCase): def test_get_metrics_empty(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit): expected = generate_expected_metrics( - timeline=generate_expected_timeline(-3, to_installs=lambda i: 0), + timeline=generate_expected_usage_timeline(-3, to_installs=lambda i: 0), total_commit=MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, + commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY ) - self._verify_results('3', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data) + self._verify_results('3', expected, mock_get_commit_activity, mock_get_latest_commit, + mock_get_install_timeline_data, mock_get_recent_activity_data) @patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT) - @patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY) + @patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY_NONEMPTY) @patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS) @patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy()) def test_get_metrics_nonempty(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit): expected = generate_expected_metrics( - timeline=generate_expected_timeline(-3), + timeline=generate_expected_usage_timeline(-3), total_installs=sum(MOCK_INSTALLS), installs_in_last_30_days=25, latest_commit=MOCK_PLUGIN_LATEST_COMMIT, - total_commit=MOCK_PLUGIN_TOTAL_COMMIT, - commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY + total_commit=MOCK_PLUGIN_TOTAL_COMMIT_NONEMPTY, + commit_activity=generate_expected_maintenance_timeline(-3) ) - self._verify_results('3', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data) + self._verify_results('3', expected, mock_get_commit_activity, mock_get_latest_commit, + mock_get_install_timeline_data, mock_get_recent_activity_data) @patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT) - @patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY) + @patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT) @patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS) @patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy()) def test_get_metrics_nonempty_zero_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data, @@ -83,13 +96,14 @@ def test_get_metrics_nonempty_zero_limit(self, mock_get_install_timeline_data, m total_installs=sum(MOCK_INSTALLS), installs_in_last_30_days=25, latest_commit=MOCK_PLUGIN_LATEST_COMMIT, - total_commit=MOCK_PLUGIN_TOTAL_COMMIT, - commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY + total_commit=MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, + commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT ) - self._verify_results('0', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data) + self._verify_results('0', expected, mock_get_commit_activity, mock_get_latest_commit, + mock_get_install_timeline_data, mock_get_recent_activity_data) @patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT) - @patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY) + @patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT) @patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS) @patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy()) def test_get_metrics_nonempty_invalid_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data, @@ -98,27 +112,30 @@ def test_get_metrics_nonempty_invalid_limit(self, mock_get_install_timeline_data total_installs=sum(MOCK_INSTALLS), installs_in_last_30_days=25, latest_commit=MOCK_PLUGIN_LATEST_COMMIT, - total_commit=MOCK_PLUGIN_TOTAL_COMMIT, - commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY + total_commit=MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, + commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT ) - self._verify_results('foo', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data) + self._verify_results('foo', expected, mock_get_commit_activity, mock_get_latest_commit, + mock_get_install_timeline_data, mock_get_recent_activity_data) @patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT) - @patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY) + @patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT) @patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS) @patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy()) def test_get_metrics_nonempty_negative_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data, - mock_get_commit_activity, mock_get_latest_commit): + mock_get_commit_activity, mock_get_latest_commit): expected = generate_expected_metrics( total_installs=sum(MOCK_INSTALLS), installs_in_last_30_days=25, latest_commit=MOCK_PLUGIN_LATEST_COMMIT, - total_commit=MOCK_PLUGIN_TOTAL_COMMIT, - commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY + total_commit=MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, + commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT ) - self._verify_results('-5', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data) + self._verify_results('-5', expected, mock_get_commit_activity, mock_get_latest_commit, + mock_get_install_timeline_data, mock_get_recent_activity_data) - def _verify_results(self, limit, expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data): + def _verify_results(self, limit, expected, mock_get_commit_activity, mock_get_latest_commit, + mock_get_install_timeline_data, mock_get_recent_activity_data): from api.model import get_metrics_for_plugin result = get_metrics_for_plugin(PLUGIN_NAME, limit, False) self.assertEqual(expected, result) @@ -131,13 +148,14 @@ def _verify_results(self, limit, expected, mock_get_commit_activity, mock_get_la class TestMetricModel: @pytest.mark.parametrize('commit_activity_input, commit_activity_result, latest_commit, total_commit, total_installs,' 'recent_installs, timeline, limit', [ - (MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, None, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 0, 0, generate_expected_timeline(-3, to_installs=lambda i: 0), '3'), - (MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT, 25, 21, generate_expected_timeline(-3), '3'), - (MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT, 25, 21, [], '0'), - (MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT, 25, 21, [], 'foo'), - (MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT, 25, 21, [], '-5'), + (MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, None, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 0, 0, generate_expected_usage_timeline(-3, to_installs=lambda i: 0), '3'), + (MOCK_PLUGIN_COMMIT_ACTIVITY_NONEMPTY, MOCK_PLUGIN_COMMIT_ACTIVITY_NONEMPTY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT_NONEMPTY, 25, 21, generate_expected_usage_timeline(-3), '3'), + (MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 25, 21, [], '0'), + (MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 25, 21, [], 'foo'), + (MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 25, 21, [], '-5'), ]) - def test_metrics_api_using_dynamo(self, monkeypatch, commit_activity_input, commit_activity_result, latest_commit, total_commit, total_installs, recent_installs, timeline, limit): + def test_metrics_api_using_dynamo(self, monkeypatch, commit_activity_input, commit_activity_result, latest_commit, + total_commit, total_installs, recent_installs, timeline, limit): monkeypatch.setattr(model, 'get_commit_activity', self._validate_args_return_value(commit_activity_input)) monkeypatch.setattr(model, 'get_latest_commit', self._validate_args_return_value(latest_commit)) monkeypatch.setattr(model.InstallActivity, 'get_total_installs', self._validate_args_return_value(total_installs)) diff --git a/backend/api/_tests/test_fixtures.py b/backend/api/_tests/test_fixtures.py index c21f0d387..7ecfc5798 100644 --- a/backend/api/_tests/test_fixtures.py +++ b/backend/api/_tests/test_fixtures.py @@ -5,9 +5,18 @@ BASE = datetime.today().date().replace(day=1) -def generate_expected_timeline(start_range, - timestamp_key='timestamp', - installs_key='installs', - to_installs=lambda i: 2 if i % 2 == 0 else 0): +def generate_expected_usage_timeline(start_range, + timestamp_key='timestamp', + installs_key='installs', + to_installs=lambda i: 2 if i % 2 == 0 else 0): to_timestamp = lambda i: int(pd.Timestamp(BASE + relativedelta(months=i)).timestamp()) * 1000 return [{timestamp_key: to_timestamp(i), installs_key: to_installs(i)} for i in range(start_range, 0)] + + +def generate_expected_maintenance_timeline(start_range, + timestamp_key='timestamp', + commits_key='commits', + to_commits=lambda i: 3 if i % 2 == 0 else 1): + to_timestamp = lambda i: int(pd.Timestamp(BASE + relativedelta(months=i)).timestamp()) * 1000 + return [{timestamp_key: to_timestamp(i), commits_key: to_commits(i)} for i in range(start_range, 0)] + diff --git a/backend/api/model.py b/backend/api/model.py index 9ccdcec67..a40126f77 100644 --- a/backend/api/model.py +++ b/backend/api/model.py @@ -1,5 +1,5 @@ from concurrent import futures -from datetime import datetime +from datetime import date, datetime import json import os from typing import Tuple, Dict, List, Callable, Any @@ -18,7 +18,6 @@ from api.zulip import notify_new_packages import boto3 import snowflake.connector as sc -from datetime import date from dateutil.relativedelta import relativedelta index_subset = {'name', 'summary', 'description_text', 'description_content_type', @@ -430,11 +429,16 @@ def _update_activity_timeline_data(): write_data(csv_string, "activity_dashboard_data/plugin_installs.csv") -def _process_for_timeline(plugin_df, limit): - date_format = '%Y-%m-%d' +def _process_for_dates(limit): end_date = date.today().replace(day=1) + relativedelta(months=-1) start_date = end_date + relativedelta(months=-limit + 1) dates = pd.date_range(start=start_date, periods=limit, freq='MS') + return start_date, end_date, dates + + +def _process_usage_timeline(plugin_df, limit): + date_format = '%Y-%m-%d' + start_date, end_date, dates = _process_for_dates(limit) plugin_df = plugin_df[(plugin_df['MONTH'] >= start_date.strftime(date_format)) & ( plugin_df['MONTH'] <= end_date.strftime(date_format))] result = [] @@ -448,6 +452,22 @@ def _process_for_timeline(plugin_df, limit): return result +def _process_maintenance_timeline(commit_activity, limit): + start_date, end_date, dates = _process_for_dates(limit) + maintenance_dict = {} + for commit_obj in commit_activity: + commit_obj_datetime = datetime.utcfromtimestamp(commit_obj['timestamp'] / 1000) + if start_date <= commit_obj_datetime.date() <= end_date: + maintenance_dict[commit_obj_datetime] = commit_obj + result = [] + for cur_date in dates: + if cur_date in maintenance_dict: + result.append(maintenance_dict[cur_date]) + else: + result.append({'timestamp': int(cur_date.timestamp()) * 1000, 'commits': 0}) + return result + + def _process_for_stats(plugin_df): if len(plugin_df) == 0: return {} @@ -483,7 +503,7 @@ def _update_recent_activity_data(number_of_time_periods=30, time_granularity='DA def _update_repo_to_plugin_dict(repo_to_plugin_dict: dict, plugin_obj: dict): code_repository = plugin_obj.get('code_repository') if code_repository: - repo_to_plugin_dict[code_repository.replace('https://github.com/', '')] = plugin_obj['name'] + repo_to_plugin_dict[code_repository.replace('https://github.com/', '')] = plugin_obj['name'].lower() return repo_to_plugin_dict @@ -524,7 +544,8 @@ def _update_latest_commits(repo_to_plugin_dict): repo = row[0] if repo in repo_to_plugin_dict: plugin = repo_to_plugin_dict[repo] - data[plugin] = int(pd.to_datetime(row[1]).strftime("%s")) * 1000 + timestamp = int(pd.to_datetime(row[1]).strftime("%s")) * 1000 + data[plugin] = timestamp write_data(json.dumps(data), "activity_dashboard_data/latest_commits.json") @@ -547,9 +568,10 @@ def _update_commit_activity(repo_to_plugin_dict): for cursor in cursor_list: for repo, month, commit_count in cursor: if repo in repo_to_plugin_dict: + plugin = repo_to_plugin_dict[repo] timestamp = int(pd.to_datetime(month).strftime("%s")) * 1000 commits = int(commit_count) - data.setdefault(repo_to_plugin_dict[repo], []).append({'timestamp': timestamp, 'commits': commits}) + data.setdefault(plugin, []).append({'timestamp': timestamp, 'commits': commits}) for plugin in data: data[plugin] = sorted(data[plugin], key=lambda x: (x['timestamp'])) write_data(json.dumps(data), "activity_dashboard_data/commit_activity.json") @@ -565,7 +587,7 @@ def _get_usage_data(plugin: str, limit: int, use_dynamo: bool) -> Dict[str, Any] :params bool use_dynamo: Fetch data from dynamo if True, else fetch from s3. """ if use_dynamo: - timeline = InstallActivity.get_timeline(plugin, limit) if limit else [] + usage_timeline = InstallActivity.get_timeline(plugin, limit) if limit else [] usage_stats = { 'total_installs': InstallActivity.get_total_installs(plugin), 'installs_in_last_30_days': InstallActivity.get_recent_installs(plugin, 30) @@ -576,9 +598,9 @@ def _get_usage_data(plugin: str, limit: int, use_dynamo: bool) -> Dict[str, Any] 'total_installs': _process_for_stats(data).get('totalInstalls', 0), 'installs_in_last_30_days': get_recent_activity_data().get(plugin, 0), } - timeline = _process_for_timeline(data, limit) if limit else [] + usage_timeline = _process_usage_timeline(data, limit) if limit else [] - return {'timeline': timeline, 'stats': usage_stats, } + return {'timeline': usage_timeline, 'stats': usage_stats, } def get_metrics_for_plugin(plugin: str, limit: str, use_dynamo_for_usage: bool) -> Dict[str, Any]: @@ -598,7 +620,7 @@ def get_metrics_for_plugin(plugin: str, limit: str, use_dynamo_for_usage: bool) if limit.isdigit() and limit != '0': month_delta = max(int(limit), 0) - maintenance_timeline = commit_activity[-month_delta:] + maintenance_timeline = _process_maintenance_timeline(commit_activity, int(limit)) maintenance_stats = { 'latest_commit_timestamp': get_latest_commit(plugin), diff --git a/backend/api/models/_tests/test_metrics.py b/backend/api/models/_tests/test_metrics.py index 470a5e984..c3082a599 100644 --- a/backend/api/models/_tests/test_metrics.py +++ b/backend/api/models/_tests/test_metrics.py @@ -5,7 +5,7 @@ import pytest from dateutil.relativedelta import relativedelta -from api._tests.test_fixtures import generate_expected_timeline +from api._tests.test_fixtures import generate_expected_usage_timeline from api.models.metrics import InstallActivity PLUGIN_NAME = 'foo' @@ -80,8 +80,8 @@ def test_get_recent_installs(self, monkeypatch, results, expected): @pytest.mark.parametrize('results, month_delta, expected', [ ([], 0, []), - ([], 1, generate_expected_timeline(-1, to_installs=lambda i: 0)), - (generate_expected_results(-4), 4, generate_expected_timeline(-4)), + ([], 1, generate_expected_usage_timeline(-1, to_installs=lambda i: 0)), + (generate_expected_results(-4), 4, generate_expected_usage_timeline(-4)), ]) def test_get_timeline(self, monkeypatch, results, month_delta, expected): mock_install_activity = Mock(return_value=results)