From 78e9b24bfab7ca8ed4ba7bc5b8c19096ad8d405f Mon Sep 17 00:00:00 2001 From: sdiazlor Date: Thu, 6 Jun 2024 14:39:24 +0200 Subject: [PATCH 1/4] feat: add script --- .../docs/scripts/gen_popular_issues.py | 123 ++++++++++++++++++ argilla-sdk/mkdocs.yml | 2 + 2 files changed, 125 insertions(+) create mode 100644 argilla-sdk/docs/scripts/gen_popular_issues.py diff --git a/argilla-sdk/docs/scripts/gen_popular_issues.py b/argilla-sdk/docs/scripts/gen_popular_issues.py new file mode 100644 index 0000000000..6b20380881 --- /dev/null +++ b/argilla-sdk/docs/scripts/gen_popular_issues.py @@ -0,0 +1,123 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from datetime import datetime + +import pandas as pd +import requests +import mkdocs_gen_files + + +REPOSITORY = "argilla-io/argilla" +DATA_PATH = "community/popular_issues.md" + +GITHUB_ACCESS_TOKEN = os.environ["GITHUB_ACCESS_TOKEN"] + +def fetch_data_from_github(repository, auth_token): + headers = { + 'Authorization': f'token {auth_token}', + 'Accept': 'application/vnd.github.v3+json' + } + issues_data = [] + + with requests.Session() as session: + session.headers.update(headers) + + owner, repo_name = repository.split('/') + issues_url = f"https://api.github.com/repos/{owner}/{repo_name}/issues?state=all" + + while issues_url: + response = session.get(issues_url) + issues = response.json() + + for issue in issues: + issues_data.append({ + 'Issue': f"{issue['number']} - {issue['title']}", + 'State': issue['state'], + 'Created at': issue['created_at'], + 'Closed at': issue.get('closed_at', None), + 'Last update': issue['updated_at'], + 'Labels': [label['name'] for label in issue['labels']], + 'Milestone': (issue.get('milestone') or {}).get('title'), + 'Reactions': issue['reactions']['total_count'], + 'Comments': issue['comments'], + 'URL': issue['html_url'], + 'Repository': repo_name, + 'Author': issue['user']['login'] + }) + + issues_url = response.links.get('next', {}).get('url', None) + + return pd.DataFrame(issues_data) + +def get_org_members(auth_token): + headers = { + 'Authorization': f'token {auth_token}', + 'Accept': 'application/vnd.github.v3+json' + } + members_list = [] + + members_url = f"https://api.github.com/orgs/argilla-io/members" + + while members_url: + response = requests.get(members_url, headers=headers) + members = response.json() + + for member in members: + members_list.append(member['login']) + + members_list.extend(['pre-commit-ci[bot]']) + + members_url = response.links.get('next', {}).get('url', None) + + return members_list + + +with mkdocs_gen_files.open(DATA_PATH, "w") as f: + + df = fetch_data_from_github(REPOSITORY, GITHUB_ACCESS_TOKEN) + + open_issues = df.loc[df['State'] == 'open'] + engagement_df = open_issues[["URL", "Issue", "Repository", "Reactions", "Comments"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10).reset_index() + + members = get_org_members(GITHUB_ACCESS_TOKEN) + community_issues = df.loc[~df['Author'].isin(members)] + community_issues_df = community_issues[["URL", "Issue", "Repository", "Created at", "Author", "State"]].sort_values(by=["Created at"], ascending=False).head(10).reset_index() + + planned_issues = df.loc[df['Milestone'].notna()] + planned_issues_df = planned_issues[["URL", "Issue", "Repository", "Created at", "Milestone", "State"]].sort_values(by=["Milestone"], ascending=False).head(10).reset_index() + + f.write("=== \"Most engaging open issues\"\n\n") + f.write(" | Rank | Issue | Reactions | Comments |\n") + f.write(" |------|-------|:---------:|:--------:|\n") + for ix, row in engagement_df.iterrows(): + f.write(f" | {ix+1} | [{row['Issue']}]({row['URL']}) | 👍 {row['Reactions']} | 💬 {row['Comments']} |\n") + + f.write("\n=== \"Latest issues open by the community\"\n\n") + f.write(" | Rank | Issue | Author |\n") + f.write(" |------|-------|:------:|\n") + for ix, row in community_issues_df.iterrows(): + state = '🟢' if row['State'] == 'open' else '🟣' + f.write(f" | {ix+1} | {state} [{row['Issue']}]({row['URL']}) | by **{row['Author']}** |\n") + + f.write("\n=== \"Planned issues for upcoming releases\"\n\n") + f.write(" | Rank | Issue | Milestone |\n") + f.write(" |------|-------|:------:|\n") + for ix, row in planned_issues_df.iterrows(): + state = '🟢' if row['State'] == 'open' else '🟣' + f.write(f" | {ix+1} | {state} [{row['Issue']}]({row['URL']}) | **{row['Milestone']}** |\n") + + today = datetime.today().date() + f.write(f"\nLast update: {today}\n") diff --git a/argilla-sdk/mkdocs.yml b/argilla-sdk/mkdocs.yml index 850decc636..3627bd2291 100644 --- a/argilla-sdk/mkdocs.yml +++ b/argilla-sdk/mkdocs.yml @@ -103,6 +103,7 @@ plugins: - open-in-new-tab # - gen-files: # scripts: + # - docs/scripts/gen_popular_issues.py # - docs/scripts/gen_ref_pages.py - literate-nav: nav_file: SUMMARY.md @@ -144,5 +145,6 @@ nav: - Community: - community/index.md - How to contribute?: community/contributor.md + # - Popular issues: community/popular_issues.md - UI Demo ↗: - https://demo.argilla.io/sign-in?auth=ZGVtbzoxMjM0NTY3OA== \ No newline at end of file From 7ec3be540e51482c5f3384b8b78d3e93b7332d2f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 12:49:14 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../docs/scripts/gen_popular_issues.py | 104 ++++++++++-------- 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/argilla-sdk/docs/scripts/gen_popular_issues.py b/argilla-sdk/docs/scripts/gen_popular_issues.py index 6b20380881..24a04fc9bc 100644 --- a/argilla-sdk/docs/scripts/gen_popular_issues.py +++ b/argilla-sdk/docs/scripts/gen_popular_issues.py @@ -25,17 +25,15 @@ GITHUB_ACCESS_TOKEN = os.environ["GITHUB_ACCESS_TOKEN"] + def fetch_data_from_github(repository, auth_token): - headers = { - 'Authorization': f'token {auth_token}', - 'Accept': 'application/vnd.github.v3+json' - } + headers = {"Authorization": f"token {auth_token}", "Accept": "application/vnd.github.v3+json"} issues_data = [] with requests.Session() as session: session.headers.update(headers) - owner, repo_name = repository.split('/') + owner, repo_name = repository.split("/") issues_url = f"https://api.github.com/repos/{owner}/{repo_name}/issues?state=all" while issues_url: @@ -43,81 +41,95 @@ def fetch_data_from_github(repository, auth_token): issues = response.json() for issue in issues: - issues_data.append({ - 'Issue': f"{issue['number']} - {issue['title']}", - 'State': issue['state'], - 'Created at': issue['created_at'], - 'Closed at': issue.get('closed_at', None), - 'Last update': issue['updated_at'], - 'Labels': [label['name'] for label in issue['labels']], - 'Milestone': (issue.get('milestone') or {}).get('title'), - 'Reactions': issue['reactions']['total_count'], - 'Comments': issue['comments'], - 'URL': issue['html_url'], - 'Repository': repo_name, - 'Author': issue['user']['login'] - }) - - issues_url = response.links.get('next', {}).get('url', None) + issues_data.append( + { + "Issue": f"{issue['number']} - {issue['title']}", + "State": issue["state"], + "Created at": issue["created_at"], + "Closed at": issue.get("closed_at", None), + "Last update": issue["updated_at"], + "Labels": [label["name"] for label in issue["labels"]], + "Milestone": (issue.get("milestone") or {}).get("title"), + "Reactions": issue["reactions"]["total_count"], + "Comments": issue["comments"], + "URL": issue["html_url"], + "Repository": repo_name, + "Author": issue["user"]["login"], + } + ) + + issues_url = response.links.get("next", {}).get("url", None) return pd.DataFrame(issues_data) + def get_org_members(auth_token): - headers = { - 'Authorization': f'token {auth_token}', - 'Accept': 'application/vnd.github.v3+json' - } + headers = {"Authorization": f"token {auth_token}", "Accept": "application/vnd.github.v3+json"} members_list = [] - members_url = f"https://api.github.com/orgs/argilla-io/members" + members_url = "https://api.github.com/orgs/argilla-io/members" while members_url: response = requests.get(members_url, headers=headers) members = response.json() for member in members: - members_list.append(member['login']) + members_list.append(member["login"]) + + members_list.extend(["pre-commit-ci[bot]"]) - members_list.extend(['pre-commit-ci[bot]']) - - members_url = response.links.get('next', {}).get('url', None) + members_url = response.links.get("next", {}).get("url", None) return members_list with mkdocs_gen_files.open(DATA_PATH, "w") as f: - df = fetch_data_from_github(REPOSITORY, GITHUB_ACCESS_TOKEN) - open_issues = df.loc[df['State'] == 'open'] - engagement_df = open_issues[["URL", "Issue", "Repository", "Reactions", "Comments"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10).reset_index() + open_issues = df.loc[df["State"] == "open"] + engagement_df = ( + open_issues[["URL", "Issue", "Repository", "Reactions", "Comments"]] + .sort_values(by=["Reactions", "Comments"], ascending=False) + .head(10) + .reset_index() + ) members = get_org_members(GITHUB_ACCESS_TOKEN) - community_issues = df.loc[~df['Author'].isin(members)] - community_issues_df = community_issues[["URL", "Issue", "Repository", "Created at", "Author", "State"]].sort_values(by=["Created at"], ascending=False).head(10).reset_index() - - planned_issues = df.loc[df['Milestone'].notna()] - planned_issues_df = planned_issues[["URL", "Issue", "Repository", "Created at", "Milestone", "State"]].sort_values(by=["Milestone"], ascending=False).head(10).reset_index() - - f.write("=== \"Most engaging open issues\"\n\n") + community_issues = df.loc[~df["Author"].isin(members)] + community_issues_df = ( + community_issues[["URL", "Issue", "Repository", "Created at", "Author", "State"]] + .sort_values(by=["Created at"], ascending=False) + .head(10) + .reset_index() + ) + + planned_issues = df.loc[df["Milestone"].notna()] + planned_issues_df = ( + planned_issues[["URL", "Issue", "Repository", "Created at", "Milestone", "State"]] + .sort_values(by=["Milestone"], ascending=False) + .head(10) + .reset_index() + ) + + f.write('=== "Most engaging open issues"\n\n') f.write(" | Rank | Issue | Reactions | Comments |\n") f.write(" |------|-------|:---------:|:--------:|\n") for ix, row in engagement_df.iterrows(): f.write(f" | {ix+1} | [{row['Issue']}]({row['URL']}) | 👍 {row['Reactions']} | 💬 {row['Comments']} |\n") - f.write("\n=== \"Latest issues open by the community\"\n\n") + f.write('\n=== "Latest issues open by the community"\n\n') f.write(" | Rank | Issue | Author |\n") f.write(" |------|-------|:------:|\n") for ix, row in community_issues_df.iterrows(): - state = '🟢' if row['State'] == 'open' else '🟣' + state = "🟢" if row["State"] == "open" else "🟣" f.write(f" | {ix+1} | {state} [{row['Issue']}]({row['URL']}) | by **{row['Author']}** |\n") - - f.write("\n=== \"Planned issues for upcoming releases\"\n\n") + + f.write('\n=== "Planned issues for upcoming releases"\n\n') f.write(" | Rank | Issue | Milestone |\n") f.write(" |------|-------|:------:|\n") for ix, row in planned_issues_df.iterrows(): - state = '🟢' if row['State'] == 'open' else '🟣' + state = "🟢" if row["State"] == "open" else "🟣" f.write(f" | {ix+1} | {state} [{row['Issue']}]({row['URL']}) | **{row['Milestone']}** |\n") - + today = datetime.today().date() f.write(f"\nLast update: {today}\n") From 88dbb20d1643ad90cb2efc148714e454503958b5 Mon Sep 17 00:00:00 2001 From: sdiazlor Date: Fri, 7 Jun 2024 18:08:44 +0200 Subject: [PATCH 3/4] change name --- argilla-sdk/mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argilla-sdk/mkdocs.yml b/argilla-sdk/mkdocs.yml index 3627bd2291..2e909eb1ca 100644 --- a/argilla-sdk/mkdocs.yml +++ b/argilla-sdk/mkdocs.yml @@ -145,6 +145,6 @@ nav: - Community: - community/index.md - How to contribute?: community/contributor.md - # - Popular issues: community/popular_issues.md + # - Issue dashboard: community/popular_issues.md - UI Demo ↗: - https://demo.argilla.io/sign-in?auth=ZGVtbzoxMjM0NTY3OA== \ No newline at end of file From 830d7a128fa7b2151bdd588dba04978e82c1c215 Mon Sep 17 00:00:00 2001 From: sdiazlor Date: Sun, 9 Jun 2024 12:47:31 +0200 Subject: [PATCH 4/4] add message --- argilla-sdk/docs/scripts/gen_popular_issues.py | 1 + 1 file changed, 1 insertion(+) diff --git a/argilla-sdk/docs/scripts/gen_popular_issues.py b/argilla-sdk/docs/scripts/gen_popular_issues.py index 24a04fc9bc..469bd6789e 100644 --- a/argilla-sdk/docs/scripts/gen_popular_issues.py +++ b/argilla-sdk/docs/scripts/gen_popular_issues.py @@ -30,6 +30,7 @@ def fetch_data_from_github(repository, auth_token): headers = {"Authorization": f"token {auth_token}", "Accept": "application/vnd.github.v3+json"} issues_data = [] + print(f"Fetching issues from {repository}...") with requests.Session() as session: session.headers.update(headers)