From fae8792a118cf9d70d289a96ccae9f5587dfdb01 Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Wed, 5 Feb 2025 21:17:54 +0800 Subject: [PATCH 1/8] init --- dev/update_changelog.py | 197 +++++++++++++++++++++++++--------------- 1 file changed, 126 insertions(+), 71 deletions(-) diff --git a/dev/update_changelog.py b/dev/update_changelog.py index 80bed873aeb3..2c75aaef661e 100644 --- a/dev/update_changelog.py +++ b/dev/update_changelog.py @@ -18,6 +18,9 @@ import pathlib import re +import subprocess +from concurrent.futures import ThreadPoolExecutor +import time try: import tomllib @@ -30,7 +33,9 @@ from github import Github from github.PullRequest import PullRequest from github.Repository import Repository +from github.Commit import Commit from github.Tag import Tag +from github.NamedUser import NamedUser REPO_NAME = "adap/flower" CHANGELOG_FILE = "framework/docs/source/ref-changelog.md" @@ -52,12 +57,25 @@ PATTERN_TEMPLATE = CONFIG["pattern_template"] PATTERN = PATTERN_TEMPLATE.format(types=TYPES, projects=PROJECTS, scope=SCOPE) +PR_TYPE_TO_SECTION = { + "feat": "### New features", + "docs": "### Documentation improvements", + "break": "### Incompatible changes", + "ci": "### Other changes", + "fix": "### Other changes", + "refactor": "### Other changes", +} -def _get_latest_tag(gh_api: Github) -> tuple[Repository, Optional[Tag]]: + +def _get_latest_tag(gh_api: Github) -> tuple[Repository, Optional[str]]: """Retrieve the latest tag from the GitHub repository.""" repo = gh_api.get_repo(REPO_NAME) - tags = repo.get_tags() - return repo, tags[0] if tags.totalCount > 0 else None + latest_tag = subprocess.run( + ["git", "describe", "--tags", "--abbrev=0"], + stdout=subprocess.PIPE, + text=True + ).stdout.strip() + return repo, latest_tag def _add_shortlog(new_version: str, shortlog: str) -> None: @@ -89,29 +107,86 @@ def _add_shortlog(new_version: str, shortlog: str) -> None: file.write(line) +def _git_commits_since_tag(repo: Repository, tag: str) -> set[Commit]: + """Get a set of commits since a given tag.""" + # Get SHA hashes of commits since the tag + result = subprocess.run( + ["git", "log", "--pretty=format:%H", f"{tag}..origin/main"], + stdout=subprocess.PIPE, + text=True, + ) + shas = set(result.stdout.splitlines()) + + # Fetch GitHub commits based on the SHA hashes + with ThreadPoolExecutor(max_workers=15) as executor: + commits = list(executor.map(repo.get_commit, shas)) + + return commits + + +def _get_contributors_from_commits(api: Github, commits: set[Commit]) -> set[str]: + """Get a set of contributors from a set of commits.""" + # Get authors and co-authors from the commits + authors: set[NamedUser] = set() + coauthor_names: set[str] = set() + coauthor_pattern = r"Co-authored-by:\s*(.+?)\s*<" + start = time.time() + # authors = {author for author in authors if author.name and "[bot]" not in author.name} + + def retrieve(commit: Commit) -> None: + if commit.author.name is None: + return + if "[bot]" in commit.author.name: + return + authors.add(commit.author) + print("A: ", time.time() - start) + # Find co-authors in the commit message + if matches := re.findall(coauthor_pattern, commit.commit.message): + coauthor_names.update(name for name in matches) + print("B: ", time.time() - start) + + with ThreadPoolExecutor(max_workers=15) as executor: + executor.map(retrieve, commits) + + print("Get info from commits:", time.time() - start) + + # Remove repeated usernames + contributors = set(author.name for author in authors if author.name) + coauthor_names.difference_update(contributors) + coauthor_names.difference_update(author.login for author in authors) + + # Get full names of the GitHub usernames + print("Coauthors", coauthor_names) + with ThreadPoolExecutor(max_workers=5) as executor: + names = list(executor.map(lambda x: api.get_user(x).name, coauthor_names)) + contributors.update(name for name in names if name) + return contributors + + def _get_pull_requests_since_tag( - repo: Repository, tag: Tag + api: Github, repo: Repository, tag: str ) -> tuple[str, set[PullRequest]]: """Get a list of pull requests merged into the main branch since a given tag.""" - commit_shas = set() - contributors = set() prs = set() - for commit in repo.compare(tag.commit.sha, "main").commits: - commit_shas.add(commit.sha) - if commit.author.name is None: - continue - if "[bot]" in commit.author.name: - continue - contributors.add(commit.author.name) + start = time.time() + commits = _git_commits_since_tag(repo, tag) + print("Time to get commits: ", time.time() - start) + + start = time.time() + contributors = _get_contributors_from_commits(api, commits) + print("Time to get contributors: ", time.time() - start) + start = time.time() + commit_shas = {commit.sha for commit in commits} for pr_info in repo.get_pulls( - state="closed", sort="created", direction="desc", base="main" + state="closed", sort="updated", direction="desc", base="main" ): if pr_info.merge_commit_sha in commit_shas: prs.add(pr_info) if len(prs) == len(commit_shas): break + print("Time to get PRs: ", time.time() - start) shortlog = ", ".join([f"`{name}`" for name in sorted(contributors)]) return shortlog, prs @@ -167,83 +242,52 @@ def _extract_changelog_entry( } -def _update_changelog(prs: set[PullRequest]) -> bool: +def _update_changelog(prs: set[PullRequest], tag: str) -> bool: """Update the changelog file with entries from provided pull requests.""" - breaking_changes = False - unknown_changes = False - with open(CHANGELOG_FILE, "r+", encoding="utf-8") as file: content = file.read() unreleased_index = content.find("## Unreleased") + # Find the end of the Unreleased section + end_index = content.find(f"## {tag}", unreleased_index + 1) + if unreleased_index == -1: print("Unreleased header not found in the changelog.") return False - # Find the end of the Unreleased section - next_header_index = content.find("## ", unreleased_index + 1) - next_header_index = ( - next_header_index if next_header_index != -1 else len(content) - ) - for pr_info in prs: + print("End index", end_index) parsed_title = _extract_changelog_entry(pr_info) - # Skip if PR should be skipped or already in changelog - if ( - parsed_title.get("scope", "unknown") == "skip" - or f"#{pr_info.number}]" in content - ): + # Skip if the PR is already in changelog + if f"#{pr_info.number}]" in content: continue + # Find section to insert pr_type = parsed_title.get("type", "unknown") - if pr_type == "feat": - insert_content_index = content.find("### What", unreleased_index + 1) - elif pr_type == "docs": - insert_content_index = content.find( - "### Documentation improvements", unreleased_index + 1 - ) - elif pr_type == "break": - breaking_changes = True - insert_content_index = content.find( - "### Incompatible changes", unreleased_index + 1 - ) - elif pr_type in {"ci", "fix", "refactor"}: - insert_content_index = content.find( - "### Other changes", unreleased_index + 1 + section = PR_TYPE_TO_SECTION.get(pr_type, "### Unknown changes") + insert_index = content.find(section, unreleased_index, end_index) + + # Add section if not exist + if insert_index == -1: + content = _insert_entry_no_desc( + content, + section, + unreleased_index, ) - else: - unknown_changes = True - insert_content_index = unreleased_index + insert_index = content.find(section, unreleased_index, end_index) pr_reference = _format_pr_reference( pr_info.title, pr_info.number, pr_info.html_url ) - content = _insert_entry_no_desc( content, pr_reference, - insert_content_index, - ) - - next_header_index = content.find("## ", unreleased_index + 1) - next_header_index = ( - next_header_index if next_header_index != -1 else len(content) - ) - - if unknown_changes: - content = _insert_entry_no_desc( - content, - "### Unknown changes", - unreleased_index, + insert_index, ) - if not breaking_changes: - content = _insert_entry_no_desc( - content, - "None", - content.find("### Incompatible changes", unreleased_index + 1), - ) + # Find the end of the Unreleased section + end_index = content.find(f"## {tag}", end_index) # Finalize content update file.seek(0) @@ -263,9 +307,9 @@ def _insert_entry_no_desc( return content -def _bump_minor_version(tag: Tag) -> Optional[str]: +def _bump_minor_version(tag: str) -> Optional[str]: """Bump the minor version of the tag.""" - match = re.match(r"v(\d+)\.(\d+)\.(\d+)", tag.name) + match = re.match(r"v(\d+)\.(\d+)\.(\d+)", tag) if match is None: return None major, minor, _ = [int(x) for x in match.groups()] @@ -274,17 +318,28 @@ def _bump_minor_version(tag: Tag) -> Optional[str]: return new_version +def _fetch_origin() -> None: + """Fetch the latest changes from the origin.""" + subprocess.run(["git", "fetch", "origin"]) + + def main() -> None: """Update changelog using the descriptions of PRs since the latest tag.""" # Initialize GitHub Client with provided token (as argument) gh_api = Github(argv[1]) + + # Fetch the latest changes from the origin + _fetch_origin() + + start = time.time() repo, latest_tag = _get_latest_tag(gh_api) if not latest_tag: - print("No tags found in the repository.") return - shortlog, prs = _get_pull_requests_since_tag(repo, latest_tag) - if _update_changelog(prs): + shortlog, prs = _get_pull_requests_since_tag(gh_api, repo, latest_tag) + + start = time.time() + if _update_changelog(prs, latest_tag): new_version = _bump_minor_version(latest_tag) if not new_version: print("Wrong tag format.") From c563ead3b3bf92f528b22294b428def71c8668d9 Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Wed, 5 Feb 2025 22:03:46 +0800 Subject: [PATCH 2/8] format --- dev/update_changelog.py | 52 ++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/dev/update_changelog.py b/dev/update_changelog.py index 2c75aaef661e..800c08601a72 100644 --- a/dev/update_changelog.py +++ b/dev/update_changelog.py @@ -20,22 +20,21 @@ import re import subprocess from concurrent.futures import ThreadPoolExecutor -import time try: import tomllib except ModuleNotFoundError: import tomli as tomllib + from datetime import date from sys import argv from typing import Optional from github import Github -from github.PullRequest import PullRequest -from github.Repository import Repository from github.Commit import Commit -from github.Tag import Tag from github.NamedUser import NamedUser +from github.PullRequest import PullRequest +from github.Repository import Repository REPO_NAME = "adap/flower" CHANGELOG_FILE = "framework/docs/source/ref-changelog.md" @@ -44,8 +43,8 @@ # Load the TOML configuration with (pathlib.Path(__file__).parent.resolve() / "changelog_config.toml").open( "rb" -) as file: - CONFIG = tomllib.load(file) +) as toml_f: + CONFIG = tomllib.load(toml_f) # Extract types, project, and scope from the config TYPES = "|".join(CONFIG["type"]) @@ -64,6 +63,7 @@ "ci": "### Other changes", "fix": "### Other changes", "refactor": "### Other changes", + "unknown": "### Unknown changes", } @@ -71,9 +71,10 @@ def _get_latest_tag(gh_api: Github) -> tuple[Repository, Optional[str]]: """Retrieve the latest tag from the GitHub repository.""" repo = gh_api.get_repo(REPO_NAME) latest_tag = subprocess.run( - ["git", "describe", "--tags", "--abbrev=0"], - stdout=subprocess.PIPE, - text=True + ["git", "describe", "--tags", "--abbrev=0"], + stdout=subprocess.PIPE, + text=True, + check=True, ).stdout.strip() return repo, latest_tag @@ -114,9 +115,10 @@ def _git_commits_since_tag(repo: Repository, tag: str) -> set[Commit]: ["git", "log", "--pretty=format:%H", f"{tag}..origin/main"], stdout=subprocess.PIPE, text=True, + check=True, ) shas = set(result.stdout.splitlines()) - + # Fetch GitHub commits based on the SHA hashes with ThreadPoolExecutor(max_workers=15) as executor: commits = list(executor.map(repo.get_commit, shas)) @@ -130,8 +132,6 @@ def _get_contributors_from_commits(api: Github, commits: set[Commit]) -> set[str authors: set[NamedUser] = set() coauthor_names: set[str] = set() coauthor_pattern = r"Co-authored-by:\s*(.+?)\s*<" - start = time.time() - # authors = {author for author in authors if author.name and "[bot]" not in author.name} def retrieve(commit: Commit) -> None: if commit.author.name is None: @@ -139,24 +139,19 @@ def retrieve(commit: Commit) -> None: if "[bot]" in commit.author.name: return authors.add(commit.author) - print("A: ", time.time() - start) # Find co-authors in the commit message if matches := re.findall(coauthor_pattern, commit.commit.message): coauthor_names.update(name for name in matches) - print("B: ", time.time() - start) with ThreadPoolExecutor(max_workers=15) as executor: executor.map(retrieve, commits) - - print("Get info from commits:", time.time() - start) # Remove repeated usernames - contributors = set(author.name for author in authors if author.name) - coauthor_names.difference_update(contributors) + contributors = {author.name for author in authors if author.name} + coauthor_names -= contributors coauthor_names.difference_update(author.login for author in authors) # Get full names of the GitHub usernames - print("Coauthors", coauthor_names) with ThreadPoolExecutor(max_workers=5) as executor: names = list(executor.map(lambda x: api.get_user(x).name, coauthor_names)) contributors.update(name for name in names if name) @@ -169,15 +164,9 @@ def _get_pull_requests_since_tag( """Get a list of pull requests merged into the main branch since a given tag.""" prs = set() - start = time.time() commits = _git_commits_since_tag(repo, tag) - print("Time to get commits: ", time.time() - start) - - start = time.time() contributors = _get_contributors_from_commits(api, commits) - print("Time to get contributors: ", time.time() - start) - start = time.time() commit_shas = {commit.sha for commit in commits} for pr_info in repo.get_pulls( state="closed", sort="updated", direction="desc", base="main" @@ -186,7 +175,6 @@ def _get_pull_requests_since_tag( prs.add(pr_info) if len(prs) == len(commit_shas): break - print("Time to get PRs: ", time.time() - start) shortlog = ", ".join([f"`{name}`" for name in sorted(contributors)]) return shortlog, prs @@ -256,7 +244,6 @@ def _update_changelog(prs: set[PullRequest], tag: str) -> bool: return False for pr_info in prs: - print("End index", end_index) parsed_title = _extract_changelog_entry(pr_info) # Skip if the PR is already in changelog @@ -312,7 +299,7 @@ def _bump_minor_version(tag: str) -> Optional[str]: match = re.match(r"v(\d+)\.(\d+)\.(\d+)", tag) if match is None: return None - major, minor, _ = [int(x) for x in match.groups()] + major, minor, _ = (int(x) for x in match.groups()) # Increment the minor version and reset patch version new_version = f"v{major}.{minor + 1}.0" return new_version @@ -320,25 +307,26 @@ def _bump_minor_version(tag: str) -> Optional[str]: def _fetch_origin() -> None: """Fetch the latest changes from the origin.""" - subprocess.run(["git", "fetch", "origin"]) + subprocess.run(["git", "fetch", "origin"], check=True) def main() -> None: """Update changelog using the descriptions of PRs since the latest tag.""" # Initialize GitHub Client with provided token (as argument) gh_api = Github(argv[1]) - + # Fetch the latest changes from the origin _fetch_origin() - start = time.time() + # Get the repository and the latest tag repo, latest_tag = _get_latest_tag(gh_api) if not latest_tag: return + # Get the shortlog and the pull requests since the latest tag shortlog, prs = _get_pull_requests_since_tag(gh_api, repo, latest_tag) - start = time.time() + # Update the changelog if _update_changelog(prs, latest_tag): new_version = _bump_minor_version(latest_tag) if not new_version: From dfb85f33a9a0b6e05f55dc570e75e80c6e052e2e Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Wed, 5 Feb 2025 22:26:38 +0800 Subject: [PATCH 3/8] add warning --- dev/update_changelog.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/dev/update_changelog.py b/dev/update_changelog.py index 800c08601a72..119788ec0dd5 100644 --- a/dev/update_changelog.py +++ b/dev/update_changelog.py @@ -20,6 +20,7 @@ import re import subprocess from concurrent.futures import ThreadPoolExecutor +from threading import Lock try: import tomllib @@ -132,16 +133,19 @@ def _get_contributors_from_commits(api: Github, commits: set[Commit]) -> set[str authors: set[NamedUser] = set() coauthor_names: set[str] = set() coauthor_pattern = r"Co-authored-by:\s*(.+?)\s*<" + lock = Lock() def retrieve(commit: Commit) -> None: if commit.author.name is None: return if "[bot]" in commit.author.name: return - authors.add(commit.author) + with lock: + authors.add(commit.author) # Find co-authors in the commit message if matches := re.findall(coauthor_pattern, commit.commit.message): - coauthor_names.update(name for name in matches) + with lock: + coauthor_names.update(name for name in matches) with ThreadPoolExecutor(max_workers=15) as executor: executor.map(retrieve, commits) @@ -152,9 +156,16 @@ def retrieve(commit: Commit) -> None: coauthor_names.difference_update(author.login for author in authors) # Get full names of the GitHub usernames + def get_user(username: str) -> None: + try: + if name := api.get_user(username).name: + with lock: + contributors.add(name) + except Exception: # pylint: disable=broad-exception-caught + print(f"Failed to get user '{username}'") + with ThreadPoolExecutor(max_workers=5) as executor: - names = list(executor.map(lambda x: api.get_user(x).name, coauthor_names)) - contributors.update(name for name in names if name) + executor.map(get_user, coauthor_names) return contributors From 23bf4e33082092c6f673012da73b71915e2f9807 Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Wed, 5 Feb 2025 22:29:34 +0800 Subject: [PATCH 4/8] add logs --- dev/update_changelog.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dev/update_changelog.py b/dev/update_changelog.py index 119788ec0dd5..7bd631f9ae14 100644 --- a/dev/update_changelog.py +++ b/dev/update_changelog.py @@ -175,9 +175,13 @@ def _get_pull_requests_since_tag( """Get a list of pull requests merged into the main branch since a given tag.""" prs = set() + print(f"Retrieving commits since tag '{tag}'...") commits = _git_commits_since_tag(repo, tag) + + print("Retrieving contributors...") contributors = _get_contributors_from_commits(api, commits) + print("Retrieving pull requests...") commit_shas = {commit.sha for commit in commits} for pr_info in repo.get_pulls( state="closed", sort="updated", direction="desc", base="main" @@ -327,9 +331,11 @@ def main() -> None: gh_api = Github(argv[1]) # Fetch the latest changes from the origin + print("Fetching the latest changes from the origin...") _fetch_origin() # Get the repository and the latest tag + print("Retrieving the latest tag...") repo, latest_tag = _get_latest_tag(gh_api) if not latest_tag: return @@ -338,6 +344,7 @@ def main() -> None: shortlog, prs = _get_pull_requests_since_tag(gh_api, repo, latest_tag) # Update the changelog + print("Updating the changelog...") if _update_changelog(prs, latest_tag): new_version = _bump_minor_version(latest_tag) if not new_version: From 982b0317dc7a5d9f76d542015ff2655c51d146e3 Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Wed, 5 Feb 2025 22:36:51 +0800 Subject: [PATCH 5/8] reset --- dev/update_changelog.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/update_changelog.py b/dev/update_changelog.py index 7bd631f9ae14..51ebec627ae8 100644 --- a/dev/update_changelog.py +++ b/dev/update_changelog.py @@ -68,7 +68,7 @@ } -def _get_latest_tag(gh_api: Github) -> tuple[Repository, Optional[str]]: +def _get_latest_tag(gh_api: Github) -> tuple[Repository, str]: """Retrieve the latest tag from the GitHub repository.""" repo = gh_api.get_repo(REPO_NAME) latest_tag = subprocess.run( @@ -338,6 +338,7 @@ def main() -> None: print("Retrieving the latest tag...") repo, latest_tag = _get_latest_tag(gh_api) if not latest_tag: + print("No tags found in the repository.") return # Get the shortlog and the pull requests since the latest tag From be5cc427336c28479072a432ec0ac853fd87bc02 Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Thu, 6 Feb 2025 22:33:39 +0800 Subject: [PATCH 6/8] address comments --- dev/update_changelog.py | 120 ++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 71 deletions(-) diff --git a/dev/update_changelog.py b/dev/update_changelog.py index 51ebec627ae8..a66c68b15af4 100644 --- a/dev/update_changelog.py +++ b/dev/update_changelog.py @@ -18,25 +18,23 @@ import pathlib import re -import subprocess from concurrent.futures import ThreadPoolExecutor -from threading import Lock - -try: - import tomllib -except ModuleNotFoundError: - import tomli as tomllib - from datetime import date from sys import argv from typing import Optional +import git +from git import Commit from github import Github -from github.Commit import Commit -from github.NamedUser import NamedUser from github.PullRequest import PullRequest from github.Repository import Repository +try: + import tomllib +except ModuleNotFoundError: + import tomli as tomllib + + REPO_NAME = "adap/flower" CHANGELOG_FILE = "framework/docs/source/ref-changelog.md" CHANGELOG_SECTION_HEADER = "### Changelog entry" @@ -57,6 +55,10 @@ PATTERN_TEMPLATE = CONFIG["pattern_template"] PATTERN = PATTERN_TEMPLATE.format(types=TYPES, projects=PROJECTS, scope=SCOPE) +# Local git repository +LOCAL_REPO = git.Repo(search_parent_directories=True) + +# Map PR types to sections in the changelog PR_TYPE_TO_SECTION = { "feat": "### New features", "docs": "### Documentation improvements", @@ -67,17 +69,15 @@ "unknown": "### Unknown changes", } +# Maximum number of workers in the thread pool +MAX_WORKERS = argv[2] if len(argv) > 2 else 10 + def _get_latest_tag(gh_api: Github) -> tuple[Repository, str]: """Retrieve the latest tag from the GitHub repository.""" repo = gh_api.get_repo(REPO_NAME) - latest_tag = subprocess.run( - ["git", "describe", "--tags", "--abbrev=0"], - stdout=subprocess.PIPE, - text=True, - check=True, - ).stdout.strip() - return repo, latest_tag + tags = sorted(LOCAL_REPO.tags, key=lambda t: t.commit.committed_datetime) + return repo, tags[-1].name def _add_shortlog(new_version: str, shortlog: str) -> None: @@ -109,63 +109,45 @@ def _add_shortlog(new_version: str, shortlog: str) -> None: file.write(line) -def _git_commits_since_tag(repo: Repository, tag: str) -> set[Commit]: +def _git_commits_since_tag(tag: str) -> list[Commit]: """Get a set of commits since a given tag.""" - # Get SHA hashes of commits since the tag - result = subprocess.run( - ["git", "log", "--pretty=format:%H", f"{tag}..origin/main"], - stdout=subprocess.PIPE, - text=True, - check=True, - ) - shas = set(result.stdout.splitlines()) + return list(LOCAL_REPO.iter_commits(f"{tag}..origin/main")) - # Fetch GitHub commits based on the SHA hashes - with ThreadPoolExecutor(max_workers=15) as executor: - commits = list(executor.map(repo.get_commit, shas)) - return commits - - -def _get_contributors_from_commits(api: Github, commits: set[Commit]) -> set[str]: +def _get_contributors_from_commits(api: Github, commits: list[Commit]) -> set[str]: """Get a set of contributors from a set of commits.""" # Get authors and co-authors from the commits - authors: set[NamedUser] = set() - coauthor_names: set[str] = set() - coauthor_pattern = r"Co-authored-by:\s*(.+?)\s*<" - lock = Lock() + contributors: set[str] = set() + coauthor_names_emails: set[tuple[str, str]] = set() + coauthor_pattern = r"Co-authored-by:\s*(.+?)\s*<(.+?)>" - def retrieve(commit: Commit) -> None: + for commit in commits: if commit.author.name is None: - return + continue if "[bot]" in commit.author.name: - return - with lock: - authors.add(commit.author) + continue # Find co-authors in the commit message - if matches := re.findall(coauthor_pattern, commit.commit.message): - with lock: - coauthor_names.update(name for name in matches) - - with ThreadPoolExecutor(max_workers=15) as executor: - executor.map(retrieve, commits) + matches: list[str] = re.findall(coauthor_pattern, commit.message) - # Remove repeated usernames - contributors = {author.name for author in authors if author.name} - coauthor_names -= contributors - coauthor_names.difference_update(author.login for author in authors) + contributors.add(commit.author.name) + if matches: + coauthor_names_emails.update(matches) # Get full names of the GitHub usernames - def get_user(username: str) -> None: + def _get_user(username: str, email: str) -> Optional[str]: try: - if name := api.get_user(username).name: - with lock: - contributors.add(name) + if user := api.get_user(username): + if user.email == email: + return user.name except Exception: # pylint: disable=broad-exception-caught - print(f"Failed to get user '{username}'") + pass + print(f"FAILED to get user: {username} <{email}>") + return None - with ThreadPoolExecutor(max_workers=5) as executor: - executor.map(get_user, coauthor_names) + with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: + for name in executor.map(lambda x: _get_user(*x), coauthor_names_emails): + if name: + contributors.add(name) return contributors @@ -176,13 +158,13 @@ def _get_pull_requests_since_tag( prs = set() print(f"Retrieving commits since tag '{tag}'...") - commits = _git_commits_since_tag(repo, tag) + commits = _git_commits_since_tag(tag) print("Retrieving contributors...") contributors = _get_contributors_from_commits(api, commits) print("Retrieving pull requests...") - commit_shas = {commit.sha for commit in commits} + commit_shas = {commit.hexsha for commit in commits} for pr_info in repo.get_pulls( state="closed", sort="updated", direction="desc", base="main" ): @@ -254,6 +236,11 @@ def _update_changelog(prs: set[PullRequest], tag: str) -> bool: # Find the end of the Unreleased section end_index = content.find(f"## {tag}", unreleased_index + 1) + for section in PR_TYPE_TO_SECTION.values(): + if content.find(section, unreleased_index, end_index) == -1: + content = content[:end_index] + f"\n{section}\n\n" + content[end_index:] + end_index = content.find(f"## {tag}", end_index) + if unreleased_index == -1: print("Unreleased header not found in the changelog.") return False @@ -270,15 +257,6 @@ def _update_changelog(prs: set[PullRequest], tag: str) -> bool: section = PR_TYPE_TO_SECTION.get(pr_type, "### Unknown changes") insert_index = content.find(section, unreleased_index, end_index) - # Add section if not exist - if insert_index == -1: - content = _insert_entry_no_desc( - content, - section, - unreleased_index, - ) - insert_index = content.find(section, unreleased_index, end_index) - pr_reference = _format_pr_reference( pr_info.title, pr_info.number, pr_info.html_url ) @@ -322,7 +300,7 @@ def _bump_minor_version(tag: str) -> Optional[str]: def _fetch_origin() -> None: """Fetch the latest changes from the origin.""" - subprocess.run(["git", "fetch", "origin"], check=True) + LOCAL_REPO.remote("origin").fetch() def main() -> None: From 9d99430830c11daa6d1f9e4122032841396c13d1 Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Thu, 6 Feb 2025 22:35:08 +0800 Subject: [PATCH 7/8] fix typo --- dev/update_changelog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/update_changelog.py b/dev/update_changelog.py index a66c68b15af4..d6cc3ecc7e26 100644 --- a/dev/update_changelog.py +++ b/dev/update_changelog.py @@ -330,7 +330,7 @@ def main() -> None: print("Wrong tag format.") return _add_shortlog(new_version, shortlog) - print("Changelog updated succesfully.") + print("Changelog updated successfully.") if __name__ == "__main__": From 8ca5611c737c97453fecafb36c95db66e9a53edb Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Thu, 6 Feb 2025 22:41:13 +0800 Subject: [PATCH 8/8] show time --- dev/update_changelog.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dev/update_changelog.py b/dev/update_changelog.py index d6cc3ecc7e26..87ee081d95b0 100644 --- a/dev/update_changelog.py +++ b/dev/update_changelog.py @@ -18,6 +18,7 @@ import pathlib import re +import time from concurrent.futures import ThreadPoolExecutor from datetime import date from sys import argv @@ -305,6 +306,8 @@ def _fetch_origin() -> None: def main() -> None: """Update changelog using the descriptions of PRs since the latest tag.""" + start = time.time() + # Initialize GitHub Client with provided token (as argument) gh_api = Github(argv[1]) @@ -330,7 +333,7 @@ def main() -> None: print("Wrong tag format.") return _add_shortlog(new_version, shortlog) - print("Changelog updated successfully.") + print(f"Changelog updated successfully in {time.time() - start:.2f}s.") if __name__ == "__main__":