pylint-dev · Pierre-Sassoulas · Jun 1, 2022 · DanielNoord · Jun 2, 2022 · Pierre-Sassoulas
diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml
@@ -0,0 +1,25 @@
+name: changelog
+
+on:
+  pull_request:
+    types: [opened, synchronize, labeled, unlabeled, reopened]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    name: Changelog Entry Check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Grep CHANGES.md for PR number
+        if:
+          contains(github.event.pull_request.labels.*.name, 'skip news :mute:') != true
+        run: |
+          grep -Pz "(\*\s[\S[\n
+]+?]*\n\n\s\s(Refs|Closes|Follow-up in|Fixes part of)) (PyCQA/astroid)?#${{ github.event.pull_request.number }}" doc/whatsnew/2/**/*.rst || \
+          (echo "Please add '(#${{ github.event.pull_request.number }})' change line to 'doc/whatsnew/2/2.15/index.rst' \
+          (or if appropriate, ask a maintainer to add the 'skip news' label)" && \
+          exit 1)
diff --git a/doc/whatsnew/2/2.15/index.rst b/doc/whatsnew/2/2.15/index.rst
@@ -47,3 +47,5 @@ Other Changes
 
 Internal changes
 ================
+
+* Added a check in CI for changelogs entry (#6688)
diff --git a/script/check_changelog.py b/script/check_changelog.py
@@ -0,0 +1,150 @@
+# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
+# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
+# Copyright (c) https://github.com/PyCQA/pylint/blob/main/CONTRIBUTORS.txt
+
+"""Small script to check the changelog. Used by 'changelog.yml' and pre-commit.
+
+If no issue number is provided we only check that proper formatting is respected."""
+
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+from collections.abc import Iterator
+from pathlib import Path
+from re import Pattern
+
+VALID_ISSUES_KEYWORDS = [
+    "Refs",
+    "Closes",
+    "Follow-up in",
+    "Fixes part of",
+    "Closes BitBucket",
+    "Closes Logilab",
+]
+ISSUE_NUMBER_PATTERN = r"#\d{1,5}"
+VALID_ISSUE_NUMBER_PATTERN = r"\*[\S\s]*?" + ISSUE_NUMBER_PATTERN
+ISSUES_KEYWORDS = "|".join(VALID_ISSUES_KEYWORDS)
+PREFIX_CHANGELOG_PATTERN = (
+    rf"(\*\s[\S[\n ]+?]*\n\n\s\s({ISSUES_KEYWORDS})) (PyCQA/astroid)?"
+)
+VALID_CHANGELOG_PATTERN = PREFIX_CHANGELOG_PATTERN + ISSUE_NUMBER_PATTERN
+
+ISSUE_NUMBER_COMPILED_PATTERN = re.compile(ISSUE_NUMBER_PATTERN)
+VALID_CHANGELOG_COMPILED_PATTERN: Pattern[str] = re.compile(VALID_CHANGELOG_PATTERN)
+VALID_ISSUE_NUMBER_COMPILED_PATTERN: Pattern[str] = re.compile(
+    VALID_ISSUE_NUMBER_PATTERN
+)
+
+DOC_PATH = (Path(__file__).parent / "../doc/").resolve()
+PATH_TO_WHATSNEW = DOC_PATH / "whatsnew"
+UNCHECKED_VERSION = [
+    # Not checking version prior to 1.0.0 because the issues referenced are a mix
+    # between Logilab internal issue and Bitbucket. It's hard to tell, it's
+    # inaccessible for Logilab and often dead links for Bitbucket anyway.
+    # Not very useful generally, unless you're an open source historian.
+    "0.x",
+]
+
+NO_CHECK_REQUIRED_FILES = {
+    "index.rst",
+    "full_changelog_explanation.rst",
+    "summary_explanation.rst",
+}
+
+
+def sorted_whatsnew(verbose: bool) -> Iterator[Path]:
+    """Return the whats-new in the 'right' numerical order ('9' before '10')"""
+    numeric_whatsnew = {}
+    for file in PATH_TO_WHATSNEW.glob("**/*"):
+        relpath_file = file.relative_to(DOC_PATH)
+        if file.is_dir():
+            if verbose:
+                print(f"I don't care about '{relpath_file}', it's a directory : 🤖🤷")
+            continue
+        if file.name in NO_CHECK_REQUIRED_FILES:
+            if verbose:
+                print(
+                    f"I don't care about '{relpath_file}' it's in 'NO_CHECK_REQUIRED_FILES' : 🤖🤷"
+                )
+            continue
+        version = (
+            file.parents[0].name if file.stem in {"summary", "full"} else file.stem
+        )
+        if any(version == x for x in UNCHECKED_VERSION):
+            if verbose:
+                print(
+                    f"I don't care about '{relpath_file}' {version} is in UNCHECKED_VERSION : 🤖🤷"
+                )
+            continue
+        if verbose:
+            print(f"I'm going to check '{relpath_file}' 🤖")
+        num = tuple(int(x) for x in (version.split(".")))
+        numeric_whatsnew[num] = file
+    for num in sorted(numeric_whatsnew):
+        yield numeric_whatsnew[num]
+
+
+def main(argv: list[str] | None = None) -> int:
+    argv = argv or sys.argv[1:]
+    description = __doc__
+    description += " The regex we enforce is: " + VALID_CHANGELOG_PATTERN
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument(
+        "--issue-number",
+        type=int,
+        default=0,
+        help="The issue we expect to find in the changelog.",
+    )
+    parser.add_argument("--verbose", "-v", action="count", default=0)
+    args = parser.parse_args(argv)
+    verbose = args.verbose
+    is_valid = True
+    for file in sorted_whatsnew(verbose):
+        if not check_file(file, verbose):
+            is_valid = False
+    return 0 if is_valid else 1
+
+
+def check_file(file: Path, verbose: bool) -> bool:
+    """Check that a file contain valid change-log's entries."""
+    with open(file, encoding="utf8") as f:
+        content = f.read()
+    valid_full_descriptions = VALID_CHANGELOG_COMPILED_PATTERN.findall(content)
+    result = len(valid_full_descriptions)
+    contain_issue_number_descriptions = VALID_ISSUE_NUMBER_COMPILED_PATTERN.findall(
+        content
+    )
+    expected = len(contain_issue_number_descriptions)
+    if result != expected:
+        return create_detailed_fail_message(
+            file, contain_issue_number_descriptions, valid_full_descriptions
+        )
+    if verbose:
+        relpath_file = file.relative_to(DOC_PATH)
+        print(f"Checked '{relpath_file}' : LGTM 🤖👍")
+    return True
+
+
+def create_detailed_fail_message(
+    file_name: Path,
+    contain_issue_number_descriptions: list,
+    valid_full_descriptions: list,
+) -> bool:
+    is_valid = True
+    for issue_number_description in contain_issue_number_descriptions:
+        if not any(v[0] in issue_number_description for v in valid_full_descriptions):
+            is_valid = False
+            issue_number = ISSUE_NUMBER_COMPILED_PATTERN.findall(
+                issue_number_description
+            )[0]
+            print(
+                f"{file_name}: {issue_number}'s description is not on one line, or "
+                "does not respect the standard format 🤖👎"
+            )
+    return is_valid
+
+
+if __name__ == "__main__":
+    sys.exit(main())
Original file line number	Diff line number	Diff line change
Expand Up		@@ -47,3 +47,5 @@ Other Changes

		Internal changes
		================

		* Added a check in CI for changelogs entry (#6688)
Copy link Member Author Pierre-Sassoulas Jun 2, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. The question here is should we keep the old format with: `* Fix a regression where messages with dash are not fully parsed Closes #3604` Or is `* Fix a regression where messages with dash are not fully parsed (#3604)` better ? Sometime we might want to be more detailed use multiple line ? Note that #3604 was an issue number but we would use the PR number now (It was just a commit at the time `ffb354a` but now we can't push to main so there's always a PR) Copy link Collaborator DanielNoord Jun 2, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Let's keep the old system. It brings some uniformity to all changelogs and is also easier to parse the data programatically. Pierre-Sassoulas reacted with thumbs up emoji