From ec52c6283f16a8f057d9d0691c6c1881b9c476dd Mon Sep 17 00:00:00 2001 From: Israel Fruchter Date: Thu, 20 Jun 2024 13:55:48 +0300 Subject: [PATCH] feature(github): cache github issues status in S3 one github action that runs once a day, that caches the issues status from multiple github repositories and client code that can read those csv files and cache them locally, so SkipPerIssue can read first out of it and then fallback into doing direct github API calls Ref: https://github.com/scylladb/qa-tasks/issues/1678 --- .github/workflows/cache-issues.yaml | 27 +++++++++++++++++ sdcm/utils/issues.py | 45 ++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/cache-issues.yaml diff --git a/.github/workflows/cache-issues.yaml b/.github/workflows/cache-issues.yaml new file mode 100644 index 0000000000..5e0f90fb7d --- /dev/null +++ b/.github/workflows/cache-issues.yaml @@ -0,0 +1,27 @@ +name: Cache issues status +on: + schedule: + - cron: '0 */6 * * *' + +jobs: + collect_n_upload: + runs-on: ubuntu-latest + steps: + - run: | + mkdir -p issues + for repo in scylladb scylla-enterprise scylla-manager scylla-operator scylla-cluster-tests scylla-dtest qa-tasks scylla-tools-java ; do + gh issue list --state all --json number,state,labels --limit 30000 --template '{{range .}}{{.number}},{{.state}},{{range .labels}}{{.name}}|{{end}}{{println ""}}{{end}}' --repo scylladb/$repo > issues/scylladb_$repo.csv + done + env: + GH_TOKEN: ${{ secrets.ISSUE_ASSIGNMENT_TO_PROJECT_TOKEN }} + - name: Upload folder to bucket + uses: a-sync/s3-uploader@2.0.1 + with: + args: --recursive + env: + AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: 'us-east-1' + S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} + S3_KEY: 'issues' + FILE: ./issues diff --git a/sdcm/utils/issues.py b/sdcm/utils/issues.py index d7301aa20f..6155d7e194 100644 --- a/sdcm/utils/issues.py +++ b/sdcm/utils/issues.py @@ -1,5 +1,6 @@ import re import sys +import csv import logging from functools import lru_cache from dataclasses import dataclass @@ -7,8 +8,9 @@ import github import github.Auth import github.Issue +import github.Label from github.GithubException import UnknownObjectException, RateLimitExceededException - +from botocore.exceptions import ClientError from sdcm.keystore import KeyStore from sdcm.sct_config import SCTConfiguration from sdcm.sct_events.base import Severity @@ -28,6 +30,33 @@ class Issue: issue_id: int | None +@lru_cache(maxsize=1) +class CachedGitHubIssues: + """ + This class would cache the issues from the s3 bucket, and return the issue details + the cache is populated by `.github/workflows/cache-issues.yaml` workflow + every 6 hours. + + it's main goal is to make sure we don't reach the rate limit of the github api + """ + + def __init__(self): + self.storage = KeyStore() + + @lru_cache() + def get_repo_data(self, owner, repo): + scsv = self.storage.get_file_contents(f'issues/{owner}_{repo}.csv') + issues = {issue['id']: issue for issue in csv.DictReader( + scsv.decode().splitlines(), fieldnames=("id", "state", "labels"))} + issues = {issue['id']: issue | dict( + labels=[dict(name=label) for label in issue['labels'].strip().rstrip('|').split('|')]) for issue in issues.values()} + return issues + + def get_issue(self, owner: str, repo_id: str, issue_id: str | int): + repo_issues_mapping = self.get_repo_data(owner, repo_id) + return repo_issues_mapping.get(str(issue_id)) + + class SkipPerIssues: """ instance of this class would return true, if one of the issue on the list is open @@ -51,6 +80,8 @@ def github(cls): return cls._github def __init__(self, issues: list[str] | str, params: SCTConfiguration | dict): + self.cache = CachedGitHubIssues() + self.params = params issues = [issues] if isinstance(issues, str) else issues @@ -81,6 +112,18 @@ def get_issue_details(self, issue): severity=Severity.WARNING, trace=sys._getframe().f_back).publish() # pylint: disable=protected-access return None + try: + if issue_details := self.cache.get_issue(owner=issue_parsed.user_id, repo_id=issue_parsed.repo_id, issue_id=issue_parsed.issue_id): + return github.Issue.Issue(requester=None, headers={}, + attributes=dict(state=issue_details['state'].lower(), + labels=issue_details['labels']), + completed=True) + except ClientError as exc: + logging.warning("failed to get issue: %s from s3 cache", issue) + TestFrameworkEvent(source=self.__class__.__name__, + message=f"failed to get issue {issue} from s3 cache", + severity=Severity.ERROR, + exception=exc).publish() try: return self.github.get_repo(f'{issue_parsed.user_id}/{issue_parsed.repo_id}', lazy=True).get_issue(issue_parsed.issue_id) except UnknownObjectException: