From a6ff35b815ff1819bed6b0024b1dd4d72f4e6ab3 Mon Sep 17 00:00:00 2001 From: Israel Fruchter Date: Thu, 20 Jun 2024 13:55:48 +0300 Subject: [PATCH] feature(github): cache github issues status in S3 one github action that runs once a day, that caches the issues status from multiple github repositories and client code that can read those csv files and cache them locally, so SkipPerIssue can read first out of it and then fallback into doing direct github API calls Ref: https://github.com/scylladb/qa-tasks/issues/1678 --- .github/workflows/cache-issues.yaml | 33 +++++++++++++++++++++++++ sdcm/utils/issues.py | 38 ++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/cache-issues.yaml diff --git a/.github/workflows/cache-issues.yaml b/.github/workflows/cache-issues.yaml new file mode 100644 index 00000000000..3644030c44a --- /dev/null +++ b/.github/workflows/cache-issues.yaml @@ -0,0 +1,33 @@ +name: Cache issues status +on: + schedule: + # Daily at 8:20 UTC + - cron: '20 8 * * *' + pull_request: + branches: + - master + push: + branches: + - master +jobs: + collect_n_upload: + runs-on: ubuntu-latest + steps: + - run: | + mkdir -p issues + for repo in scylladb scylla-enterprise scylla-manager scylla-operator scylla-cluster-tests scylla-dtest qa-tasks ; do + gh issue list --state all --json number,state,labels --limit 30000 --template '{{range .}}{{.number}},{{.state}},{{range .labels}}{{.name}}|{{end}}{{println ""}}{{end}}' --repo scylladb/$repo > issues/scylladb_$repo.csv + done + env: + GH_TOKEN: ${{ secrets.ISSUE_ASSIGNMENT_TO_PROJECT_TOKEN }} + - name: Upload folder to bucket + uses: a-sync/s3-uploader@2.0.1 + with: + args: --recursive + env: + AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: 'us-east-1' + S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} + S3_KEY: 'issues' + FILE: ./issues diff --git a/sdcm/utils/issues.py b/sdcm/utils/issues.py index d7301aa20f7..efa1de47345 100644 --- a/sdcm/utils/issues.py +++ b/sdcm/utils/issues.py @@ -1,5 +1,6 @@ import re import sys +import csv import logging from functools import lru_cache from dataclasses import dataclass @@ -7,8 +8,9 @@ import github import github.Auth import github.Issue +import github.Label from github.GithubException import UnknownObjectException, RateLimitExceededException - +from botocore.exceptions import ClientError from sdcm.keystore import KeyStore from sdcm.sct_config import SCTConfiguration from sdcm.sct_events.base import Severity @@ -28,6 +30,26 @@ class Issue: issue_id: int | None +class CachedGitHubIssues: + def __init__(self): + self.storage = KeyStore() + + @lru_cache() + def get_repo_data(self, owner, repo): + scsv = self.storage.get_file_contents(f'issues/{owner}_{repo}.csv') + print('got repo') + return {issue['id']: issue for issue in csv.DictReader(scsv.decode().splitlines(), fieldnames=("id", "state", "labels"))} + + def get_issue(self, owner: str, repo_id: str, issue_id: str | int): + repo_issues_mapping = self.get_repo_data(owner, repo_id) + return repo_issues_mapping.get(str(issue_id)) + + +@lru_cache(maxsize=1) +def get_issues_cache(): + return CachedGitHubIssues() + + class SkipPerIssues: """ instance of this class would return true, if one of the issue on the list is open @@ -51,6 +73,8 @@ def github(cls): return cls._github def __init__(self, issues: list[str] | str, params: SCTConfiguration | dict): + self.cache = get_issues_cache() + self.params = params issues = [issues] if isinstance(issues, str) else issues @@ -81,6 +105,18 @@ def get_issue_details(self, issue): severity=Severity.WARNING, trace=sys._getframe().f_back).publish() # pylint: disable=protected-access return None + try: + if issue_details := self.cache.get_issue(owner=issue_parsed.user_id, repo_id=issue_parsed.repo_id, issue_id=issue_parsed.issue_id): + labels = [dict(name=label) for label in issue_details['labels'].strip().rstrip('|').split('|')] + return github.Issue.Issue(requester=None, headers={}, + attributes=dict(state=issue_details['state'].lower(), labels=labels), + completed=True) + except ClientError as exc: + logging.warning("failed to get issue: %s from s3 cache", issue) + TestFrameworkEvent(source=self.__class__.__name__, + message=f"failed to get issue {issue} from s3 cache", + severity=Severity.ERROR, + exception=exc).publish() try: return self.github.get_repo(f'{issue_parsed.user_id}/{issue_parsed.repo_id}', lazy=True).get_issue(issue_parsed.issue_id) except UnknownObjectException: