From 1f0feae179a34af79469f4f5eded2d873f97fd20 Mon Sep 17 00:00:00 2001 From: Yustina <62885041+YustinaKvr@users.noreply.github.com> Date: Wed, 17 Jul 2024 12:32:37 +0200 Subject: [PATCH] Add classes (#70) * add classes * linters fix * linters fix * linters fix * Update failed_zuul.py * max pages count * rename files --------- Co-authored-by: YustinaKvr --- 2_gitea_info.py | 55 ++-- 4_failed_zuul.py | 619 ++++++++++++++++++++---------------------- 8_ecosystem_issues.py | 269 ++++++++---------- classes.py | 55 ++++ 4 files changed, 488 insertions(+), 510 deletions(-) create mode 100644 classes.py diff --git a/2_gitea_info.py b/2_gitea_info.py index d59020f..da51061 100644 --- a/2_gitea_info.py +++ b/2_gitea_info.py @@ -5,7 +5,6 @@ import csv import json import logging -import os import pathlib import re import time @@ -14,6 +13,8 @@ import requests from github import Github +from classes import Database, EnvVariables + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') start_time = time.time() @@ -22,26 +23,13 @@ GITEA_API_ENDPOINT = "https://gitea.eco.tsi-dev.otc-service.com/api/v1" session = requests.Session() -gitea_token = os.getenv("GITEA_TOKEN") -github_token = os.getenv("GITHUB_TOKEN") -github_fallback_token = os.getenv("GITHUB_FALLBACK_TOKEN") - -db_host = os.getenv("DB_HOST") -db_port = os.getenv("DB_PORT") -db_csv = os.getenv("DB_CSV") # main postgres db, where open PRs tables for both public and hybrid clouds are stored -db_orph = os.getenv("DB_ORPH") # dedicated db for orphans PRs (for both clouds) tables -db_user = os.getenv("DB_USER") -db_password = os.getenv("DB_PASSWORD") +env_vars = EnvVariables() +database = Database(env_vars) -def check_env_variables(): - required_env_vars = [ - "GITHUB_TOKEN", "DB_HOST", "DB_PORT", - "DB_CSV", "DB_ORPH", "DB_USER", "DB_PASSWORD", "GITEA_TOKEN" - ] - for var in required_env_vars: - if os.getenv(var) is None: - raise Exception(f"Missing environment variable: {var}") +github_token = env_vars.github_token +gitea_token = env_vars.gitea_token +github_fallback_token = env_vars.github_fallback_token def csv_erase(filenames): @@ -54,22 +42,7 @@ def csv_erase(filenames): else: continue except Exception as e: - logging.error("CSV erase: error has been occured: %s", e) - - -def connect_to_db(db_name): - logging.info("Connecting to Postgres (%s)...", db_name) - try: - return psycopg2.connect( - host=db_host, - port=db_port, - dbname=db_name, - user=db_user, - password=db_password - ) - except psycopg2.Error as e: - logging.error("Connecting to Postgres: an error occurred while trying to connect to the database: %s", e) - return None + logging.error("CSV erase: error has been occurred: %s", e) def create_prs_table(conn_csv, cur_csv, table_name): @@ -105,6 +78,7 @@ def get_repos(org, cur_csv, gitea_token, rtc_table): logging.error("Fetching exclude repos for internal services: %s", e) return repos + max_pages = 50 page = 1 while True: @@ -126,6 +100,10 @@ def get_repos(org, cur_csv, gitea_token, rtc_table): continue repos.append(repo["name"]) + if page > max_pages: + logging.warning(f"Reached maximum page limit for {org}") + break + link_header = repos_resp.headers.get("Link") if link_header is None or "rel=\"next\"" not in link_header: break @@ -408,6 +386,7 @@ def compare_csv_files(conn_csv, cur_csv, conn_orph, cur_orph, opentable): pr1.extend([pr2[3], pr2[4]]) orphaned.append(pr1) try: + # print("ORPHANED----------------------------------------------", pr1, len(pr1)) cur_orph.execute(f""" INSERT INTO public.{opentable} ("Parent PR Number", "Service Name", "Squad", "Auto PR URL", "Auto PR State", "If merged", @@ -519,12 +498,12 @@ def update_squad_and_title(cursors, conns, rtctable, opentable): def main(org, gh_org, rtctable, opentable, string, token): - check_env_variables() + csv_erase(["proposalbot_prs.csv", "doc_exports_prs.csv", "orphaned_prs.csv"]) - conn_csv = connect_to_db(db_csv) + conn_csv = database.connect_to_db(env_vars.db_csv) cur_csv = conn_csv.cursor() - conn_orph = connect_to_db(db_orph) + conn_orph = database.connect_to_db(env_vars.db_orph) cur_orph = conn_orph.cursor() g = Github(token) github_org = g.get_organization(gh_org) diff --git a/4_failed_zuul.py b/4_failed_zuul.py index aea36fa..b3f886d 100644 --- a/4_failed_zuul.py +++ b/4_failed_zuul.py @@ -1,323 +1,296 @@ -""" -This script gathers info regarding PRs, which check jobs in zuul has been failed -""" - -import json -import logging -import os -import re -import time -from datetime import datetime - -import psycopg2 -import requests - -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - -start_time = time.time() - -logging.info("-------------------------FAILED PRS SCRIPT IS RUNNING-------------------------") - -GITEA_API_ENDPOINT = "https://gitea.eco.tsi-dev.otc-service.com/api/v1" -session = requests.Session() -gitea_token = os.getenv("GITEA_TOKEN") -github_token = os.getenv("GITHUB_TOKEN") - -db_host = os.getenv("DB_HOST") -db_port = os.getenv("DB_PORT") -db_name = os.getenv( - "DB_ZUUL") # here we're using dedicated postgres db 'zuul' since Failed Zuul PRs panel should be placed on a same \ -# dashboard such as Open PRs -db_user = os.getenv("DB_USER") -db_password = os.getenv("DB_PASSWORD") - - -def check_env_variables(): - required_env_vars = [ - "GITHUB_TOKEN", "DB_HOST", "DB_PORT", - "DB_NAME", "DB_USER", "DB_PASSWORD", "GITEA_TOKEN" - ] - for var in required_env_vars: - if os.getenv(var) is None: - raise Exception(f"Missing environment variable: {var}") - - -def connect_to_db(db_name): - logging.info("Connecting to Postgres (%s)...", db_name) - try: - return psycopg2.connect( - host=db_host, - port=db_port, - dbname=db_name, - user=db_user, - password=db_password - ) - except psycopg2.Error as e: - logging.error( - "Connecting to Postgres: an error occurred while trying to connect to the database %s: %s", db_name, e) - return None - - -def create_prs_table(conn_zuul, cur_zuul, table_name): - try: - cur_zuul.execute( - f'''CREATE TABLE IF NOT EXISTS {table_name} ( - id SERIAL PRIMARY KEY, - "Service Name" VARCHAR(255), - "Failed PR Title" VARCHAR(255), - "Failed PR URL" VARCHAR(255), - "Squad" VARCHAR(255), - "Failed PR State" VARCHAR(255), - "Zuul URL" VARCHAR(255), - "Zuul Check Status" VARCHAR(255), - "Created at" VARCHAR(255), - "Days Passed" INT, - "Parent PR Number" INT - );''' - ) - conn_zuul.commit() - logging.info("Table %s has been created successfully", table_name) - except psycopg2.Error as e: - logging.error( - "Create table: an error occurred while trying to create a table %s in the database: %s", table_name, e) - - -def is_repo_empty(org, repo, gitea_token): - try: - commits_resp = session.get(f"{GITEA_API_ENDPOINT}/repos/{org}/{repo}/commits?token={gitea_token}") - commits_resp.raise_for_status() - - commits_data = json.loads(commits_resp.content.decode()) - if not commits_data: - return True - return False - except requests.exceptions.HTTPError as e: - if e.response.status_code == 409: # Conflict error which might mean empty repo, skip this repo to avoid script\ - # hangs - logging.info("Repo %s is empty, skipping", repo) - return True - logging.error("Check repo: an error occurred while trying to get commits for repo %s: %s", repo, e) - return False - except requests.exceptions.RequestException as e: - logging.error("Check repo: an error occurred while trying to get commits for repo %s: %s", repo, e) - return False - - -def get_repos(org, gitea_token): - logging.info("Gathering repos...") - repos = [] - page = 1 - while True: - try: - repos_resp = session.get(f"{GITEA_API_ENDPOINT}/orgs/{org}/repos?page={page}&limit=50&token={gitea_token}") - repos_resp.raise_for_status() - except requests.exceptions.RequestException as e: - logging.error("Get repos: an error occurred while trying to get repos: %s", e) - break - - try: - repos_dict = json.loads(repos_resp.content.decode()) - except json.JSONDecodeError as e: - logging.error("Get repos: an error occurred while trying to decode JSON: %s", e) - break - - for repo in repos_dict: - if not is_repo_empty(org, repo["name"], gitea_token): # Skipping empty repos - repos.append(repo["name"]) - - link_header = repos_resp.headers.get("Link") - if link_header is None or "rel=\"next\"" not in link_header: - break - page += 1 - - logging.info("%s repos have been processed", len(repos)) - - return repos - - -def extract_number_from_body(text): - try: - match = re.search(r"#\d+", str(text)) - if match: - return int(match.group()[1:]) - except ValueError as e: - logging.error("Extract number from body: an error occurred while converting match group to int: %s", e) - return None - return None - - -def get_f_pr_commits(org, repo, f_pr_number, gitea_token): - try: - zuul_url = None - status = None - created_at = None - days_passed = None - - pull_request_resp = session.get( - f"{GITEA_API_ENDPOINT}/repos/{org}/{repo}/pulls/{f_pr_number}/commits?token={gitea_token}") - pull_request_resp.raise_for_status() - - f_pr_info = json.loads(pull_request_resp.content.decode("utf-8")) - - if len(f_pr_info) > 0: - f_commit_sha = f_pr_info[0]["sha"] - commit_status_resp = session.get( - f"{GITEA_API_ENDPOINT}/repos/{org}/{repo}/statuses/{f_commit_sha}?token={gitea_token}") - commit_status_resp.raise_for_status() - - commit_info = json.loads(commit_status_resp.content.decode("utf-8")) - commit_status = commit_info[0]["status"] - if commit_status == "failure": - status = commit_info[0]["status"] - zuul_url = commit_info[0]["target_url"] - created_at = datetime.strptime(commit_info[0]["created_at"], '%Y-%m-%dT%H:%M:%SZ') - now = datetime.utcnow() - days_passed = (now - created_at).days - - return zuul_url, status, created_at, days_passed - - except requests.exceptions.RequestException as e: - logging.error( - "Get failed PR commits: an error occurred while trying to get pull requests of %s repo for %s org: \ - %s", repo, org, e) - - return None, None, None, None - - -def get_failed_prs(org, repo, gitea_token, conn_zuul, cur_zuul, table_name): - # logging.info(f"Processing {repo}...") # Debug print, uncomment in case of script hangs - try: - if repo != "doc-exports": - page = 1 - while True: - # logging.info(f"Fetching PRs for {repo}, page {page}...") # Debug print, uncomment in case of script\ - # hangs - repo_resp = session.get( - f"{GITEA_API_ENDPOINT}/repos/{org}/{repo}/pulls?state=open&page={page}&limit=1000&token=\ - {gitea_token}") - pull_requests = [] - if repo_resp.status_code == 200: - try: - pull_requests = json.loads(repo_resp.content.decode("utf-8")) - except json.JSONDecodeError as e: - logging.error("Get parent PR: an error occurred while decoding JSON: %s", e) - if not pull_requests: - break - - for pull_req in pull_requests: - body = pull_req["body"] - if body.startswith("This is an automatically created Pull Request"): - if pull_req["merged"] is True: - continue - f_par_pr_num = extract_number_from_body(body) - f_pr_number = pull_req["number"] - service_name = repo - squad = "" - title = pull_req["title"] - f_pr_url = pull_req["url"] - f_pr_state = pull_req["state"] - zuul_url, status, created_at, days_passed = get_f_pr_commits(org, repo, f_pr_number, - gitea_token) - try: - if all(item is not None for item in [zuul_url, status, created_at, days_passed]): - cur_zuul.execute(f""" - INSERT INTO public.{table_name} - ("Service Name", "Failed PR Title", "Failed PR URL", "Squad", "Failed PR State"\ - , "Zuul URL", "Zuul Check Status", "Days Passed", "Parent PR Number") - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) - """, - ( - service_name, title, f_pr_url, squad, f_pr_state, zuul_url, - status, - days_passed, f_par_pr_num) - ) - conn_zuul.commit() - except Exception as e: - logging.error( - "Failed PRs: an error occurred while inserting into %s table: %s", table_name, e) - else: - continue - elif org in ["docs-swiss", "docs"] and repo_resp.status_code != 200: - break - page += 1 - - except Exception as e: - logging.error('Failed PRs: an error occurred:', e) - - -def update_squad_and_title(conn_zuul, cur_zuul, rtctable, opentable): - logging.info("Updating squads and titles in %s...", opentable) - try: - cur_zuul.execute(f"SELECT * FROM {opentable};") - failed_prs_rows = cur_zuul.fetchall() - - for row in failed_prs_rows: - service_name_index = 1 - id_index = 0 - - cur_zuul.execute( - f"""SELECT "Title", "Squad" - FROM {rtctable} - WHERE "Repository" = %s;""", - (row[service_name_index],) - ) - rtc_row = cur_zuul.fetchone() - - if rtc_row: - cur_zuul.execute( - f"""UPDATE {opentable} - SET "Service Name" = %s, "Squad" = %s - WHERE id = %s;""", - (rtc_row[0], rtc_row[1], row[id_index]) - ) - - if row[service_name_index] in ('doc-exports', 'docs_on_docs', 'docsportal'): - cur_zuul.execute( - f"""UPDATE {opentable} - SET "Squad" = 'Other' - WHERE id = %s;""", - (row[id_index],) - ) - - conn_zuul.commit() - - except Exception as e: - logging.error("Error updating squad and title: %s", e) - conn_zuul.rollback() - - -def main(org, table_name, rtc): - check_env_variables() - - conn_zuul = connect_to_db(db_name) - cur_zuul = conn_zuul.cursor() - - cur_zuul.execute(f"DROP TABLE IF EXISTS {table_name}") - conn_zuul.commit() - - create_prs_table(conn_zuul, cur_zuul, table_name) - - repos = get_repos(org, gitea_token) - - logging.info("Gathering PRs info...") - for repo in repos: - get_failed_prs(org, repo, gitea_token, conn_zuul, cur_zuul, table_name) - - update_squad_and_title(conn_zuul, cur_zuul, rtc, FAILED_TABLE) - - cur_zuul.close() - conn_zuul.close() - - -if __name__ == "__main__": - ORG_STRING = "docs" - FAILED_TABLE = "open_prs" - RTC_TABLE = "repo_title_category" - - main(ORG_STRING, FAILED_TABLE, RTC_TABLE) - main(f"{ORG_STRING}-swiss", f"{FAILED_TABLE}_swiss", f"{RTC_TABLE}_swiss") - - end_time = time.time() - execution_time = end_time - start_time - minutes, seconds = divmod(execution_time, 60) - logging.info("Script executed in %s minutes %s seconds! Let's go drink some beer :)", int(minutes), int(seconds)) +""" +This script gathers info regarding PRs, which check jobs in zuul has been failed +""" + +import json +import logging +import re +import time +from datetime import datetime + +import psycopg2 +import requests + +from classes import Database, EnvVariables + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +start_time = time.time() + +logging.info("-------------------------FAILED PRS SCRIPT IS RUNNING-------------------------") + +GITEA_API_ENDPOINT = "https://gitea.eco.tsi-dev.otc-service.com/api/v1" +session = requests.Session() + +env_vars = EnvVariables() +database = Database(env_vars) + +github_token = env_vars.github_token +github_fallback_token = env_vars.github_fallback_token + + +def create_prs_table(conn_zuul, cur_zuul, table_name): + try: + cur_zuul.execute( + f'''CREATE TABLE IF NOT EXISTS {table_name} ( + id SERIAL PRIMARY KEY, + "Service Name" VARCHAR(255), + "Failed PR Title" VARCHAR(255), + "Failed PR URL" VARCHAR(255), + "Squad" VARCHAR(255), + "Failed PR State" VARCHAR(255), + "Zuul URL" VARCHAR(255), + "Zuul Check Status" VARCHAR(255), + "Created at" VARCHAR(255), + "Days Passed" INT, + "Parent PR Number" INT + );''' + ) + conn_zuul.commit() + logging.info("Table %s has been created successfully", table_name) + except psycopg2.Error: + logging.error( + "Create table: an error occurred while trying to create a table %s in the database: %s", table_name, + env_vars.db_zuul) + + +def is_repo_empty(org, repo, gitea_token): + try: + commits_resp = session.get(f"{GITEA_API_ENDPOINT}/repos/{org}/{repo}/commits?token={gitea_token}") + commits_resp.raise_for_status() + + commits_data = json.loads(commits_resp.content.decode()) + if not commits_data: + return True + return False + except requests.exceptions.HTTPError as e: + if e.response.status_code == 409: # Conflict error which might mean empty repo, skip this repo to avoid script\ + # hangs + logging.info("Repo %s is empty, skipping", repo) + return True + logging.error("Check repo: an error occurred while trying to get commits for repo %s: %s", repo, e) + return False + except requests.exceptions.RequestException as e: + logging.error("Check repo: an error occurred while trying to get commits for repo %s: %s", repo, e) + return False + + +def get_repos(org, gitea_token): + logging.info("Gathering repos...") + repos = [] + page = 1 + max_pages = 33 + while True: + try: + repos_resp = session.get(f"{GITEA_API_ENDPOINT}/orgs/{org}/repos?page={page}&limit=50&token={gitea_token}") + repos_resp.raise_for_status() + except requests.exceptions.RequestException as e: + logging.error("Get repos: an error occurred while trying to get repos: %s", e) + break + + try: + repos_dict = json.loads(repos_resp.content.decode()) + except json.JSONDecodeError as e: + logging.error("Get repos: an error occurred while trying to decode JSON: %s", e) + break + + for repo in repos_dict: + if not is_repo_empty(org, repo["name"], gitea_token): # Skipping empty repos + repos.append(repo["name"]) + if page > max_pages: + logging.warning(f"Reached maximum page limit for {org}") + break + + link_header = repos_resp.headers.get("Link") + if link_header is None or "rel=\"next\"" not in link_header: + break + page += 1 + + logging.info("%s repos have been processed", len(repos)) + + return repos + + +def extract_number_from_body(text): + try: + match = re.search(r"#\d+", str(text)) + if match: + return int(match.group()[1:]) + except ValueError as e: + logging.error("Extract number from body: an error occurred while converting match group to int: %s", e) + return None + return None + + +def get_f_pr_commits(org, repo, f_pr_number, gitea_token): + try: + zuul_url = None + status = None + created_at = None + days_passed = None + + pull_request_resp = session.get( + f"{GITEA_API_ENDPOINT}/repos/{org}/{repo}/pulls/{f_pr_number}/commits?token={gitea_token}") + pull_request_resp.raise_for_status() + + f_pr_info = json.loads(pull_request_resp.content.decode("utf-8")) + + if len(f_pr_info) > 0: + f_commit_sha = f_pr_info[0]["sha"] + commit_status_resp = session.get( + f"{GITEA_API_ENDPOINT}/repos/{org}/{repo}/statuses/{f_commit_sha}?token={gitea_token}") + commit_status_resp.raise_for_status() + + commit_info = json.loads(commit_status_resp.content.decode("utf-8")) + commit_status = commit_info[0]["status"] + if commit_status == "failure": + status = commit_info[0]["status"] + zuul_url = commit_info[0]["target_url"] + created_at = datetime.strptime(commit_info[0]["created_at"], '%Y-%m-%dT%H:%M:%SZ') + now = datetime.utcnow() + days_passed = (now - created_at).days + + return zuul_url, status, created_at, days_passed + + except requests.exceptions.RequestException as e: + logging.error( + "Get failed PR commits: an error occurred while trying to get pull requests of %s repo for %s org: \ + %s", repo, org, e) + + return None, None, None, None + + +def get_failed_prs(org, repo, gitea_token, conn_zuul, cur_zuul, table_name): + # logging.info(f"Processing {repo}...") # Debug print, uncomment in case of script hangs + try: + if repo != "doc-exports": + page = 1 + while True: + # logging.info(f"Fetching PRs for {org} {repo}, page {page}...") # Debug, uncomment if script hangs + repo_resp = session.get( + f"{GITEA_API_ENDPOINT}/repos/{org}/{repo}/pulls?state=open&page={page}&token=\ + {gitea_token}") + pull_requests = [] + if repo_resp.status_code == 200: + try: + pull_requests = json.loads(repo_resp.content.decode("utf-8")) + except json.JSONDecodeError as e: + logging.error("Get parent PR: an error occurred while decoding JSON: %s", e) + if not pull_requests: + break + + for pull_req in pull_requests: + body = pull_req["body"] + if body.startswith("This is an automatically created Pull Request"): + if pull_req["merged"] is True: + continue + f_par_pr_num = extract_number_from_body(body) + f_pr_number = pull_req["number"] + service_name = repo + squad = "" + title = pull_req["title"] + f_pr_url = pull_req["url"] + f_pr_state = pull_req["state"] + zuul_url, status, created_at, days_passed = get_f_pr_commits(org, repo, f_pr_number, + gitea_token) + try: + if all(item is not None for item in [zuul_url, status, created_at, days_passed]): + cur_zuul.execute(f""" + INSERT INTO public.{table_name} + ("Service Name", "Failed PR Title", "Failed PR URL", "Squad", "Failed PR State"\ + , "Zuul URL", "Zuul Check Status", "Days Passed", "Parent PR Number") + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) + """, + ( + service_name, title, f_pr_url, squad, f_pr_state, zuul_url, + status, + days_passed, f_par_pr_num) + ) + conn_zuul.commit() + except Exception as e: + logging.error( + "Failed PRs: an error occurred while inserting into %s table: %s", table_name, e) + else: + continue + elif org in ["docs-swiss", "docs"] and repo_resp.status_code != 200: + break + page += 1 + except Exception as e: + logging.error('Failed PRs: an error occurred:', e) + + +def update_squad_and_title(conn_zuul, cur_zuul, rtctable, opentable): + logging.info("Updating squads and titles in %s...", opentable) + try: + cur_zuul.execute(f"SELECT * FROM {opentable};") + failed_prs_rows = cur_zuul.fetchall() + + for row in failed_prs_rows: + service_name_index = 1 + id_index = 0 + + cur_zuul.execute( + f"""SELECT "Title", "Squad" + FROM {rtctable} + WHERE "Repository" = %s;""", + (row[service_name_index],) + ) + rtc_row = cur_zuul.fetchone() + + if rtc_row: + cur_zuul.execute( + f"""UPDATE {opentable} + SET "Service Name" = %s, "Squad" = %s + WHERE id = %s;""", + (rtc_row[0], rtc_row[1], row[id_index]) + ) + + if row[service_name_index] in ('doc-exports', 'docs_on_docs', 'docsportal'): + cur_zuul.execute( + f"""UPDATE {opentable} + SET "Squad" = 'Other' + WHERE id = %s;""", + (row[id_index],) + ) + + conn_zuul.commit() + + except Exception as e: + logging.error("Error updating squad and title: %s", e) + conn_zuul.rollback() + + +def main(org, table_name, rtc): + + conn_zuul = database.connect_to_db(env_vars.db_zuul) + cur_zuul = conn_zuul.cursor() + + cur_zuul.execute(f"DROP TABLE IF EXISTS {table_name}") + conn_zuul.commit() + + create_prs_table(conn_zuul, cur_zuul, table_name) + + repos = get_repos(org, env_vars.gitea_token) + + logging.info("Gathering PRs info...") + for repo in repos: + get_failed_prs(org, repo, env_vars.gitea_token, conn_zuul, cur_zuul, table_name) + + update_squad_and_title(conn_zuul, cur_zuul, rtc, FAILED_TABLE) + + cur_zuul.close() + conn_zuul.close() + + +if __name__ == "__main__": + ORG_STRING = "docs" + FAILED_TABLE = "open_prs" + RTC_TABLE = "repo_title_category" + + main(ORG_STRING, FAILED_TABLE, RTC_TABLE) + main(f"{ORG_STRING}-swiss", f"{FAILED_TABLE}_swiss", f"{RTC_TABLE}_swiss") + + end_time = time.time() + execution_time = end_time - start_time + minutes, seconds = divmod(execution_time, 60) + logging.info("Script executed in %s minutes %s seconds! Let's go drink some beer :)", int(minutes), int(seconds)) diff --git a/8_ecosystem_issues.py b/8_ecosystem_issues.py index 6f24a48..5be28d9 100644 --- a/8_ecosystem_issues.py +++ b/8_ecosystem_issues.py @@ -1,149 +1,120 @@ -""" -This script gathers info about github issues in infra repos, for ecosystem squad -""" - -import logging -import os -import time -from datetime import datetime, timedelta - -import psycopg2 -from github import Github - -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - -start_time = time.time() - -logging.info("-------------------------ECOSYSTEM ISSUES SCRIPT IS RUNNING-------------------------") - -github_token = os.getenv("GITHUB_TOKEN") -github_fallback_token = os.getenv("GITHUB_FALLBACK_TOKEN") - -db_host = os.getenv("DB_HOST") -db_port = os.getenv("DB_PORT") -db_name = os.getenv("DB_CSV") -db_user = os.getenv("DB_USER") -db_password = os.getenv("DB_PASSWORD") - - -def check_env_variables(): - required_env_vars = [ - "GITHUB_TOKEN", "DB_HOST", "DB_PORT", - "DB_NAME", "DB_USER", "DB_PASSWORD", "GITEA_TOKEN" - ] - for var in required_env_vars: - if os.getenv(var) is None: - raise Exception(f"Missing environment variable: {var}") - - -def connect_to_db(db_name): - logging.info("Connecting to Postgres (%s)...", db_name) - try: - return psycopg2.connect( - host=db_host, - port=db_port, - dbname=db_name, - user=db_user, - password=db_password - ) - except psycopg2.Error as e: - logging.error("Connecting to Postgres: an error occurred while trying to connect to the database: %s", e) - return None - - -def create_open_issues_table(conn, cur, table_name): - try: - cur.execute( - f'''CREATE TABLE IF NOT EXISTS {table_name} ( - id SERIAL PRIMARY KEY, - "Repo Name" VARCHAR(255), - "Issue Number" INT, - "Issue URL" VARCHAR(255), - "Created by" VARCHAR(255), - "Created at" VARCHAR(255), - "Duration" INT, - "Comments" INT, - "Assignees" TEXT - );''' - ) - conn.commit() - logging.info("Table %s has been created successfully", table_name) - except psycopg2.Error as e: - logging.error("Tables creating: an error occurred while trying to create a table %s in the database \ - %s: %s", table_name, db_name, e) - - -def insert_issue_data(conn, cur, table_name, repo, issue): - assignees = ', '.join(assignee.login for assignee in issue.assignees) - created_at = issue.created_at.strftime('%Y-%m-%d') - try: - cur.execute( - f"""INSERT INTO {table_name} ( - "Repo Name", "Issue Number", - "Issue URL", "Created by", "Created at", "Duration", "Comments", "Assignees" - ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s);""", - ( - repo.name, - issue.number, - issue.html_url, - issue.user.login, - created_at, - (datetime.now() - issue.created_at).days, - issue.comments, - assignees - ) - ) - conn.commit() - except psycopg2.Error as e: - logging.error("Error inserting issue data: %s", e) - conn.rollback() - - -def gather_issues(ghorg, conn, cur, table_name): - logging.info("Gathering issues info...") - one_year_ago = datetime.now() - timedelta(days=365) - for repo in ghorg.get_repos(): - if repo.archived or repo.pushed_at < one_year_ago: - continue - issues = repo.get_issues(state="open") - for issue in issues: - insert_issue_data(conn, cur, table_name, repo, issue) - - -def main(gorg, table_name, token): - check_env_variables() - g = Github(token) - - ghorg = g.get_organization(gorg) - conn = connect_to_db(db_name) - cur = conn.cursor() - - cur.execute(f"DROP TABLE IF EXISTS {table_name}") - conn.commit() - - create_open_issues_table(conn, cur, table_name) - gather_issues(ghorg, conn, cur, table_name) - - cur.close() - conn.close() - - -if __name__ == "__main__": - GH_ORG_STR = "opentelekomcloud" - ISSUES_TABLE = "open_issues_eco" - - DONE = False - try: - main(GH_ORG_STR, ISSUES_TABLE, github_token) - DONE = True - except Exception as e: - logging.error("Error has been occurred: %s", e) - main(GH_ORG_STR, ISSUES_TABLE, github_fallback_token) - DONE = True - if DONE: - logging.info("Github operations successfully done!") - - end_time = time.time() - execution_time = end_time - start_time - minutes, seconds = divmod(execution_time, 60) - logging.info("Script executed in %s minutes %s seconds! Let's go drink some beer :)", int(minutes), int(seconds)) +""" +This script gathers info about github issues in infra repos, for ecosystem squad +""" + +import logging +import time +from datetime import datetime, timedelta + +import psycopg2 +from github import Github + +from classes import Database, EnvVariables + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +start_time = time.time() + +logging.info("-------------------------ECOSYSTEM ISSUES SCRIPT IS RUNNING-------------------------") + +env_vars = EnvVariables() +database = Database(env_vars) + +github_token = env_vars.github_token +github_fallback_token = env_vars.github_fallback_token + + +def create_open_issues_table(conn, cur, table_name): + try: + cur.execute( + f'''CREATE TABLE IF NOT EXISTS {table_name} ( + id SERIAL PRIMARY KEY, + "Repo Name" VARCHAR(255), + "Issue Number" INT, + "Issue URL" VARCHAR(255), + "Created by" VARCHAR(255), + "Created at" VARCHAR(255), + "Duration" INT, + "Comments" INT, + "Assignees" TEXT + );''' + ) + conn.commit() + logging.info("Table %s has been created successfully", table_name) + except psycopg2.Error as e: + logging.error("Tables creating: an error occurred while trying to create a table %s in the database \ + %s: %s", table_name, env_vars.db_csv, e) + + +def insert_issue_data(conn, cur, table_name, repo, issue): + assignees = ', '.join(assignee.login for assignee in issue.assignees) + created_at = issue.created_at.strftime('%Y-%m-%d') + try: + cur.execute( + f"""INSERT INTO {table_name} ( + "Repo Name", "Issue Number", + "Issue URL", "Created by", "Created at", "Duration", "Comments", "Assignees" + ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s);""", + ( + repo.name, + issue.number, + issue.html_url, + issue.user.login, + created_at, + (datetime.now() - issue.created_at).days, + issue.comments, + assignees + ) + ) + conn.commit() + except psycopg2.Error as e: + logging.error("Error inserting issue data: %s", e) + conn.rollback() + + +def gather_issues(ghorg, conn, cur, table_name): + logging.info("Gathering issues info...") + one_year_ago = datetime.now() - timedelta(days=365) + for repo in ghorg.get_repos(): + if repo.archived or repo.pushed_at < one_year_ago: + continue + issues = repo.get_issues(state="open") + for issue in issues: + insert_issue_data(conn, cur, table_name, repo, issue) + + +def main(gorg, table_name, token): + g = Github(token) + ghorg = g.get_organization(gorg) + conn = database.connect_to_db(env_vars.db_csv) + cur = conn.cursor() + + cur.execute(f"DROP TABLE IF EXISTS {table_name}") + conn.commit() + + create_open_issues_table(conn, cur, table_name) + gather_issues(ghorg, conn, cur, table_name) + + cur.close() + conn.close() + + +if __name__ == "__main__": + GH_ORG_STR = "opentelekomcloud" + ISSUES_TABLE = "open_issues_eco" + + DONE = False + try: + main(GH_ORG_STR, ISSUES_TABLE, github_token) + DONE = True + except Exception as e: + logging.error("Error has been occurred: %s", e) + main(GH_ORG_STR, ISSUES_TABLE, github_fallback_token) + DONE = True + if DONE: + logging.info("Github operations successfully done!") + + end_time = time.time() + execution_time = end_time - start_time + minutes, seconds = divmod(execution_time, 60) + logging.info("Script executed in %s minutes %s seconds! Let's go drink some beer :)", int(minutes), int(seconds)) diff --git a/classes.py b/classes.py new file mode 100644 index 0000000..e521192 --- /dev/null +++ b/classes.py @@ -0,0 +1,55 @@ +""" +This script contains data classes for code reusing +""" + +import logging +import os + +import psycopg2 + + +class EnvVariables: + required_env_vars = [ + "DB_HOST", "DB_PORT", "DB_CSV", "DB_USER", "DB_ORPH", "DB_ZUUL", "DB_PASSWORD", "GITEA_TOKEN", "GITHUB_TOKEN", + "GITHUB_FALLBACK_TOKEN" + ] + + def __init__(self): + self.db_host = os.getenv("DB_HOST") + self.db_port = os.getenv("DB_PORT") + self.db_csv = os.getenv("DB_CSV") # main postgres db, open PRs tables for public and hybrid clouds are stored + self.db_user = os.getenv("DB_USER") + self.db_orph = os.getenv("DB_ORPH") + self.db_zuul = os.getenv("DB_ZUUL") + self.db_password = os.getenv("DB_PASSWORD") + self.gitea_token = os.getenv("GITEA_TOKEN") + self.github_token = os.getenv("GITHUB_TOKEN") + self.github_fallback_token = os.getenv("GITHUB_FALLBACK_TOKEN") + self.check_env_variables() + + def check_env_variables(self): + for var in self.required_env_vars: + if os.getenv(var) is None: + raise Exception("Missing environment variable: %s" % var) + + +class Database: + def __init__(self, env): + self.db_host = env.db_host + self.db_port = env.db_port + self.db_user = env.db_user + self.db_password = env.db_password + + def connect_to_db(self, db_name): + logging.info("Connecting to Postgres (%s)...", db_name) + try: + return psycopg2.connect( + host=self.db_host, + port=self.db_port, + dbname=db_name, + user=self.db_user, + password=self.db_password + ) + except psycopg2.Error as e: + logging.error("Connecting to Postgres: an error occurred while trying to connect to the database: %s", e) + return None