Skip to content

Commit

Permalink
Refactor files for reusing code (#77)
Browse files Browse the repository at this point in the history
Refactor files for reusing code

Refactor rest of the scripts for reusing classes
Add github token vars to classes.py

Reviewed-by: Anton Sidelnikov
  • Loading branch information
YustinaKvr authored Aug 1, 2024
1 parent 9b28f99 commit a48a266
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 277 deletions.
56 changes: 16 additions & 40 deletions 1_otc_services_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@

import base64
import logging
import os
import time

import psycopg2
import requests
import yaml

from classes import Database, EnvVariables

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

start_time = time.time()
Expand All @@ -20,33 +21,8 @@

BASE_URL = "https://gitea.eco.tsi-dev.otc-service.com/api/v1"

gitea_token = os.getenv("GITEA_TOKEN")
headers = {
"Authorization": f"token {gitea_token}"
}

db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_csv = os.getenv("DB_CSV")
db_orph = os.getenv("DB_ORPH")
db_zuul = os.getenv("DB_ZUUL")
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")


def connect_to_db(db):
logging.info("Connecting to Postgres (%s)...", db)
try:
return psycopg2.connect(
host=db_host,
port=db_port,
dbname=db,
user=db_user,
password=db_password
)
except psycopg2.Error as e:
logging.error("Connecting to Postgres: an error occurred while trying to connect to the database: %s", e)
return None
env_vars = EnvVariables()
database = Database(env_vars)


def create_rtc_table(conn_csv, cur_csv, table_name):
Expand Down Expand Up @@ -86,15 +62,15 @@ def create_doc_table(conn_csv, cur_csv, table_name):


def get_pretty_category_names(base_dir, category_dir):
response = requests.get(f"{BASE_URL}{category_dir}", timeout=10, headers=headers)
response = requests.get(f"{BASE_URL}{category_dir}", timeout=10)
response.raise_for_status()
all_files = [item['path'] for item in response.json() if item['type'] == 'file']

category_mapping = {}

for file_path in all_files:
if file_path.endswith('.yaml'):
response = requests.get(f"{BASE_URL}{base_dir}{file_path}", timeout=10, headers=headers)
response = requests.get(f"{BASE_URL}{base_dir}{file_path}", timeout=10)
response.raise_for_status()

file_content_base64 = response.json()['content']
Expand All @@ -109,15 +85,15 @@ def get_pretty_category_names(base_dir, category_dir):
def get_service_categories(base_dir, category_dir, services_dir):
pretty_names = get_pretty_category_names(base_dir, category_dir)

response = requests.get(f"{BASE_URL}{services_dir}", timeout=10, headers=headers)
response = requests.get(f"{BASE_URL}{services_dir}", timeout=10)
response.raise_for_status()
all_files = [item['path'] for item in response.json() if item['type'] == 'file']

all_data = []

for file_path in all_files:
if file_path.endswith('.yaml'):
response = requests.get(f"{BASE_URL}{base_dir}{file_path}", timeout=10, headers=headers)
response = requests.get(f"{BASE_URL}{base_dir}{file_path}", timeout=10)
response.raise_for_status()

file_content_base64 = response.json()['content']
Expand All @@ -139,15 +115,15 @@ def get_service_categories(base_dir, category_dir, services_dir):


def get_docs_info(base_dir, doc_dir):
response = requests.get(f"{BASE_URL}{doc_dir}", timeout=10, headers=headers)
response = requests.get(f"{BASE_URL}{doc_dir}", timeout=10)
response.raise_for_status()
all_files = [item['path'] for item in response.json() if item['type'] == 'file']

all_data = []

for file_path in all_files:
if file_path.endswith('.yaml'):
response = requests.get(f"{BASE_URL}{base_dir}{file_path}", timeout=10, headers=headers)
response = requests.get(f"{BASE_URL}{base_dir}{file_path}", timeout=10)
response.raise_for_status()

file_content_base64 = response.json()['content']
Expand Down Expand Up @@ -179,7 +155,7 @@ def insert_services_data(item, conn_csv, cur_csv, table_name):


def get_squad_description(styring_url):
response = requests.get(styring_url, timeout=10, headers=headers)
response = requests.get(styring_url, timeout=10)
response.raise_for_status()

file_content_base64 = response.json()['content']
Expand Down Expand Up @@ -266,15 +242,15 @@ def main(base_dir, rtctable, doctable, styring_path):
services_dir = f"{base_dir}otc_metadata/data/services"
category_dir = f"{base_dir}otc_metadata/data/service_categories"
doc_dir = f"{base_dir}otc_metadata/data/documents"
styring_url = f"{BASE_URL}{styring_path}{gitea_token}"
styring_url = f"{BASE_URL}{styring_path}{env_vars.gitea_token}"

conn_orph = connect_to_db(db_orph)
conn_orph = database.connect_to_db(env_vars.db_orph)
cur_orph = conn_orph.cursor()

conn_zuul = connect_to_db(db_zuul)
conn_zuul = database.connect_to_db(env_vars.db_zuul)
cur_zuul = conn_zuul.cursor()

conn_csv = connect_to_db(db_csv)
conn_csv = database.connect_to_db(env_vars.db_csv)
cur_csv = conn_csv.cursor()

conns = [conn_orph, conn_zuul]
Expand Down Expand Up @@ -315,7 +291,7 @@ def main(base_dir, rtctable, doctable, styring_path):

main(BASE_DIR_REGULAR, BASE_RTC_TABLE, BASE_DOC_TABLE, STYRING_URL_REGULAR)
main(BASE_DIR_SWISS, f"{BASE_RTC_TABLE}_swiss", f"{BASE_DOC_TABLE}_swiss", STYRING_URL_SWISS)
conn_csv = connect_to_db(db_csv)
conn_csv = database.connect_to_db(env_vars.db_csv)
cur_csv = conn_csv.cursor()
add_obsolete_services(conn_csv, cur_csv)
conn_csv.commit()
Expand Down
68 changes: 18 additions & 50 deletions 3_github_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,53 +3,22 @@
"""

import logging
import os
import re
import time

import psycopg2
import requests
from github import Github

from classes import Database, EnvVariables

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

start_time = time.time()

logging.info("-------------------------GITHUB INFO SCRIPT IS RUNNING-------------------------")

github_token = os.getenv("GITHUB_TOKEN")
github_fallback_token = os.getenv("GITHUB_FALLBACK_TOKEN")

db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_ORPH") # Here we're using dedicated postgres db for orphan PRs only
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")


def check_env_variables():
required_env_vars = [
"GITHUB_TOKEN", "DB_HOST", "DB_PORT",
"DB_NAME", "DB_USER", "DB_PASSWORD", "GITEA_TOKEN"
]
for var in required_env_vars:
if os.getenv(var) is None:
raise Exception(f"Missing environment variable: {var}")


def connect_to_db(db_name):
logging.info(f"Connecting to Postgres ({db_name})...")
try:
return psycopg2.connect(
host=db_host,
port=db_port,
dbname=db_name,
user=db_user,
password=db_password
)
except psycopg2.Error as e:
logging.info("Connecting to Postgres: an error occurred while trying to connect to the database: %s", e)
return None
env_vars = EnvVariables()
database = Database(env_vars)


def extract_pull_links(cur, table_name):
Expand Down Expand Up @@ -129,30 +98,29 @@ def update_orphaned_prs(org_str, cur, conn, rows, auto_prs, table_name):


def main(org, gorg, table_name, token):
check_env_variables()
g = Github(token)

ghorg = g.get_organization(gorg)
repo_names = [repo.name for repo in ghorg.get_repos()]
conn = connect_to_db(db_name)
cur = conn.cursor()
conn_orph = database.connect_to_db(env_vars.db_orph)
cur_orph = conn_orph.cursor()

pull_links = extract_pull_links(cur, table_name)
pull_links = extract_pull_links(cur_orph, table_name)

auto_prs = []
logging.info("Gathering PRs info...")
for repo_name in repo_names:
auto_prs += get_auto_prs(gorg, repo_name, github_token, pull_links)
auto_prs += get_auto_prs(gorg, repo_name, env_vars.github_token, pull_links)

add_github_columns(cur, conn, table_name)
add_github_columns(cur_orph, conn_orph, table_name)

cur.execute(f'SELECT id, "Auto PR URL" FROM {table_name};')
rows = cur.fetchall()
cur_orph.execute(f'SELECT id, "Auto PR URL" FROM {table_name};')
rows = cur_orph.fetchall()

update_orphaned_prs(org, cur, conn, rows, auto_prs, table_name)
update_orphaned_prs(org, cur_orph, conn_orph, rows, auto_prs, table_name)

cur.close()
conn.close()
cur_orph.close()
conn_orph.close()


if __name__ == "__main__":
Expand All @@ -162,13 +130,13 @@ def main(org, gorg, table_name, token):

DONE = False
try:
main(ORG_STRING, GH_ORG_STR, ORPH_TABLE, github_token)
main(f"{ORG_STRING}-swiss", f"{GH_ORG_STR}-swiss", f"{ORPH_TABLE}_swiss", github_token)
main(ORG_STRING, GH_ORG_STR, ORPH_TABLE, env_vars.github_token)
main(f"{ORG_STRING}-swiss", f"{GH_ORG_STR}-swiss", f"{ORPH_TABLE}_swiss", env_vars.github_token)
DONE = True
except Exception as e:
logging.info(f"Error has been occurred: {e}")
main(ORG_STRING, GH_ORG_STR, ORPH_TABLE, github_fallback_token)
main(f"{ORG_STRING}-swiss", f"{GH_ORG_STR}-swiss", f"{ORPH_TABLE}_swiss", github_fallback_token)
main(ORG_STRING, GH_ORG_STR, ORPH_TABLE, env_vars.github_fallback_token)
main(f"{ORG_STRING}-swiss", f"{GH_ORG_STR}-swiss", f"{ORPH_TABLE}_swiss", env_vars.github_fallback_token)
DONE = True
if DONE:
logging.info("Github operations successfully done!")
Expand Down
73 changes: 21 additions & 52 deletions 5_open_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
import logging
import os
import re
import time
from datetime import datetime
Expand All @@ -13,6 +12,8 @@
import requests
from github import Github

from classes import Database, EnvVariables

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

start_time = time.time()
Expand All @@ -22,40 +23,8 @@
GITEA_API_ENDPOINT = "https://gitea.eco.tsi-dev.otc-service.com/api/v1"
session = requests.Session()

gitea_token = os.getenv("GITEA_TOKEN")
github_token = os.getenv("GITHUB_TOKEN")
github_fallback_token = os.getenv("GITHUB_FALLBACK_TOKEN")

db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_CSV") # here we're using main postgres table since we don't need orphan PRs
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")


def check_env_variables():
required_env_vars = [
"GITHUB_TOKEN", "DB_HOST", "DB_PORT",
"DB_NAME", "DB_USER", "DB_PASSWORD", "GITEA_TOKEN"
]
for var in required_env_vars:
if os.getenv(var) is None:
raise Exception(f"Missing environment variable: {var}")


def connect_to_db(db_name):
logging.info("Connecting to Postgres %s...", db_name)
try:
return psycopg2.connect(
host=db_host,
port=db_port,
dbname=db_name,
user=db_user,
password=db_password
)
except psycopg2.Error as e:
logging.error("Connecting to Postgres: an error occurred while trying to connect to the database %s:", e)
return None
env_vars = EnvVariables()
database = Database(env_vars)


def create_open_issues_table(conn, cur, table_name):
Expand All @@ -79,7 +48,7 @@ def create_open_issues_table(conn, cur, table_name):
logging.info("Table %s has been created successfully", table_name)
except psycopg2.Error as e:
logging.error("Tables creating: an error occurred while trying to create a table %s in the "
"database %s: %s", table_name, db_name, e)
"database %s: %s", table_name, env_vars.db_csv, e)


def get_gitea_issues(gitea_token, gitea_org):
Expand Down Expand Up @@ -145,7 +114,7 @@ def get_github_issues(github_token, repo_names, gh_org):


def get_issues_table(gh_org, gitea_issues, github_issues, cur, conn, table_name):
logging.info("Posting data to Postgres (%s)...", db_name)
logging.info("Posting data to Postgres (%s)...", env_vars.db_csv)
try:
for tea in gitea_issues:
environment = "Gitea"
Expand Down Expand Up @@ -245,26 +214,25 @@ def update_squad_and_title(conn, cur, table_name, rtc):


def main(org, gh_org, table_name, rtc, token):
check_env_variables()
g = Github(token)
github_org = g.get_organization(gh_org)
repo_names = [repo.name for repo in github_org.get_repos()]
logging.info("%s repos have been processed", len(repo_names))

gitea_issues = get_gitea_issues(gitea_token, org)
github_issues = get_github_issues(github_token, repo_names, gh_org)
conn = connect_to_db(db_name)
cur = conn.cursor()
gitea_issues = get_gitea_issues(env_vars.gitea_token, org)
github_issues = get_github_issues(env_vars.github_token, repo_names, gh_org)
conn_csv = database.connect_to_db(env_vars.db_csv)
cur_csv = conn_csv.cursor()

cur.execute(
cur_csv.execute(
f'''DROP TABLE IF EXISTS {table_name}'''
)
conn.commit()
conn_csv.commit()

create_open_issues_table(conn, cur, table_name)
get_issues_table(org, gitea_issues, github_issues, cur, conn, table_name)
update_squad_and_title(conn, cur, table_name, rtc)
conn.close()
create_open_issues_table(conn_csv, cur_csv, table_name)
get_issues_table(org, gitea_issues, github_issues, cur_csv, conn_csv, table_name)
update_squad_and_title(conn_csv, cur_csv, table_name, rtc)
conn_csv.close()


if __name__ == '__main__':
Expand All @@ -275,14 +243,15 @@ def main(org, gh_org, table_name, rtc, token):

DONE = False
try:
main(ORG_STRING, GH_ORG_STRING, OPEN_TABLE, RTC_TABLE, github_token)
main(f"{ORG_STRING}-swiss", f"{GH_ORG_STRING}-swiss", f"{OPEN_TABLE}_swiss", f"{RTC_TABLE}_swiss", github_token)
main(ORG_STRING, GH_ORG_STRING, OPEN_TABLE, RTC_TABLE, env_vars.github_token)
main(f"{ORG_STRING}-swiss", f"{GH_ORG_STRING}-swiss", f"{OPEN_TABLE}_swiss", f"{RTC_TABLE}_swiss",
env_vars.github_token)
DONE = True
except Exception as e:
logging.error("An error occurred: %s", e)
main(ORG_STRING, GH_ORG_STRING, OPEN_TABLE, RTC_TABLE, github_fallback_token)
main(ORG_STRING, GH_ORG_STRING, OPEN_TABLE, RTC_TABLE, env_vars.github_fallback_token)
main(f"{ORG_STRING}-swiss", f"{GH_ORG_STRING}-swiss", f"{OPEN_TABLE}_swiss", f"{RTC_TABLE}_swiss",
github_fallback_token)
env_vars.github_fallback_token)
DONE = True
if DONE:
logging.info("Github operations successfully done!")
Expand Down
Loading

0 comments on commit a48a266

Please sign in to comment.