Skip to content

Commit

Permalink
Merge pull request #116 from MikroElektronika/improvement/fixed-indexing
Browse files Browse the repository at this point in the history
Added script to check for index discrepancies.
  • Loading branch information
IvanRuzavin authored Oct 14, 2024
2 parents fc461a7 + 1d690f1 commit 285261c
Show file tree
Hide file tree
Showing 5 changed files with 380 additions and 0 deletions.
111 changes: 111 additions & 0 deletions .github/workflows/checkIndexes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: Check Indexed File Links

on:
workflow_dispatch:
inputs:
select_index:
type: choice
description: Check Test and/or Live ES indexed items
options:
- Test
- Live
- Both
regex:
type: string
description: Regex to use when searching for indexed items
default: "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
fix:
type: boolean
description: Fix the broken links with new ones?
default: false

push:
branches:
- main # This will trigger on every push (merge) to the 'main' branch

schedule:
- cron: "*/30 * * * *" # This will run every 30 minutes

jobs:
manual_run:
if: ${{ github.event_name == 'workflow_dispatch' }}
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install -r scripts/requirements/check_index.txt
- name: Check Indexed Links - Live
if: ${{ github.event.inputs.select_index == 'Live' || github.event.inputs.select_index == 'Both' }}
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "${{ github.event.inputs.regex }}" "--log_only" ${{ !github.event.inputs.fix }}
continue-on-error: true # Ensure the workflow continues

- name: Check Indexed Links - Test
if: ${{ github.event.inputs.select_index == 'Test' || github.event.inputs.select_index == 'Both' }}
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "${{ github.event.inputs.regex }}" "--log_only" ${{ !github.event.inputs.fix }}
continue-on-error: true # Ensure the workflow continues

push_to_main_run:
if: ${{ github.event_name == 'push' }}
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install -r scripts/requirements/check_index.txt
- name: Check Indexed Links - Live
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
continue-on-error: true # Ensure the workflow continues

- name: Check Indexed Links - Test
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
continue-on-error: true # Ensure the workflow continues

scheduled_run:
if: ${{ github.event_name == 'schedule' }}
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install -r scripts/requirements/check_index.txt
- name: Check Indexed Links - Live
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
continue-on-error: true # Ensure the workflow continues

- name: Check Indexed Links - Test
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
continue-on-error: true # Ensure the workflow continues
47 changes: 47 additions & 0 deletions scripts/check_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import sys, json, argparse, requests

import classes.class_gh as gh
import classes.class_es as es

if __name__ == "__main__":
# Get arguments
parser = argparse.ArgumentParser(description="Upload directories as release assets.")
parser.add_argument("gh_repo", help="Github repository name, e.g., 'username/repo'", type=str)
parser.add_argument("gh_token", help="GitHub Token", type=str)
parser.add_argument("es_host", help="ES instance host value", type=str)
parser.add_argument("es_user", help="ES instance user value", type=str)
parser.add_argument("es_password", help="ES instance password value", type=str)
parser.add_argument("es_index", help="ES instance index value", type=str)
parser.add_argument("--es_regex", help="Regex to use to fetch indexed items", type=str, default=".+")
parser.add_argument("--log_only", help="Regex to use to fetch indexed items", type=bool, default=False)
args = parser.parse_args()

es_instance = es.index(
es_host=args.es_host, es_user=args.es_user, es_password=args.es_password,
index=args.es_index, token=args.gh_token
)

gh_instance = gh.repo(args.gh_repo, args.gh_token)

es_instance.fetch(regex=args.es_regex)

headers = {
'Authorization': f'token {args.gh_token}'
}

err = False
for indexed_item in es_instance.indexed_items:
asset_status = requests.get(indexed_item['source']['download_link'], headers=headers)
if es_instance.Status.ERROR.value == asset_status.status_code: ## code 404 - error, reindex with correct download link
err = True
print("%sERROR: Asset \"%s\" download link is incorrect. - %s" % (es_instance.Colors.FAIL, indexed_item['source']['name'], indexed_item['source']['download_link']))
if not args.log_only:
package_name = (json.loads(asset_status.text))['name']
url = gh_instance.asset_fetch_url_api(package_name, loose=False)
indexed_item['source']['download_link'] = url
es_instance.update(indexed_item['doc']['type'], indexed_item['doc']['id'], indexed_item['source'])
else: ## code 200 - success, no need to reindex
print("%sOK: Asset \"%s\" download link is correct. - %s" % (es_instance.Colors.OKBLUE, indexed_item['source']['name'], indexed_item['source']['download_link']))

if err and args.log_only:
sys.exit(-1)
138 changes: 138 additions & 0 deletions scripts/classes/class_es.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import re, time
from elasticsearch import Elasticsearch
from enum import Enum

class index():
class Status(Enum):
SUCCESS = 200
ERROR = 404

class Colors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'

@staticmethod
def init(es_host, es_user, es_password, retry=None):
retry_check = 10
if retry:
retry_check = retry
num_of_retries = 1
while True:
print(f"Trying to connect to ES. Connection retry: {num_of_retries}")
es = Elasticsearch([es_host], http_auth=(es_user, es_password))
if es.ping():
break
# Wait for 1 second and try again if connection fails
if retry_check == num_of_retries:
# Exit if it fails 10 times, something is wrong with the server
raise ValueError("Connection to ES failed!")
num_of_retries += 1
es = None

time.sleep(1)

return es

@staticmethod
def response(es: Elasticsearch, index, query_size=5000):
# Search query to use
query_search = {
"size": query_size,
"query": {
"match_all": {}
}
}

# Search the base with provided query
num_of_retries = 1
while num_of_retries <= 10:
try:
response = es.search(index=index, body=query_search)
if not response['timed_out']:
break
except:
print("Executing search query - retry number %i" % num_of_retries)
num_of_retries += 1

return response

@staticmethod
def find_item(items, check):
for index in items:
if 'name' in index:
if index['name'] == check:
return True
return False

@staticmethod
def api_index(es: Elasticsearch, doc_index, doc_type, doc_id, doc_body):
return es.index(
index=doc_index,
doc_type=doc_type,
id=doc_id,
body=doc_body
)

def __init__(self, es_host, es_user, es_password, index, token, retry=None):
self.es_instance = self.init(es_host, es_user, es_password, retry)
self.index = index
self.token = token
self.indexed_items = []

def fetch(self, regex=r'.+', query_size=5000):
pattern = re.compile(regex)
response = self.response(self.es_instance, self.index, query_size)

for eachHit in response['hits']['hits']:
if 'name' in eachHit['_source']:
if pattern.match(eachHit['_source']['name']):
self.indexed_items.append(
{
'doc': {
'index': eachHit['_index'],
'type': eachHit['_type'],
'id': eachHit['_id']
},
'source': eachHit['_source']
}
)

def exists(self, check, query_size=5000):
## Did not use ES exists function as it requires doc_type and doc_id
## For future use it is better to search by name only
response = self.response(self.es_instance, self.index, query_size)
return self.find_item([index['_source'] for index in response['hits']['hits']], check)

def create(self, doc_type, doc_id, doc_body):
response = self.api_index(self.es_instance, self.index, doc_type, doc_id, doc_body)
if not 'created' == response['result']:
raise ValueError("%s%s failed to index to %s!" % (self.Colors.FAIL, doc_id, self.index))
else:
print("%sINFO: Asset \"%s\" created. - %s" % (self.Colors.OKGREEN, doc_body['name'], doc_body['download_link']))

def update(self, doc_type, doc_id, doc_body):
response = self.api_index(self.es_instance, self.index, doc_type, doc_id, doc_body)
if response['created'] and 'created' == response['result']:
print("%sWARNING: Asset \"%s\" created instead of updating. - %s" % (self.Colors.WARNING, doc_body['name'], doc_body['download_link']))
elif not 'updated' == response['result']:
raise ValueError("%s%s failed to update on %s!" % (self.Colors.FAIL, doc_id, self.index))
else:
print("%sINFO: Asset \"%s\" updated. - %s" % (self.Colors.OKGREEN, doc_body['name'], doc_body['download_link']))

def delete(self, doc_type, doc_id):
response = self.es_instance.delete(
index=self.index,
doc_type=doc_type,
id=doc_id
)
if not 'deleted' == response['result']:
raise ValueError("%s%s failed to delete from %s!" % (self.Colors.FAIL, doc_id, self.index))
else:
print("%sINFO: Asset \"%s\" deleted." % (self.Colors.OKGREEN, doc_id))
82 changes: 82 additions & 0 deletions scripts/classes/class_gh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import requests
from enum import Enum

class repo():
class Status(Enum):
SUCCESS = 200

class Colors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'

@staticmethod
def fetch_all_assets(url, token):
headers = {
'Authorization': f'token {token}'
}

asset_list = []

response = requests.get(url, headers=headers)
response.raise_for_status()
assets = response.json()

# If no more assets, break out of loop
if not assets:
return None

for asset in assets.get('assets', []):
asset_list.append(asset)

return asset_list

@staticmethod
def fetch_asset(assets, asset_name, loose=False):
for asset in assets:
if loose:
if asset_name in asset['name']:
return asset
else:
if asset['name'] == asset_name:
return asset
return None

def __init__(self, repo, token, release_id='latest'):
self.repo = repo
self.token = token
if 'latest' == release_id:
self.repo_url = f'https://api.github.com/repos/{repo}/releases/latest'
else:
self.repo_url = f'https://api.github.com/repos/{repo}/releases/{release_id}/assets'
self.assets = self.fetch_all_assets(self.repo_url, self.token)

def asset_exists(self, asset_name, loose=False):
## TODO - implement asset checking in the future
return

def asset_fetch_url_api(self, asset_name, loose=False):
asset = self.fetch_asset(self.assets, asset_name, loose)
if asset:
return asset['url']
return None

def asset_fetch_url_browser(self, asset_name, loose=False):
asset = self.fetch_asset(self.assets, asset_name, loose)
if asset:
return asset['browser_download_url']
return None

def asset_upload(self, asset_path, asset_name):
## TODO - implement asset upload in the future
return

def asset_delete(self, asset_path, asset_name):
## TODO - implement asset deletion in the future
return
2 changes: 2 additions & 0 deletions scripts/requirements/check_index.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
requests
elasticsearch==7.13.4

0 comments on commit 285261c

Please sign in to comment.