Skip to content

Commit

Permalink
Merge pull request #1 from ModelAtlasofTheEarth/template-update
Browse files Browse the repository at this point in the history
New scripts from template repo
  • Loading branch information
dansand authored May 7, 2024
2 parents 92cbc81 + 4a4d6f0 commit bf57b2e
Show file tree
Hide file tree
Showing 16 changed files with 700 additions and 0 deletions.
16 changes: 16 additions & 0 deletions .github/ISSUE_TEMPLATE/publish_model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
name: Publish model
description: Update model with doi and publish
title: "Publish model"
labels: ["model published"]

body:

- type: input
id: doi
attributes:
label: -> doi
placeholder: "https://doi.org/10.47366/sabia.v5n1a3"
description: "Provide the doi of your published model"
validations:
required: true
Empty file added .github/foo.txt
Empty file.
21 changes: 21 additions & 0 deletions .github/scripts/check_published.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
from github import Github, Auth

# Environment variables
token = os.environ.get("GITHUB_TOKEN")
repo_name = os.environ.get("REPO_NAME")

# Get repo
auth = Auth.Token(token)
g = Github(auth=auth)
repo = g.get_repo(repo_name)

# Find if any of the issues has the published label
published = False

for issue in repo.get_issues():
for label in issue.labels:
if 'published' in label.name:
published = True

print(published)
43 changes: 43 additions & 0 deletions .github/scripts/copy_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import base64
from github import Github, Auth

# Environment variables
token = os.environ.get("GITHUB_TOKEN")
source_repo_owner = os.environ.get("SOURCE_REPO_OWNER")
source_repo_name = os.environ.get("SOURCE_REPO_NAME")
source_path = os.environ.get("SOURCE_PATH")
target_repo_owner = os.environ.get("TARGET_REPO_OWNER")
target_repo_name = os.environ.get("TARGET_REPO_NAME")
target_branch_name = os.environ.get("TARGET_REPO_BRANCH")
target_path = os.environ.get("TARGET_PATH")

auth = Auth.Token(token)
g = Github(auth=auth)
source_repo = g.get_repo(f"{source_repo_owner}/{source_repo_name}")
target_repo = g.get_repo(f"{target_repo_owner}/{target_repo_name}")

def copy_files(contents, target_path):
for content in contents:
if content.type == "dir":
# Get the contents of the directory and copy recursively
copy_files(source_repo.get_contents(content.path), f"{target_path}/{content.name}")
else:
# Check if the file already exists in the target repo
try:
target_file = target_repo.get_contents(f"{target_path}/{content.name}", ref=target_branch_name)
# File exists, compare contents
if content.sha != target_file.sha:
# Contents differ, update the file
source_file_content = base64.b64decode(source_repo.get_git_blob(content.sha).content)
target_repo.update_file(f"{target_path}/{content.name}",f"Updating {content.name}", source_file_content, target_file.sha, branch=target_branch_name)
except:
# Copy file to target repository
source_file_content = base64.b64decode(source_repo.get_git_blob(content.sha).content)
target_repo.create_file(f"{target_path}/{content.name}", f"Copying {content.name}", source_file_content, branch=target_branch_name)

# Get contents of source directory
source_contents = source_repo.get_contents(source_path)

# Start copying files
copy_files(source_contents, target_path)
23 changes: 23 additions & 0 deletions .github/scripts/create_branch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
from github import Github, Auth

# Environment variables
token = os.environ.get("GITHUB_TOKEN")
repo_owner = os.environ.get("REPO_OWNER")
repo_name = os.environ.get("REPO_NAME")
branch_name = os.environ.get("BRANCH_NAME")

auth = Auth.Token(token)
g = Github(auth=auth)
repo = g.get_repo(f"{repo_owner}/{repo_name}")

# Check if the branch name already exists
try:
repo.get_git_ref(f"heads/{branch_name}")
print("Branch already exists")

# Create new branch if it doesn't
except:
base_ref = repo.get_git_ref(f"heads/{repo.default_branch}")

repo.create_git_ref(f"refs/heads/{branch_name}", base_ref.object.sha)
25 changes: 25 additions & 0 deletions .github/scripts/find_repos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
import json
import requests
from github import Github, Auth

# Environment variables
token = os.environ.get("TOKEN")
repo_name = os.environ.get("REPO")
org = os.environ.get("ORG")

repos = []

# Get org
auth = Auth.Token(token)
g = Github(auth=auth)
org = g.get_organization(org)

# Find repos created from this template
for repo in org.get_repos():
repo_json = requests.get(repo.url).json()
if 'template_repository' in repo_json:
if repo_json['template_repository']['name'] == repo_name:
repos.append(repo.name)

print(json.dumps(repos))
43 changes: 43 additions & 0 deletions .github/scripts/json_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import json

def create_or_update_json_entry(file_path, keys_path, new_value):
# Read the JSON file
with open(file_path, 'r') as file:
data = json.load(file)

# Traverse the nested structure using the keys path
keys = keys_path.split('.')
prefix = ""
current_data = data

for key in keys[:-1]:
# Hack to deal with potential of key being "./"
key = prefix + key
if key == "":
prefix = "."
continue
else:
prefix = ""

if type(current_data) == list:
# Find the item with @id as the key
for item in current_data:
if item.get("@id") == key:
current_data = item
elif key in current_data:
current_data = current_data[key]
else:
print(f"Key '{key}' not found.")
return None

# Update value of the entry
last_key = keys[-1]
if last_key in current_data:
if isinstance(current_data[last_key], list):
current_data[last_key].insert(0, new_value)
else:
current_data[last_key] = [new_value, current_data[last_key]]
else:
current_data[last_key] = [new_value]

return data
64 changes: 64 additions & 0 deletions .github/scripts/pull_request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import os
import json
from github import Github, Auth

# Environment variables
token = os.environ.get("GITHUB_TOKEN")
repo_owner = os.environ.get("REPO_OWNER")
repo_name = os.environ.get("REPO_NAME")
pr_title = os.environ.get("PR_TITLE")
event_path = os.environ.get("GITHUB_EVENT_PATH")
head_branch = os.environ.get("HEAD_BRANCH")
base_branch = os.environ.get("BASE_BRANCH")

def get_commit_messages(event_path):
with open(event_path) as f:
event_data = json.load(f)

# Extract commits from event data
commits = event_data['commits']

# Extract commit messages
commit_messages = '\n'.join(['- '+ commit['message'] for commit in commits])

return commit_messages


# Get repo
auth = Auth.Token(token)
g = Github(auth=auth)
repo = g.get_repo(f"{repo_owner}/{repo_name}")

# Generate PR body from commit messages in event json data:
pr_body = get_commit_messages(event_path)

# Existing PRs
existing_prs = repo.get_pulls(state='open', sort='created', base='main')

pr_exists = False

for pr in existing_prs:
if pr.title == pr_title:
pr_exists = True
existing_pr = pr

# Edit existing PR
existing_pr_body = existing_pr.body
updated_pr_body = existing_pr_body + '\n' + pr_body
existing_pr.edit(body=updated_pr_body)

print(f"Pull request body updated: {existing_pr.html_url}")
break

if pr_exists == False:
# Make new pull request
new_pr = repo.create_pull(
title = pr_title,
body = "*Commits*\n\n" + pr_body,
head = head_branch,
base = base_branch
)

print(f"Pull request created: {new_pr.html_url}")


23 changes: 23 additions & 0 deletions .github/scripts/request_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import requests

def check_uri(uri):

"""
Checks the availability or validity of a URI by making a HTTP GET request
Parameters:
- uri (str): The URI to check.
Returns:
'OK' if the request is successful, or an error message if not.
"""

session = requests.Session()
TIMEOUT = 10

try:
response = session.get(uri, timeout=TIMEOUT)
response.raise_for_status()
return "OK"
except Exception as err:
return str(err.args[0])
3 changes: 3 additions & 0 deletions .github/scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pygithub==2.2.0
ruamel.yaml<0.18.0
ruamel.yaml.string
98 changes: 98 additions & 0 deletions .github/scripts/update_doi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import os
import io
import re
import json
from ruamel.yaml import YAML
from github import Github, Auth
from parse_utils import extract_doi_parts
from yaml_utils import *
from json_utils import *

# Environment variables
token = os.environ.get("GITHUB_TOKEN")
repo_name = os.environ.get("REPO_NAME")
issue_number = int(os.environ.get("ISSUE_NUMBER"))

# Get issue
auth = Auth.Token(token)
g = Github(auth=auth)
repo = g.get_repo(repo_name)
issue = repo.get_issue(number = issue_number)

# Parse issue
regex = r"### *(?P<key>.*?)\s*[\r\n]+(?P<value>[\s\S]*?)(?=###|$)"
data = dict(re.findall(regex, issue.body))

doi = data["-> doi"].strip()

# Verify doi is valid
# because we are usign reserved DOIs, they can't be verified usign http.
# instead, test if the DOI is in a sensible form
response = extract_doi_parts(doi)
if response != "No valid DOI found in the input string.":
# Insert DOI into metadata

# JSON
json_file_path = "ro-crate-metadata.json"

#add the DOI to the root entity
key_path = "@graph../.identifier"
json_data = create_or_update_json_entry(json_file_path, key_path, doi)
metadata_out = json.dumps(json_data, indent=4)
file_content = repo.get_contents(json_file_path)
commit_message = "Update ro-crate with DOI"
repo.update_file(json_file_path, commit_message, metadata_out, file_content.sha)


#add the DOI to the model_inputs entity
key_path = "@graph.model_inputs.identifier"
json_data = create_or_update_json_entry(json_file_path, key_path, doi)
metadata_out = json.dumps(json_data, indent=4)
file_content = repo.get_contents(json_file_path)
commit_message = "Update ro-crate with DOI"
repo.update_file(json_file_path, commit_message, metadata_out, file_content.sha)


#add the DOI to the model_outputs entity
key_path = "@graph.model_outputs.identifier"
json_data = create_or_update_json_entry(json_file_path, key_path, doi)
metadata_out = json.dumps(json_data, indent=4)
file_content = repo.get_contents(json_file_path)
commit_message = "Update ro-crate with DOI"
repo.update_file(json_file_path, commit_message, metadata_out, file_content.sha)


# YAML
yaml = YAML(typ=['rt', 'string'])
yaml.preserve_quotes = True
yaml.indent(mapping=2, sequence=4, offset=2)

# Read existing file
yaml_file_path = "website_material/index.md"
web_yaml_dict = read_yaml_with_header(yaml_file_path)

# Path to key to update
#key_path = "dataset.doi"
#add doi to the top level only
key_path = "doi"
# Update value
navigate_and_assign(web_yaml_dict, key_path, doi)

# Use an in-memory text stream to hold the YAML content
stream = io.StringIO()
stream.write("---\n")
yaml.dump(web_yaml_dict, stream)
stream.write("---\n")
yaml_content_with_frontmatter = stream.getvalue()

file_content = repo.get_contents(yaml_file_path)
commit_message = "Update YAML file with DOI"
repo.update_file(yaml_file_path, commit_message, yaml_content_with_frontmatter, file_content.sha)

# Print True to indicate success so that files may be copied to website repo
print(True)
else:
issue.create_comment(f"An error was encountered trying to access the DOI provided. Please check that it was entered correctly.\n{response}")
issue.remove_from_labels("model published")
# Print False to indicate failure so that files are not copied to website repo
print(False)
Loading

0 comments on commit bf57b2e

Please sign in to comment.