From bc52402461e6e699eef4190e703118eb973ee38a Mon Sep 17 00:00:00 2001
From: "m-te-bot[bot]" <160700919+m-te-bot[bot]@users.noreply.github.com>
Date: Mon, 28 Oct 2024 04:50:29 +0000
Subject: [PATCH] Initial commit

---
 .github/ISSUE_TEMPLATE/publish_model.yml |  16 +++
 .github/foo.txt                          |   0
 .github/scripts/check_published.py       |  21 ++++
 .github/scripts/copy_files.py            |  43 +++++++
 .github/scripts/create_branch.py         |  23 ++++
 .github/scripts/file_utils.py            | 123 +++++++++++++++++++
 .github/scripts/find_repos.py            |  25 ++++
 .github/scripts/parse_utils.py           | 149 +++++++++++++++++++++++
 .github/scripts/pull_request.py          |  64 ++++++++++
 .github/scripts/request_utils.py         |  24 ++++
 .github/scripts/requirements.txt         |   4 +
 .github/scripts/update_doi.py            | 125 +++++++++++++++++++
 .github/scripts/update_labels.py         |  25 ++++
 .github/workflows/copy-files.yml         | 111 +++++++++++++++++
 .github/workflows/new-actions.yml        | 102 ++++++++++++++++
 .github/workflows/new-files.yml          |  66 ++++++++++
 .gitignore                               |   6 +
 .metadata_trail/.gitkeep                 |   1 +
 .website_material/assets/.gitkeep        |   1 +
 .website_material/graphics/.gitkeep      |   1 +
 CITATION.cff                             |   1 +
 README.md                                |  13 ++
 model_code_inputs/.gitkeep               |   1 +
 model_code_inputs/README.md              |   1 +
 model_output_data/.gitkeep               |   1 +
 model_output_data/README.md              |   6 +
 26 files changed, 953 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/publish_model.yml
 create mode 100644 .github/foo.txt
 create mode 100644 .github/scripts/check_published.py
 create mode 100644 .github/scripts/copy_files.py
 create mode 100644 .github/scripts/create_branch.py
 create mode 100644 .github/scripts/file_utils.py
 create mode 100644 .github/scripts/find_repos.py
 create mode 100644 .github/scripts/parse_utils.py
 create mode 100644 .github/scripts/pull_request.py
 create mode 100644 .github/scripts/request_utils.py
 create mode 100644 .github/scripts/requirements.txt
 create mode 100644 .github/scripts/update_doi.py
 create mode 100644 .github/scripts/update_labels.py
 create mode 100644 .github/workflows/copy-files.yml
 create mode 100644 .github/workflows/new-actions.yml
 create mode 100644 .github/workflows/new-files.yml
 create mode 100644 .gitignore
 create mode 100644 .metadata_trail/.gitkeep
 create mode 100644 .website_material/assets/.gitkeep
 create mode 100644 .website_material/graphics/.gitkeep
 create mode 100644 CITATION.cff
 create mode 100644 README.md
 create mode 100644 model_code_inputs/.gitkeep
 create mode 100644 model_code_inputs/README.md
 create mode 100644 model_output_data/.gitkeep
 create mode 100644 model_output_data/README.md

diff --git a/.github/ISSUE_TEMPLATE/publish_model.yml b/.github/ISSUE_TEMPLATE/publish_model.yml
new file mode 100644
index 0000000..2478483
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/publish_model.yml
@@ -0,0 +1,16 @@
+---
+name: Publish model
+description: Update model with doi and publish
+title: "Publish model"
+labels: ["model published"]
+
+body:
+
+- type: input
+  id: doi
+  attributes:
+    label: -> doi
+    placeholder: "https://doi.org/10.47366/sabia.v5n1a3"
+    description: "Provide the doi of your published model"
+  validations:
+    required: true
\ No newline at end of file
diff --git a/.github/foo.txt b/.github/foo.txt
new file mode 100644
index 0000000..e69de29
diff --git a/.github/scripts/check_published.py b/.github/scripts/check_published.py
new file mode 100644
index 0000000..443eb28
--- /dev/null
+++ b/.github/scripts/check_published.py
@@ -0,0 +1,21 @@
+import os
+from github import Github, Auth
+
+# Environment variables
+token = os.environ.get("GITHUB_TOKEN")
+repo_name = os.environ.get("REPO_NAME")
+
+# Get repo
+auth = Auth.Token(token)
+g = Github(auth=auth)
+repo = g.get_repo(repo_name)
+
+# Find if any of the issues has the published label
+published = False
+
+for issue in repo.get_issues():
+    for label in issue.labels:
+        if 'published' in label.name:
+            published = True
+
+print(published)
diff --git a/.github/scripts/copy_files.py b/.github/scripts/copy_files.py
new file mode 100644
index 0000000..8b46b38
--- /dev/null
+++ b/.github/scripts/copy_files.py
@@ -0,0 +1,43 @@
+import os
+import base64
+from github import Github, Auth
+
+# Environment variables
+token = os.environ.get("GITHUB_TOKEN")
+source_repo_owner = os.environ.get("SOURCE_REPO_OWNER")
+source_repo_name = os.environ.get("SOURCE_REPO_NAME")
+source_path = os.environ.get("SOURCE_PATH")
+target_repo_owner = os.environ.get("TARGET_REPO_OWNER")
+target_repo_name = os.environ.get("TARGET_REPO_NAME")
+target_branch_name = os.environ.get("TARGET_REPO_BRANCH")
+target_path = os.environ.get("TARGET_PATH")
+
+auth = Auth.Token(token)
+g = Github(auth=auth)
+source_repo = g.get_repo(f"{source_repo_owner}/{source_repo_name}")
+target_repo = g.get_repo(f"{target_repo_owner}/{target_repo_name}")
+
+def copy_files(contents, target_path):
+    for content in contents:
+        if content.type == "dir":
+            # Get the contents of the directory and copy recursively
+            copy_files(source_repo.get_contents(content.path), f"{target_path}/{content.name}")
+        else:
+            # Check if the file already exists in the target repo
+            try:
+                target_file = target_repo.get_contents(f"{target_path}/{content.name}", ref=target_branch_name)
+                # File exists, compare contents
+                if content.sha != target_file.sha:
+                    # Contents differ, update the file
+                    source_file_content = base64.b64decode(source_repo.get_git_blob(content.sha).content)
+                    target_repo.update_file(f"{target_path}/{content.name}",f"Updating {content.name}", source_file_content, target_file.sha, branch=target_branch_name)
+            except:
+                # Copy file to target repository
+                source_file_content = base64.b64decode(source_repo.get_git_blob(content.sha).content)
+                target_repo.create_file(f"{target_path}/{content.name}", f"Copying {content.name}", source_file_content, branch=target_branch_name)
+
+# Get contents of source directory
+source_contents = source_repo.get_contents(source_path)
+
+# Start copying files
+copy_files(source_contents, target_path)
diff --git a/.github/scripts/create_branch.py b/.github/scripts/create_branch.py
new file mode 100644
index 0000000..41e0ab8
--- /dev/null
+++ b/.github/scripts/create_branch.py
@@ -0,0 +1,23 @@
+import os
+from github import Github, Auth
+
+# Environment variables
+token = os.environ.get("GITHUB_TOKEN")
+repo_owner = os.environ.get("REPO_OWNER")
+repo_name = os.environ.get("REPO_NAME")
+branch_name = os.environ.get("BRANCH_NAME")
+
+auth = Auth.Token(token)
+g = Github(auth=auth)
+repo = g.get_repo(f"{repo_owner}/{repo_name}")
+
+# Check if the branch name already exists
+try:
+    assert repo.get_git_ref(f"heads/{branch_name}").ref is not None
+    print("Branch already exists")
+
+# Create new branch if it doesn't
+except:
+    base_ref = repo.get_git_ref(f"heads/{repo.default_branch}")
+
+    repo.create_git_ref(f"refs/heads/{branch_name}", base_ref.object.sha)
diff --git a/.github/scripts/file_utils.py b/.github/scripts/file_utils.py
new file mode 100644
index 0000000..0f8b320
--- /dev/null
+++ b/.github/scripts/file_utils.py
@@ -0,0 +1,123 @@
+import json
+from ruamel.yaml import YAML
+import csv
+import os
+from io import StringIO
+
+def create_or_update_json_entry(rocrate, keys_path, new_value):
+    """
+    Create or update a nested JSON entry in a ro-crate structure.
+
+    Args:
+        rocrate (dict): The main ro-crate dictionary.
+        keys_path (str): Dot-separated path to the key that needs updating.
+        new_value (any): New value to be inserted or updated.
+    """
+    # Split the keys path into individual components
+    keys = keys_path.split('.')
+    prefix = ""
+    structure = rocrate
+
+    # Traverse through the nested structure using keys except the last one
+    for key in keys[:-1]:
+        key = prefix + key
+
+        # Handle potential './' prefix logic
+        if key == "":
+            prefix = "."
+            continue
+        else:
+            prefix = ""
+
+        if isinstance(structure, list):
+            # Find the item with matching '@id' key
+            for item in structure:
+                if item.get("@id") == key:
+                    structure = item
+                    break
+            else:
+                print(f"Key '{key}' not found.")
+                return
+        elif key in structure:
+            structure = structure[key]
+        else:
+            print(f"Key '{key}' not found.")
+            return
+
+    # The final key where the new value should be placed
+    last_key = keys[-1]
+
+    # Update the value at the final key
+    if last_key in structure:
+        if isinstance(structure[last_key], list):
+            # Prepend only if the new value is not already in the list
+            if new_value not in structure[last_key]:
+                structure[last_key].insert(0, new_value)
+        else:
+            # Convert existing non-list value to a list if needed
+            structure[last_key] = [new_value, structure[last_key]]
+    else:
+        # If the key doesn't exist, create a new list with the new value
+        structure[last_key] = [new_value]
+
+
+def navigate_and_assign(source, path, value):
+    """Navigate through a nested dictionary and assign a value to the specified path."""
+    keys = path.split('.')
+    for i, key in enumerate(keys[:-1]):
+        if key.isdigit():  # If the key is a digit, it's an index for a list
+            key = int(key)
+            while len(source) <= key:  # Extend the list if necessary
+                source.append({})
+            source = source[key]
+        else:
+            if i < len(keys) - 2 and keys[i + 1].isdigit():  # Next key is a digit, so ensure this key leads to a list
+                source = source.setdefault(key, [])
+            else:  # Otherwise, it leads to a dictionary
+                source = source.setdefault(key, {})
+    # Assign the value to the final key
+    if keys[-1].isdigit():  # If the final key is a digit, it's an index for a list
+        key = int(keys[-1])
+        while len(source) <= key:  # Extend the list if necessary
+            source.append(None)
+        source[key] = value
+    else:
+        source[keys[-1]] = value
+
+
+def read_yaml_with_header(file_path):
+    """
+    Read YAML content inside YAML header delimiters '---'
+    """
+
+    with open(file_path,'r') as file:
+        data = file.read()
+
+    yaml = YAML()
+    yaml_content = yaml.load(data.strip('---\n'))
+
+    return yaml_content
+
+def update_csv_content(file_path, field, value):
+    # Read the CSV file and update the field value
+    updated_rows = []
+    field_exists = False
+    with open(file_path, mode='r', newline='') as file:
+        reader = csv.reader(file)
+        for row in reader:
+            if row and row[0] == field:
+                row[1] = value
+                field_exists = True
+            updated_rows.append(row)
+
+    # If the field does not exist, add a new line
+    if not field_exists:
+        updated_rows.append([field, value])
+
+    # Convert the updated rows back into a CSV-formatted string
+    updated_csv_content = StringIO()
+    writer = csv.writer(updated_csv_content)
+    writer.writerows(updated_rows)
+    updated_csv_string = updated_csv_content.getvalue()
+
+    return updated_csv_string
diff --git a/.github/scripts/find_repos.py b/.github/scripts/find_repos.py
new file mode 100644
index 0000000..d7897cf
--- /dev/null
+++ b/.github/scripts/find_repos.py
@@ -0,0 +1,25 @@
+import os
+import json
+import requests
+from github import Github, Auth
+
+# Environment variables
+token = os.environ.get("TOKEN")
+repo_name = os.environ.get("REPO")
+org = os.environ.get("ORG")
+
+repos = []
+
+# Get org
+auth = Auth.Token(token)
+g = Github(auth=auth)
+org = g.get_organization(org)
+
+# Find repos created from this template
+for repo in org.get_repos():
+    repo_json = requests.get(repo.url).json()
+    if 'template_repository' in repo_json:
+        if repo_json['template_repository']['name'] == repo_name:
+            repos.append(repo.name)
+
+print(json.dumps(repos))
\ No newline at end of file
diff --git a/.github/scripts/parse_utils.py b/.github/scripts/parse_utils.py
new file mode 100644
index 0000000..ff0ebf3
--- /dev/null
+++ b/.github/scripts/parse_utils.py
@@ -0,0 +1,149 @@
+import re
+import yaml
+
+
+def extract_doi_parts(doi_string):
+    # Regular expression to match a DOI within a string or URL
+    # It looks for a string starting with '10.' followed by any non-whitespace characters
+    # and optionally includes common URL prefixes
+    # the DOI
+    doi_pattern = re.compile(r'(10\.[0-9]+/[^ \s]+)')
+
+    # Search for DOI pattern in the input string
+    match = doi_pattern.search(doi_string)
+
+    # If a DOI is found in the string
+    if match:
+        # Extract the DOI
+        doi = match.group(1)
+
+        # Clean up the DOI by removing any trailing characters that are not part of a standard DOI
+        # This includes common punctuation and whitespace that might be accidentally included
+        #doi = re.sub(r'[\s,.:;]+$', '', doi)
+        doi = re.sub(r'[\s,.:;|\/\?:@&=+\$,]+$', '', doi)
+
+        # Split the DOI into prefix and suffix at the first "/"
+        #prefix, suffix = doi.split('/', 1)
+
+        return doi
+    else:
+        # Return an error message if no DOI is found
+        return "No valid DOI found in the input string."
+
+
+def format_citation(ro_crate):
+    # Find the root entity (main dataset)
+    root_entity = next((item for item in ro_crate['@graph'] if item['@id'] == './'), None)
+    if not root_entity:
+        return "Error: Root data entity not found."
+
+    # Extract essential data: title, DOI, publication year
+    title = root_entity.get('name', 'No title available')
+
+    # Handle the case where 'identifier' might be an empty string or empty list
+    identifier = root_entity.get('identifier')
+    if isinstance(identifier, list):
+        doi = identifier[0] if identifier and identifier[0] else 'No DOI available'
+    elif isinstance(identifier, str) and identifier:
+        doi = identifier
+    else:
+        doi = 'No DOI available'
+
+    date_published = root_entity.get('datePublished', '')[:4]  # Extract the first four characters, which represent the year
+
+    # Extract publisher information, handling multiple publishers
+    publisher_ids = root_entity.get('publisher', [])
+    if not isinstance(publisher_ids, list):
+        publisher_ids = [publisher_ids]
+    publishers = []
+    for publisher_id in publisher_ids:
+        publisher_entity = next((item for item in ro_crate['@graph'] if item['@id'] == publisher_id['@id']), None)
+        if publisher_entity:
+            publishers.append(publisher_entity.get('name', 'No publisher available'))
+    publisher_names = ', '.join(publishers) if publishers else "No publisher available"
+
+    # Extract and format author names
+    authors = root_entity.get('creator', [])
+    # If 'authors' is a dictionary (single author), convert it to a list for uniform handling
+    if isinstance(authors, dict):
+        authors = [authors]
+    author_names = []
+    for author_id in authors:
+        author_entity = next((item for item in ro_crate['@graph'] if item['@id'] == author_id['@id']), None)
+        if author_entity:
+            surname = author_entity.get('familyName', '')
+            given_name_initial = author_entity.get('givenName', '')[0] if author_entity.get('givenName', '') else ''
+            author_names.append(f"{surname}, {given_name_initial}.")
+
+    # Join author names with commas, and use '&' before the last author if multiple
+    if len(author_names) > 1:
+        authors_formatted = ', '.join(author_names[:-1]) + f", & {author_names[-1]}"
+    else:
+        authors_formatted = ''.join(author_names)
+
+    # Create formatted citation string
+    citation = f"{authors_formatted} ({date_published}). {title} [Data set]. {publisher_names}. https://doi.org/{doi.split('/')[-1]}"
+    return citation
+
+
+
+
+
+def ro_crate_to_cff(ro_crate):
+    # Find the root entity
+    root_entity = next((item for item in ro_crate['@graph'] if item['@id'] == './'), None)
+    if not root_entity:
+        return "Error: Root data entity not found."
+
+    # Extract necessary fields
+    title = root_entity.get('name', 'No title available')
+    version = root_entity.get('version', '1.0')
+    doi = root_entity.get('identifier', ['No DOI available'])[0]
+    date_released = root_entity.get('datePublished', '').split('T')[0]
+    url = root_entity.get('url', 'No URL provided')
+
+
+    # Extract authors
+    authors = root_entity.get('creator', [])
+    # If 'authors' is a dictionary (single author), convert it to a list for uniform handling
+    if isinstance(authors, dict):
+        authors = [authors]
+
+    author_list = []
+
+    for author in authors:
+        # Ensure we access the correct field and check if author is a dict
+        if isinstance(author, dict):
+            author_id = author.get('@id')
+            
+            # Check if author_id is not None
+            if author_id is not None:
+                author_entity = next((item for item in ro_crate['@graph'] if item['@id'] == author_id), None)
+                
+                if author_entity:
+                    author_list.append({
+                        'family-names': author_entity.get('familyName', ''),
+                        'given-names': author_entity.get('givenName', ''),
+                        'orcid': author_id  # This is now a string
+                    })
+            else:
+                print(f"No '@id' found for author: {author}")
+        else:
+            print(f"Unexpected author format: {author}")
+
+    # Construct the CFF object
+    cff_dict = {
+        'cff-version': '1.2.0',
+        'message': 'If you use this model, please cite it as below.',
+        'authors': author_list,
+        'title': title,
+        'version': version,
+        'doi': doi,  # Assuming DOI is a complete URL, extract just the number
+        'date-released': date_released,
+        'url': url,
+        'type': 'dataset'
+    }
+
+    # Convert dict to YAML format
+    cff_yaml = yaml.dump(cff_dict, sort_keys=False, default_flow_style=False)
+    return cff_yaml
diff --git a/.github/scripts/pull_request.py b/.github/scripts/pull_request.py
new file mode 100644
index 0000000..f29f6fe
--- /dev/null
+++ b/.github/scripts/pull_request.py
@@ -0,0 +1,64 @@
+import os
+import json
+from github import Github, Auth
+
+# Environment variables
+token = os.environ.get("GITHUB_TOKEN")
+repo_owner = os.environ.get("REPO_OWNER")
+repo_name = os.environ.get("REPO_NAME")
+pr_title = os.environ.get("PR_TITLE")
+event_path = os.environ.get("GITHUB_EVENT_PATH")
+head_branch = os.environ.get("HEAD_BRANCH")
+base_branch = os.environ.get("BASE_BRANCH")
+
+def get_commit_messages(event_path):
+    with open(event_path) as f:
+        event_data = json.load(f)
+
+    # Extract commits from event data
+    commits = event_data['commits']
+
+    # Extract commit messages
+    commit_messages = '\n'.join(['- '+ commit['message'] for commit in commits])
+
+    return commit_messages
+
+
+# Get repo
+auth = Auth.Token(token)
+g = Github(auth=auth)
+repo = g.get_repo(f"{repo_owner}/{repo_name}")
+
+# Generate PR body from commit messages in event json data:
+pr_body = get_commit_messages(event_path)
+
+# Existing PRs
+existing_prs = repo.get_pulls(state='open', sort='created', base='main')
+
+pr_exists = False
+
+for pr in existing_prs:
+    if pr.title == pr_title:
+        pr_exists = True
+        existing_pr = pr
+
+        # Edit existing PR
+        existing_pr_body = existing_pr.body
+        updated_pr_body = existing_pr_body + '\n' + pr_body
+        existing_pr.edit(body=updated_pr_body)
+
+        print(f"Pull request body updated: {existing_pr.html_url}")
+        break
+    
+if pr_exists == False:
+    # Make new pull request
+    new_pr = repo.create_pull(
+        title = pr_title,
+        body = "*Commits*\n\n" + pr_body,
+        head = head_branch,
+        base = base_branch
+    )
+
+    print(f"Pull request created: {new_pr.html_url}")
+
+    
\ No newline at end of file
diff --git a/.github/scripts/request_utils.py b/.github/scripts/request_utils.py
new file mode 100644
index 0000000..a107a82
--- /dev/null
+++ b/.github/scripts/request_utils.py
@@ -0,0 +1,24 @@
+import requests
+
+
+def check_uri(uri):
+
+    """
+    Checks the availability or validity of a URI by making a HTTP GET request
+
+    Parameters:
+    - uri (str): The URI to check.
+
+    Returns:
+    'OK' if the request is successful, or an error message if not.
+    """
+
+    session = requests.Session()
+    TIMEOUT = 10
+
+    try:
+        response = session.get(uri, timeout=TIMEOUT)
+        response.raise_for_status()
+        return "OK"
+    except Exception as err:
+        return str(err.args[0])
diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt
new file mode 100644
index 0000000..955f8e5
--- /dev/null
+++ b/.github/scripts/requirements.txt
@@ -0,0 +1,4 @@
+pygithub==2.2.0
+ruamel.yaml<0.18.0
+ruamel.yaml.string
+pyyaml
diff --git a/.github/scripts/update_doi.py b/.github/scripts/update_doi.py
new file mode 100644
index 0000000..5d2c4b9
--- /dev/null
+++ b/.github/scripts/update_doi.py
@@ -0,0 +1,125 @@
+import os
+import io
+import re
+import json
+from ruamel.yaml import YAML
+from github import Github, Auth
+from parse_utils import *
+from file_utils import *
+
+# Environment variables
+token = os.environ.get("GITHUB_TOKEN")
+repo_name = os.environ.get("REPO_NAME")
+issue_number = int(os.environ.get("ISSUE_NUMBER"))
+
+# Get issue
+auth = Auth.Token(token)
+g = Github(auth=auth)
+repo = g.get_repo(repo_name)
+issue = repo.get_issue(number = issue_number)
+
+# Parse issue
+regex = r"### *(?P<key>.*?)\s*[\r\n]+(?P<value>[\s\S]*?)(?=###|$)"
+data = dict(re.findall(regex, issue.body))
+
+doi = data["-> doi"].strip()
+
+# Verify doi is valid
+# because we are usign reserved DOIs, they can't be verified usign http.
+# instead, test if the DOI is in a sensible form
+response = extract_doi_parts(doi)
+if response != "No valid DOI found in the input string.":
+    # Insert DOI into metadata
+
+    # Read the RO-Crate  (JSON file)
+    json_file_path = "ro-crate-metadata.json"
+    with open(json_file_path, 'r') as file:
+            rocrate = json.load(file)
+
+    #add the DOI and any other chanages to the to the ro-crate
+
+    key_path = "@graph../.identifier"
+    create_or_update_json_entry(rocrate, key_path, doi)
+    key_path = "@graph.model_inputs.identifier"
+    create_or_update_json_entry(rocrate, key_path, doi)
+    key_path = "@graph.model_outputs.identifier"
+    create_or_update_json_entry(rocrate, key_path, doi)
+    citation_str = format_citation(rocrate)
+    key_path = "@graph../.creditText"
+    create_or_update_json_entry(rocrate, key_path, citation_str)
+
+    #save the updated crate
+    metadata_out = json.dumps(rocrate, indent=4)
+    file_content = repo.get_contents(json_file_path)
+    commit_message = "Update ro-crate with DOI etc."
+    repo.update_file(json_file_path, commit_message, metadata_out, file_content.sha)
+
+    #add the creditText
+    #json_data should be the updatated rocrate dictionary
+    #citation_str = format_citation(json_data)
+    #key_path = "@graph../.creditText"
+    #json_data = create_or_update_json_entry(json_file_path, key_path, citation_str)
+    #metadata_out = json.dumps(json_data, indent=4)
+    #file_content = repo.get_contents(json_file_path)
+    #commit_message = "Update ro-crate with DOI"
+    #repo.update_file(json_file_path, commit_message, metadata_out, file_content.sha)
+
+    #update the github cff file
+    cff_text = ro_crate_to_cff(rocrate)
+    cff_file_path = "CITATION.cff"
+    file_content = repo.get_contents(cff_file_path)
+    commit_message = "Update CITATION.cff"
+    repo.update_file(cff_file_path, commit_message, cff_text, file_content.sha)
+
+
+    #need to copy into the website materials folder
+    web_json_file_path = ".website_material/ro-crate-metadata.json"
+    file_content = repo.get_contents(web_json_file_path)
+    commit_message = "Update Website ro-crate with DOI"
+    repo.update_file(web_json_file_path, commit_message, metadata_out, file_content.sha)
+
+    #update CSV
+    csv_file_path = '.metadata_trail/nci_iso.csv'
+    field = 'DOI (NCI Internal Field)'
+    updated_csv_content = update_csv_content(csv_file_path, field, doi)
+    file_content = repo.get_contents(csv_file_path)
+    commit_message = "Update nci_iso.csv with DOI"
+    repo.update_file(csv_file_path, commit_message, updated_csv_content, file_content.sha)
+
+    # YAML
+    yaml = YAML(typ=['rt', 'string'])
+    yaml.preserve_quotes = True
+    yaml.indent(mapping=2, sequence=4, offset=2)
+
+    # Read existing file
+    yaml_file_path = ".website_material/index.md"
+    web_yaml_dict = read_yaml_with_header(yaml_file_path)
+
+    # Path to key to update
+    #key_path = "dataset.doi"
+    #add doi to the top level only
+    key_path = "doi"
+    # Update value
+    navigate_and_assign(web_yaml_dict, key_path, doi)
+    key_path = "creditText"
+    # Update value
+    navigate_and_assign(web_yaml_dict, key_path, citation_str)
+
+    # Use an in-memory text stream to hold the YAML content
+    stream = io.StringIO()
+    stream.write("---\n")
+    yaml.dump(web_yaml_dict, stream)
+    stream.write("---\n")
+    yaml_content_with_frontmatter = stream.getvalue()
+
+    file_content = repo.get_contents(yaml_file_path)
+    commit_message = "Update YAML file with DOI"
+    repo.update_file(yaml_file_path, commit_message, yaml_content_with_frontmatter, file_content.sha)
+
+    # Print True to indicate success so that files may be copied to website repo
+    print(True)
+else:
+    issue.create_comment(f"An error was encountered trying to access the DOI provided. Please check that it was entered correctly.\n{response}")
+    issue.remove_from_labels("model published")
+    # Print False to indicate failure so that files are not copied to website repo
+    print(False)
diff --git a/.github/scripts/update_labels.py b/.github/scripts/update_labels.py
new file mode 100644
index 0000000..d17242d
--- /dev/null
+++ b/.github/scripts/update_labels.py
@@ -0,0 +1,25 @@
+import os
+from github import Github, Auth
+
+# Environment variables
+token = os.environ.get("GITHUB_TOKEN")
+repo_owner = os.environ.get("REPO_OWNER")
+repo_name = os.environ.get("REPO_NAME")
+slug = os.environ.get("SLUG")
+
+auth = Auth.Token(token)
+g = Github(auth=auth)
+repo = g.get_repo(f"{repo_owner}/{repo_name}")
+
+phrase = 'Model repository created at' # Phrase to find the right comment
+
+
+for issue in repo.get_issues():
+    for comment in issue.get_comments():
+        if phrase in comment.body:
+            comment_slug = comment.body.split('ModelAtlasofTheEarth/')[1] # get slug in issue comment
+            if slug == comment_slug:
+                issue.add_to_labels('model published')
+                break
+
+        
\ No newline at end of file
diff --git a/.github/workflows/copy-files.yml b/.github/workflows/copy-files.yml
new file mode 100644
index 0000000..8b95bdd
--- /dev/null
+++ b/.github/workflows/copy-files.yml
@@ -0,0 +1,111 @@
+name: Copy Files
+on:
+  issues:
+    types:
+      - labeled
+
+jobs:
+  update-doi:
+    if: contains(github.event.label.name, 'model published')
+    runs-on: ubuntu-latest
+    outputs:
+      success: ${{ steps.update-doi.outputs.success }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: setup python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          cache: "pip"
+      - run: pip install -r .github/scripts/requirements.txt
+
+      - name: create m@te bot token
+        uses: actions/create-github-app-token@v1
+        id: app-token
+        with:
+          app-id: ${{ vars.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      - name: update doi
+        id: update-doi
+        env:
+          GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
+          REPO_NAME: ${{ github.repository }}
+          ISSUE_NUMBER: ${{ github.event.issue.number }}
+        run: |
+          SUCCESS=$(python3 .github/scripts/update_doi.py)
+          echo "success=${SUCCESS}" >> $GITHUB_OUTPUT
+
+      - name: Clean gitkeep files from repo
+        run: |
+          git pull
+          for line in $(git ls-files | grep './gitkeep'); do
+            EXTRA_FILES=$(git ls-files $(dirname "$line") | wc -l)
+            if [ $EXTRA_FILES -ne "1" ]; then
+              rm "$line"
+            fi
+          done
+
+      - name: Commit changes
+        uses: stefanzweifel/git-auto-commit-action@v5
+        with:
+          commit_message: Remove unnecessary .gitkeep
+
+
+  copy-files:
+    needs: update-doi
+    if: ${{ needs.update-doi.outputs.success == 'True' }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: setup python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          cache: "pip"
+      - run: pip install -r .github/scripts/requirements.txt
+
+      - name: create token for website repo
+        uses: actions/create-github-app-token@v1
+        id: app-token
+        with:
+          app-id: ${{ vars.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+          owner: ${{ github.repository_owner }}
+          repositories: 'website,model_submission'
+
+      - name: create branch
+        env:
+          GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
+          REPO_OWNER: ${{ github.repository_owner }}
+          REPO_NAME: 'website'
+          BRANCH_NAME: ${{ github.event.repository.name }}
+        run: |
+          python3 .github/scripts/create_branch.py
+
+
+      - name: copy files to website repo
+        env:
+          GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
+          SOURCE_REPO_OWNER: ${{ github.repository_owner }}
+          SOURCE_REPO_NAME: ${{ github.event.repository.name }}
+          SOURCE_PATH: '.website_material'
+          TARGET_REPO_OWNER: ${{ github.repository_owner }}
+          TARGET_REPO_NAME: 'website'
+          TARGET_REPO_BRANCH: ${{ github.event.repository.name }}
+          TARGET_PATH: 'src/pages/models/${{ github.event.repository.name }}'
+        run: |
+          python3 .github/scripts/copy_files.py
+
+      - name: update submission issue
+        env:
+          GITHUB_TOKEN:  ${{ steps.app-token.outputs.token }}
+          REPO_OWNER: ${{ github.repository_owner }}
+          REPO_NAME: 'model_submission'
+          SLUG: ${{ github.event.repository.name }}
+        run: |
+          python3 .github/scripts/update_labels.py
diff --git a/.github/workflows/new-actions.yml b/.github/workflows/new-actions.yml
new file mode 100644
index 0000000..2356214
--- /dev/null
+++ b/.github/workflows/new-actions.yml
@@ -0,0 +1,102 @@
+name: New actions
+on:
+    push:
+        branches:
+            - 'main'
+        paths:
+            - '.github/**'
+
+jobs:
+    find-repos:
+        if: ${{ github.event.repository.name == 'mate-model-template' }}
+        runs-on: ubuntu-latest
+        outputs:
+            repo_matrix: ${{ steps.found-repos.outputs.repos }}
+        steps:
+            - name: Checkout
+              uses: actions/checkout@v4
+
+            - name: Setup python
+              uses: actions/setup-python@v5
+              with:
+                python-version: "3.10"
+                cache: "pip"
+            - run: pip install -r .github/scripts/requirements.txt
+
+            - name: create m@te token
+              uses: actions/create-github-app-token@v1
+              id: app-token
+              with:
+                app-id: ${{ vars.APP_ID }}
+                private-key: ${{ secrets.APP_PRIVATE_KEY }}
+                owner: ${{ github.repository_owner }}
+
+            - name: Find repos
+              id: found-repos
+              env:
+                TOKEN: ${{ steps.app-token.outputs.token }}
+                REPO: ${{ github.event.repository.name }}
+                ORG: ${{ github.repository_owner }}
+              run: |
+                REPOS=$(python3 .github/scripts/find_repos.py)
+                echo "repos=$(jq -cn --argjson environments "$REPOS" '{repo: $environments}')" >> $GITHUB_OUTPUT
+                jq -cn --argjson environments "$REPOS" '{repo: $environments}'
+
+    copy-PR:
+        needs: find-repos
+        runs-on: ubuntu-latest
+        strategy:
+            matrix: ${{ fromJson(needs.find-repos.outputs.repo_matrix) }}
+        steps:
+            - name: Checkout
+              uses: actions/checkout@v4
+
+            - name: Setup python
+              uses: actions/setup-python@v5
+              with:
+                python-version: "3.10"
+                cache: "pip"
+            - run: pip install -r .github/scripts/requirements.txt
+
+            - name: create m@te token for repo
+              uses: actions/create-github-app-token@v1
+              id: app-token
+              with:
+                app-id: ${{ vars.APP_ID }}
+                private-key: ${{ secrets.APP_PRIVATE_KEY }}
+                owner: ${{ github.repository_owner }}
+                repositories: ${{ matrix.repo }}
+
+            - name: create branch
+              env:
+                GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
+                REPO_OWNER: ${{ github.repository_owner }}
+                REPO_NAME: ${{ matrix.repo }}
+                BRANCH_NAME: 'template-update'
+              run: |
+                python3 .github/scripts/create_branch.py
+
+            - name: copy files to repo
+              env:
+                GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
+                SOURCE_REPO_OWNER: ${{ github.repository_owner }}
+                SOURCE_REPO_NAME: ${{ github.event.repository.name }}
+                SOURCE_PATH: '.github'
+                TARGET_REPO_OWNER: ${{ github.repository_owner }}
+                TARGET_REPO_NAME: ${{ matrix.repo }}
+                TARGET_REPO_BRANCH: 'template-update'
+                TARGET_PATH: '.github'
+              run: |
+                python3 .github/scripts/copy_files.py
+
+            - name: create pull request
+              env:
+                GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
+                REPO_OWNER: ${{ github.repository_owner }}
+                REPO_NAME: ${{ matrix.repo }}
+                PR_TITLE: "New scripts from template repo"
+                GITHUB_EVENT_PATH: ${{ github.event_path }}
+                HEAD_BRANCH: 'template-update'
+                BASE_BRANCH: 'main'
+              run: |
+                python3 .github/scripts/pull_request.py
diff --git a/.github/workflows/new-files.yml b/.github/workflows/new-files.yml
new file mode 100644
index 0000000..19c76b1
--- /dev/null
+++ b/.github/workflows/new-files.yml
@@ -0,0 +1,66 @@
+name: New files
+on:
+    push:
+        branches:
+            - 'main'
+        paths:
+            - '.website_material/**'
+
+jobs:
+    # Copy files to website repo
+    copy-files:
+        runs-on: ubuntu-latest
+        steps:
+          - name: Checkout
+            uses: actions/checkout@v4
+
+          - name: setup python
+            uses: actions/setup-python@v5
+            with:
+              python-version: "3.10"
+              cache: "pip"
+          - run: pip install -r .github/scripts/requirements.txt
+
+          - name: check if published
+            id: check-published
+            env:
+                GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+                REPO_NAME: ${{ github.repository }}
+            run: |
+                PUBLISHED=$(python3 .github/scripts/check_published.py)
+                echo "published=${PUBLISHED}" >> $GITHUB_OUTPUT
+
+          - name: create token for website repo
+            if: ${{ steps.check-published.outputs.published == 'True' }}
+            uses: actions/create-github-app-token@v1
+            id: app-token
+            with:
+              app-id: ${{ vars.APP_ID }}
+              private-key: ${{ secrets.APP_PRIVATE_KEY }}
+              owner: ${{ github.repository_owner }}
+              repositories: 'website'
+
+          - name: create branch
+            if: ${{ steps.check-published.outputs.published == 'True' }}
+            env:
+              GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
+              REPO_OWNER: ${{ github.repository_owner }}
+              REPO_NAME: 'website'
+              BRANCH_NAME: ${{ github.event.repository.name }}
+            run: |
+              python3 .github/scripts/create_branch.py
+
+
+          - name: copy files to website repo
+            if: ${{ steps.check-published.outputs.published == 'True' }}
+            env:
+              GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
+              SOURCE_REPO_OWNER: ${{ github.repository_owner }}
+              SOURCE_REPO_NAME: ${{ github.event.repository.name }}
+              SOURCE_PATH: '.website_material'
+              TARGET_REPO_OWNER: ${{ github.repository_owner }}
+              TARGET_REPO_NAME: 'website'
+              TARGET_REPO_BRANCH: ${{ github.event.repository.name }}
+              TARGET_PATH: 'src/pages/models/${{ github.event.repository.name }}'
+            run: |
+              python3 .github/scripts/copy_files.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fd53027
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+# Ignore everything in the subdirectory
+model_output_data/*
+
+# Unignore the specific file in the subdirectory
+!model_output_data/README.md
+
diff --git a/.metadata_trail/.gitkeep b/.metadata_trail/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/.metadata_trail/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/.website_material/assets/.gitkeep b/.website_material/assets/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/.website_material/assets/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/.website_material/graphics/.gitkeep b/.website_material/graphics/.gitkeep
new file mode 100644
index 0000000..d3f5a12
--- /dev/null
+++ b/.website_material/graphics/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1 @@
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..007b790
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+# M@TE_template
+
+_this text to be deleted when model new model is created_
+
+Template repository for new models. This repository will be used as part of model submission (link) and should not be copied/forked. This model contains a set of workflows that are used to provide metadata, and content for the website. It will be populated via data sources specified through the "submit new model" issue template (link).
+
+# When new model is created
+
+This model was created using the following sources of metadata:
+
+* link to github issue (link to local .json)
+* link to publication (link to local .json)
+* link to contributor (link to local .json)
diff --git a/model_code_inputs/.gitkeep b/model_code_inputs/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/model_code_inputs/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/model_code_inputs/README.md b/model_code_inputs/README.md
new file mode 100644
index 0000000..a307161
--- /dev/null
+++ b/model_code_inputs/README.md
@@ -0,0 +1 @@
+# Model Code & Inputs
diff --git a/model_output_data/.gitkeep b/model_output_data/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/model_output_data/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/model_output_data/README.md b/model_output_data/README.md
new file mode 100644
index 0000000..9c9f17e
--- /dev/null
+++ b/model_output_data/README.md
@@ -0,0 +1,6 @@
+# Model Output Data
+
+**Note Output Data will often be too large to host on Github.** 
+
+To access output data for this model, check the M@TE collection on NCI (http://dx.doi.org/10.25914/yrzp-g882), or refer to this model on the M@TE website (http://mate.science)
+