From 694d2eaf4db4157be9396126eeb0849bd9f3b0a1 Mon Sep 17 00:00:00 2001
From: Peter Baker <peter.baker122@csiro.au>
Date: Mon, 22 Jul 2024 08:39:09 +1000
Subject: [PATCH] Initial push from working repo

---
 .Dockerignore                     |   0
 .github/workflows/integration.yml |  26 +++
 .github/workflows/python.yml      |  18 ++
 .gitignore                        |   3 +
 .vscode/settings.json             |   9 +
 Dockerfile                        |  18 ++
 LICENSE                           |  22 +++
 README.md                         |   1 +
 action.yml                        |  32 ++++
 config.py                         |  26 +++
 dev-requirements.txt              |   2 +
 logging_setup.py                  |  20 +++
 main.py                           | 284 ++++++++++++++++++++++++++++++
 mypy.ini                          |  14 ++
 requirements.txt                  |   3 +
 test_json_merge.py                |  49 ++++++
 util.py                           |  38 ++++
 17 files changed, 565 insertions(+)
 create mode 100644 .Dockerignore
 create mode 100644 .github/workflows/integration.yml
 create mode 100644 .github/workflows/python.yml
 create mode 100644 .vscode/settings.json
 create mode 100644 Dockerfile
 create mode 100644 LICENSE
 create mode 100644 action.yml
 create mode 100644 config.py
 create mode 100644 dev-requirements.txt
 create mode 100644 logging_setup.py
 create mode 100644 main.py
 create mode 100644 mypy.ini
 create mode 100644 requirements.txt
 create mode 100644 test_json_merge.py
 create mode 100644 util.py

diff --git a/.Dockerignore b/.Dockerignore
new file mode 100644
index 0000000..e69de29
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
new file mode 100644
index 0000000..12d2cc7
--- /dev/null
+++ b/.github/workflows/integration.yml
@@ -0,0 +1,26 @@
+name: Integration Test
+on: [push]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@master
+      - name: Version and Update Item
+        id: version-update-item
+        uses: provena/version-update-item-action@main
+        with:
+          offline_token: ${{ secrets.PROVENA_OFFLINE_TOKEN }}
+          domain: "dev.rrap-is.com"
+          realm_name: "rrap"
+          item_id: "10378.1/1925572"
+          version_reason: "Testing GitHub Action which can version and update items"
+          update_reason: "Updating git custom attributes, as well as source URL."
+          attribute_updates: |
+            {
+              "user_metadata": {
+                "git_hash": "${{ github.sha }}",
+                "git_branch": "${{ github.ref_name }}",
+                "git_release": "${{ github.ref_type == 'tag' && github.ref_name || '' }}"
+              },
+              "source_url": "https://github.com/${{ github.repository }}"
+            }
\ No newline at end of file
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
new file mode 100644
index 0000000..ce1223a
--- /dev/null
+++ b/.github/workflows/python.yml
@@ -0,0 +1,18 @@
+name: Lint
+on: [push, pull_request]
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v1
+        with:
+          python-version: "3.7"
+
+      - uses: actions/checkout@v1
+
+      - name: Lint
+        run: |
+          pip install flake8
+          flake8 main.py
diff --git a/.gitignore b/.gitignore
index 82f9275..38791e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,3 +160,6 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+.tokens.json
+.offline_token
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..9a23e07
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,9 @@
+{
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.analysis.diagnosticMode": "workspace",
+    "mypy.runUsingActiveInterpreter": true,
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+}
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..1a6842b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3-slim AS builder
+
+# Just dependencies to minimise rebuilds
+COPY requirements.txt /app
+
+# We are installing a dependency here directly into our app source dir
+RUN pip install --target=/app -r requirements.txt
+
+ADD . /app
+WORKDIR /app
+
+# A distroless container image with Python and some basics like SSL certificates
+# https://github.com/GoogleContainerTools/distroless
+FROM gcr.io/distroless/python3-debian10
+COPY --from=builder /app /app
+WORKDIR /app
+ENV PYTHONPATH /app
+CMD ["/app/main.py"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..a67dca8
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,22 @@
+
+The MIT License (MIT)
+
+Copyright (c) 2018 GitHub, Inc. and contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.md b/README.md
index 391343a..41e0b38 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,3 @@
 # version-update-item-action
+
 Docker GitHub action which produces a new version of an existing registry item, and updates metadata attributes if supplied.
diff --git a/action.yml b/action.yml
new file mode 100644
index 0000000..94916cd
--- /dev/null
+++ b/action.yml
@@ -0,0 +1,32 @@
+name: "Update Provena registry item"
+description: "Creates a new version of an existing item in a Provena deployment, providing a reason and an optional set of attribute updates."
+author: "Peter Baker"
+inputs:
+  offline_token:
+    description: "Offline token which is suitable for the specified provena domain, realm and client."
+    required: true
+    secret: true
+  domain:
+    description: "The base domain for the provena deployment"
+    required: true
+  realm_name:
+    description: "The realm name for the keycloak instance for this deployment - contact administrator if unsure."
+    required: true
+  item_id:
+    description: "The id of the item to create a new version for."
+    required: true
+  version_reason:
+    description: "What reason should be provided for the creation of the new version?"
+    required: true
+  update_reason:
+    description: "What reason should be provided for the update of metadata attributes?"
+    required: false
+  attribute_updates:
+    description: "Would you like to apply a set of updates after versioning? If so this JSON will be merged with the updated item domain info and an update applied. Please ensure this is a valid serialised JSON string."
+    required: false
+outputs:
+  new_id: 
+    description: "The ID of the new version of the item"
+runs:
+  using: "docker"
+  image: "Dockerfile"
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..1a8cf28
--- /dev/null
+++ b/config.py
@@ -0,0 +1,26 @@
+from pydantic import BaseSettings
+from logging import WARNING
+
+
+class GithubInputs(BaseSettings):
+    # The offline token to use for the client auth
+    input_offline_token: str
+    # The domain for the provena deployment
+    input_domain: str
+    # The auth realm name
+    input_realm_name: str
+    # The item id to create new version for
+    input_item_id: str
+    # The reason to provide
+    input_version_reason: str
+    # The reason to provide
+    input_update_reason: str | None
+    # The set of attribute updates to apply after versioning, if any
+    input_attribute_updates: str | None
+    # The log level to display - defaults to WARNING - see https://docs.python.org/3/library/logging.html#levels
+    input_log_level: int = WARNING
+
+    # use .env file optionally for local testing
+    class Config:
+        env_file = ".env"
+        env_file_encoding = "utf-8"
diff --git a/dev-requirements.txt b/dev-requirements.txt
new file mode 100644
index 0000000..c03c36e
--- /dev/null
+++ b/dev-requirements.txt
@@ -0,0 +1,2 @@
+mypy
+pytest
\ No newline at end of file
diff --git a/logging_setup.py b/logging_setup.py
new file mode 100644
index 0000000..0b2c6ed
--- /dev/null
+++ b/logging_setup.py
@@ -0,0 +1,20 @@
+import logging
+import sys
+
+def setup_logger(name : str,  level: int = logging.INFO) -> logging.Logger:
+    """Function to setup as many loggers as you want"""
+
+    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
+
+    # Console handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setFormatter(formatter)
+
+    # Create logger and set level
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+
+    # Add handlers to logger
+    logger.addHandler(console_handler)
+
+    return logger
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..1497b02
--- /dev/null
+++ b/main.py
@@ -0,0 +1,284 @@
+import os
+from config import GithubInputs
+import logging
+from typing import cast, Tuple, Type
+import json
+from logging_setup import setup_logger
+from provenaclient import ProvenaClient, Config
+from provenaclient.auth.manager import LogType, Log
+from provenaclient.auth.implementations import OfflineFlow
+from ProvenaInterfaces.SharedTypes import StatusResponse
+from ProvenaInterfaces.RegistryModels import (
+    DatasetTemplateDomainInfo,
+    ModelDomainInfo,
+    ModelRunWorkflowTemplateDomainInfo,
+    OrganisationDomainInfo,
+    StudyDomainInfo,
+    PersonDomainInfo,
+    RecordInfo,
+)
+from ProvenaInterfaces.RegistryAPI import VersionRequest, DomainInfoBase, ItemSubType
+from asyncio import run
+from util import JSONObject, update_json
+
+
+SUBTYPE_TO_DOMAIN_INFO: dict[ItemSubType, Type[DomainInfoBase]] = {
+    ItemSubType.DATASET_TEMPLATE: DatasetTemplateDomainInfo,
+    ItemSubType.MODEL: ModelDomainInfo,
+    ItemSubType.MODEL_RUN_WORKFLOW_TEMPLATE: ModelRunWorkflowTemplateDomainInfo,
+    ItemSubType.ORGANISATION: OrganisationDomainInfo,
+    ItemSubType.PERSON: PersonDomainInfo,
+    ItemSubType.STUDY: StudyDomainInfo,
+}
+
+
+def get_logger(level: int) -> logging.Logger:
+    return setup_logger("update-action-logger", level=level)
+
+
+def int_to_log_level(level: int) -> LogType:
+    try:
+        return Log(level)
+    except Exception as e:
+        print(
+            f"Failed to resolve log type for provided log level {level}... Reverting to info default. See https://docs.python.org/3/library/logging.html#levels. Exception {e}."
+        )
+        return Log.INFO
+
+
+def parse_inputs() -> GithubInputs:
+    try:
+        # Parse the inputs
+        settings = GithubInputs()
+    except Exception as e:
+        print(f"Inputs could not be parsed! Validation error. Error: {e}")
+        raise Exception("Failed input pydantic validation") from e
+
+    return settings
+
+
+def set_github_action_output(output_name: str, output_value: str) -> None:
+    """
+    Sets a github action output
+
+    Args:
+        output_name (str): The output name
+        output_value (str): The output value
+    """
+    gh_out_path = os.environ.get("GITHUB_OUTPUT")
+    if not gh_out_path:
+        print(
+            "Cannot write GitHub output when GITHUB_OUTPUT env variable is not provided."
+        )
+    else:
+        f = open(os.path.abspath(gh_out_path), "a")
+        f.write(f"{output_name}={output_value}")
+        f.close()
+
+
+def setup_provena_client(settings: GithubInputs) -> ProvenaClient:
+    """
+    Sets up a provena client from the provided github input information.
+
+    NOTE assumes automated-access is the client id to use.
+
+    Args:
+        settings (GithubInputs): The set of parsed inputs from the action.
+
+    Returns:
+        ProvenaClient: The instantiated provena client object ready to use.
+    """
+    log_level = int_to_log_level(settings.input_log_level)
+    config = Config(domain=settings.input_domain, realm_name=settings.input_realm_name)
+    auth = OfflineFlow(
+        config=config,
+        client_id="automated-access",
+        offline_token=settings.input_offline_token,
+        log_level=log_level,
+    )
+    client = ProvenaClient(auth=auth, config=config)
+    return client
+
+
+async def find_latest_version_of_item(
+    client: ProvenaClient, log: logging.Logger, settings: GithubInputs
+) -> Tuple[str, ItemSubType]:
+    """
+
+    Takes the client, logger and inputs and finds the latest version
+
+    Args:
+        client (ProvenaClient): The client to use
+        log (logging.Logger): The logger
+        inputs (GithubInputs): The inputs
+
+    Returns:
+        str: The ID of the newest version of existing item
+    """
+    # item id
+    id = settings.input_item_id
+
+    # fetch the item at generic level
+    log.debug(f"Fetching existing item with id {id}.")
+
+    latest = False
+    latest_id = id
+
+    while not latest:
+        item = await client.registry.general_fetch_item(id=latest_id)
+
+        # check subtype
+        assert item.item
+
+        # parse as base record info
+        record_info = RecordInfo.parse_obj(item.item)
+        subtype = record_info.item_subtype
+
+        # new version?
+        if record_info.versioning_info and record_info.versioning_info.next_version:
+            latest_id = record_info.versioning_info.next_version
+        else:
+            latest = True
+
+    return latest_id, subtype
+
+
+async def produce_new_version_of_item(
+    newest_id: str, client: ProvenaClient, log: logging.Logger, settings: GithubInputs
+) -> str:
+    """
+
+    Takes the client, logger and inputs and produces the new version.
+
+    Args:
+        client (ProvenaClient): The client to use
+        log (logging.Logger): The logger
+        inputs (GithubInputs): The inputs
+
+    Returns:
+        str: The ID of the new version of existing item
+    """
+    # item id
+    id = newest_id
+
+    # fetch the item at generic level
+    log.debug(f"Fetching existing item with id {id}.")
+
+    item = await client.registry.general_fetch_item(id=id)
+
+    # check subtype
+    assert item.item
+
+    # parse as base record info
+    record_info = RecordInfo.parse_obj(item.item)
+    subtype = record_info.item_subtype
+
+    # now use L2 client method to version
+    response = await client._registry_client.version(
+        version_request=VersionRequest(id=id, reason=settings.input_version_reason),
+        item_subtype=subtype,
+    )
+
+    return response.new_version_id
+
+
+async def update_details_of_item(
+    new_id: str,
+    subtype: ItemSubType,
+    client: ProvenaClient,
+    log: logging.Logger,
+    settings: GithubInputs,
+) -> None:
+    """
+
+    Takes the new id, client, logger and inputs and updates the details.
+
+    Merges existing metadata with the provided json values and validates.
+
+    Args:
+        client (ProvenaClient): The client to use
+        log (logging.Logger): The logger
+        inputs (GithubInputs): The inputs
+
+    Returns:
+        str: The ID of the new version of existing item
+    """
+    # Expected
+    assert settings.input_attribute_updates is not None
+
+    # get current item
+    current_item_metadata = cast(
+        JSONObject, (await client.registry.general_fetch_item(id=new_id)).item
+    )
+
+    # new metadata
+    new_metadata = cast(JSONObject, json.loads(settings.input_attribute_updates))
+
+    # merge the metadata
+    merged_metadata = update_json(existing=current_item_metadata, updates=new_metadata)
+
+    print(merged_metadata)
+
+    # Get correct domain info update model
+    domain_info_model = SUBTYPE_TO_DOMAIN_INFO.get(subtype)
+    assert domain_info_model, f"Unexpected missing domain info model for {subtype = }."
+    update_payload = domain_info_model.parse_obj(merged_metadata)
+    print(update_payload.dict())
+
+    # run update
+    res = await client._registry_client.update_item(
+        id=new_id,
+        reason=settings.input_update_reason
+        or "Updating metadata attributes in Github Action.",
+        item_subtype=subtype,
+        domain_info=update_payload,
+        update_response_model=StatusResponse,
+    )
+    assert res.status.success, f"Update failed with error {res.status.details}"
+
+
+async def main() -> None:
+    # parse inputs
+    settings = parse_inputs()
+
+    # setup logger
+    log = get_logger(level=settings.input_log_level)
+
+    # setup client
+    client = setup_provena_client(settings=settings)
+
+    # find latest version
+    newest_id, subtype = await find_latest_version_of_item(
+        client=client,
+        log=log,
+        settings=settings,
+    )
+
+    # Check subtype is in allowed update list
+    if subtype not in SUBTYPE_TO_DOMAIN_INFO.keys():
+        raise ValueError(
+            f"Subtype requested to be updated is not supported by this action: {subtype}."
+        )
+
+    # perform versioning
+    new_id = await produce_new_version_of_item(
+        newest_id=newest_id,
+        client=client,
+        log=log,
+        settings=settings,
+    )
+
+    # perform update if desired
+    if settings.input_attribute_updates is not None:
+        await update_details_of_item(
+            client=client, log=log, subtype=subtype, new_id=new_id, settings=settings
+        )
+
+    log.info("Setting github output for new ID.")
+    set_github_action_output("new_id", new_id)
+
+    log.info("Operations complete.")
+
+
+if __name__ == "__main__":
+    run(main())
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 0000000..43af2ca
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,14 @@
+[mypy]
+exclude = (?x)(
+    ^\.venv.*
+    | .*__pycache__.*
+    | .*pytest_cache.*
+  )
+disallow_untyped_defs = True 
+disallow_incomplete_defs = True
+plugins = pydantic.mypy
+allow_redefinition = True
+
+[pydantic-mypy]
+init_forbid_extra = True
+warn_untyped_fields = True
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c0efbb0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+pydantic[dotenv]==1.10.12
+provenaclient==0.11.2
+requests
\ No newline at end of file
diff --git a/test_json_merge.py b/test_json_merge.py
new file mode 100644
index 0000000..5ea2421
--- /dev/null
+++ b/test_json_merge.py
@@ -0,0 +1,49 @@
+from util import update_json, JSONObject
+
+def test_simple_update() -> None:
+    existing: JSONObject = {"name": "John", "age": 30}
+    updates: JSONObject = {"age": 31}
+    expected = {"name": "John", "age": 31}
+    assert update_json(existing, updates) == expected
+
+
+def test_nested_update() -> None:
+    existing: JSONObject = {"user": {"name": "John", "age": 30}}
+    updates: JSONObject = {"user": {"age": 31}}
+    expected = {"user": {"name": "John", "age": 31}}
+    assert update_json(existing, updates) == expected
+
+
+def test_new_field() -> None:
+    existing: JSONObject = {"name": "John"}
+    updates: JSONObject = {"age": 30}
+    expected = {"name": "John", "age": 30}
+    assert update_json(existing, updates) == expected
+
+
+def test_deep_new_field() -> None:
+    existing: JSONObject = {"user": {"name": "John"}}
+    updates: JSONObject = {"user": {"age": 30}}
+    expected = {"user": {"name": "John", "age": 30}}
+    assert update_json(existing, updates) == expected
+
+
+def test_list_update() -> None:
+    existing: JSONObject = {"scores": [1, 2, 3]}
+    updates: JSONObject = {"scores": [4, 5, 6]}
+    expected = {"scores": [4, 5, 6]}
+    assert update_json(existing, updates) == expected
+
+
+def test_mixed_types() -> None:
+    existing: JSONObject = {"data": {"value": 10}}
+    updates: JSONObject = {"data": "new value"}
+    expected = {"data": "new value"}
+    assert update_json(existing, updates) == expected
+
+
+def test_null_values() -> None:
+    existing: JSONObject = {"name": "John", "age": 30}
+    updates: JSONObject = {"age": None}
+    expected = {"name": "John", "age": None}
+    assert update_json(existing, updates) == expected
diff --git a/util.py b/util.py
new file mode 100644
index 0000000..9025a7a
--- /dev/null
+++ b/util.py
@@ -0,0 +1,38 @@
+from typing import Any, Dict, List, Union, cast, Mapping
+
+JSONValue = Union[str, int, float, bool, None, Mapping[str, Any], List[Any]]
+JSONObject = Dict[str, JSONValue]
+
+def update_json(existing: JSONObject, updates: JSONObject) -> JSONObject:
+    """
+    
+    Merges an update json object over an existing one
+
+    Args:
+        existing (JSONObject): The existing JSON metadata
+        updates (JSONObject): The set of updates (which can either add or overwrite fields)
+
+    Returns:
+        JSONObject: The updated json object
+    """
+    def merge(current: JSONValue, update: JSONValue) -> JSONValue:
+
+        def merge_key(key: str, current: dict, update: dict) -> JSONValue:
+            if key in current and key in update:
+                return merge(current[key], update[key])
+            elif key in current and key not in update:
+                return current[key]
+            else:
+                return update[key]
+
+        if isinstance(current, dict) and isinstance(update, dict):
+            return {
+                key: merge_key(key, current, update)
+                for key in set(current) | set(update)
+            }
+        elif isinstance(current, list) and isinstance(update, list):
+            return update
+        else:
+            return update
+
+    return cast(JSONObject, merge(existing, updates))
\ No newline at end of file