From 694d2eaf4db4157be9396126eeb0849bd9f3b0a1 Mon Sep 17 00:00:00 2001 From: Peter Baker Date: Mon, 22 Jul 2024 08:39:09 +1000 Subject: [PATCH] Initial push from working repo --- .Dockerignore | 0 .github/workflows/integration.yml | 26 +++ .github/workflows/python.yml | 18 ++ .gitignore | 3 + .vscode/settings.json | 9 + Dockerfile | 18 ++ LICENSE | 22 +++ README.md | 1 + action.yml | 32 ++++ config.py | 26 +++ dev-requirements.txt | 2 + logging_setup.py | 20 +++ main.py | 284 ++++++++++++++++++++++++++++++ mypy.ini | 14 ++ requirements.txt | 3 + test_json_merge.py | 49 ++++++ util.py | 38 ++++ 17 files changed, 565 insertions(+) create mode 100644 .Dockerignore create mode 100644 .github/workflows/integration.yml create mode 100644 .github/workflows/python.yml create mode 100644 .vscode/settings.json create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 action.yml create mode 100644 config.py create mode 100644 dev-requirements.txt create mode 100644 logging_setup.py create mode 100644 main.py create mode 100644 mypy.ini create mode 100644 requirements.txt create mode 100644 test_json_merge.py create mode 100644 util.py diff --git a/.Dockerignore b/.Dockerignore new file mode 100644 index 0000000..e69de29 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml new file mode 100644 index 0000000..12d2cc7 --- /dev/null +++ b/.github/workflows/integration.yml @@ -0,0 +1,26 @@ +name: Integration Test +on: [push] +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Version and Update Item + id: version-update-item + uses: provena/version-update-item-action@main + with: + offline_token: ${{ secrets.PROVENA_OFFLINE_TOKEN }} + domain: "dev.rrap-is.com" + realm_name: "rrap" + item_id: "10378.1/1925572" + version_reason: "Testing GitHub Action which can version and update items" + update_reason: "Updating git custom attributes, as well as source URL." + attribute_updates: | + { + "user_metadata": { + "git_hash": "${{ github.sha }}", + "git_branch": "${{ github.ref_name }}", + "git_release": "${{ github.ref_type == 'tag' && github.ref_name || '' }}" + }, + "source_url": "https://github.com/${{ github.repository }}" + } \ No newline at end of file diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 0000000..ce1223a --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,18 @@ +name: Lint +on: [push, pull_request] +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: "3.7" + + - uses: actions/checkout@v1 + + - name: Lint + run: | + pip install flake8 + flake8 main.py diff --git a/.gitignore b/.gitignore index 82f9275..38791e3 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +.tokens.json +.offline_token \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..9a23e07 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,9 @@ +{ + "python.testing.pytestArgs": [ + "." + ], + "python.analysis.diagnosticMode": "workspace", + "mypy.runUsingActiveInterpreter": true, + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, +} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1a6842b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3-slim AS builder + +# Just dependencies to minimise rebuilds +COPY requirements.txt /app + +# We are installing a dependency here directly into our app source dir +RUN pip install --target=/app -r requirements.txt + +ADD . /app +WORKDIR /app + +# A distroless container image with Python and some basics like SSL certificates +# https://github.com/GoogleContainerTools/distroless +FROM gcr.io/distroless/python3-debian10 +COPY --from=builder /app /app +WORKDIR /app +ENV PYTHONPATH /app +CMD ["/app/main.py"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a67dca8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ + +The MIT License (MIT) + +Copyright (c) 2018 GitHub, Inc. and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md index 391343a..41e0b38 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ # version-update-item-action + Docker GitHub action which produces a new version of an existing registry item, and updates metadata attributes if supplied. diff --git a/action.yml b/action.yml new file mode 100644 index 0000000..94916cd --- /dev/null +++ b/action.yml @@ -0,0 +1,32 @@ +name: "Update Provena registry item" +description: "Creates a new version of an existing item in a Provena deployment, providing a reason and an optional set of attribute updates." +author: "Peter Baker" +inputs: + offline_token: + description: "Offline token which is suitable for the specified provena domain, realm and client." + required: true + secret: true + domain: + description: "The base domain for the provena deployment" + required: true + realm_name: + description: "The realm name for the keycloak instance for this deployment - contact administrator if unsure." + required: true + item_id: + description: "The id of the item to create a new version for." + required: true + version_reason: + description: "What reason should be provided for the creation of the new version?" + required: true + update_reason: + description: "What reason should be provided for the update of metadata attributes?" + required: false + attribute_updates: + description: "Would you like to apply a set of updates after versioning? If so this JSON will be merged with the updated item domain info and an update applied. Please ensure this is a valid serialised JSON string." + required: false +outputs: + new_id: + description: "The ID of the new version of the item" +runs: + using: "docker" + image: "Dockerfile" diff --git a/config.py b/config.py new file mode 100644 index 0000000..1a8cf28 --- /dev/null +++ b/config.py @@ -0,0 +1,26 @@ +from pydantic import BaseSettings +from logging import WARNING + + +class GithubInputs(BaseSettings): + # The offline token to use for the client auth + input_offline_token: str + # The domain for the provena deployment + input_domain: str + # The auth realm name + input_realm_name: str + # The item id to create new version for + input_item_id: str + # The reason to provide + input_version_reason: str + # The reason to provide + input_update_reason: str | None + # The set of attribute updates to apply after versioning, if any + input_attribute_updates: str | None + # The log level to display - defaults to WARNING - see https://docs.python.org/3/library/logging.html#levels + input_log_level: int = WARNING + + # use .env file optionally for local testing + class Config: + env_file = ".env" + env_file_encoding = "utf-8" diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..c03c36e --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,2 @@ +mypy +pytest \ No newline at end of file diff --git a/logging_setup.py b/logging_setup.py new file mode 100644 index 0000000..0b2c6ed --- /dev/null +++ b/logging_setup.py @@ -0,0 +1,20 @@ +import logging +import sys + +def setup_logger(name : str, level: int = logging.INFO) -> logging.Logger: + """Function to setup as many loggers as you want""" + + formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + + # Console handler + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(formatter) + + # Create logger and set level + logger = logging.getLogger(name) + logger.setLevel(level) + + # Add handlers to logger + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..1497b02 --- /dev/null +++ b/main.py @@ -0,0 +1,284 @@ +import os +from config import GithubInputs +import logging +from typing import cast, Tuple, Type +import json +from logging_setup import setup_logger +from provenaclient import ProvenaClient, Config +from provenaclient.auth.manager import LogType, Log +from provenaclient.auth.implementations import OfflineFlow +from ProvenaInterfaces.SharedTypes import StatusResponse +from ProvenaInterfaces.RegistryModels import ( + DatasetTemplateDomainInfo, + ModelDomainInfo, + ModelRunWorkflowTemplateDomainInfo, + OrganisationDomainInfo, + StudyDomainInfo, + PersonDomainInfo, + RecordInfo, +) +from ProvenaInterfaces.RegistryAPI import VersionRequest, DomainInfoBase, ItemSubType +from asyncio import run +from util import JSONObject, update_json + + +SUBTYPE_TO_DOMAIN_INFO: dict[ItemSubType, Type[DomainInfoBase]] = { + ItemSubType.DATASET_TEMPLATE: DatasetTemplateDomainInfo, + ItemSubType.MODEL: ModelDomainInfo, + ItemSubType.MODEL_RUN_WORKFLOW_TEMPLATE: ModelRunWorkflowTemplateDomainInfo, + ItemSubType.ORGANISATION: OrganisationDomainInfo, + ItemSubType.PERSON: PersonDomainInfo, + ItemSubType.STUDY: StudyDomainInfo, +} + + +def get_logger(level: int) -> logging.Logger: + return setup_logger("update-action-logger", level=level) + + +def int_to_log_level(level: int) -> LogType: + try: + return Log(level) + except Exception as e: + print( + f"Failed to resolve log type for provided log level {level}... Reverting to info default. See https://docs.python.org/3/library/logging.html#levels. Exception {e}." + ) + return Log.INFO + + +def parse_inputs() -> GithubInputs: + try: + # Parse the inputs + settings = GithubInputs() + except Exception as e: + print(f"Inputs could not be parsed! Validation error. Error: {e}") + raise Exception("Failed input pydantic validation") from e + + return settings + + +def set_github_action_output(output_name: str, output_value: str) -> None: + """ + Sets a github action output + + Args: + output_name (str): The output name + output_value (str): The output value + """ + gh_out_path = os.environ.get("GITHUB_OUTPUT") + if not gh_out_path: + print( + "Cannot write GitHub output when GITHUB_OUTPUT env variable is not provided." + ) + else: + f = open(os.path.abspath(gh_out_path), "a") + f.write(f"{output_name}={output_value}") + f.close() + + +def setup_provena_client(settings: GithubInputs) -> ProvenaClient: + """ + Sets up a provena client from the provided github input information. + + NOTE assumes automated-access is the client id to use. + + Args: + settings (GithubInputs): The set of parsed inputs from the action. + + Returns: + ProvenaClient: The instantiated provena client object ready to use. + """ + log_level = int_to_log_level(settings.input_log_level) + config = Config(domain=settings.input_domain, realm_name=settings.input_realm_name) + auth = OfflineFlow( + config=config, + client_id="automated-access", + offline_token=settings.input_offline_token, + log_level=log_level, + ) + client = ProvenaClient(auth=auth, config=config) + return client + + +async def find_latest_version_of_item( + client: ProvenaClient, log: logging.Logger, settings: GithubInputs +) -> Tuple[str, ItemSubType]: + """ + + Takes the client, logger and inputs and finds the latest version + + Args: + client (ProvenaClient): The client to use + log (logging.Logger): The logger + inputs (GithubInputs): The inputs + + Returns: + str: The ID of the newest version of existing item + """ + # item id + id = settings.input_item_id + + # fetch the item at generic level + log.debug(f"Fetching existing item with id {id}.") + + latest = False + latest_id = id + + while not latest: + item = await client.registry.general_fetch_item(id=latest_id) + + # check subtype + assert item.item + + # parse as base record info + record_info = RecordInfo.parse_obj(item.item) + subtype = record_info.item_subtype + + # new version? + if record_info.versioning_info and record_info.versioning_info.next_version: + latest_id = record_info.versioning_info.next_version + else: + latest = True + + return latest_id, subtype + + +async def produce_new_version_of_item( + newest_id: str, client: ProvenaClient, log: logging.Logger, settings: GithubInputs +) -> str: + """ + + Takes the client, logger and inputs and produces the new version. + + Args: + client (ProvenaClient): The client to use + log (logging.Logger): The logger + inputs (GithubInputs): The inputs + + Returns: + str: The ID of the new version of existing item + """ + # item id + id = newest_id + + # fetch the item at generic level + log.debug(f"Fetching existing item with id {id}.") + + item = await client.registry.general_fetch_item(id=id) + + # check subtype + assert item.item + + # parse as base record info + record_info = RecordInfo.parse_obj(item.item) + subtype = record_info.item_subtype + + # now use L2 client method to version + response = await client._registry_client.version( + version_request=VersionRequest(id=id, reason=settings.input_version_reason), + item_subtype=subtype, + ) + + return response.new_version_id + + +async def update_details_of_item( + new_id: str, + subtype: ItemSubType, + client: ProvenaClient, + log: logging.Logger, + settings: GithubInputs, +) -> None: + """ + + Takes the new id, client, logger and inputs and updates the details. + + Merges existing metadata with the provided json values and validates. + + Args: + client (ProvenaClient): The client to use + log (logging.Logger): The logger + inputs (GithubInputs): The inputs + + Returns: + str: The ID of the new version of existing item + """ + # Expected + assert settings.input_attribute_updates is not None + + # get current item + current_item_metadata = cast( + JSONObject, (await client.registry.general_fetch_item(id=new_id)).item + ) + + # new metadata + new_metadata = cast(JSONObject, json.loads(settings.input_attribute_updates)) + + # merge the metadata + merged_metadata = update_json(existing=current_item_metadata, updates=new_metadata) + + print(merged_metadata) + + # Get correct domain info update model + domain_info_model = SUBTYPE_TO_DOMAIN_INFO.get(subtype) + assert domain_info_model, f"Unexpected missing domain info model for {subtype = }." + update_payload = domain_info_model.parse_obj(merged_metadata) + print(update_payload.dict()) + + # run update + res = await client._registry_client.update_item( + id=new_id, + reason=settings.input_update_reason + or "Updating metadata attributes in Github Action.", + item_subtype=subtype, + domain_info=update_payload, + update_response_model=StatusResponse, + ) + assert res.status.success, f"Update failed with error {res.status.details}" + + +async def main() -> None: + # parse inputs + settings = parse_inputs() + + # setup logger + log = get_logger(level=settings.input_log_level) + + # setup client + client = setup_provena_client(settings=settings) + + # find latest version + newest_id, subtype = await find_latest_version_of_item( + client=client, + log=log, + settings=settings, + ) + + # Check subtype is in allowed update list + if subtype not in SUBTYPE_TO_DOMAIN_INFO.keys(): + raise ValueError( + f"Subtype requested to be updated is not supported by this action: {subtype}." + ) + + # perform versioning + new_id = await produce_new_version_of_item( + newest_id=newest_id, + client=client, + log=log, + settings=settings, + ) + + # perform update if desired + if settings.input_attribute_updates is not None: + await update_details_of_item( + client=client, log=log, subtype=subtype, new_id=new_id, settings=settings + ) + + log.info("Setting github output for new ID.") + set_github_action_output("new_id", new_id) + + log.info("Operations complete.") + + +if __name__ == "__main__": + run(main()) diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..43af2ca --- /dev/null +++ b/mypy.ini @@ -0,0 +1,14 @@ +[mypy] +exclude = (?x)( + ^\.venv.* + | .*__pycache__.* + | .*pytest_cache.* + ) +disallow_untyped_defs = True +disallow_incomplete_defs = True +plugins = pydantic.mypy +allow_redefinition = True + +[pydantic-mypy] +init_forbid_extra = True +warn_untyped_fields = True \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c0efbb0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pydantic[dotenv]==1.10.12 +provenaclient==0.11.2 +requests \ No newline at end of file diff --git a/test_json_merge.py b/test_json_merge.py new file mode 100644 index 0000000..5ea2421 --- /dev/null +++ b/test_json_merge.py @@ -0,0 +1,49 @@ +from util import update_json, JSONObject + +def test_simple_update() -> None: + existing: JSONObject = {"name": "John", "age": 30} + updates: JSONObject = {"age": 31} + expected = {"name": "John", "age": 31} + assert update_json(existing, updates) == expected + + +def test_nested_update() -> None: + existing: JSONObject = {"user": {"name": "John", "age": 30}} + updates: JSONObject = {"user": {"age": 31}} + expected = {"user": {"name": "John", "age": 31}} + assert update_json(existing, updates) == expected + + +def test_new_field() -> None: + existing: JSONObject = {"name": "John"} + updates: JSONObject = {"age": 30} + expected = {"name": "John", "age": 30} + assert update_json(existing, updates) == expected + + +def test_deep_new_field() -> None: + existing: JSONObject = {"user": {"name": "John"}} + updates: JSONObject = {"user": {"age": 30}} + expected = {"user": {"name": "John", "age": 30}} + assert update_json(existing, updates) == expected + + +def test_list_update() -> None: + existing: JSONObject = {"scores": [1, 2, 3]} + updates: JSONObject = {"scores": [4, 5, 6]} + expected = {"scores": [4, 5, 6]} + assert update_json(existing, updates) == expected + + +def test_mixed_types() -> None: + existing: JSONObject = {"data": {"value": 10}} + updates: JSONObject = {"data": "new value"} + expected = {"data": "new value"} + assert update_json(existing, updates) == expected + + +def test_null_values() -> None: + existing: JSONObject = {"name": "John", "age": 30} + updates: JSONObject = {"age": None} + expected = {"name": "John", "age": None} + assert update_json(existing, updates) == expected diff --git a/util.py b/util.py new file mode 100644 index 0000000..9025a7a --- /dev/null +++ b/util.py @@ -0,0 +1,38 @@ +from typing import Any, Dict, List, Union, cast, Mapping + +JSONValue = Union[str, int, float, bool, None, Mapping[str, Any], List[Any]] +JSONObject = Dict[str, JSONValue] + +def update_json(existing: JSONObject, updates: JSONObject) -> JSONObject: + """ + + Merges an update json object over an existing one + + Args: + existing (JSONObject): The existing JSON metadata + updates (JSONObject): The set of updates (which can either add or overwrite fields) + + Returns: + JSONObject: The updated json object + """ + def merge(current: JSONValue, update: JSONValue) -> JSONValue: + + def merge_key(key: str, current: dict, update: dict) -> JSONValue: + if key in current and key in update: + return merge(current[key], update[key]) + elif key in current and key not in update: + return current[key] + else: + return update[key] + + if isinstance(current, dict) and isinstance(update, dict): + return { + key: merge_key(key, current, update) + for key in set(current) | set(update) + } + elif isinstance(current, list) and isinstance(update, list): + return update + else: + return update + + return cast(JSONObject, merge(existing, updates)) \ No newline at end of file