Skip to content

Commit

Permalink
OSDF Origin Push/Pull Support added (#182)
Browse files Browse the repository at this point in the history
* Enable Artifact Pushing/Pull from OSDF Origin
- This functions with dvc[http] plugin for push except that it passes a
  bearer token in the header before any push/pull to a OSDF HTTP Remote.
- Introduces new command `cmf init osdfremote` with new
    cmflib/commands/init/osdfremote.py
- Handles artifact push when `core.remote==osdf` through dvc_push except
  that it updates `core.remote.password` in .dvc/config before doing so
- Required OSDF Token is generated in function new generate_osdf_token()
  in utils/helper_functions.py. This uses scitokens and cryptography
packages. This reads supplied private key, gets token from issuer and
formats it as "Bearer + token".
- Handles artifact pull through a new OSDFRemoteArtifacts Class in
`storage_backends/osdf_artifacts.py`. This works with requests package
- `commands/artifacts/pull.py` has new handling for `core.remote==osdf`
  which updates `core.remote.password` in .dvc/config and passes  the
newly generated dynamic password to download_artifacts to use.
- Also updated dvc version to 3.50.2 which is needed for dvc[ttp] to
  function. Also bumped ml-metadata to 1.15.0 which is needed for
compatibility. External packages scitokens and cryptography also added
as dependencies.

* Added init doc for OSDF Remote in CMF-client
  • Loading branch information
atripathy86 authored Jun 18, 2024
1 parent d5acc04 commit e6c8858
Show file tree
Hide file tree
Showing 11 changed files with 457 additions and 9 deletions.
3 changes: 0 additions & 3 deletions cmflib/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import os
import sys


def fix_subparsers(subparsers):
subparsers.required = True
subparsers.dest = "cmd"
Expand Down Expand Up @@ -74,5 +73,3 @@ def check_minio_server(dvc_config_op):
return exception
except S3Error as exception:
return exception


31 changes: 31 additions & 0 deletions cmflib/cmf_commands_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,34 @@ def _init_sshremote(path,user,port,password,git_remote_url,cmf_server_url,neo4j_
msg = cmd.do_run()
print(msg)
return msg

def _init_osdfremote(path,key_id,key_path,key_issuer,git_remote_url,cmf_server_url,neo4j_user,neo4j_password,neo4j_uri):
cli_args = cli.parse_args(
[
"init",
"osdf",
"--path",
path,
"--key-id",
key_id,
"--key-path",
key_path,
"--key-issuer",
key_issuer,
"--git-remote-url",
git_remote_url,
"--cmf-server-url",
cmf_server_url,
"--neo4j-user",
neo4j_user,
"--neo4j-password",
neo4j_password,
"--neo4j-uri",
neo4j_uri
]
)
cmd = cli_args.func(cli_args)
msg = cmd.do_run()
print(msg)
return msg

61 changes: 60 additions & 1 deletion cmflib/commands/artifact/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
local_artifacts,
amazonS3_artifacts,
sshremote_artifacts,
osdf_artifacts,
)
from cmflib.cli.command import CmdBase
from cmflib.utils.dvc_config import DvcConfig
Expand Down Expand Up @@ -120,7 +121,12 @@ def extract_repo_args(self, type: str, name: str, url: str, current_directory: s
current_loc_1 = "/".join(token)
current_loc = f"/{current_loc_1}"
return host, current_loc, name

elif type == "osdf":
token_length = len(token)
download_loc = current_directory + "/" + name if current_directory != "" else name
#current_dvc_loc = (token[(token_length - 2)] + "/" + token[(token_length - 1)])
#return FQDNL of where to download from, where to download to, what the artifact will be named
return s_url, download_loc, name
else:
# sometimes s_url is empty - this shouldn't happen technically
# sometimes s_url is not starting with s3:// - technically this shouldn't happen
Expand Down Expand Up @@ -294,6 +300,59 @@ def run(self):
)
print(stmt)
return "Done"
elif dvc_config_op["core.remote"] == "osdf":
#Regenerate Token for OSDF
from cmflib.utils.helper_functions import generate_osdf_token
from cmflib.utils.helper_functions import is_url
from cmflib.dvc_wrapper import dvc_add_attribute
from cmflib.utils.cmf_config import CmfConfig
#Fetch Config from CMF_Config_File
cmf_config_file = os.environ.get("CONFIG_FILE", ".cmfconfig")
cmf_config={}
cmf_config=CmfConfig.read_config(cmf_config_file)
#Regenerate password
dynamic_password = generate_osdf_token(cmf_config["osdf-key_id"],cmf_config["osdf-key_path"],cmf_config["osdf-key_issuer"])
#cmf_config["password"]=dynamic_password
#Update Password in .dvc/config for future use
dvc_add_attribute(dvc_config_op["core.remote"],"password",dynamic_password)
#Updating dvc_config_op data structure with new password as well since this is used in download_artifacts() below
dvc_config_op["remote.osdf.password"]=dynamic_password
#Need to write to cmfconfig with new credentials
#CmfConfig.write_config(cmf_config, "osdf", attr_dict, True)
#Now Ready to do dvc pull

osdfremote_class_obj = osdf_artifacts.OSDFremoteArtifacts()
if self.args.artifact_name:
output = self.search_artifact(name_url_dict)
# output[0] = name
# output[1] = url
if output is None:
print(f"{self.args.artifact_name} doesn't exist.")
else:
args = self.extract_repo_args("osdf", output[0], output[1], current_directory)
stmt = osdfremote_class_obj.download_artifacts(
dvc_config_op,
args[0], # s_url of the artifact
current_directory,
args[1], # download_loc of the artifact
args[2] # name of the artifact
)
print(stmt)
else:
for name, url in name_url_dict.items():
#print(name, url)
if not isinstance(url, str):
continue
args = self.extract_repo_args("osdf", name, url, current_directory)
stmt = osdfremote_class_obj.download_artifacts(
dvc_config_op,
args[0], # host,
current_directory,
args[1], # remote_loc of the artifact
args[2] # name
)
print(stmt)
return "Done"
elif dvc_config_op["core.remote"] == "amazons3":
amazonS3_class_obj = amazonS3_artifacts.AmazonS3Artifacts()
#print(self.args.artifact_name,"artifact name")
Expand Down
17 changes: 16 additions & 1 deletion cmflib/commands/artifact/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,32 @@

from cmflib.cli.command import CmdBase
from cmflib.cli.utils import check_minio_server
from cmflib.utils.helper_functions import generate_osdf_token
from cmflib.utils.helper_functions import is_url
from cmflib.utils.dvc_config import DvcConfig
from cmflib.dvc_wrapper import dvc_push

from cmflib.dvc_wrapper import dvc_add_attribute
from cmflib.utils.cmf_config import CmfConfig

class CmdArtifactPush(CmdBase):
def run(self):
result = ""
dvc_config_op = DvcConfig.get_dvc_config()
cmf_config_file = os.environ.get("CONFIG_FILE", ".cmfconfig")
cmf_config={}
cmf_config=CmfConfig.read_config(cmf_config_file)
out_msg = check_minio_server(dvc_config_op)
if dvc_config_op["core.remote"] == "minio" and out_msg != "SUCCESS":
return out_msg
elif dvc_config_op["core.remote"] == "osdf":
#print("key_id="+cmf_config["osdf-key_id"])
dynamic_password = generate_osdf_token(cmf_config["osdf-key_id"],cmf_config["osdf-key_path"],cmf_config["osdf-key_issuer"])
#print("Dynamic Password"+dynamic_password)
dvc_add_attribute(dvc_config_op["core.remote"],"password",dynamic_password)
#The Push URL will be something like: https://<Path>/files/md5/[First Two of MD5 Hash]
result = dvc_push()
#print(result)
return result
else:
result = dvc_push()
return result
Expand Down
4 changes: 2 additions & 2 deletions cmflib/commands/init/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@

import argparse

from cmflib.commands.init import minioS3, amazonS3, local, sshremote, show
from cmflib.commands.init import minioS3, amazonS3, local, sshremote, osdfremote, show
from cmflib.cli.utils import *

SUB_COMMANDS = [minioS3, amazonS3, local, sshremote, show]
SUB_COMMANDS = [minioS3, amazonS3, local, sshremote, osdfremote, show]

# This parser adds positional arguments to the main parser
def add_parser(subparsers, parent_parser):
Expand Down
192 changes: 192 additions & 0 deletions cmflib/commands/init/osdfremote.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
###
# Copyright (2024) Hewlett Packard Enterprise Development LP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###


#!/usr/bin/env python3
import argparse
import os
import subprocess
import sys

from cmflib.cli.command import CmdBase
from cmflib.dvc_wrapper import (
git_quiet_init,
git_checkout_new_branch,
git_initial_commit,
git_add_remote,
check_git_repo,
dvc_quiet_init,
dvc_add_remote_repo,
dvc_add_attribute,
)
from cmflib.utils.cmf_config import CmfConfig
from cmflib.utils.helper_functions import is_git_repo
from cmflib.utils.helper_functions import generate_osdf_token
from cmflib.utils.helper_functions import is_url

class CmdInitOSDFRemote(CmdBase):
def run(self):
# Reading CONFIG_FILE variable
cmf_config = os.environ.get("CONFIG_FILE", ".cmfconfig")
# checking if config file exists
if not os.path.exists(cmf_config):
# writing default value to config file
attr_dict = {}
attr_dict["server-ip"] = "http://127.0.0.1:80"
CmfConfig.write_config(cmf_config, "cmf", attr_dict)

# if user gave --cmf-server-url, override the config file
if self.args.cmf_server_url:
attr_dict = {}
attr_dict["server-ip"] = self.args.cmf_server_url
CmfConfig.write_config(cmf_config, "cmf", attr_dict, True)

# read --neo4j details and add to the exsting file
if self.args.neo4j_user and self.args.neo4j_password and self.args.neo4j_uri:
attr_dict = {}
attr_dict["user"] = self.args.neo4j_user
attr_dict["password"] = self.args.neo4j_password
attr_dict["uri"] = self.args.neo4j_uri
CmfConfig.write_config(cmf_config, "neo4j", attr_dict, True)
elif (
not self.args.neo4j_user
and not self.args.neo4j_password
and not self.args.neo4j_uri
):
pass
else:
return "ERROR: Provide user, password and uri for neo4j initialization."
output = is_git_repo()
if not output:
branch_name = "master"
print("Starting git init.")
git_quiet_init()
git_checkout_new_branch(branch_name)
git_initial_commit()
git_add_remote(self.args.git_remote_url)
print("git init complete.")

print("Starting cmf init.")
repo_type = "osdf"
dvc_quiet_init()
output = dvc_add_remote_repo(repo_type, self.args.path)
if not output:
return "cmf init failed."
print(output)
#dvc_add_attribute(repo_type, "key_id", self.args.key_id)
#dvc_add_attribute(repo_type, "key_path", self.args.key_path)
#dvc_add_attribute(repo_type, "key_issuer", self.args.key_issuer)
#Writing to an OSDF Remote is based on SSH Remote. With few additions
#In addition to URL (including FQDN, port, path), we need to provide
#method=PUT, ssl_verify=false, ask_password=false, auth=custom, custom_auth-header='Authorization'
#password='Bearer + dynamically generated scitoken' (This token has a timeout of 15 mins so must be generated right before any push/pull)
dvc_add_attribute(repo_type,"method", "PUT")
dvc_add_attribute(repo_type,"ssl_verify", "false")
dvc_add_attribute(repo_type,"ask_password", "false")
dvc_add_attribute(repo_type,"auth", "custom")
dvc_add_attribute(repo_type,"custom_auth_header", "Authorization")
dynamic_password = generate_osdf_token(self.args.key_id,self.args.key_path,self.args.key_issuer)
dvc_add_attribute(repo_type,"password",dynamic_password)

attr_dict = {}
attr_dict["path"] = self.args.path
attr_dict["key_id"] = self.args.key_id
attr_dict["key_path"] = self.args.key_path
attr_dict["key_issuer"] = self.args.key_issuer
CmfConfig.write_config(cmf_config, "osdf", attr_dict, True)

return "cmf init complete."


def add_parser(subparsers, parent_parser):
HELP = "Initialises remote OSDF directory as artifact repository."

parser = subparsers.add_parser(
"osdfremote",
parents=[parent_parser],
description="This command initialises remote OSDF directory as artifact repository for CMF.",
help=HELP,
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
required_arguments = parser.add_argument_group("required arguments")

required_arguments.add_argument(
"--path",
required=True,
help="Specify FQDN for OSDF directory path including port and path",
metavar="<path>",
default=argparse.SUPPRESS,
)

required_arguments.add_argument(
"--key-id",
required=True,
help="Specify key_id for provided private key. eg. b2d3",
metavar="<key_id>",
default=argparse.SUPPRESS,
)

required_arguments.add_argument(
"--key-path",
required=True,
help="Specify path for private key on local filesystem. eg. ~/.ssh/XXX.pem",
metavar="<key_path>",
default=argparse.SUPPRESS,
)

required_arguments.add_argument(
"--key-issuer",
required=True,
help="Specify URL for Key Issuer. eg. https://t.nationalresearchplatform.org/XXX",
metavar="<key_issuer>",
default=argparse.SUPPRESS,
)

required_arguments.add_argument(
"--git-remote-url",
required=True,
help="Specify git repo url. eg: https://github.com/XXX/example.git",
metavar="<git_remote_url>",
default=argparse.SUPPRESS,
)

parser.add_argument(
"--cmf-server-url",
help="Specify cmf-server URL.",
metavar="<cmf_server_url>",
default="http://127.0.0.1:80",
)

parser.add_argument(
"--neo4j-user",
help="Specify neo4j user.",
metavar="<neo4j_user>",
# default=argparse.SUPPRESS,
)
parser.add_argument(
"--neo4j-password",
help="Specify neo4j password.",
metavar="<neo4j_password>",
# default=argparse.SUPPRESS,
)
parser.add_argument(
"--neo4j-uri",
help="Specify neo4j uri.eg bolt://localhost:7687",
metavar="<neo4j_uri>",
# default=argparse.SUPPRESS,
)

parser.set_defaults(func=CmdInitOSDFRemote)
Loading

0 comments on commit e6c8858

Please sign in to comment.