Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add repo provider for MECA Bundles #1824

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions binderhub/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
GitLabRepoProvider,
GitRepoProvider,
HydroshareProvider,
MecaRepoProvider,
RepoProvider,
ZenodoProvider,
)
Expand Down Expand Up @@ -586,6 +587,7 @@ def _default_build_namespace(self):
"figshare": FigshareProvider,
"hydroshare": HydroshareProvider,
"dataverse": DataverseProvider,
"meca": MecaRepoProvider,
},
config=True,
help="""
Expand Down
3 changes: 2 additions & 1 deletion binderhub/event-schemas/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"Zenodo",
"Figshare",
"Hydroshare",
"Dataverse"
"Dataverse",
"MECA"
],
"description": "Provider for the repository being launched"
},
Expand Down
1 change: 1 addition & 0 deletions binderhub/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"figshare": "Figshare",
"hydroshare": "Hydroshare",
"dataverse": "Dataverse",
"meca": "MECA",
}


Expand Down
112 changes: 111 additions & 1 deletion binderhub/repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
import time
import urllib.parse
from datetime import datetime, timedelta, timezone
from urllib.parse import urlparse
from hashlib import md5
from urllib.parse import unquote, urlparse, urlunparse

import escapism
import validators as val
from prometheus_client import Gauge
from tornado.httpclient import AsyncHTTPClient, HTTPError, HTTPRequest
from tornado.httputil import url_concat
Expand Down Expand Up @@ -263,6 +265,114 @@ def get_build_slug(self):
return f"zenodo-{self.record_id}"


class MecaRepoProvider(RepoProvider):
"""BinderHub Provider that can handle the contents of a MECA bundle

Users must provide a spec consisting of a public URL to the bundle
The URL origin must be included in the list of allowed_origins when that trait is set
"""

name = Unicode("MECA Bundle")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following the test error, either name should be only MECA or launch.json changed to contain MECA Bundle.


display_name = "MECA Bundle"

labels = {
"text": "MECA Bundle URL (https://journals.curvenote.com/journal/submissions/12345/meca.zip)",
"tag_text": "<no tag required>",
"ref_prop_disabled": True,
"label_prop_disabled": True,
}

validate_bundle = Bool(config=True, help="Validate the file as MECA Bundle").tag(
default=True
)

allowed_origins = List(
config=True,
help="""List of allowed origins for the URL

If set, the URL must be on one of these origins.

If not set, the URL can be on any origin.
""",
)

@default("allowed_origins")
def _allowed_origins_default(self):
return []

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

url = unquote(self.spec)

if not val.url(url):
raise ValueError(f"[MecaRepoProvider] Invalid URL {url}")

if (
len(self.allowed_origins) > 0
and urlparse(self.spec).hostname not in self.allowed_origins
):
raise ValueError("URL is not on an allowed origin")

self.url = url

self.log.info(f"MECA Bundle URL: {self.url}")
self.log.info(f"MECA Bundle raw spec: {self.spec}")

def get_hashed_slug(self, url, changes_with_content):
"""Return a unique slug that is invariant to query parameters in the url"""
parsed_url = urlparse(url)
stripped_url = urlunparse(
(parsed_url.scheme, parsed_url.netloc, parsed_url.path, "", "", "")
)
return (
"meca-" + md5(f"{stripped_url}-{changes_with_content}".encode()).hexdigest()
)

async def get_resolved_ref(self):
# Check the URL is reachable
client = AsyncHTTPClient()
req = HTTPRequest(self.url, method="HEAD", user_agent="BinderHub")
self.log.info(f"get_resolved_ref() HEAD: {self.url}")
try:
r = await client.fetch(req)
self.log.info(f"URL is reachable: {self.url}")
self.hashed_slug = self.get_hashed_slug(
self.url, r.headers.get("ETag") or r.headers.get("Content-Length")
)
except Exception as e:
raise RuntimeError(f"URL is unreachable ({e})")

self.log.info(f"hashed_slug: {self.hashed_slug}")
return self.hashed_slug

async def get_resolved_spec(self):
if not hasattr(self, "hashed_slug"):
await self.get_resolved_ref()
self.log.info(f"get_resolved_spec(): {self.hashed_slug}")
return self.spec

async def get_resolved_ref_url(self):
self.log.info(f"get_resolved_ref_url(): {self.url}")
return self.url

def get_repo_url(self):
"""This is passed to repo2docker and is the URL that is to be fetched
with a `http[s]+meca` protocol string. We do this by convention to enable
detection of meca urls by the MecaContentProvider.
"""
parsed = urlparse(self.url)
parsed = parsed._replace(scheme=f"{parsed.scheme}+meca")
url = urlunparse(parsed)
self.log.info(f"get_repo_url(): {url}")
return url

def get_build_slug(self):
"""Should return a unique build slug"""
return self.hashed_slug


class FigshareProvider(RepoProvider):
"""Provide contents of a Figshare article

Expand Down
5 changes: 3 additions & 2 deletions binderhub/static/js/src/form.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export function getBuildFormValues() {
repo = repo.replace(/^(https?:\/\/)?github.com\//, "");
repo = repo.replace(/^(https?:\/\/)?gitlab.com\//, "");
}
// trim trailing or leading '/' on repo
// trim trailing or leading "/" on repo
repo = repo.replace(/(^\/)|(\/?$)/g, "");
// git providers encode the URL of the git repository as the repo
// argument.
Expand All @@ -31,7 +31,8 @@ export function getBuildFormValues() {
providerPrefix === "zenodo" ||
providerPrefix === "figshare" ||
providerPrefix === "dataverse" ||
providerPrefix === "hydroshare"
providerPrefix === "hydroshare" ||
providerPrefix === "meca"
) {
ref = "";
}
Expand Down
6 changes: 6 additions & 0 deletions docs/source/reference/repoproviders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ Module: :mod:`binderhub.repoproviders`
.. autoconfigurable:: DataverseProvider
:members:

:class:`MecaRepoProvider`
---------------------------

.. autoconfigurable:: MecaRepoProvider
:members:


:class:`GitRepoProvider`
---------------------------
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ pyjwt>=2
python-json-logger
tornado>=5.1
traitlets
validators
Loading