Skip to content

Commit

Permalink
implement pygeoapi datasets serve for dwca
Browse files Browse the repository at this point in the history
  • Loading branch information
nicokant committed Nov 17, 2023
1 parent 6f53fcd commit 1d6f55a
Show file tree
Hide file tree
Showing 17 changed files with 212 additions and 11 deletions.
1 change: 1 addition & 0 deletions config/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
path("ht/", include("health_check.urls")),
path("csw/", include("metadata_catalogue.datasets.csw.urls")),
path("geoapi/", include("metadata_catalogue.datasets.geoapi.urls")),
path("datasets/", include("metadata_catalogue.datasets.urls")),
] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)


Expand Down
1 change: 1 addition & 0 deletions metadata_catalogue/datasets/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


admin.site.register(models.Dataset)
admin.site.register(models.Content)
admin.site.register(models.Person)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 4.2.7 on 2023-11-15 13:46

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("csw", "0006_remove_cswconfig_contact_address_and_more"),
]

operations = [
migrations.RemoveField(
model_name="cswconfig",
name="language",
),
]
6 changes: 4 additions & 2 deletions metadata_catalogue/datasets/geoapi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ def get_config(self, url=""):
Dataset = apps.get_model("datasets", "Dataset")
info = ServiceInfo.objects.select_related("contact", "license", "provider").get()

resources = {id: value for id, value in Dataset.objects.select_related("metadata").all().as_geoapi_resource()}
resources = {
id: value for id, value in Dataset.objects.select_related("metadata").all().as_geoapi_resource(url)
}

return {
"server": {
Expand All @@ -29,7 +31,7 @@ def get_config(self, url=""):
"language": info.language or "en-US",
"limit": str(self.max_records),
"pretty_print": self.pretty_print,
"url": url,
"url": url + "/geoapi",
"templates": {"static": str(Path(__name__).parent / "statics" / "geoapi")},
"map": {
"url": self.map_url,
Expand Down
4 changes: 2 additions & 2 deletions metadata_catalogue/datasets/geoapi/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from django.http import HttpRequest, HttpResponse
from pygeoapi.api import API

from ..libs.utils import req_to_base
from .models import GeoAPIConfig


Expand Down Expand Up @@ -452,8 +453,7 @@ def _feed_response(request: HttpRequest, api_definition: str, *args, **kwargs) -
"""Use pygeoapi api to process the input request"""

config = GeoAPIConfig.get_solo()
as_dict = config.get_config(request.scheme + "://" + request.get_host() + "/geoapi/")

as_dict = config.get_config(req_to_base(request))
api_ = API(as_dict, {})
api = getattr(api_, api_definition)
return api(request, *args, **kwargs)
Expand Down
1 change: 1 addition & 0 deletions metadata_catalogue/datasets/libs/darwincore/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .deserializer import to_metadata
from .mapping import to_content
65 changes: 65 additions & 0 deletions metadata_catalogue/datasets/libs/darwincore/mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# import csv
import xml.etree.ElementTree as ET

from bs4 import BeautifulSoup
from django.db import transaction


def to_vrt(layername):
root = ET.Element("OGRVRTDataSource")
layer = ET.Element("OGRVRTLayer", attrib={"name": layername})
root.append(layer)

src_data_source = ET.Element("SrcDataSource")
src_data_source.text = "{{SOURCE}}"
layer.append(src_data_source)

# field = ET.Element("Field", attrib={
# "src": "",
# "name": ""
# })
# layer.append(field)

srs = ET.Element("LayerSRS")
srs.text = "WGS84"
layer.append(srs)

geo_field = ET.Element(
"GeometryField",
attrib={
"encoding": "PointFromColumns",
"x": "decimalLongitude",
"y": "decimalLatitude",
},
)
layer.append(geo_field)
geo_type = ET.Element("GeometryType")
geo_type.text = "wkbPoint"
geo_field.append(geo_type)

return ET.tostring(root, encoding="utf-8").decode("utf-8")


def to_content(xml_path, dataset):
meta_path = xml_path / "meta.xml"
if not meta_path.is_file():
raise Exception("There should be one meta.xml file")
with open(str(meta_path)) as meta:
soup = BeautifulSoup(meta, features="lxml-xml")
with transaction.atomic():
content = dataset.content

data_file_path = xml_path / soup.find("location").text
if not data_file_path.is_file():
raise Exception(f"Missing file {data_file_path} declared in meta.xml")
# with open(str(data_file_path)) as data_file:
# reader = csv.reader(data_file, delimiter='\t')
# for row in reader:
# headers = row
# break

# fields = {f['index']: f['term'].split('/')[-1] for f in soup.find_all('field')}
# fields[soup.find('id')['index']] = "id"
content.gdal_vrt_definition = to_vrt(data_file_path.stem)
content.remote_source = data_file_path.name
content.save()
18 changes: 15 additions & 3 deletions metadata_catalogue/datasets/libs/geoapi/mapping.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from django.urls import reverse_lazy


class ResourceMapping:
def __init__(self, dataset) -> None:
def __init__(self, dataset, base_name) -> None:
self.id = dataset.uuid
self.dataset = dataset
self.base_name = base_name

def as_resource(self):
return self.id, {
return str(self.id), {
"type": "collection",
"visibility": "default",
"title": self.dataset.metadata.title,
Expand All @@ -24,8 +28,16 @@ def as_resource(self):
{
"type": "feature",
"default": True,
"name": "CSV",
"name": "OGR",
"editable": False,
"id_field": "id",
"layer": "occurrence",
"data": {
"source_type": "VRT",
"source": "/vsicurl/"
+ self.base_name
+ reverse_lazy("get-dataset-vrt", kwargs={"dataset_uuid": self.dataset.uuid}),
},
}
],
}
1 change: 1 addition & 0 deletions metadata_catalogue/datasets/libs/harvesters.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def handle_file_as_darwincore_zip(file: NamedTemporaryFile, dataset: Dataset):
raise Exception("There should be one eml.xml file")

darwincore.to_metadata(eml, dataset)
darwincore.to_content(path, dataset)


def harvest_dataset(dataset_id: int):
Expand Down
4 changes: 4 additions & 0 deletions metadata_catalogue/datasets/libs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@ def safe_get(element, attribute):
if (v := element.__getattribute__(attribute)) is not None:
return v
return ""


def req_to_base(request):
return request.scheme + "://" + request.get_host()
4 changes: 2 additions & 2 deletions metadata_catalogue/datasets/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def csw_sort(self, sort):
logger.warn(f"Not implemented! {sort}")
return self

def as_geoapi_resource(self, *args, warn=True, **kwargs):
def as_geoapi_resource(self, base_url, *args, warn=True, **kwargs):
logger.warn("DANGER: This method consumes the queryset and returns and array of items")
return [ResourceMapping(instance).as_resource() for instance in self]
return [ResourceMapping(instance, base_url).as_resource() for instance in self]


class DatasetManager(models.Manager):
Expand Down
27 changes: 27 additions & 0 deletions metadata_catalogue/datasets/migrations/0009_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Generated by Django 4.2.7 on 2023-11-15 13:46

from django.db import migrations, models
import django.db.models.deletion
import metadata_catalogue.core.fields


class Migration(migrations.Migration):
dependencies = [
("datasets", "0008_serviceinfo_language"),
]

operations = [
migrations.CreateModel(
name="Content",
fields=[
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
("gdal_vrt_definition", models.TextField(blank=True, null=True)),
(
"dataset",
metadata_catalogue.core.fields.AutoOneToOneField(
on_delete=django.db.models.deletion.CASCADE, related_name="content", to="datasets.dataset"
),
),
],
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 4.2.7 on 2023-11-15 14:02

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("datasets", "0009_content"),
]

operations = [
migrations.AddField(
model_name="content",
name="remote_source",
field=models.TextField(blank=True, null=True),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.7 on 2023-11-17 09:04

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("datasets", "0010_content_remote_source"),
]

operations = [
migrations.AlterField(
model_name="dataset",
name="fetch_url",
field=models.TextField(blank=True, null=True, verbose_name="URL of the resource to fetch"),
),
migrations.AlterField(
model_name="dataset",
name="source",
field=models.TextField(blank=True, null=True),
),
]
17 changes: 15 additions & 2 deletions metadata_catalogue/datasets/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.contrib.gis.db import models
from django.db.models import Value
from django.db.models.functions import Coalesce
from django.urls import reverse_lazy
from django.utils.timezone import now
from django.utils.translation import gettext_lazy as _
from django_lifecycle import AFTER_CREATE, AFTER_DELETE, BEFORE_SAVE, LifecycleModel, hook
Expand All @@ -20,8 +21,8 @@ class FetchType(models.IntegerChoices):

name = models.CharField(max_length=250, verbose_name=_("Internal name"))
uuid = models.UUIDField(default=uuid.uuid4)
source = models.URLField(null=True, blank=True)
fetch_url = models.URLField(verbose_name=_("URL of the resource to fetch"), null=True, blank=True)
source = models.TextField(null=True, blank=True)
fetch_url = models.TextField(verbose_name=_("URL of the resource to fetch"), null=True, blank=True)
fetch_type = models.IntegerField(choices=FetchType.choices, null=True, blank=True)
created_at = models.DateTimeField(auto_now_add=True, verbose_name=_("Created at"))
last_modified_at = models.DateTimeField(auto_now=True, verbose_name=_("Last modified at"))
Expand Down Expand Up @@ -389,3 +390,15 @@ class ServiceInfo(SingletonModel):
provider = models.ForeignKey("datasets.Organization", blank=True, null=True, on_delete=models.SET_NULL)
license = models.ForeignKey("datasets.License", on_delete=models.SET_NULL, null=True, blank=True)
language = models.CharField(max_length=7, null=True, blank=True)


class Content(models.Model):
dataset = AutoOneToOneField("datasets.Dataset", on_delete=models.CASCADE, related_name="content")
gdal_vrt_definition = models.TextField(null=True, blank=True)
remote_source = models.TextField(null=True, blank=True)

def get_gdal_vrt_source(self):
if self.dataset.fetch_type == Dataset.FetchType.DARWINCORE:
return f"CSV:/vsizip/{{/vsicurl/{self.dataset.fetch_url}}}/{self.remote_source}"

return ""
7 changes: 7 additions & 0 deletions metadata_catalogue/datasets/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from django.urls import path, re_path

from .views import get_dataset_vrt_view

urlpatterns = [
path("<uuid:dataset_uuid>/definition.vrt", get_dataset_vrt_view, name="get-dataset-vrt"),
]
12 changes: 12 additions & 0 deletions metadata_catalogue/datasets/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from django.http import HttpResponse, HttpResponseNotFound

from .models import Dataset


def get_dataset_vrt_view(request, dataset_uuid):
try:
dataset = Dataset.objects.select_related("content").get(uuid=dataset_uuid)
content = dataset.content.gdal_vrt_definition.replace("{{SOURCE}}", dataset.content.get_gdal_vrt_source())
return HttpResponse(content, content_type="text")
except Dataset.DoesNotExist:
return HttpResponseNotFound()

0 comments on commit 1d6f55a

Please sign in to comment.