Skip to content

Commit

Permalink
Merge pull request #61 from kartoza/feature/504-default-layer
Browse files Browse the repository at this point in the history
Add default_layer_sync
  • Loading branch information
danangmassandy committed Aug 29, 2024
2 parents da233cd + 6f3364f commit 9871c92
Show file tree
Hide file tree
Showing 9 changed files with 280 additions and 1 deletion.
3 changes: 3 additions & 0 deletions deployment/docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,6 @@ minio==7.2.5

flower==1.2.0
django-revproxy @ https://github.com/jazzband/django-revproxy/archive/refs/tags/0.11.0.zip

# For getting raster metadata
rasterio==1.3.10
4 changes: 4 additions & 0 deletions django_project/core/celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@
'clean-multipart-upload': {
'task': 'clean_multipart_upload',
'schedule': crontab(minute='0', hour='2'), # Run everyday at 2am
},
'sync-default-layers': {
'task': 'sync_default_layers',
'schedule': crontab(minute='0', hour='1'), # Run everyday at 1am
}
}

Expand Down
2 changes: 1 addition & 1 deletion django_project/cplus_api/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class InputLayerAdmin(admin.ModelAdmin):
'size', 'component_type', 'privacy_type')
search_fields = ['name', 'uuid']
list_filter = ["layer_type", "owner", "component_type", "privacy_type"]
readonly_fields = ['layer_type', 'component_type', 'uuid']
readonly_fields = ['layer_type', 'component_type', 'uuid', 'modified_on']
actions = [trigger_verify_input_layer]


Expand Down
18 changes: 18 additions & 0 deletions django_project/cplus_api/migrations/0011_inputlayer_modified_on.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.7 on 2024-08-28 12:55

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('cplus_api', '0010_inputlayer_metadata'),
]

operations = [
migrations.AddField(
model_name='inputlayer',
name='modified_on',
field=models.DateTimeField(auto_now=True),
),
]
5 changes: 5 additions & 0 deletions django_project/cplus_api/models/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ class PrivacyTypes(models.TextChoices):
help_text='Layer Metadata.'
)

modified_on = models.DateTimeField(auto_now=True)

def __str__(self):
return f"{self.name} | {self.component_type}"

def download_to_working_directory(self, base_dir: str):
if not self.is_available():
return None
Expand Down
1 change: 1 addition & 0 deletions django_project/cplus_api/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .runner import * # noqa
from .remove_layers import * # noqa
from .verify_input_layer import * # noqa
from .sync_default_layers import *
169 changes: 169 additions & 0 deletions django_project/cplus_api/tasks/sync_default_layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import logging
import os
import typing
import tempfile

import rasterio
from datetime import datetime
from django.utils import timezone

from celery import shared_task
from django.contrib.auth.models import User
from storages.backends.s3 import S3Storage
from django.conf import settings
from django.core.files.storage import FileSystemStorage

from cplus_api.models import (
select_input_layer_storage,
InputLayer,
COMMON_LAYERS_DIR
)
from cplus_api.utils.api_helper import get_layer_type

logger = logging.getLogger(__name__)


class ProcessFile:
"""
Class to process a file dictionary into an Input Layer
:param storage: Django storage instance
:type storage: FileSystemStorage or S3Storage
:param owner: Owner of the input layer
:type owner: User
:param component_type: Component type of the input layer e.g. ncs_pathway
:type component_type: str
:param file: Dictionary of the file info to be processed
:type file: dict
:return: None
:rtype: None
"""
def __init__(
self,
storage: typing.Union[FileSystemStorage, S3Storage],
owner: User,
component_type: str,
file: dict
):
self.storage = storage
self.owner = owner
self.component_type = component_type
self.file = file
self.input_layer, self.created = InputLayer.objects.get_or_create(
name=os.path.basename(file['Key']),
owner=owner,
privacy_type=InputLayer.PrivacyTypes.COMMON,
component_type=component_type,
defaults={
'created_on': timezone.now(),
'layer_type': get_layer_type(file['Key'])
}
)

def read_metadata(
self,
file_path: str
):
"""
Read metadata from layer file and save it to InputLayer object
:param file_path: path for input file
:type file_path: str
:return: None
:rtype: None
"""
with rasterio.open(file_path) as dataset:
transform = dataset.transform
res_x = abs(transform[0])
res_y = abs(transform[4])
crs = dataset.crs
nodata = dataset.nodata

metadata = {
"name": os.path.basename(file_path),
"is_raster": get_layer_type(file_path) == 0,
"description": os.path.basename(file_path),
"crs": str(crs),
"resolution": [res_x, res_y],
"no_data": nodata
}
self.input_layer.metadata = metadata
self.input_layer.file.name = self.file['Key']
self.input_layer.save()

def run(self):
"""
Function to trigger file processing
:return: None
:rtype: None
"""
# Save layer if the file is modified after input layers last saved OR
# if input layer is a new record
if (
self.file['LastModified'] > self.input_layer.modified_on or
self.created
):
media_root = self.storage.location or settings.MEDIA_ROOT
if isinstance(self.storage, FileSystemStorage):
download_path = os.path.join(media_root, self.file['Key'])
os.makedirs(os.path.dirname(download_path), exist_ok=True)
self.read_metadata(download_path)
os.remove(download_path)
else:
with tempfile.NamedTemporaryFile() as tmpfile:
boto3_client = self.storage.connection.meta.client
boto3_client.download_file(
self.storage.bucket_name,
self.file['Key'],
tmpfile.name,
Config=settings.AWS_TRANSFER_CONFIG
)
self.read_metadata(tmpfile.name)


@shared_task(name="sync_default_layers")
def sync_default_layers():
"""
Create Input Layers from default layers copied to S3/local directory
"""

storage = select_input_layer_storage()
component_types = [c[0] for c in InputLayer.ComponentTypes.choices]
admin_username = os.getenv('ADMIN_USERNAME')
owner = User.objects.get(username=admin_username)
if isinstance(storage, FileSystemStorage):
media_root = storage.location or settings.MEDIA_ROOT
for component_type in component_types:
component_path = os.path.join(
media_root, COMMON_LAYERS_DIR, component_type
)
os.makedirs(component_path, exist_ok=True)
layers = os.listdir(component_path)
for layer in layers:
key = f"{COMMON_LAYERS_DIR}/{component_type}/{layer}"
download_path = os.path.join(media_root, key)
last_modified = datetime.fromtimestamp(
os.path.getmtime(download_path),
tz=timezone.now().tzinfo
)
file = {
"Key": key,
"LastModified": last_modified,
"Size": os.path.getsize(download_path)
}
ProcessFile(storage, owner, component_type, file).run()
else:
boto3_client = storage.connection.meta.client
for component_type in component_types:
response = boto3_client.list_objects(
Bucket=storage.bucket_name,
Prefix=f"{COMMON_LAYERS_DIR}/{component_type}"
)
for file in response.get('Contents', []):
ProcessFile(storage, owner, component_type, file).run()
Binary file not shown.
79 changes: 79 additions & 0 deletions django_project/cplus_api/tests/test_sync_default_layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
from shutil import copyfile

from core.settings.utils import absolute_path
from cplus_api.models.layer import (
InputLayer,
COMMON_LAYERS_DIR
)
from cplus_api.tasks.sync_default_layers import sync_default_layers
from cplus_api.tests.common import BaseAPIViewTransactionTest


class TestSyncDefaultLayer(BaseAPIViewTransactionTest):
def setUp(self, *args, **kwargs):
super().setUp(*args, **kwargs)
# print(help(self))
# breakpoint()
self.superuser.username = os.getenv('ADMIN_USERNAME')
self.superuser.save()

def base_run(self):
# Check Input Layer before test
input_layers = InputLayer.objects.filter(
name='test_pathway_2.tif',
owner=self.superuser,
privacy_type=InputLayer.PrivacyTypes.COMMON,
component_type=InputLayer.ComponentTypes.NCS_PATHWAY
)
self.assertFalse(input_layers.exists())

source_path = absolute_path(
'cplus_api', 'tests', 'data',
'pathways', 'test_pathway_2.tif'
)
dest_path = (
f'/home/web/media/minio_test/{COMMON_LAYERS_DIR}/'
f'{InputLayer.ComponentTypes.NCS_PATHWAY}/test_pathway_2.tif'
)
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
copyfile(source_path, dest_path)
sync_default_layers()
input_layers = input_layers.all()
input_layer = input_layers[0]
first_modified_on = input_layer.modified_on
self.assertTrue(input_layers.exists())
metadata = {
'crs': 'EPSG:32735',
'name': 'test_pathway_2.tif',
'no_data': -9999.0,
'is_raster': True,
'resolution': [19.676449999999022, 19.676448888890445],
'description': 'test_pathway_2.tif'
}
self.assertEqual(input_layer.metadata, metadata)

# Rerun sync default layers
sync_default_layers()
# Check modified time is not changing, because the file is not updated
input_layer.refresh_from_db()
self.assertEqual(input_layer.modified_on, first_modified_on)
return input_layer, source_path, dest_path

def test_new_layer(self):
"""
Test when a new file is added to the common layers directory
:return:
:rtype:
"""
self.base_run()

def test_file_updated(self):
input_layer, source_path, dest_path = self.base_run()
first_modified_on = input_layer.modified_on
copyfile(source_path, dest_path)
sync_default_layers()

# Check modified_on is updated
input_layer.refresh_from_db()
self.assertNotEquals(input_layer.modified_on, first_modified_on)

0 comments on commit 9871c92

Please sign in to comment.