diff --git a/deployment/docker/requirements.txt b/deployment/docker/requirements.txt index 29d309b..6b73c4d 100644 --- a/deployment/docker/requirements.txt +++ b/deployment/docker/requirements.txt @@ -58,3 +58,6 @@ minio==7.2.5 flower==1.2.0 django-revproxy @ https://github.com/jazzband/django-revproxy/archive/refs/tags/0.11.0.zip + +# For getting raster metadata +rasterio==1.3.10 \ No newline at end of file diff --git a/django_project/core/celery.py b/django_project/core/celery.py index 1fc56d7..c81ddf1 100644 --- a/django_project/core/celery.py +++ b/django_project/core/celery.py @@ -55,6 +55,10 @@ 'clean-multipart-upload': { 'task': 'clean_multipart_upload', 'schedule': crontab(minute='0', hour='2'), # Run everyday at 2am + }, + 'sync-default-layers': { + 'task': 'sync_default_layers', + 'schedule': crontab(minute='0', hour='1'), # Run everyday at 1am } } diff --git a/django_project/cplus_api/admin.py b/django_project/cplus_api/admin.py index d9fab4b..4bf8b57 100644 --- a/django_project/cplus_api/admin.py +++ b/django_project/cplus_api/admin.py @@ -49,7 +49,7 @@ class InputLayerAdmin(admin.ModelAdmin): 'size', 'component_type', 'privacy_type') search_fields = ['name', 'uuid'] list_filter = ["layer_type", "owner", "component_type", "privacy_type"] - readonly_fields = ['layer_type', 'component_type', 'uuid'] + readonly_fields = ['layer_type', 'component_type', 'uuid', 'modified_on'] actions = [trigger_verify_input_layer] diff --git a/django_project/cplus_api/migrations/0011_inputlayer_modified_on.py b/django_project/cplus_api/migrations/0011_inputlayer_modified_on.py new file mode 100644 index 0000000..5a658a4 --- /dev/null +++ b/django_project/cplus_api/migrations/0011_inputlayer_modified_on.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.7 on 2024-08-28 12:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('cplus_api', '0010_inputlayer_metadata'), + ] + + operations = [ + migrations.AddField( + model_name='inputlayer', + name='modified_on', + field=models.DateTimeField(auto_now=True), + ), + ] diff --git a/django_project/cplus_api/models/layer.py b/django_project/cplus_api/models/layer.py index c8e8b7f..4bcb691 100644 --- a/django_project/cplus_api/models/layer.py +++ b/django_project/cplus_api/models/layer.py @@ -125,6 +125,11 @@ class PrivacyTypes(models.TextChoices): help_text='Layer Metadata.' ) + modified_on = models.DateTimeField(auto_now=True) + + def __str__(self): + return f"{self.name} | {self.component_type}" + def download_to_working_directory(self, base_dir: str): if not self.is_available(): return None diff --git a/django_project/cplus_api/tasks/__init__.py b/django_project/cplus_api/tasks/__init__.py index 7323674..e9f406c 100644 --- a/django_project/cplus_api/tasks/__init__.py +++ b/django_project/cplus_api/tasks/__init__.py @@ -2,3 +2,4 @@ from .runner import * # noqa from .remove_layers import * # noqa from .verify_input_layer import * # noqa +from .sync_default_layers import * diff --git a/django_project/cplus_api/tasks/sync_default_layers.py b/django_project/cplus_api/tasks/sync_default_layers.py new file mode 100644 index 0000000..f6ec620 --- /dev/null +++ b/django_project/cplus_api/tasks/sync_default_layers.py @@ -0,0 +1,169 @@ +import logging +import os +import typing +import tempfile + +import rasterio +from datetime import datetime +from django.utils import timezone + +from celery import shared_task +from django.contrib.auth.models import User +from storages.backends.s3 import S3Storage +from django.conf import settings +from django.core.files.storage import FileSystemStorage + +from cplus_api.models import ( + select_input_layer_storage, + InputLayer, + COMMON_LAYERS_DIR +) +from cplus_api.utils.api_helper import get_layer_type + +logger = logging.getLogger(__name__) + + +class ProcessFile: + """ + Class to process a file dictionary into an Input Layer + + :param storage: Django storage instance + :type storage: FileSystemStorage or S3Storage + + :param owner: Owner of the input layer + :type owner: User + + :param component_type: Component type of the input layer e.g. ncs_pathway + :type component_type: str + + :param file: Dictionary of the file info to be processed + :type file: dict + + :return: None + :rtype: None + """ + def __init__( + self, + storage: typing.Union[FileSystemStorage, S3Storage], + owner: User, + component_type: str, + file: dict + ): + self.storage = storage + self.owner = owner + self.component_type = component_type + self.file = file + self.input_layer, self.created = InputLayer.objects.get_or_create( + name=os.path.basename(file['Key']), + owner=owner, + privacy_type=InputLayer.PrivacyTypes.COMMON, + component_type=component_type, + defaults={ + 'created_on': timezone.now(), + 'layer_type': get_layer_type(file['Key']) + } + ) + + def read_metadata( + self, + file_path: str + ): + """ + Read metadata from layer file and save it to InputLayer object + + :param file_path: path for input file + :type file_path: str + + :return: None + :rtype: None + """ + with rasterio.open(file_path) as dataset: + transform = dataset.transform + res_x = abs(transform[0]) + res_y = abs(transform[4]) + crs = dataset.crs + nodata = dataset.nodata + + metadata = { + "name": os.path.basename(file_path), + "is_raster": get_layer_type(file_path) == 0, + "description": os.path.basename(file_path), + "crs": str(crs), + "resolution": [res_x, res_y], + "no_data": nodata + } + self.input_layer.metadata = metadata + self.input_layer.file.name = self.file['Key'] + self.input_layer.save() + + def run(self): + """ + Function to trigger file processing + + :return: None + :rtype: None + """ + # Save layer if the file is modified after input layers last saved OR + # if input layer is a new record + if ( + self.file['LastModified'] > self.input_layer.modified_on or + self.created + ): + media_root = self.storage.location or settings.MEDIA_ROOT + if isinstance(self.storage, FileSystemStorage): + download_path = os.path.join(media_root, self.file['Key']) + os.makedirs(os.path.dirname(download_path), exist_ok=True) + self.read_metadata(download_path) + os.remove(download_path) + else: + with tempfile.NamedTemporaryFile() as tmpfile: + boto3_client = self.storage.connection.meta.client + boto3_client.download_file( + self.storage.bucket_name, + self.file['Key'], + tmpfile.name, + Config=settings.AWS_TRANSFER_CONFIG + ) + self.read_metadata(tmpfile.name) + + +@shared_task(name="sync_default_layers") +def sync_default_layers(): + """ + Create Input Layers from default layers copied to S3/local directory + """ + + storage = select_input_layer_storage() + component_types = [c[0] for c in InputLayer.ComponentTypes.choices] + admin_username = os.getenv('ADMIN_USERNAME') + owner = User.objects.get(username=admin_username) + if isinstance(storage, FileSystemStorage): + media_root = storage.location or settings.MEDIA_ROOT + for component_type in component_types: + component_path = os.path.join( + media_root, COMMON_LAYERS_DIR, component_type + ) + os.makedirs(component_path, exist_ok=True) + layers = os.listdir(component_path) + for layer in layers: + key = f"{COMMON_LAYERS_DIR}/{component_type}/{layer}" + download_path = os.path.join(media_root, key) + last_modified = datetime.fromtimestamp( + os.path.getmtime(download_path), + tz=timezone.now().tzinfo + ) + file = { + "Key": key, + "LastModified": last_modified, + "Size": os.path.getsize(download_path) + } + ProcessFile(storage, owner, component_type, file).run() + else: + boto3_client = storage.connection.meta.client + for component_type in component_types: + response = boto3_client.list_objects( + Bucket=storage.bucket_name, + Prefix=f"{COMMON_LAYERS_DIR}/{component_type}" + ) + for file in response.get('Contents', []): + ProcessFile(storage, owner, component_type, file).run() diff --git a/django_project/cplus_api/tests/data/pathways/test_pathway_2.tif b/django_project/cplus_api/tests/data/pathways/test_pathway_2.tif new file mode 100644 index 0000000..956e04f Binary files /dev/null and b/django_project/cplus_api/tests/data/pathways/test_pathway_2.tif differ diff --git a/django_project/cplus_api/tests/test_sync_default_layers.py b/django_project/cplus_api/tests/test_sync_default_layers.py new file mode 100644 index 0000000..99bc44c --- /dev/null +++ b/django_project/cplus_api/tests/test_sync_default_layers.py @@ -0,0 +1,79 @@ +import os +from shutil import copyfile + +from core.settings.utils import absolute_path +from cplus_api.models.layer import ( + InputLayer, + COMMON_LAYERS_DIR +) +from cplus_api.tasks.sync_default_layers import sync_default_layers +from cplus_api.tests.common import BaseAPIViewTransactionTest + + +class TestSyncDefaultLayer(BaseAPIViewTransactionTest): + def setUp(self, *args, **kwargs): + super().setUp(*args, **kwargs) + # print(help(self)) + # breakpoint() + self.superuser.username = os.getenv('ADMIN_USERNAME') + self.superuser.save() + + def base_run(self): + # Check Input Layer before test + input_layers = InputLayer.objects.filter( + name='test_pathway_2.tif', + owner=self.superuser, + privacy_type=InputLayer.PrivacyTypes.COMMON, + component_type=InputLayer.ComponentTypes.NCS_PATHWAY + ) + self.assertFalse(input_layers.exists()) + + source_path = absolute_path( + 'cplus_api', 'tests', 'data', + 'pathways', 'test_pathway_2.tif' + ) + dest_path = ( + f'/home/web/media/minio_test/{COMMON_LAYERS_DIR}/' + f'{InputLayer.ComponentTypes.NCS_PATHWAY}/test_pathway_2.tif' + ) + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + copyfile(source_path, dest_path) + sync_default_layers() + input_layers = input_layers.all() + input_layer = input_layers[0] + first_modified_on = input_layer.modified_on + self.assertTrue(input_layers.exists()) + metadata = { + 'crs': 'EPSG:32735', + 'name': 'test_pathway_2.tif', + 'no_data': -9999.0, + 'is_raster': True, + 'resolution': [19.676449999999022, 19.676448888890445], + 'description': 'test_pathway_2.tif' + } + self.assertEqual(input_layer.metadata, metadata) + + # Rerun sync default layers + sync_default_layers() + # Check modified time is not changing, because the file is not updated + input_layer.refresh_from_db() + self.assertEqual(input_layer.modified_on, first_modified_on) + return input_layer, source_path, dest_path + + def test_new_layer(self): + """ + Test when a new file is added to the common layers directory + :return: + :rtype: + """ + self.base_run() + + def test_file_updated(self): + input_layer, source_path, dest_path = self.base_run() + first_modified_on = input_layer.modified_on + copyfile(source_path, dest_path) + sync_default_layers() + + # Check modified_on is updated + input_layer.refresh_from_db() + self.assertNotEquals(input_layer.modified_on, first_modified_on)