diff --git a/per/cache.py b/per/cache.py new file mode 100644 index 000000000..3d97cfe40 --- /dev/null +++ b/per/cache.py @@ -0,0 +1,55 @@ +import hashlib +import json +import typing + +import django_filters +from django.core.serializers.json import DjangoJSONEncoder + +from per.models import OpsLearningCacheResponse + + +class OpslearningSummaryCacheHelper: + @staticmethod + def calculate_md5_str(string): + hash_md5 = hashlib.md5() + hash_md5.update(string) + return hash_md5.hexdigest() + + @classmethod + def generate_hash(cls, value: typing.Union[None, str, dict]) -> str: + # TODO: Use OrderedDict + if value is None: + return "" + hashable = None + if isinstance(value, str): + hashable = value + elif isinstance(value, dict): + hashable = json.dumps( + value, + sort_keys=True, + indent=2, + cls=DjangoJSONEncoder, + ).encode("utf-8") + else: + raise Exception(f"Invalid Type: {type(value)}") + return cls.calculate_md5_str(hashable) + + @classmethod + def get_or_create( + self, + request, + filter_sets: typing.List[django_filters.FilterSet], + ): + filter_data = { + key: value + for key, value in request.query_params.items() + if key in [field for filter_set in filter_sets for field in filter_set.get_filters()] + } + hash_value = self.generate_hash(filter_data) + # Check if the summary is already cached + ops_learning_summary = OpsLearningCacheResponse.objects.filter(used_filters_hash=hash_value).first() + if ops_learning_summary: + return ops_learning_summary + # TODO: Create a new summary based on the filters + # returning a dummy object for now + return OpsLearningCacheResponse.objects.first() diff --git a/per/drf_views.py b/per/drf_views.py index 46e860e36..d288cdbdd 100644 --- a/per/drf_views.py +++ b/per/drf_views.py @@ -21,6 +21,7 @@ from deployments.models import SectorTag from main.permissions import DenyGuestUserMutationPermission, DenyGuestUserPermission from main.utils import SpreadSheetContentNegotiation +from per.cache import OpslearningSummaryCacheHelper from per.filter_set import ( PerDocumentFilter, PerOverviewFilter, @@ -809,6 +810,20 @@ def get_renderer_context(self): return context + @extend_schema(filters=True) + @action( + detail=False, + methods=["GET"], + permission_classes=[permissions.IsAuthenticated], + url_path="summary", + ) + def summary(self, request): + """ + Returns a summary of the OpsLearning data + """ + ops_learning_summary_instance = OpslearningSummaryCacheHelper.get_or_create(request, [self.filterset_class]) + return response.Response(OpsLearningSummarySerializer(ops_learning_summary_instance).data) + class PerDocumentUploadViewSet(viewsets.ModelViewSet): queryset = PerDocumentUpload.objects.all() diff --git a/per/management/commands/create_dummy_opslearningsummary.py b/per/management/commands/create_dummy_opslearningsummary.py index 192f2d007..9e5877ade 100644 --- a/per/management/commands/create_dummy_opslearningsummary.py +++ b/per/management/commands/create_dummy_opslearningsummary.py @@ -1,5 +1,5 @@ -from django.core.management.base import BaseCommand from django.conf import settings +from django.core.management.base import BaseCommand from django.db import transaction from per.factories import ( @@ -18,26 +18,20 @@ def generate_sector_response(self, ops_learnings: list, ops_learning_cache_respo Generate dummy OpsLearningSectorCacheResponse """ dummy_ops_learning_sector_cache_response = OpsLearningSectorCacheResponseFactory.create_batch( - 5, - filter_response=ops_learning_cache_response + 5, filter_response=ops_learning_cache_response ) for ops_learning_sector_cache in dummy_ops_learning_sector_cache_response: - ops_learning_sector_cache.used_ops_learning.add( - *ops_learnings - ) + ops_learning_sector_cache.used_ops_learning.add(*ops_learnings) def generate_component_response(self, ops_learnings: list, ops_learning_cache_response: list): """ Generate dummy OpsLearningComponentCacheResponse """ dummy_ops_learning_component_cache_response = OpsLearningComponentCacheResponseFactory.create_batch( - 5, - filter_response=ops_learning_cache_response + 5, filter_response=ops_learning_cache_response ) for ops_learning_component_cache in dummy_ops_learning_component_cache_response: - ops_learning_component_cache.used_ops_learning.add( - *ops_learnings - ) + ops_learning_component_cache.used_ops_learning.add(*ops_learnings) def generate_ops_learning_summary(self): selected_ops_learning = OpsLearningFactory.create_batch(50, is_validated=True) @@ -45,20 +39,18 @@ def generate_ops_learning_summary(self): # Generating dummy OpsLearningCacheResponse dummy_ops_learning_cache_responses = OpsLearningCacheResponseFactory.create_batch(5) for ops_learning_cache in dummy_ops_learning_cache_responses: - ops_learning_cache.used_ops_learning.add( - *selected_ops_learning[:10] - ) + ops_learning_cache.used_ops_learning.add(*selected_ops_learning[:10]) self.generate_sector_response(selected_ops_learning[11:20], ops_learning_cache) self.generate_component_response(selected_ops_learning[21:50], ops_learning_cache) - self.stdout.write(self.style.SUCCESS('Successfully created dummy OpsLearningSummary')) + self.stdout.write(self.style.SUCCESS("Successfully created dummy OpsLearningSummary")) @transaction.atomic def handle(self, *args, **options): - if not settings.DEBUG and not settings.GO_ENVIRONMENT == 'development': + if not settings.DEBUG and not settings.GO_ENVIRONMENT == "development": self.stderr.write( - 'Dummy data generation is not allowed for this instance.' - ' Use environment variable DEBUG set to True and GO_ENVIRONMENT to development' + "Dummy data generation is not allowed for this instance." + " Use environment variable DEBUG set to True and GO_ENVIRONMENT to development" ) return self.generate_ops_learning_summary() diff --git a/per/serializers.py b/per/serializers.py index bbba4b64c..d4e58ff78 100644 --- a/per/serializers.py +++ b/per/serializers.py @@ -1148,7 +1148,7 @@ class OpsLearningComponentCacheResponseSerializer(serializers.ModelSerializer): class Meta: model = OpsLearningComponentCacheResponse - fields = ["summary", "title",] + fields = ["summary", "title"] class OpsLearningSummarySerializer(serializers.ModelSerializer): diff --git a/per/task.py b/per/task.py new file mode 100644 index 000000000..8d4bdf99c --- /dev/null +++ b/per/task.py @@ -0,0 +1,116 @@ +from ast import literal_eval + +import pandas as pd +from celery import shared_task +from django.db.models import Exists, F, OuterRef + +from api.models import Country +from country_plan.models import CountryPlan +from per.models import FormPrioritization, Overview + + +class OpsLearningSummaryTask: + + @classmethod + def generate_regional_prioritization_list(self, df: pd.DataFrame): + """Generates a list of regional prioritizations from the given data.""" + df_exploded = df.explode("components") + regional_df = df_exploded.groupby(["region", "components"]).size().reset_index(name="count") + regional_df = regional_df[regional_df["count"] > 2] + regional_list = regional_df.groupby("region")["components"].apply(list).reset_index() + return regional_list + + @classmethod + def generate_global_prioritization_list(self, regional_df: pd.DataFrame): + """Generates a global prioritization list from regional data.""" + global_df = regional_df.explode("components").groupby("components").size().reset_index(name="count") + global_components = global_df[global_df["count"] > 2]["components"].tolist() + global_list = {"global": global_components} + return global_list + + @classmethod + def generate_country_prioritization_list( + self, regional_df: pd.DataFrame, global_components: list, prioritization_df: pd.DataFrame, country_df: pd.DataFrame + ): + """Generates a country-level prioritization list.""" + regional_dict = dict(zip(regional_df["region"], regional_df["components"])) + merged_df = country_df[["country", "region"]].merge(prioritization_df, on=["country", "region"], how="left") + no_prioritization_df = merged_df[merged_df["components"].isna()] + + for index, row in no_prioritization_df.iterrows(): + region_id = row["region"] + components = regional_dict.get(region_id, global_components["global"]) + no_prioritization_df.at[index, "components"] = components + + final_df = pd.concat([merged_df.dropna(subset=["components"]), no_prioritization_df]) + final_df["components"] = final_df["components"].apply(lambda x: literal_eval(str(x))) + final_df = final_df[["country", "components"]] + return final_df + + @classmethod + def generate_priotization_list(self): + exclusion_list = [ + "IFRC Africa", + "IFRC Americas", + "IFRC Asia-Pacific", + "IFRC Europe", + "IFRC Geneva", + "IFRC MENA", + "Benelux ERU", + "ICRC", + ] + + # Get all countries + country_qs = ( + Country.objects.filter(is_deprecated=False, society_name__isnull=False) + .exclude(name__in=exclusion_list) + .annotate(has_country_plan=Exists(CountryPlan.objects.filter(country=OuterRef("pk"), is_publish=True))) + .values("id") + ) + country_df = pd.DataFrame(list(country_qs)) + country_df = country_df.rename(columns={"id": "country"}, inplace=True) + + # Get all PER Overview + per_overview_qs = Overview.objects.select_related("country").values( + "id", + "country_id", + "country__region", + "assessment_number", + ) + per_overview_df = pd.DataFrame(list(per_overview_qs)) + per_overview_df = per_overview_df.rename( + columns={"id": "overview", "country_id": "country", "country__region": "region"}, inplace=True + ) + + # Get all PER Prioritization + per_priotization_qs = ( + FormPrioritization.objects.filter( + is_draft=False, + prioritized_action_responses__isnull=False, + ) + .annotate( + components=F("prioritized_action_responses__component"), + ) + .values( + "overview__country__id", + "components", + ) + ) + per_priotization_df = pd.DataFrame(list(per_priotization_qs)) + per_priotization_df = per_priotization_df.merge( + per_overview_df[["overview", "country", "region", "assessment_number"]], on="overview", how="left" + ) + per_priotization_df = per_priotization_df.sort_values("assessment_number").drop_duplicates(subset="country", keep="last") + per_priotization_df = per_priotization_df[["region", "country", "components"]] + + # Generate the prioritization list + regional_list = self.generate_regional_prioritization_list(per_priotization_df) + global_list = self.generate_global_prioritization_list(regional_list) + country_list = self.generate_country_prioritization_list(regional_list, global_list, per_priotization_df, country_df) + + return regional_list, global_list, country_list + + +@shared_task +def generate_summary(filter_data, hash_value): + regional_list, global_list, country_list = OpsLearningSummaryTask.generate_priotization_list() diff --git a/per/utils.py b/per/utils.py index 28da04666..9392d2f7a 100644 --- a/per/utils.py +++ b/per/utils.py @@ -1,9 +1,4 @@ -import hashlib -import json -import typing - from django.contrib.auth.models import Permission -from django.core.serializers.json import DjangoJSONEncoder from django.db.models import Q @@ -31,29 +26,3 @@ def filter_per_queryset_by_user_access(user, queryset): ).distinct() # Normal access return queryset.filter(created_by=user) - - -class CacheHelper: - @staticmethod - def calculate_md5_str(string): - hash_md5 = hashlib.md5() - hash_md5.update(string) - return hash_md5.hexdigest() - - @classmethod - def generate_hash(cls, value: typing.Union[None, str, dict]) -> str: - if value is None: - return "" - hashable = None - if isinstance(value, str): - hashable = value - elif isinstance(value, dict): - hashable = json.dumps( - value, - sort_keys=True, - indent=2, - cls=DjangoJSONEncoder, - ).encode("utf-8") - else: - raise Exception(f"Invalid Type: {type(value)}") - return cls.calculate_md5_str(hashable)