Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Operational learning stats API #2335

Merged
75 changes: 73 additions & 2 deletions per/drf_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytz
from django.conf import settings
from django.db import transaction
from django.db.models import Prefetch, Q
from django.db.models import Count, F, Prefetch, Q
from django.http import HttpResponse
from django.shortcuts import get_object_or_404
from django.utils.translation import get_language as django_get_language
Expand All @@ -20,7 +20,7 @@
from rest_framework.response import Response
from rest_framework.settings import api_settings

from api.models import Country
from api.models import Appeal, Country, Region
from deployments.models import SectorTag
from main.permissions import DenyGuestUserMutationPermission, DenyGuestUserPermission
from main.utils import SpreadSheetContentNegotiation
Expand Down Expand Up @@ -82,6 +82,7 @@
OpsLearningInSerializer,
OpsLearningOrganizationTypeSerializer,
OpsLearningSerializer,
OpsLearningStatSerializer,
OpsLearningSummarySerializer,
PerAssessmentSerializer,
PerDocumentUploadSerializer,
Expand Down Expand Up @@ -921,6 +922,76 @@ def summary(self, request):
)
return response.Response(OpsLearningSummarySerializer(ops_learning_summary_instance).data)

@extend_schema(
request=None,
filters=True,
responses=OpsLearningStatSerializer,
)
@action(
detail=False,
methods=["GET"],
permission_classes=[DenyGuestUserMutationPermission, OpsLearningPermission],
url_path="stats",
)
def stats(self, request):
"""
Get the Ops Learning stats based on the filters
"""
queryset = self.filter_queryset(self.get_queryset()).filter(is_validated=True)
ops_data = queryset.aggregate(
operations_included=Count("appeal_code", distinct=True),
learning_extracts=Count("id", distinct=True),
sector_covered=Count("sector_validated", distinct=True),
source_used=Count("appeal_code__appealdocument", distinct=True),
)

learning_by_sector = (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add _qs as postfix (learning_by_sector_qs) for all querysets variables

SectorTag.objects.filter(validated_sectors__in=queryset, title__isnull=False)
.annotate(count=Count("validated_sectors", distinct=True))
susilnem marked this conversation as resolved.
Show resolved Hide resolved
.values("title", "count")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need to add 'id', this will be used by client as unique key

)

sources_overtime = (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a NOTE that this queryset is unbounded, and we may need to add some start_date filter in the future.

Appeal.objects.filter(opslearning__in=queryset)
.annotate(
type=F("atype"),
date=F("start_date"),
count=Count("appealdocument", distinct=True),
)
.values("type", "date", "count")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need to add 'id', this will be used by client as unique key

)

learning_by_region = (
Region.objects.filter(appeal__opslearning__in=queryset)
.annotate(
region_name=F("label"),
count=Count("appeal__opslearning", distinct=True),
)
.values("region_name", "count")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need to add 'id', this will be used by client as unique key

)

learning_by_country = (
Country.objects.filter(appeal__opslearning__in=queryset)
.annotate(
country_id=F("id"),
country_name=F("name"),
count=Count("appeal__opslearning", distinct=True),
)
.values("country_id", "country_name", "count")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need to add 'id', this will be used by client as unique key

)

data = {
"operations_included": ops_data["operations_included"],
"learning_extracts": ops_data["learning_extracts"],
"sectors_covered": ops_data["sector_covered"],
"sources_used": ops_data["source_used"],
"learning_by_region": learning_by_region,
"learning_by_sector": learning_by_sector,
"sources_overtime": sources_overtime,
"learning_by_country": learning_by_country,
}
return response.Response(OpsLearningStatSerializer(data).data)


class PerDocumentUploadViewSet(viewsets.ModelViewSet):
queryset = PerDocumentUpload.objects.all()
Expand Down
19 changes: 19 additions & 0 deletions per/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import factory
from factory import fuzzy

from api.factories.country import CountryFactory
from api.models import Appeal, AppealDocument
from deployments.factories.project import SectorTagFactory
from per.models import (
AssessmentType,
Expand Down Expand Up @@ -105,12 +107,22 @@ class Meta:
model = FormPrioritization


class AppealFactory(factory.django.DjangoModelFactory):
class Meta:
model = Appeal

country = factory.SubFactory(CountryFactory)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not use factory.SubFactory, this will create objects without developer knowledge which can have weird confusing related to data generation, and it's effect.



class OpsLearningFactory(factory.django.DjangoModelFactory):
learning = fuzzy.FuzzyText(length=50)

class Meta:
model = OpsLearning

appeal_code = factory.SubFactory(AppealFactory)
is_validated = fuzzy.FuzzyChoice([True, False])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not use fuzzy choice for is_validated, we should manually define in the test cases as lots of endpoints depends on this



class OpsLearningCacheResponseFactory(factory.django.DjangoModelFactory):
used_filters_hash = fuzzy.FuzzyText(length=20)
Expand Down Expand Up @@ -141,3 +153,10 @@ class OpsLearningComponentCacheResponseFactory(factory.django.DjangoModelFactory

class Meta:
model = OpsLearningComponentCacheResponse


class AppealDocumentFactory(factory.django.DjangoModelFactory):
class Meta:
model = AppealDocument

appeal = factory.SubFactory(AppealFactory)
34 changes: 34 additions & 0 deletions per/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,7 @@ class Meta:
"atype",
"event_details",
"country",
"start_date",
)


Expand Down Expand Up @@ -1253,3 +1254,36 @@ class Meta:
"id",
"title",
]


class LearningByRegionSerializer(serializers.Serializer):
region_name = serializers.CharField()
count = serializers.IntegerField()


class LearningByCountrySerializer(serializers.Serializer):
country_id = serializers.IntegerField()
country_name = serializers.CharField()
count = serializers.IntegerField()


class LearningBySectorSerializer(serializers.Serializer):
title = serializers.CharField()
count = serializers.IntegerField()


class LearningSourcesOvertimeSerializer(serializers.Serializer):
type = serializers.IntegerField()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@susilnem Do we return type as integer or get display name for type? If no may be we need to do type = serializers.CharField(source='get_type_display', read_only=True) to get the type name.

Copy link
Member

@susilnem susilnem Dec 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, for now it's not required as it should be managed by the client side

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure about this? It will be complicated for the client side to do this mapping.

source='get_type_display' can't be used here, as the object will be a dict instead of a proper object. We could use serializer_method or do the enum->label in the views.

date = serializers.DateTimeField()
count = serializers.IntegerField()


class OpsLearningStatSerializer(serializers.Serializer):
operations_included = serializers.IntegerField()
learning_extracts = serializers.IntegerField()
sectors_covered = serializers.IntegerField()
sources_used = serializers.IntegerField()
learning_by_region = LearningByRegionSerializer(many=True)
learning_by_country = LearningByCountrySerializer(many=True)
learning_by_sector = LearningBySectorSerializer(many=True)
sources_overtime = LearningSourcesOvertimeSerializer(many=True)
Comment on lines +1293 to +1296
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's explicitly define required=True for all fields

74 changes: 74 additions & 0 deletions per/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,19 @@
from unittest import mock

from api.factories.country import CountryFactory
from api.factories.region import RegionFactory
from api.models import AppealType
from main.test_case import APITestCase
from per.factories import (
AppealDocumentFactory,
AppealFactory,
FormAreaFactory,
FormComponentFactory,
FormPrioritizationFactory,
OpsLearningFactory,
OverviewFactory,
PerWorkPlanFactory,
SectorTagFactory,
)

from .models import WorkPlanStatus
Expand Down Expand Up @@ -224,3 +229,72 @@ def test_summary_generation(self, generate_summary):
}
self.check_response_id(url=url, data=filters)
self.assertTrue(generate_summary.assert_called)


class OpsLearningStatsTestCase(APITestCase):

def setUp(self):
super().setUp()
self.region = RegionFactory.create(label="Region A")
self.country = CountryFactory.create(region=self.region, name="Country A")

self.sector1 = SectorTagFactory.create(title="Sector 1")
self.sector2 = SectorTagFactory.create(title="Sector 2")

self.appeal1 = AppealFactory.create(
region=self.region, country=self.country, code="APP001", atype=0, start_date="2023-01-01"
)
self.appeal2 = AppealFactory.create(
region=self.region, country=self.country, code="APP002", atype=1, start_date="2023-02-01"
)

AppealDocumentFactory.create(appeal=self.appeal1)
AppealDocumentFactory.create(appeal=self.appeal2)

self.ops_learning1 = OpsLearningFactory.create(is_validated=True, appeal_code=self.appeal1)
self.ops_learning1.sector_validated.set([self.sector1])

self.ops_learning2 = OpsLearningFactory.create(is_validated=True, appeal_code=self.appeal2)
self.ops_learning2.sector_validated.set([self.sector2])

self.ops_learning3 = OpsLearningFactory.create(is_validated=False, appeal_code=self.appeal2)
self.ops_learning3.sector_validated.set([self.sector2])

def test_ops_learning_stats(self):
url = "/api/v2/ops-learning/stats/"
response = self.client.get(url)

self.assert_200(response)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are comparing all fields data, it will be better to just use basic dict assert instead.

assert response.data == expected_data

expected_keys = [
"operations_included",
"sources_used",
"learning_extracts",
"sectors_covered",
"sources_overtime",
"learning_by_region",
"learning_by_sector",
"learning_by_country",
]
for key in expected_keys:
self.assertIn(key, response.data)

# Updated counts based on validated entries
self.assertEqual(response.data["operations_included"], 2)
self.assertEqual(response.data["sources_used"], 2)
self.assertEqual(response.data["learning_extracts"], 2)
self.assertEqual(response.data["sectors_covered"], 2)

# Validate learning by region
region_data = response.data["learning_by_region"]
self.assertEqual(region_data[0]["count"], 2)

# Validate learning by sector
sector_data = response.data["learning_by_sector"]
self.assertEqual(len(sector_data), 2)

# Validate learning by country
country_data = response.data["learning_by_country"]
self.assertEqual(len(country_data), 1)

sources_overtime = response.data["sources_overtime"]
self.assertEqual(len(sources_overtime), 2)
Loading