Skip to content

Operational learning enhancements and methodologies #2369

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 29, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion per/admin.py
Original file line number Diff line number Diff line change
@@ -68,7 +68,11 @@ def area_number(self, obj):

def get_queryset(self, request):
return (
super().get_queryset(request).order_by("area__area_num", "component_num", "component_letter").select_related("area")
super()
.get_queryset(request)
.exclude(component_num=14, is_parent__isnull=True)
.order_by("area__area_num", "component_num", "component_letter")
.select_related("area")
)


85 changes: 83 additions & 2 deletions per/drf_views.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
import pytz
from django.conf import settings
from django.db import transaction
from django.db.models import Prefetch, Q
from django.db.models import Count, F, Prefetch, Q
from django.http import HttpResponse
from django.shortcuts import get_object_or_404
from django.utils.translation import get_language as django_get_language
@@ -20,7 +20,7 @@
from rest_framework.response import Response
from rest_framework.settings import api_settings

from api.models import Country
from api.models import Country, Region
from deployments.models import SectorTag
from main.permissions import DenyGuestUserMutationPermission, DenyGuestUserPermission
from main.utils import SpreadSheetContentNegotiation
@@ -82,6 +82,7 @@
OpsLearningInSerializer,
OpsLearningOrganizationTypeSerializer,
OpsLearningSerializer,
OpsLearningStatSerializer,
OpsLearningSummarySerializer,
PerAssessmentSerializer,
PerDocumentUploadSerializer,
@@ -169,11 +170,19 @@ class FormAreaViewset(viewsets.ReadOnlyModelViewSet):

class FormComponentFilter(filters.FilterSet):
area_id = filters.NumberFilter(field_name="area__id", lookup_expr="exact")
exclude_subcomponents = filters.BooleanFilter(
method="get_exclude_subcomponents",
)

class Meta:
model = FormComponent
fields = {"area": ("exact",)}

def get_exclude_subcomponents(self, queryset, name, value):
if value:
return queryset.exclude(component_num=14, is_parent__isnull=True)
return queryset


class FormComponentViewset(viewsets.ReadOnlyModelViewSet):
"""PER Form Components Viewset"""
@@ -921,6 +930,78 @@ def summary(self, request):
)
return response.Response(OpsLearningSummarySerializer(ops_learning_summary_instance).data)

@extend_schema(
request=None,
filters=True,
responses=OpsLearningStatSerializer,
)
@action(
detail=False,
methods=["GET"],
permission_classes=[DenyGuestUserMutationPermission, OpsLearningPermission],
url_path="stats",
)
def stats(self, request):
"""
Get the Ops Learning stats based on the filters
"""
queryset = self.filter_queryset(self.get_queryset()).filter(is_validated=True)
ops_data = queryset.aggregate(
operations_included=Count("appeal_code", distinct=True),
learning_extracts=Count("id", distinct=True),
sector_covered=Count("sector_validated", distinct=True),
source_used=Count("appeal_document_id", distinct=True),
)

learning_by_sector_qs = (
SectorTag.objects.filter(validated_sectors__in=queryset, title__isnull=False)
.annotate(sector_id=F("id"), count=Count("validated_sectors", distinct=True))
.values("sector_id", "title", "count")
)

# NOTE: Queryset is unbounded, we may need to add some start_date filter.
sources_overtime_qs = (
queryset.filter(appeal_document_id__isnull=False)
.annotate(
atype=F("appeal_code__atype"),
date=F("appeal_code__start_date"),
count=Count("appeal_document_id", distinct=True),
)
.values("atype", "date", "count")
)

learning_by_region_qs = (
Region.objects.filter(appeal__opslearning__in=queryset)
.annotate(
region_id=F("id"),
region_name=F("label"),
count=Count("appeal__opslearning", distinct=True),
)
.values("region_id", "region_name", "count")
)

learning_by_country_qs = (
Country.objects.filter(appeal__opslearning__in=queryset)
.annotate(
country_id=F("id"),
country_name=F("name"),
count=Count("appeal__opslearning", distinct=True),
)
.values("country_id", "country_name", "count")
)

data = {
"operations_included": ops_data["operations_included"],
"learning_extracts": ops_data["learning_extracts"],
"sectors_covered": ops_data["sector_covered"],
"sources_used": ops_data["source_used"],
"learning_by_region": learning_by_region_qs,
"learning_by_sector": learning_by_sector_qs,
"sources_overtime": sources_overtime_qs,
"learning_by_country": learning_by_country_qs,
}
return response.Response(OpsLearningStatSerializer(data).data)


class PerDocumentUploadViewSet(viewsets.ModelViewSet):
queryset = PerDocumentUpload.objects.all()
13 changes: 13 additions & 0 deletions per/factories.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
import factory
from factory import fuzzy

from api.models import Appeal, AppealDocument
from deployments.factories.project import SectorTagFactory
from per.models import (
AssessmentType,
@@ -105,6 +106,11 @@ class Meta:
model = FormPrioritization


class AppealFactory(factory.django.DjangoModelFactory):
class Meta:
model = Appeal


class OpsLearningFactory(factory.django.DjangoModelFactory):
learning = fuzzy.FuzzyText(length=50)

@@ -141,3 +147,10 @@ class OpsLearningComponentCacheResponseFactory(factory.django.DjangoModelFactory

class Meta:
model = OpsLearningComponentCacheResponse


class AppealDocumentFactory(factory.django.DjangoModelFactory):
class Meta:
model = AppealDocument

appeal = factory.SubFactory(AppealFactory)
73 changes: 73 additions & 0 deletions per/management/commands/migrate_sub_components_to_component14.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from django.core.management.base import BaseCommand

from per.models import FormComponent, OpsLearning


class Command(BaseCommand):
help = "Migration of sub components of component 14 to component 14"

def handle(self, *args, **kwargs):

parent_component_14 = FormComponent.objects.filter(component_num=14, is_parent=True).first()

if not parent_component_14:
self.stdout.write(self.style.ERROR("No parent component found for component 14"))
return

sub_components_14_ids = FormComponent.objects.filter(component_num=14, is_parent__isnull=True).values_list(
"id", flat=True
)

if not sub_components_14_ids.exists():
self.stdout.write(self.style.ERROR("No sub components found for component 14"))
return

# Get OpsLearning IDs that already have parent component
with_parent_component_ops_learning_qs = OpsLearning.objects.filter(per_component=parent_component_14).values_list(
"id", flat=True
)

# For per_component
# Removing if already have parent component
print(
OpsLearning.per_component.through.objects.filter(
formcomponent_id__in=sub_components_14_ids, opslearning_id__in=with_parent_component_ops_learning_qs
).delete()
)

# Removing all Sub-Components except one and updating to parent component
OpsLearning.per_component.through.objects.filter(formcomponent_id__in=sub_components_14_ids).exclude(
id__in=OpsLearning.per_component.through.objects.filter(formcomponent_id__in=sub_components_14_ids).distinct(
"opslearning_id"
)
).delete()

OpsLearning.per_component.through.objects.filter(formcomponent_id__in=sub_components_14_ids).update(
formcomponent_id=parent_component_14.id
)

# For per_component_validated
# Get OpsLearning IDs that already have parent component validated
with_parent_component_validated_ops_learning_qs = OpsLearning.objects.filter(
per_component_validated=parent_component_14
).values_list("id", flat=True)

# Removing if already have parent component
print(
OpsLearning.per_component_validated.through.objects.filter(
formcomponent_id__in=sub_components_14_ids, opslearning_id__in=with_parent_component_validated_ops_learning_qs
).delete()
)

# Removing all Sub-Components except one and updating to parent component
OpsLearning.per_component_validated.through.objects.filter(formcomponent_id__in=sub_components_14_ids).exclude(
id__in=OpsLearning.per_component_validated.through.objects.filter(
formcomponent_id__in=sub_components_14_ids
).distinct("opslearning_id")
).delete()

OpsLearning.per_component_validated.through.objects.filter(formcomponent_id__in=sub_components_14_ids).update(
formcomponent_id=parent_component_14.id
)

self.stdout.write(self.style.SUCCESS("Successfully migrated sub-components of component-14 to component-14"))
20 changes: 20 additions & 0 deletions per/migrations/0124_alter_opslearningpromptresponsecache_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 4.2.17 on 2025-01-03 03:07

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("per", "0123_alter_perdocumentupload_file_alter_perfile_file"),
]

operations = [
migrations.AlterField(
model_name="opslearningpromptresponsecache",
name="type",
field=models.IntegerField(
choices=[(1, "Primary"), (2, "Secondary"), (3, "Sector"), (4, "Component")], verbose_name="type"
),
),
]
2 changes: 2 additions & 0 deletions per/models.py
Original file line number Diff line number Diff line change
@@ -761,6 +761,8 @@ class OpsLearningPromptResponseCache(models.Model):
class PromptType(models.IntegerChoices):
PRIMARY = 1, _("Primary")
SECONDARY = 2, _("Secondary")
SECTOR = 3, _("Sector")
COMPONENT = 4, _("Component")

prompt_hash = models.CharField(verbose_name=_("used prompt hash"), max_length=32)
prompt = models.TextField(verbose_name=_("used prompt"), null=True, blank=True)
156 changes: 103 additions & 53 deletions per/ops_learning_summary.py
Original file line number Diff line number Diff line change
@@ -57,7 +57,7 @@ class OpsLearningSummaryTask:
MIN_DIF_EXCERPTS = 3

primary_prompt = (
"Please aggregate and summarize the provided data into UP TO THREE structured paragraphs.\n"
"\n Please aggregate and summarize the provided data into UP TO THREE structured paragraphs.\n"
"The output MUST strictly adhere to the format below:\n"
"- *Title*: Each finding should begin with the main finding TITLE in bold.\n"
"Should be a high level summary of the finding below. "
@@ -87,11 +87,32 @@ class OpsLearningSummaryTask:
'"contradictory reports": "..."}'
)

secondary_prompt = (
"Please aggregate and summarize this data into structured paragraphs (as few as possible, as many as necessary). \n "
component_prompt = (
"\n Please aggregate and summarize this data into structured paragraphs (as few as possible, as many as necessary). \n "
"The output SHOULD ALWAYS follow the format below:\n"
"- *Type*: Whether the paragraph is related to a 'sector' or a 'component'\n"
"- *Subtype*: Provides the name of the sector or of the component to which the paragraph refers.\n"
"- *Type*: 'component'\n"
"- *Subtype*: Provides the name of the component to which the paragraph refers.\n"
"- *Excerpts ID*: Identify the ids of the excerpts you took into account for creating the summary.\n"
"*Content*: A short summary aggregating findings related to the Subtype, "
"so that they are supported by evidence coming from more than one report, "
"and there is ONLY ONE entry per subtype. Always integrate in the paragraph evidence that supports "
"it from the data available from multiples reports or items, include year and country of the evidence. "
"The length of each paragraph MUST be between 20 and 30 words.\n"
" Important:\n\n"
"- ONLY create one summary per subtype\n"
"- DO NOT mention the ids of the excerpts in the content of the summary.\n"
"- DO NOT use data from any source other than the one provided.\n\n"
"Output Format:\n"
"Provide your answer in valid JSON form. Reply with ONLY the answer in JSON form and include NO OTHER COMMENTARY.\n"
'{"0": {"type": "component", "subtype": "Information Management", "excerpts id":"23, 235", "content": "lorem ipsum"}, '
'"1": {"type": "component", "subtype": "Logistics", "excerpts id":"45, 678", "content": "lorem ipsum"}}'
)

sector_prompt = (
"\n Please aggregate and summarize this data into structured paragraphs (as few as possible, as many as necessary). \n "
"The output SHOULD ALWAYS follow the format below:\n"
"- *Type*: 'sector'\n"
"- *Subtype*: Provides the name of the sector to which the paragraph refers.\n"
"- *Excerpts ID*: Identify the ids of the excerpts you took into account for creating the summary.\n"
"*Content*: A short summary aggregating findings related to the Subtype, "
"so that they are supported by evidence coming from more than one report, "
@@ -105,8 +126,7 @@ class OpsLearningSummaryTask:
"Output Format:\n"
"Provide your answer in valid JSON form. Reply with ONLY the answer in JSON form and include NO OTHER COMMENTARY.\n"
'{"0": {"type": "sector", "subtype": "shelter", "excerpts id":"43, 1375, 14543", "content": "lorem ipsum"}, '
'"1": {"type": "component", "subtype": "Information Management", "excerpts id":"23, 235", "content": "lorem ipsum"}, '
'"2": {"type": "sector", "subtype": "WASH", "excerpts id":"30, 40", "content": "lorem ipsum"}}'
'"1": {"type": "sector", "subtype": "WASH", "excerpts id":"30, 40", "content": "lorem ipsum"}}'
)

system_message = (
@@ -466,8 +486,6 @@ def _contextualize_learnings(df):
df.at[index, "learning"] = (
f"{row['excerpts_id']}. In {row['appeal_year']} in {row['appeal_name']}: {row['learning']}"
)

df = df.drop(columns=["appeal_name"])
logger.info("Contextualization added to DataFrame.")
return df

@@ -512,13 +530,23 @@ def primary_prioritize_excerpts(cls, df: pd.DataFrame):
"""Prioritize the most recent excerpts within the token limit for primary insights."""
logger.info("Prioritizing primary excerpts within token limit.")

# Droping duplicates based on 'learning' column for primary DataFrame
primary_learning_df = (
df.drop_duplicates(subset="learning").sort_values(by="appeal_year", ascending=False).reset_index(drop=True)
primary_learning_df = df.drop_duplicates(subset="learning")

# Sort by 'appeal_name' and 'appeal_year' (descending for recency)
primary_learning_df = primary_learning_df.sort_values(by=["appeal_name", "appeal_year"], ascending=[True, False])

grouped = primary_learning_df.groupby("appeal_name")

# Interleaved list of rows
interleaved = list(chain(*zip_longest(*[group[1].itertuples(index=False) for group in grouped], fillvalue=None)))

# Convert back to a DataFrame, removing any placeholder rows
result = (
pd.DataFrame(interleaved, columns=primary_learning_df.columns).dropna(subset=["appeal_name"]).reset_index(drop=True)
)

# Slice the Primary DataFrame
sliced_primary_learning_df = cls.slice_dataframe(primary_learning_df, cls.PROMPT_DATA_LENGTH_LIMIT, cls.ENCODING_NAME)
sliced_primary_learning_df = cls.slice_dataframe(result, cls.PROMPT_DATA_LENGTH_LIMIT, cls.ENCODING_NAME)
logger.info("Primary excerpts prioritized within token limit.")
return sliced_primary_learning_df

@@ -527,11 +555,11 @@ def seconday_prioritize_excerpts(cls, df: pd.DataFrame):
"""Prioritize the most recent excerpts within the token limit for secondary insights."""
logger.info("Prioritizing secondary excerpts within token limit.")

# Droping duplicates based on 'learning' and 'component' columns for secondary DataFrame
# Droping duplicates based on 'appeal_name' 'learning' and 'component' columns for secondary DataFrame
secondary_learning_df = df.drop_duplicates(subset=["learning", "component", "sector"]).sort_values(
by=["component", "appeal_year"], ascending=[True, False]
by=["appeal_name", "component", "appeal_year"], ascending=[True, True, False]
)
grouped = secondary_learning_df.groupby("component")
grouped = secondary_learning_df.groupby(["component", "appeal_name"])

# Create an interleaved list of rows
interleaved = list(chain(*zip_longest(*[group[1].itertuples(index=False) for group in grouped], fillvalue=None)))
@@ -686,44 +714,61 @@ def process_learnings_component(component, df, max_length_per_section):
)
return learnings_component

def _build_data_section(secondary_df: pd.DataFrame):
# Secondary learnings section
sectors = get_main_sectors(secondary_df)
def _build_component_data_section(secondary_df: pd.DataFrame):
# Component learnings section
components = get_main_components(secondary_df)
max_length_per_section = cls.PROMPT_DATA_LENGTH_LIMIT

if (len(sectors) + len(components)) > 0:
max_length_per_section = cls.PROMPT_DATA_LENGTH_LIMIT / (len(components) + len(sectors))
if len(components) > 0:
max_length_per_section = cls.PROMPT_DATA_LENGTH_LIMIT / len(components)

learnings_sectors = (
learnings_components = (
"\n----------------\n\n"
+ "TYPE: SECTORS"
+ "TYPE: COMPONENT"
+ "\n----------------\n".join(
[process_learnings_sector(x, secondary_df, max_length_per_section) for x in sectors if pd.notna(x)]
[process_learnings_component(x, secondary_df, max_length_per_section) for x in components if pd.notna(x)]
)
)
learnings_components = (
secondary_learnings_data = learnings_components
return secondary_learnings_data

def _build_sector_data_section(secondary_df: pd.DataFrame):
# Sector learnings section
sectors = get_main_sectors(secondary_df)
max_length_per_section = cls.PROMPT_DATA_LENGTH_LIMIT

if len(sectors) > 0:
max_length_per_section = cls.PROMPT_DATA_LENGTH_LIMIT / len(sectors)

learnings_sectors = (
"\n----------------\n\n"
+ "TYPE: COMPONENT"
+ "TYPE: SECTORS"
+ "\n----------------\n".join(
[process_learnings_component(x, secondary_df, max_length_per_section) for x in components if pd.notna(x)]
[process_learnings_sector(x, secondary_df, max_length_per_section) for x in sectors if pd.notna(x)]
)
)
secondary_learnings_data = learnings_sectors + learnings_components
secondary_learnings_data = learnings_sectors
return secondary_learnings_data

# Prompt intro section
prompt_intro = cls._build_intro_section()
secondary_prompt_instruction = cls._build_instruction_section(
filter_data, secondary_learning_df, cls.secondary_instruction_prompt
)
secondary_learnings_data = _build_data_section(secondary_learning_df)

# Sector Prompt and Data
sector_prompt_instruction = cls._build_instruction_section(filter_data, secondary_learning_df, cls.sector_prompt)
sector_learning_data = _build_sector_data_section(secondary_learning_df)

# Components Prompt and Data
component_prompt_instruction = cls._build_instruction_section(filter_data, secondary_learning_df, cls.component_prompt)
component_learning_data = _build_component_data_section(secondary_learning_df)

# format the prompts
secondary_learning_prompt = "".join(
[prompt_intro, secondary_prompt_instruction, secondary_learnings_data, cls.secondary_prompt]
sector_learning_prompt = "".join([prompt_intro, sector_prompt_instruction, sector_learning_data, cls.sector_prompt])
component_learning_prompt = "".join(
[prompt_intro, component_prompt_instruction, component_learning_data, cls.sector_prompt]
)

logger.info("Secondary Prompt formatted.")
return secondary_learning_prompt
return sector_learning_prompt, component_learning_prompt

@classmethod
def generate_summary(cls, prompt, type: OpsLearningPromptResponseCache.PromptType) -> dict:
@@ -849,8 +894,10 @@ def _modify_summary(summary: dict) -> dict:

@classmethod
def _get_or_create_summary(
cls, prompt: str, prompt_hash: str, type: OpsLearningPromptResponseCache.PromptType, overwrite_prompt_cache: bool = False
cls, prompt: str, type: OpsLearningPromptResponseCache.PromptType, overwrite_prompt_cache: bool = False
) -> dict:
"""Retrieves or Generates the summary based on the provided prompt."""
prompt_hash = OpslearningSummaryCacheHelper.generate_hash(prompt)
instance, created = OpsLearningPromptResponseCache.objects.update_or_create(
prompt_hash=prompt_hash,
type=type,
@@ -952,13 +999,9 @@ def get_or_create_primary_summary(
"""Retrieves or Generates the primary summary based on the provided prompt."""
logger.info("Retrieving or generating primary summary.")

# generating hash for primary prompt
primary_prompt_hash = OpslearningSummaryCacheHelper.generate_hash(primary_learning_prompt)

# Checking the response for primary prompt
primary_summary = cls._get_or_create_summary(
prompt=primary_learning_prompt,
prompt_hash=primary_prompt_hash,
type=OpsLearningPromptResponseCache.PromptType.PRIMARY,
overwrite_prompt_cache=overwrite_prompt_cache,
)
@@ -981,30 +1024,37 @@ def get_or_create_primary_summary(
def get_or_create_secondary_summary(
cls,
ops_learning_summary_instance: OpsLearningCacheResponse,
secondary_learning_prompt: str,
sector_learning_prompt: str,
component_learning_prompt: str,
overwrite_prompt_cache: bool = False,
):
"""Retrieves or Generates the summary based on the provided prompts."""
logger.info("Retrieving or generating secondary summary.")

# generating hash for secondary prompt
secondary_prompt_hash = OpslearningSummaryCacheHelper.generate_hash(secondary_learning_prompt)

# Checking the response for secondary prompt
secondary_summary = cls._get_or_create_summary(
prompt=secondary_learning_prompt,
prompt_hash=secondary_prompt_hash,
type=OpsLearningPromptResponseCache.PromptType.SECONDARY,
overwrite_prompt_cache=overwrite_prompt_cache,
)
if overwrite_prompt_cache:
logger.info("Clearing the cache for secondary summary.")
# NOTE: find a better way to update the cache
OpsLearningComponentCacheResponse.objects.filter(filter_response=ops_learning_summary_instance).delete()
OpsLearningSectorCacheResponse.objects.filter(filter_response=ops_learning_summary_instance).delete()

# Saving into the database
# Checking the response for sector prompt
sector_summary = cls._get_or_create_summary(
prompt=sector_learning_prompt,
type=OpsLearningPromptResponseCache.PromptType.SECTOR,
overwrite_prompt_cache=overwrite_prompt_cache,
)
cls.secondary_response_save_to_db(
ops_learning_summary_instance=ops_learning_summary_instance,
secondary_summary=sector_summary,
)

# Checking the response for component prompt
component_summary = cls._get_or_create_summary(
prompt=component_learning_prompt,
type=OpsLearningPromptResponseCache.PromptType.COMPONENT,
overwrite_prompt_cache=overwrite_prompt_cache,
)
cls.secondary_response_save_to_db(
ops_learning_summary_instance=ops_learning_summary_instance,
secondary_summary=secondary_summary,
secondary_summary=component_summary,
)
41 changes: 41 additions & 0 deletions per/serializers.py
Original file line number Diff line number Diff line change
@@ -969,6 +969,7 @@ class Meta:
"atype",
"event_details",
"country",
"start_date",
)


@@ -1253,3 +1254,43 @@ class Meta:
"id",
"title",
]


class LearningByRegionSerializer(serializers.Serializer):
region_id = serializers.IntegerField(required=True)
region_name = serializers.CharField(required=True)
count = serializers.IntegerField(required=True)


class LearningByCountrySerializer(serializers.Serializer):
country_id = serializers.IntegerField(required=True)
country_name = serializers.CharField(required=True)
count = serializers.IntegerField(required=True)


class LearningBySectorSerializer(serializers.Serializer):
sector_id = serializers.IntegerField(required=True)
title = serializers.CharField(required=True)
count = serializers.IntegerField(required=True)


class LearningSourcesOvertimeSerializer(serializers.Serializer):
atype = serializers.IntegerField(required=True)
atype_display = serializers.SerializerMethodField(read_only=True)
date = serializers.DateTimeField(required=True)
count = serializers.IntegerField(required=True)

def get_atype_display(self, obj):
type = obj.get("atype")
return AppealType(type).label


class OpsLearningStatSerializer(serializers.Serializer):
operations_included = serializers.IntegerField(required=True)
learning_extracts = serializers.IntegerField(required=True)
sectors_covered = serializers.IntegerField(required=True)
sources_used = serializers.IntegerField(required=True)
learning_by_region = LearningByRegionSerializer(many=True)
learning_by_country = LearningByCountrySerializer(many=True)
learning_by_sector = LearningBySectorSerializer(many=True)
sources_overtime = LearningSourcesOvertimeSerializer(many=True)
9 changes: 7 additions & 2 deletions per/task.py
Original file line number Diff line number Diff line change
@@ -49,12 +49,17 @@ def generate_ops_learning_summary(ops_learning_summary_id: int, filter_data: dic

# Prioritize excerpts for secondary insights
secondary_learning_df = OpsLearningSummaryTask.seconday_prioritize_excerpts(prioritized_learnings)

# Format secondary prompt
secondary_learning_prompt = OpsLearningSummaryTask.format_secondary_prompt(secondary_learning_df, filter_data)
sector_learning_prompt, component_learning_prompt = OpsLearningSummaryTask.format_secondary_prompt(
secondary_learning_df=secondary_learning_df, filter_data=filter_data
)

# Generate secondary summary
OpsLearningSummaryTask.get_or_create_secondary_summary(
ops_learning_summary_instance=ops_learning_summary_instance,
secondary_learning_prompt=secondary_learning_prompt,
sector_learning_prompt=sector_learning_prompt,
component_learning_prompt=component_learning_prompt,
overwrite_prompt_cache=overwrite_prompt_cache,
)

130 changes: 130 additions & 0 deletions per/test_views.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
import json
from unittest import mock

from django.core import management

from api.factories.country import CountryFactory
from api.factories.region import RegionFactory
from api.models import AppealType
from main.test_case import APITestCase
from per.factories import (
AppealDocumentFactory,
AppealFactory,
FormAreaFactory,
FormComponentFactory,
FormPrioritizationFactory,
OpsLearningFactory,
OverviewFactory,
PerWorkPlanFactory,
SectorTagFactory,
)

from .models import WorkPlanStatus
@@ -224,3 +231,126 @@ def test_summary_generation(self, generate_summary):
}
self.check_response_id(url=url, data=filters)
self.assertTrue(generate_summary.assert_called)


class OpsLearningStatsTestCase(APITestCase):

def setUp(self):
super().setUp()
self.region = RegionFactory.create(label="Region A")
self.country = CountryFactory.create(region=self.region, name="Country A")

self.sector1 = SectorTagFactory.create(title="Sector 1")
self.sector2 = SectorTagFactory.create(title="Sector 2")

self.appeal1 = AppealFactory.create(
region=self.region, country=self.country, code="APP001", atype=0, start_date="2023-01-01"
)
self.appeal2 = AppealFactory.create(
region=self.region, country=self.country, code="APP002", atype=1, start_date="2023-02-01"
)

appeal_document_1 = AppealDocumentFactory.create(appeal=self.appeal1)
appeal_document_2 = AppealDocumentFactory.create(appeal=self.appeal2)

self.ops_learning1 = OpsLearningFactory.create(
is_validated=True, appeal_code=self.appeal1, appeal_document_id=appeal_document_1.id
)
self.ops_learning1.sector_validated.set([self.sector1])

self.ops_learning2 = OpsLearningFactory.create(
is_validated=True, appeal_code=self.appeal2, appeal_document_id=appeal_document_2.id
)
self.ops_learning2.sector_validated.set([self.sector2])

self.ops_learning3 = OpsLearningFactory.create(
is_validated=False, appeal_code=self.appeal2, appeal_document_id=appeal_document_2.id
)
self.ops_learning3.sector_validated.set([self.sector2])

def test_ops_learning_stats(self):
url = "/api/v2/ops-learning/stats/"
response = self.client.get(url)

self.assert_200(response)

# Updated counts based on validated entries
self.assertEqual(response.data["operations_included"], 2)
self.assertEqual(response.data["sources_used"], 2)
self.assertEqual(response.data["learning_extracts"], 2)
self.assertEqual(response.data["sectors_covered"], 2)

# Validate learning by region
region_data = response.data["learning_by_region"]
self.assertEqual(region_data[0]["count"], 2)

# Validate learning by sector
sector_data = response.data["learning_by_sector"]
self.assertEqual(len(sector_data), 2)

# Validate learning by country
country_data = response.data["learning_by_country"]
self.assertEqual(len(country_data), 1)

sources_overtime = response.data["sources_overtime"]
self.assertEqual(len(sources_overtime), 2)

def test_migrate_subcomponents(self):
parent_component_14 = FormComponentFactory.create(component_num=14, is_parent=True)

sub_components_14 = FormComponentFactory.create_batch(3, component_num=14)
other_components = FormComponentFactory.create_batch(2, component_num=1)

# OpsLearning with only parent component and no sub components of component 14
ops_learning_with_only_parent_component = OpsLearningFactory.create()
ops_learning_with_only_parent_component.per_component.add(parent_component_14)
ops_learning_with_only_parent_component.per_component.add(*other_components)

ops_learning_with_only_parent_component.per_component_validated.add(parent_component_14)
ops_learning_with_only_parent_component.per_component_validated.add(*other_components)

# OpsLearning with parent component and sub components
ops_learning_with_parent_component = OpsLearningFactory.create()

ops_learning_with_parent_component.per_component.add(parent_component_14)
ops_learning_with_parent_component.per_component.add(*sub_components_14)
ops_learning_with_parent_component.per_component.add(*other_components)

ops_learning_with_parent_component.per_component_validated.add(parent_component_14)
ops_learning_with_parent_component.per_component_validated.add(*sub_components_14)
ops_learning_with_parent_component.per_component_validated.add(*other_components)

# OpsLearning without parent component but with sub components
ops_learning_without_parent_component = OpsLearningFactory.create()
ops_learning_without_parent_component.per_component.add(*sub_components_14)
ops_learning_without_parent_component.per_component.add(*other_components)

ops_learning_without_parent_component.per_component_validated.add(*sub_components_14)
ops_learning_without_parent_component.per_component_validated.add(*other_components)

# Operational learning with one sub component without parent component
ops_learning = OpsLearningFactory.create()
ops_learning.per_component.add(sub_components_14[0])
ops_learning.per_component_validated.add(sub_components_14[0])
ops_learning.per_component_validated.add(sub_components_14[1])
ops_learning.per_component.add(other_components[0])
ops_learning.per_component_validated.add(other_components[0])

# Run the management command
management.call_command("migrate_sub_components_to_component14")

ops_learning_with_only_parent_component.refresh_from_db()
self.assertEqual(ops_learning_with_only_parent_component.per_component.count(), 3)
self.assertEqual(ops_learning_with_only_parent_component.per_component_validated.count(), 3)

ops_learning_with_parent_component.refresh_from_db()
self.assertEqual(ops_learning_with_parent_component.per_component.count(), 3)
self.assertEqual(ops_learning_with_parent_component.per_component_validated.count(), 3)

ops_learning_without_parent_component.refresh_from_db()
self.assertEqual(ops_learning_without_parent_component.per_component.count(), 3)
self.assertEqual(ops_learning_without_parent_component.per_component_validated.count(), 3)

ops_learning.refresh_from_db()
self.assertEqual(ops_learning.per_component.count(), 2)
self.assertEqual(ops_learning.per_component_validated.count(), 2)