diff --git a/main/settings.py b/main/settings.py
index f040a1143..12de49711 100644
--- a/main/settings.py
+++ b/main/settings.py
@@ -113,6 +113,10 @@
     NS_DOCUMENT_API_KEY=(str, None),
     NS_INITIATIVES_API_KEY=(str, None),
     NS_INITIATIVES_API_TOKEN=(str, None),
+    # OpenAi Azure
+    AZURE_OPENAI_ENDPOINT=(str, None),
+    AZURE_OPENAI_KEY=(str, None),
+    AZURE_OPENAI_DEPLOYMENT_NAME=(str, None),
 )
 
 
diff --git a/per/cache.py b/per/cache.py
index ba0403f4c..4181c9064 100644
--- a/per/cache.py
+++ b/per/cache.py
@@ -4,6 +4,7 @@
 
 import django_filters
 from django.core.serializers.json import DjangoJSONEncoder
+from django.db import transaction
 
 from per.models import OpsLearningCacheResponse
 from per.task import generate_summary
@@ -51,7 +52,5 @@ def get_or_create(
         ops_learning_summary = OpsLearningCacheResponse.objects.filter(used_filters_hash=hash_value).first()
         if ops_learning_summary:
             return ops_learning_summary
-        # TODO: Create a new summary based on the filters
-        # returning a dummy object for now
-        # return OpsLearningCacheResponse.objects.first()
-        return generate_summary(filter_data, hash_value)
+        # Create a new summary and cache it
+        return transaction.on_commit(lambda: generate_summary.delay(filter_data, hash_value))
diff --git a/per/drf_views.py b/per/drf_views.py
index d288cdbdd..8676d2b9f 100644
--- a/per/drf_views.py
+++ b/per/drf_views.py
@@ -810,16 +810,16 @@ def get_renderer_context(self):
 
         return context
 
-    @extend_schema(filters=True)
+    @extend_schema(response=OpsLearningSummarySerializer, filters=True)
     @action(
         detail=False,
         methods=["GET"],
-        permission_classes=[permissions.IsAuthenticated],
+        permission_classes=[permissions.AllowAny],
         url_path="summary",
     )
     def summary(self, request):
         """
-        Returns a summary of the OpsLearning data
+        Get the Ops Learning Summary based on the filters
         """
         ops_learning_summary_instance = OpslearningSummaryCacheHelper.get_or_create(request, [self.filterset_class])
         return response.Response(OpsLearningSummarySerializer(ops_learning_summary_instance).data)
diff --git a/per/ops_learning_summary.py b/per/ops_learning_summary.py
new file mode 100644
index 000000000..4c7ad38d2
--- /dev/null
+++ b/per/ops_learning_summary.py
@@ -0,0 +1,591 @@
+import ast
+import os
+import typing
+from itertools import chain
+
+import pandas as pd
+import tiktoken
+from django.db.models import F
+from openai import AzureOpenAI
+
+from api.logger import logger
+from api.models import Country
+from per.models import FormPrioritization, OpsLearning, Overview
+
+
+class OpsLearningSummaryTask:
+
+    PROMPT_DATA_LENGTH_LIMIT = 5000
+    PROMPT_LENGTH_LIMIT = 7500
+    ENCODING_NAME = "cl100k_base"
+
+    MIN_DIF_COMPONENTS = 3
+    MIN_DIF_EXCERPTS = 3
+
+    primary_prompt = (
+        "Please aggregate and summarize the provided data into UP TO THREE structured paragraphs. "
+        "The output MUST strictly adhere to the format below: "
+        "Title: Each finding should begin with the main finding TITLE in bold. "
+        "Content: Aggregate findings so that they are supported by evidence from more than one report. "
+        "Always integrate evidence from multiple reports or items into the paragraph, and "
+        "include the year and country of the evidence. "
+        "Confidence Level: For each finding, based on the number of items/reports connected to the finding, "
+        "assign a score from 1 to 5 where 1 is the lowest and 5 is the highest. "
+        "The format should be 'Confidence level: #/5' (e.g., 'Confidence level: 4/5'). "
+        "At the end of the summary, please highlight any contradictory country reports. "
+        "DO NOT use data from any source other than the one provided. Provide your answer in JSON form. "
+        "Reply with only the answer in valid JSON form and include no other commentary: "
+        "Example: "
+        '{"0": {"title": "Flexible and Adaptive Response Planning", '
+        '"content": "Responses in Honduras, Peru, Ecuador, and Panama highlight the importance of adaptable strategies. '
+        "The shift from youth-focused MHPSS to inclusive care in Peru in 2021, the pivot from sanitation infrastructure "
+        "to direct aid in Ecuador in 2022, and the responsive livelihood support in Panama in 2020, "
+        "all underscore the need for continuous reassessment and agile adaptation to the complex, "
+        'changing needs of disaster-affected communities.", "confidence level": "xxx"}, '
+        '"1": {"title": "xxx", "content": "xxx", "confidence level": "xxx"}, '
+        '"2": {"title": "xxx", "content": "xxx", "confidence level": "xxx"}, '
+        '"contradictory reports": "xxx"}'
+    )
+
+    secondary_prompt = (
+        "Please aggregate and summarize this data into structured paragraphs (as few as possible, as many as necessary). "
+        "The output SHOULD ALWAYS follow the format below: "
+        "Type: Whether the paragraph is related to a 'sector' or a 'component'. "
+        "Subtype: Provides the name of the sector or of the component to which the paragraph refers. "
+        "Content: A short summary aggregating findings related to the Subtype, so that they are supported by "
+        "evidence coming from more than one report, "
+        "and there is ONLY ONE entry per subtype. Always integrate in the paragraph evidence that supports it "
+        "from the data available from multiple reports or items, "
+        "include year and country of the evidence. DO NOT use data from any source other than the "
+        "one provided. Provide your answer in JSON form. "
+        "Reply with ONLY the answer in valid JSON form and include NO OTHER COMMENTARY: "
+        '{"0": {"type": "sector", "subtype": "shelter", "content": "lorem ipsum"}, '
+        '"1": {"type": "component", "subtype": "Information Management (IM)", "content": "lorem ipsum"}, '
+        '"2": {"type": "sector", "subtype": "WASH", "content": "lorem ipsum"}}'
+    )
+
+    system_message = (
+        "# CONTEXT # I want to summarize a set of lessons learned from a set of past emergency response operations "
+        "to extract the most useful and actionable insights."
+        "# STYLE # Use a writing style that is professional but informal."
+        "# TONE # Encouraging and motivating."
+        "# AUDIENCE # The audience is emergency response personnel from the Red Cross and Red Crescent. "
+        "They are action-oriented people who have very little time so they need concise, "
+        "not obvious information that can be easily consumed and acted upon in the time of a response."
+    )
+
+    primary_instruction_prompt = (
+        "You should:"
+        "1. Describe, Summarize and Compare: Identify and detail the who, what, where, when and how many."
+        "2. Explain and Connect: Analyze why events happened and how they are related"
+        "3. Identify gaps: Assess what data is available, what is missing and potential biases"
+        "4. Identify key messages: Determine important stories and signals hidden in the data"
+        "5. Select top three: Select up to three findings to report"
+    )
+
+    secondary_instruction_prompt = (
+        "You should for each section in the data (TYPE & SUBTYPE combination):"
+        "1. Describe, Summarize and Compare: Identify and detail the who, what, where, when and how many."
+        "2. Explain and Connect: Analyze why events happened and how they are related"
+        "3. Identify gaps: Assess what data is available, what is missing and potential biases"
+        "4. Identify key messages: Determine if there are important stories and signals hidden in the data"
+        "5. Conclude and make your case"
+    )
+
+    client = AzureOpenAI(
+        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), api_key=os.getenv("AZURE_OPENAI_API_KEY"), api_version="2023-05-15"
+    )
+
+    def count_tokens(string, encoding_name):
+        """Returns the number of tokens in a text string."""
+        encoding = tiktoken.get_encoding(encoding_name)
+        return len(encoding.encode(string))
+
+    @classmethod
+    def fetch_ops_learnings(self, filter_data):
+        """Fetches the OPS learnings from the database."""
+        ops_learning_qs = OpsLearning.objects.all()
+        from per.drf_views import OpsLearningFilter
+
+        ops_learning_filtered_qs = OpsLearningFilter(filter_data, queryset=ops_learning_qs).qs
+        ops_learning_df = pd.DataFrame(
+            list(
+                ops_learning_filtered_qs.values(
+                    "id",
+                    "per_component",
+                    "learning",
+                    "appeal_code__country_id",
+                    "appeal_code__country__region_id",
+                    "appeal_code__name",
+                    "appeal_code__start_date",
+                    "sector",
+                )
+            )
+        )
+        ops_learning_df = ops_learning_df.rename(
+            columns={
+                "per_component": "component",
+                "appeal_code__country_id": "country_id",
+                "appeal_code__country__region_id": "region_id",
+                "appeal_code__name": "appeal_name",
+                "appeal_code__start_date": "appeal_year",
+            }
+        )
+        ops_learning_df.set_index("id", inplace=True)
+        return ops_learning_df
+
+    @classmethod
+    def _generate_regional_prioritization_list(self, df: pd.DataFrame):
+        """Generates a list of regional prioritizations from the given data."""
+        df_exploded = df.explode("components")
+        regional_df = df_exploded.groupby(["region", "components"]).size().reset_index(name="count")
+        regional_df = regional_df[regional_df["count"] > 2]
+        regional_list = regional_df.groupby("region")["components"].apply(list).reset_index()
+        return regional_list
+
+    @classmethod
+    def _generate_global_prioritization_list(self, regional_df: pd.DataFrame):
+        """Generates a global prioritization list from regional data."""
+        global_df = regional_df.explode("components").groupby("components").size().reset_index(name="count")
+        global_components = global_df[global_df["count"] > 2]["components"].tolist()
+        global_list = {"global": global_components}
+        return global_list
+
+    @classmethod
+    def _generate_country_prioritization_list(
+        self, regional_df: pd.DataFrame, global_components: list, prioritization_df: pd.DataFrame, country_df: pd.DataFrame
+    ):
+        """Generates a country-level prioritization list."""
+        regional_dict = dict(zip(regional_df["region"], regional_df["components"]))
+        merged_df = country_df.merge(prioritization_df, on=["country", "region"], how="left")
+        no_prioritization_df = merged_df[merged_df["components"].isna()].astype(object)
+
+        for index, row in no_prioritization_df.iterrows():
+            region_id = row["region"]
+            components = regional_dict.get(region_id, global_components["global"])
+            no_prioritization_df.at[index, "components"] = components
+
+        final_df = pd.concat([merged_df.dropna(subset=["components"]), no_prioritization_df])
+        final_df["components"] = final_df["components"].apply(lambda x: int(x) if isinstance(x, float) else x)
+        final_df = final_df[["country", "components"]]
+        return final_df
+
+    @classmethod
+    def generate_priotization_list(self):
+        logger.info("Generating prioritization list.")
+        exclusion_list = [
+            "IFRC Africa",
+            "IFRC Americas",
+            "IFRC Asia-Pacific",
+            "IFRC Europe",
+            "IFRC Geneva",
+            "IFRC MENA",
+            "Benelux ERU",
+            "ICRC",
+        ]
+
+        # Get all countries
+        country_qs = (
+            Country.objects.filter(is_deprecated=False, society_name__isnull=False, region__isnull=False)
+            .exclude(name__in=exclusion_list)
+            .values("id", "region_id")
+        )
+        country_df = pd.DataFrame(list(country_qs))
+        country_df = country_df.rename(columns={"id": "country", "region_id": "region"})
+
+        # Get all PER Overview
+        per_overview_qs = Overview.objects.select_related("country").values(
+            "id",
+            "country_id",
+            "country__region",
+            "assessment_number",
+        )
+        per_overview_df = pd.DataFrame(list(per_overview_qs))
+        per_overview_df = per_overview_df.rename(columns={"id": "overview", "country_id": "country", "country__region": "region"})
+
+        # Get all PER Prioritization
+        per_priotization_qs = (
+            FormPrioritization.objects.filter(
+                is_draft=False,
+                prioritized_action_responses__isnull=False,
+            )
+            .annotate(
+                components=F("prioritized_action_responses__component"),
+            )
+            .values(
+                "overview",
+                "components",
+            )
+        )
+        per_priotization_df = pd.DataFrame(list(per_priotization_qs))
+        per_priotization_df = per_priotization_df.merge(
+            per_overview_df[["overview", "country", "region", "assessment_number"]], on="overview", how="left"
+        )
+        per_priotization_df = per_priotization_df.sort_values("assessment_number").drop_duplicates(subset="country", keep="last")
+        per_priotization_df = per_priotization_df[["region", "country", "components"]]
+
+        # Generate the prioritization list that are in dataframes
+        regional_list = self._generate_regional_prioritization_list(per_priotization_df)
+        global_list = self._generate_global_prioritization_list(regional_list)
+        country_list = self._generate_country_prioritization_list(regional_list, global_list, per_priotization_df, country_df)
+        logger.info("Prioritization list generated.")
+        return regional_list, global_list, country_list
+
+    @classmethod
+    def prioritize(
+        self,
+        df: pd.DataFrame,
+        components_countries: dict,
+        components_regions: dict,
+        components_global: dict,
+        type_prioritization: typing.Union[list, None],
+    ):
+        """Prioritizes components based on the type of prioritization."""
+
+        def _add_new_component(prioritized_components, per_prioritized_components, df):
+            """Adds new components to the prioritized list based on availability and frequency."""
+            available_components = list(df["component"].unique())
+            remaining_components = [item for item in available_components if item not in prioritized_components]
+
+            intersect_components = list(set(per_prioritized_components) & set(remaining_components))
+
+            if intersect_components:
+                mask = df["component"].isin(intersect_components)
+            else:
+                mask = df["component"].isin(remaining_components)
+
+            component_counts = df[mask]["component"].value_counts()
+            most_frequent_components = component_counts[component_counts == component_counts.max()].index.tolist()
+
+            return prioritized_components + most_frequent_components
+
+        if type_prioritization == "single-country":
+            country_id = str(df["country_id"].iloc[0])
+            per_prioritized_components = components_countries.get(country_id, [])
+        elif type_prioritization == "single-region":
+            region_id = str(df["region_id"].iloc[0])
+            per_prioritized_components = components_regions.get(region_id, [])
+        per_prioritized_components = components_global.get("global", [])
+
+        component_counts = df["component"].value_counts()
+        most_frequent_components = component_counts[component_counts == component_counts.max()].index.tolist()
+
+        while len(most_frequent_components) < 3:
+            most_frequent_components = _add_new_component(most_frequent_components, per_prioritized_components, df)
+
+        mask = df["component"].isin(most_frequent_components)
+        return df[mask]
+
+    @classmethod
+    def prioritize_components(
+        self,
+        filter_data: dict,
+        regional_list,
+        global_list,
+        country_list,
+    ):
+        logger.info("Prioritizing components.")
+
+        def _need_component_prioritization(df, MIN_DIF_COMPONENTS, MIN_DIF_EXCERPTS):
+            """Determines if prioritization is needed based on unique components and learnings."""
+            nb_dif_components = len(df["component"].unique())
+            nb_dif_learnings = len(df["learning"].unique())
+            return nb_dif_components > MIN_DIF_COMPONENTS and nb_dif_learnings > MIN_DIF_EXCERPTS
+
+        def _identify_type_prioritization(df):
+            """Identifies the type of prioritization required based on the data."""
+            if len(df["country_id"].unique()) == 1:
+                return "single-country"
+            elif len(df["region_id"].unique()) == 1:
+                return "single-region"
+            elif len(df["region_id"].unique()) > 1:
+                return "multi-region"
+            return None
+
+        def _contextualize_learnings(df):
+            """Adds appeal year and event name as a contextualization of the leannings."""
+            for index, row in df.iterrows():
+                df.at[index, "learning"] = f"In {row['appeal_year']} in {row['appeal_name']}: {row['learning']}"
+
+            df = df.drop(columns=["appeal_name"])
+            logger.info("Contextualization added to DataFrame.")
+            return df
+
+        components_countries = country_list.to_dict(orient="records")
+        components_countries = {item["country"]: item["components"] for item in components_countries}
+
+        components_regions = regional_list.to_dict(orient="records")
+        components_regions = {item["region"]: item["components"] for item in components_regions}
+
+        ops_learning_df = self.fetch_ops_learnings(filter_data)
+
+        if _need_component_prioritization(ops_learning_df, self.MIN_DIF_COMPONENTS, self.MIN_DIF_EXCERPTS):
+            type_prioritization = _identify_type_prioritization(ops_learning_df)
+            prioritized_learnings = self.prioritize(
+                ops_learning_df, components_countries, components_regions, global_list, type_prioritization
+            )
+        prioritized_learnings = ops_learning_df
+        logger.info("Prioritization of components completed.")
+        prioritized_learnings = _contextualize_learnings(prioritized_learnings)
+        return prioritized_learnings
+
+    @classmethod
+    def slice_dataframe(self, df, limit=2000, encoding_name="cl100k_base"):
+        df["count_temp"] = [self.count_tokens(x, encoding_name) for x in df["learning"]]
+        df["cumsum"] = df["count_temp"].cumsum()
+
+        slice_index = None
+        for i in range(1, len(df)):
+            if df["cumsum"].iloc[i - 1] <= limit and df["cumsum"].iloc[i] > limit:
+                slice_index = i - 1
+                break
+
+        if slice_index is not None:
+            df_sliced = df.iloc[: slice_index + 1]
+        else:
+            df_sliced = df
+        return df_sliced
+
+    @classmethod
+    def prioritize_excerpts(self, df: pd.DataFrame):
+        """Prioritize the most recent excerpts within the token limit."""
+        logger.info("Prioritizing excerpts within token limit.")
+
+        # Droping duplicates based on 'learning' column for primary DataFrame
+        primary_learning_df = df.drop_duplicates(subset="learning")
+        primary_learning_df = primary_learning_df.sort_values(by="appeal_year", ascending=False)
+        primary_learning_df.reset_index(inplace=True, drop=True)
+
+        # Droping duplicates based on 'learning' and 'component' columns for secondary DataFrame
+        secondary_learning_df = df.drop_duplicates(subset=["learning", "component"])
+        secondary_learning_df = secondary_learning_df.sort_values(by=["component", "appeal_year"], ascending=[True, False])
+        grouped = secondary_learning_df.groupby("component")
+
+        # Create an interleaved list of rows
+        interleaved = list(chain(*zip(*[group[1].itertuples(index=False) for group in grouped])))
+
+        # Convert the interleaved list of rows back to a DataFrame
+        result = pd.DataFrame(interleaved)
+        result.reset_index(inplace=True, drop=True)
+
+        # Slice the Primary and secondary dataframes
+        sliced_primary_learning_df = self.slice_dataframe(primary_learning_df, self.PROMPT_DATA_LENGTH_LIMIT, self.ENCODING_NAME)
+        sliced_secondary_learning_df = self.slice_dataframe(result, self.PROMPT_DATA_LENGTH_LIMIT, self.ENCODING_NAME)
+        logger.info("Excerpts prioritized within token limit.")
+        return sliced_primary_learning_df, sliced_secondary_learning_df
+
+    @classmethod
+    def format_prompt(
+        self,
+        primary_learning_df: pd.DataFrame,
+        secondary_learning_df: pd.DataFrame,
+        filter_data: dict,
+    ):
+        """Formats the prompt based on request filter and prioritized learnings."""
+        logger.info("Formatting prompt.")
+
+        def _build_intro_section():
+            """Builds the introductory section of the prompt."""
+            return (
+                "I will provide you with a set of instructions, data, and formatting requests in three sections."
+                + " I will pass you the INSTRUCTIONS section, are you ready?"
+                + os.linesep
+                + os.linesep
+            )
+
+        def _build_instruction_section(request_filter: dict, df: pd.DataFrame, instruction: str):
+            """Builds the instruction section of the prompt based on the request filter and DataFrame."""
+            instructions = ["INSTRUCTIONS\n========================\nSummarize essential insights from the DATA"]
+
+            if "appeal_code__dtype__in" in request_filter:
+                dtypes = df["dtype_name"].dropna().unique()
+                dtype_str = '", "'.join(dtypes)
+                instructions.append(f'concerning "{dtype_str}" occurrences')
+
+            if "appeal_code__country__in" in request_filter:
+                countries = df["country_name"].dropna().unique()
+                country_str = '", "'.join(countries)
+                instructions.append(f'in "{country_str}"')
+
+            if "appeal_code__region" in request_filter:
+                regions = df["region_name"].dropna().unique()
+                region_str = '", "'.join(regions)
+                instructions.append(f'in "{region_str}"')
+
+            if "sector_validated__in" in request_filter:
+                sectors = df["sector"].dropna().unique()
+                sector_str = '", "'.join(sectors)
+                instructions.append(f'focusing on "{sector_str}" aspects')
+
+            if "per_component_validated__in" in request_filter:
+                components = df["component"].dropna().unique()
+                component_str = '", "'.join(components)
+                instructions.append(f'and "{component_str}" aspects')
+
+            instructions.append("in Emergency Response.")
+            instructions.append("\n\n" + instruction)
+            instructions.append("\n\nI will pass you the DATA section, are you ready?\n\n")
+            return "\n".join(instructions)
+
+        def get_main_sectors(df: pd.DataFrame):
+            """Get only information from technical sectorial information"""
+            temp = df[df["component"] == "NS-specific areas of intervention"]
+            available_sectors = list(temp["sector"].unique())
+            nb_sectors = len(available_sectors)
+            if nb_sectors == 0:
+                logger.info("There were not specific technical sectorial learnings")
+                return []
+            logger.info("Main sectors for secondary summaries selected")
+            return available_sectors
+
+        def get_main_components(df: pd.DataFrame):
+            available_components = list(df["component"].unique())
+            nb_components = len(available_components)
+            if nb_components == 0:
+                logger.info("There were not specific components")
+                return []
+            logger.info("All components for secondary summaries selected")
+            return available_components
+
+        def process_learnings_sector(sector, df, max_length_per_section):
+            df = df[df["sector"] == sector].dropna()
+            df_sliced = self.slice_dataframe(df, max_length_per_section, self.ENCODING_NAME)
+            learnings_sector = (
+                "\n----------------\n"
+                + "SUBTYPE: "
+                + str(sector)
+                + "\n----------------\n"
+                + "\n----------------\n".join(df_sliced["learning"])
+            )
+            return learnings_sector
+
+        def process_learnings_component(component, df, max_length_per_section):
+            df = df[df["component"] == component].dropna()
+            df_sliced = self.slice_dataframe(df, max_length_per_section, self.ENCODING_NAME)
+            learnings_component = (
+                "\n----------------\n"
+                + "SUBTYPE: "
+                + str(component)
+                + "\n----------------\n"
+                + "\n----------------\n".join(df_sliced["learning"])
+            )
+            return learnings_component
+
+        def _build_data_section(primary_df: pd.DataFrame, secondary_df: pd.DataFrame):
+            # Primary learnings section
+            primary_learnings_data = "\n----------------\n".join(primary_df["learning"].dropna())
+
+            # Secondary learnings section
+            sectors = get_main_sectors(secondary_df)
+            components = get_main_components(secondary_df)
+            max_length_per_section = self.PROMPT_DATA_LENGTH_LIMIT / (len(components) + len(sectors))
+            learnings_sectors = (
+                "\n----------------\n\n"
+                + "TYPE: SECTORS"
+                + "\n----------------\n".join(
+                    [process_learnings_sector(int(x), secondary_df, max_length_per_section) for x in sectors if pd.notna(x)]
+                )
+            )
+            learnings_components = (
+                "\n----------------\n\n"
+                + "TYPE: COMPONENT"
+                + "\n----------------\n".join(
+                    [process_learnings_component(int(x), secondary_df, max_length_per_section) for x in components if pd.notna(x)]
+                )
+            )
+            secondary_learnings_data = learnings_sectors + learnings_components
+            return primary_learnings_data, secondary_learnings_data
+
+        prompt_intro = _build_intro_section()
+        primary_prompt_instruction = _build_instruction_section(filter_data, primary_learning_df, self.primary_instruction_prompt)
+        secondary_prompt_instruction = _build_instruction_section(
+            filter_data, secondary_learning_df, self.secondary_instruction_prompt
+        )
+        primary_learnings_data, secondary_learnings_data = _build_data_section(primary_learning_df, secondary_learning_df)
+
+        # format the prompts
+        primary_learning_prompt = "".join([prompt_intro, primary_prompt_instruction, primary_learnings_data, self.primary_prompt])
+        secondary_learning_prompt = "".join(
+            [prompt_intro, secondary_prompt_instruction, secondary_learnings_data, self.secondary_prompt]
+        )
+        logger.info("Prompt formatted.")
+        return primary_learning_prompt, secondary_learning_prompt
+
+    @classmethod
+    def generate_summaries(self, primary_learning_prompt, secondary_learning_prompt):
+        """Generates summaries using the provided system message and prompt."""
+        logger.info("Generating summaries.")
+
+        def _validate_length_prompt(messages, prompt_length_limit):
+            """Validates the length of the prompt."""
+            message_content = [msg["content"] for msg in messages]
+            text = " ".join(message_content)
+            count = self.count_tokens(text, self.ENCODING_NAME)
+            logger.info(f"Token count: {count}")
+            return count <= prompt_length_limit
+
+        def _summarize(prompt, system_message="You are a helpful assistant"):
+            """Summarizes the prompt using the provided system message."""
+            messages = [
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": prompt},
+                {
+                    "role": "assistant",
+                    "content": "Understood, thank you for providing the data, and formatting requests. "
+                    + "I am ready to proceed with the task.",
+                },
+            ]
+
+            if not _validate_length_prompt(messages, self.PROMPT_LENGTH_LIMIT):
+                logger.warning("The length of the prompt might be too long.")
+                return "{}"
+
+            try:
+                response = self.client.chat.completions.create(
+                    model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"), messages=messages, temperature=0.7
+                )
+                summary = response.choices[0].message.content
+                return summary
+            except Exception as e:
+                logger.error(f"Error in summarizing: {e}")
+                raise
+
+        def _validate_format(summary) -> bool:
+            """
+            Validates the format of the summary and modifies it if necessary.
+            """
+
+            def validate_text_is_dictionary(text):
+                formatted_text = ast.literal_eval(text)
+                return isinstance(formatted_text, dict)
+
+            def modify_format(summary):
+                try:
+                    # Find the index of the last closing brace before the "Note"
+                    end_index = summary.rfind("}")
+
+                    # Truncate the string to include only the dictionary part
+                    formatted_summary = summary[: end_index + 1]
+
+                    logger.info("Modification realized to response")
+                    return formatted_summary
+
+                except Exception as e:
+                    logger.error(f"Modification failed: {e}")
+                    return "{}"
+
+            # Attempt to parse the summary as a dictionary
+            if validate_text_is_dictionary(summary):
+                formated_summary = ast.literal_eval(summary)
+                return formated_summary
+            else:
+                formatted_summary = modify_format(summary)
+                formatted_summary = ast.literal_eval(formatted_summary)
+                return formatted_summary
+
+        primary_summary = _summarize(primary_learning_prompt, self.system_message)
+        secondary_summary = _summarize(secondary_learning_prompt, self.system_message)
+        formated_primary_summary = _validate_format(primary_summary)
+        formated_secondary_summary = _validate_format(secondary_summary)
+        logger.info("Summaries generated.")
+        return formated_primary_summary, formated_secondary_summary
diff --git a/per/task.py b/per/task.py
index e845acaf8..f7f901e33 100644
--- a/per/task.py
+++ b/per/task.py
@@ -1,474 +1,6 @@
-import os
-import typing
-from itertools import chain
-
-import pandas as pd
-import tiktoken
 from celery import shared_task
-from django.db.models import F
-
-from api.logger import logger
-from api.models import Country
-from per.models import FormPrioritization, OpsLearning, Overview
-
-
-class OpsLearningSummaryTask:
-
-    PROMPT_DATA_LENGTH_LIMIT = 5000
-    ENCODING_NAME = "cl100k_base"
-
-    MIN_DIF_COMPONENTS = 3
-    MIN_DIF_EXCERPTS = 3
-    primary_prompt = (
-        "Please aggregate and summarize the provided data into UP TO THREE structured paragraphs. "
-        "The output MUST strictly adhere to the format below: "
-        "Title: Each finding should begin with the main finding TITLE in bold. "
-        "Content: Aggregate findings so that they are supported by evidence from more than one report. "
-        "Always integrate evidence from multiple reports or items into the paragraph, and "
-        "include the year and country of the evidence. "
-        "Confidence Level: For each finding, based on the number of items/reports connected to the finding, "
-        "assign a score from 1 to 5 where 1 is the lowest and 5 is the highest. "
-        "The format should be 'Confidence level: #/5' (e.g., 'Confidence level: 4/5'). "
-        "At the end of the summary, please highlight any contradictory country reports. "
-        "DO NOT use data from any source other than the one provided. Provide your answer in JSON form. "
-        "Reply with only the answer in valid JSON form and include no other commentary: "
-        '{"0": {"title": "xxx", "content": "xxx", "confidence level": "xxx"}, '
-        '"1": {"title": "xxx", "content": "xxx", "confidence level": "xxx"}, '
-        '"2": {"title": "xxx", "content": "xxx", "confidence level": "xxx"}, '
-        '"contradictory reports": "xxx"}'
-    )
-
-    secondary_prompt = (
-        "Please aggregate and summarize this data into structured paragraphs (as few as possible, as many as necessary). "
-        "The output SHOULD ALWAYS follow the format below: "
-        "Type: Whether the paragraph is related to a 'sector' or a 'component'. "
-        "Subtype: Provides the name of the sector or of the component to which the paragraph refers. "
-        "Content: A short summary aggregating findings related to the Subtype, so that they are supported by "
-        "evidence coming from more than one report, "
-        "and there is ONLY ONE entry per subtype. Always integrate in the paragraph evidence that supports it "
-        "from the data available from multiple reports or items, "
-        "include year and country of the evidence. DO NOT use data from any source other than the "
-        "one provided. Provide your answer in JSON form. "
-        "Reply with ONLY the answer in valid JSON form and include NO OTHER COMMENTARY: "
-        '{"0": {"type": "sector", "subtype": "shelter", "content": "lorem ipsum"}, '
-        '"1": {"type": "component", "subtype": "Information Management (IM)", "content": "lorem ipsum"}, '
-        '"2": {"type": "sector", "subtype": "WASH", "content": "lorem ipsum"}}'
-    )
-
-    @classmethod
-    def fetch_ops_learnings(self, filter_data):
-        """Fetches the OPS learnings from the database."""
-        ops_learning_qs = OpsLearning.objects.all()
-        from per.drf_views import OpsLearningFilter
-
-        ops_learning_filtered_qs = OpsLearningFilter(filter_data, queryset=ops_learning_qs).qs
-        ops_learning_df = pd.DataFrame(
-            list(
-                ops_learning_filtered_qs.values(
-                    "id",
-                    "per_component",
-                    "learning",
-                    "appeal_code__country_id",
-                    "appeal_code__country__region_id",
-                    "appeal_code__name",
-                    "appeal_code__start_date",
-                    "sector",
-                )
-            )
-        )
-        ops_learning_df = ops_learning_df.rename(
-            columns={
-                "per_component": "component",
-                "appeal_code__country_id": "country_id",
-                "appeal_code__country__region_id": "region_id",
-                "appeal_code__name": "appeal_name",
-                "appeal_code__start_date": "appeal_year",
-            }
-        )
-        ops_learning_df.set_index("id", inplace=True)
-        return ops_learning_df
-
-    @classmethod
-    def generate_regional_prioritization_list(self, df: pd.DataFrame):
-        """Generates a list of regional prioritizations from the given data."""
-        df_exploded = df.explode("components")
-        regional_df = df_exploded.groupby(["region", "components"]).size().reset_index(name="count")
-        regional_df = regional_df[regional_df["count"] > 2]
-        regional_list = regional_df.groupby("region")["components"].apply(list).reset_index()
-        return regional_list
-
-    @classmethod
-    def generate_global_prioritization_list(self, regional_df: pd.DataFrame):
-        """Generates a global prioritization list from regional data."""
-        global_df = regional_df.explode("components").groupby("components").size().reset_index(name="count")
-        global_components = global_df[global_df["count"] > 2]["components"].tolist()
-        global_list = {"global": global_components}
-        return global_list
-
-    @classmethod
-    def generate_country_prioritization_list(
-        self, regional_df: pd.DataFrame, global_components: list, prioritization_df: pd.DataFrame, country_df: pd.DataFrame
-    ):
-        """Generates a country-level prioritization list."""
-        regional_dict = dict(zip(regional_df["region"], regional_df["components"]))
-        merged_df = country_df.merge(prioritization_df, on=["country", "region"], how="left")
-        no_prioritization_df = merged_df[merged_df["components"].isna()].astype(object)
-
-        for index, row in no_prioritization_df.iterrows():
-            region_id = row["region"]
-            components = regional_dict.get(region_id, global_components["global"])
-            no_prioritization_df.at[index, "components"] = components
-
-        final_df = pd.concat([merged_df.dropna(subset=["components"]), no_prioritization_df])
-        final_df["components"] = final_df["components"].apply(lambda x: int(x) if isinstance(x, float) else x)
-        final_df = final_df[["country", "components"]]
-        return final_df
-
-    @classmethod
-    def generate_priotization_list(self):
-        logger.info("Generating prioritization list.")
-        exclusion_list = [
-            "IFRC Africa",
-            "IFRC Americas",
-            "IFRC Asia-Pacific",
-            "IFRC Europe",
-            "IFRC Geneva",
-            "IFRC MENA",
-            "Benelux ERU",
-            "ICRC",
-        ]
-
-        # Get all countries
-        country_qs = (
-            Country.objects.filter(is_deprecated=False, society_name__isnull=False, region__isnull=False)
-            .exclude(name__in=exclusion_list)
-            .values("id", "region_id")
-        )
-        country_df = pd.DataFrame(list(country_qs))
-        country_df = country_df.rename(columns={"id": "country", "region_id": "region"})
-
-        # Get all PER Overview
-        per_overview_qs = Overview.objects.select_related("country").values(
-            "id",
-            "country_id",
-            "country__region",
-            "assessment_number",
-        )
-        per_overview_df = pd.DataFrame(list(per_overview_qs))
-        per_overview_df = per_overview_df.rename(columns={"id": "overview", "country_id": "country", "country__region": "region"})
-
-        # Get all PER Prioritization
-        per_priotization_qs = (
-            FormPrioritization.objects.filter(
-                is_draft=False,
-                prioritized_action_responses__isnull=False,
-            )
-            .annotate(
-                components=F("prioritized_action_responses__component"),
-            )
-            .values(
-                "overview",
-                "components",
-            )
-        )
-        per_priotization_df = pd.DataFrame(list(per_priotization_qs))
-        per_priotization_df = per_priotization_df.merge(
-            per_overview_df[["overview", "country", "region", "assessment_number"]], on="overview", how="left"
-        )
-        per_priotization_df = per_priotization_df.sort_values("assessment_number").drop_duplicates(subset="country", keep="last")
-        per_priotization_df = per_priotization_df[["region", "country", "components"]]
-
-        # Generate the prioritization list that are in dataframes
-        regional_list = self.generate_regional_prioritization_list(per_priotization_df)
-        global_list = self.generate_global_prioritization_list(regional_list)
-        country_list = self.generate_country_prioritization_list(regional_list, global_list, per_priotization_df, country_df)
-        logger.info("Prioritization list generated.")
-        return regional_list, global_list, country_list
-
-    @classmethod
-    def prioritize(
-        self,
-        df: pd.DataFrame,
-        components_countries: dict,
-        components_regions: dict,
-        components_global: dict,
-        type_prioritization: typing.Union[list, None],
-    ):
-        """Prioritizes components based on the type of prioritization."""
-
-        def add_new_component(prioritized_components, per_prioritized_components, df):
-            """Adds new components to the prioritized list based on availability and frequency."""
-            available_components = list(df["component"].unique())
-            remaining_components = [item for item in available_components if item not in prioritized_components]
-
-            intersect_components = list(set(per_prioritized_components) & set(remaining_components))
-
-            if intersect_components:
-                mask = df["component"].isin(intersect_components)
-            else:
-                mask = df["component"].isin(remaining_components)
-
-            component_counts = df[mask]["component"].value_counts()
-            most_frequent_components = component_counts[component_counts == component_counts.max()].index.tolist()
-
-            return prioritized_components + most_frequent_components
-
-        if type_prioritization == "single-country":
-            country_id = str(df["country_id"].iloc[0])
-            per_prioritized_components = components_countries.get(country_id, [])
-        elif type_prioritization == "single-region":
-            region_id = str(df["region_id"].iloc[0])
-            per_prioritized_components = components_regions.get(region_id, [])
-        per_prioritized_components = components_global.get("global", [])
-
-        component_counts = df["component"].value_counts()
-        most_frequent_components = component_counts[component_counts == component_counts.max()].index.tolist()
-
-        while len(most_frequent_components) < 3:
-            most_frequent_components = add_new_component(most_frequent_components, per_prioritized_components, df)
-
-        mask = df["component"].isin(most_frequent_components)
-        return df[mask]
-
-    @classmethod
-    def prioritize_components(
-        self,
-        filter_data: dict,
-        regional_list,
-        global_list,
-        country_list,
-    ):
-        logger.info("Prioritizing components.")
-
-        def need_component_prioritization(df, MIN_DIF_COMPONENTS, MIN_DIF_EXCERPTS):
-            """Determines if prioritization is needed based on unique components and learnings."""
-            nb_dif_components = len(df["component"].unique())
-            nb_dif_learnings = len(df["learning"].unique())
-            return nb_dif_components > MIN_DIF_COMPONENTS and nb_dif_learnings > MIN_DIF_EXCERPTS
-
-        def identify_type_prioritization(df):
-            """Identifies the type of prioritization required based on the data."""
-            if len(df["country_id"].unique()) == 1:
-                return "single-country"
-            elif len(df["region_id"].unique()) == 1:
-                return "single-region"
-            elif len(df["region_id"].unique()) > 1:
-                return "multi-region"
-            return None
-
-        def contextualize_learnings(df):
-            """Adds appeal year and event name as a contextualization of the leannings."""
-            for index, row in df.iterrows():
-                df.at[index, "learning"] = f"In {row['appeal_year']} in {row['appeal_name']}: {row['learning']}"
-
-            df = df.drop(columns=["appeal_name"])
-            logger.info("Contextualization added to DataFrame.")
-            return df
-
-        components_countries = country_list.to_dict(orient="records")
-        components_countries = {item["country"]: item["components"] for item in components_countries}
-
-        components_regions = regional_list.to_dict(orient="records")
-        components_regions = {item["region"]: item["components"] for item in components_regions}
-
-        ops_learning_df = self.fetch_ops_learnings(filter_data)
-
-        if need_component_prioritization(ops_learning_df, self.MIN_DIF_COMPONENTS, self.MIN_DIF_EXCERPTS):
-            type_prioritization = identify_type_prioritization(ops_learning_df)
-            prioritized_learnings = self.prioritize(
-                ops_learning_df, components_countries, components_regions, global_list, type_prioritization
-            )
-        prioritized_learnings = ops_learning_df
-        logger.info("Prioritization of components completed.")
-        prioritized_learnings = contextualize_learnings(prioritized_learnings)
-        return prioritized_learnings
-
-    @classmethod
-    def slice_dataframe(self, df, limit=2000, encoding_name="cl100k_base"):
-        def count_tokens(string, encoding_name):
-            """Returns the number of tokens in a text string."""
-            encoding = tiktoken.get_encoding(encoding_name)
-            return len(encoding.encode(string))
-
-        df["count_temp"] = [count_tokens(x, encoding_name) for x in df["learning"]]
-        df["cumsum"] = df["count_temp"].cumsum()
-
-        slice_index = None
-        for i in range(1, len(df)):
-            if df["cumsum"].iloc[i - 1] <= limit and df["cumsum"].iloc[i] > limit:
-                slice_index = i - 1
-                break
-
-        if slice_index is not None:
-            df_sliced = df.iloc[: slice_index + 1]
-        else:
-            df_sliced = df
-        return df_sliced
-
-    @classmethod
-    def prioritize_excerpts(self, df: pd.DataFrame):
-        """Prioritize the most recent excerpts within the token limit."""
-        logger.info("Prioritizing excerpts within token limit.")
-
-        # Droping duplicates based on 'learning' column for primary DataFrame
-        primary_learning_df = df.drop_duplicates(subset="learning")
-        primary_learning_df = primary_learning_df.sort_values(by="appeal_year", ascending=False)
-        primary_learning_df.reset_index(inplace=True, drop=True)
-
-        # Droping duplicates based on 'learning' and 'component' columns for secondary DataFrame
-        secondary_learning_df = df.drop_duplicates(subset=["learning", "component"])
-        secondary_learning_df = secondary_learning_df.sort_values(by=["component", "appeal_year"], ascending=[True, False])
-        grouped = secondary_learning_df.groupby("component")
-
-        # Create an interleaved list of rows
-        interleaved = list(chain(*zip(*[group[1].itertuples(index=False) for group in grouped])))
-
-        # Convert the interleaved list of rows back to a DataFrame
-        result = pd.DataFrame(interleaved)
-        result.reset_index(inplace=True, drop=True)
-
-        # Slice the Primary and secondary dataframes
-        sliced_primary_learning_df = self.slice_dataframe(primary_learning_df, self.PROMPT_DATA_LENGTH_LIMIT, self.ENCODING_NAME)
-        sliced_secondary_learning_df = self.slice_dataframe(result, self.PROMPT_DATA_LENGTH_LIMIT, self.ENCODING_NAME)
-        logger.info("Excerpts prioritized within token limit.")
-        return sliced_primary_learning_df, sliced_secondary_learning_df
-
-    @classmethod
-    def format_prompt(
-        self,
-        primary_learning_df: pd.DataFrame,
-        secondary_learning_df: pd.DataFrame,
-        filter_data: dict,
-    ):
-        """Formats the prompt based on request filter and prioritized learnings."""
-        logger.info("Formatting prompt.")
-
-        def build_intro_section():
-            """Builds the introductory section of the prompt."""
-            return (
-                "I will provide you with a set of instructions, data, and formatting requests in three sections."
-                + " I will pass you the INSTRUCTIONS section, are you ready?"
-                + os.linesep
-                + os.linesep
-            )
-
-        def build_instruction_section(request_filter, df):
-            """Builds the instruction section of the prompt based on the request filter and DataFrame."""
-            instructions = ["INSTRUCTIONS", "========================", "Summarize essential insights from the DATA"]
-
-            if "appeal_code__dtype__in" in request_filter:
-                dtypes = df["dtype_name"].dropna().unique()
-                dtype_str = '", "'.join(dtypes)
-                instructions.append(f'concerning "{dtype_str}" occurrences')
-
-            if "appeal_code__country__in" in request_filter:
-                countries = df["country_name"].dropna().unique()
-                country_str = '", "'.join(countries)
-                instructions.append(f'in "{country_str}"')
-
-            if "appeal_code__region" in request_filter:
-                regions = df["region_name"].dropna().unique()
-                region_str = '", "'.join(regions)
-                instructions.append(f'in "{region_str}"')
-
-            if "sector_validated__in" in request_filter:
-                sectors = df["sector"].dropna().unique()
-                sector_str = '", "'.join(sectors)
-                instructions.append(f'focusing on "{sector_str}" aspects')
-
-            if "per_component_validated__in" in request_filter:
-                components = df["component"].dropna().unique()
-                component_str = '", "'.join(components)
-                instructions.append(f'and "{component_str}" aspects')
-
-            instructions.append(
-                "In Emergency Response. You should prioritize the insights based on their recurrence "
-                "and potential impact on humanitarian operations, and provide the top insights. \n\n"
-                "I will pass you the DATA section, are you ready?\n\n"
-            )
-            return "\n".join(instructions)
-
-        def get_main_sectors(df: pd.DataFrame):
-            """Get only information from technical sectorial information"""
-            temp = df[df["component"] == "NS-specific areas of intervention"]
-            available_sectors = list(temp["sector"].unique())
-            nb_sectors = len(available_sectors)
-            if nb_sectors == 0:
-                logger.info("There were not specific technical sectorial learnings")
-                return []
-            logger.info("Main sectors for secondary summaries selected")
-            return available_sectors
-
-        def get_main_components(df: pd.DataFrame):
-            available_components = list(df["component"].unique())
-            nb_components = len(available_components)
-            if nb_components == 0:
-                logger.info("There were not specific components")
-                return []
-            logger.info("All components for secondary summaries selected")
-            return available_components
-
-        def process_learnings_sector(sector, df, max_length_per_section):
-            df = df[df["sector"] == sector].dropna()
-            df_sliced = self.slice_dataframe(df, max_length_per_section, self.ENCODING_NAME)
-            learnings_sector = (
-                "\n----------------\n"
-                + "SUBTYPE: "
-                + str(sector)
-                + "\n----------------\n"
-                + "\n----------------\n".join(df_sliced["learning"])
-            )
-            return learnings_sector
-
-        def process_learnings_component(component, df, max_length_per_section):
-            df = df[df["component"] == component].dropna()
-            df_sliced = self.slice_dataframe(df, max_length_per_section, self.ENCODING_NAME)
-            learnings_component = (
-                "\n----------------\n"
-                + "SUBTYPE: "
-                + str(component)
-                + "\n----------------\n"
-                + "\n----------------\n".join(df_sliced["learning"])
-            )
-            return learnings_component
-
-        def build_data_section(primary_df: pd.DataFrame, secondary_df: pd.DataFrame):
-            # Primary learnings section
-            primary_learnings_data = "\n----------------\n".join(primary_df["learning"].dropna())
-
-            # Secondary learnings section
-            sectors = get_main_sectors(secondary_df)
-            components = get_main_components(secondary_df)
-            max_length_per_section = self.PROMPT_DATA_LENGTH_LIMIT / (len(components) + len(sectors))
-            learnings_sectors = (
-                "\n----------------\n\n"
-                + "TYPE: SECTORS"
-                + "\n----------------\n".join(
-                    [process_learnings_sector(int(x), secondary_df, max_length_per_section) for x in sectors if pd.notna(x)]
-                )
-            )
-            learnings_components = (
-                "\n----------------\n\n"
-                + "TYPE: COMPONENT"
-                + "\n----------------\n".join(
-                    [process_learnings_component(int(x), secondary_df, max_length_per_section) for x in components if pd.notna(x)]
-                )
-            )
-            secondary_learnings_data = learnings_sectors + learnings_components
-            return primary_learnings_data, secondary_learnings_data
-
-        prompt_intro = build_intro_section()
-        primary_prompt_instruction = build_instruction_section(filter_data, primary_learning_df)
-        secondary_prompt_instruction = build_instruction_section(filter_data, secondary_learning_df)
-        primary_learnings_data, secondary_learnings_data = build_data_section(primary_learning_df, secondary_learning_df)
 
-        # format the prompts
-        primary_learning_prompt = "".join([prompt_intro, primary_prompt_instruction, primary_learnings_data, self.primary_prompt])
-        secondary_learning_prompt = "".join(
-            [prompt_intro, secondary_prompt_instruction, secondary_learnings_data, self.secondary_prompt]
-        )
-        logger.info("Prompt formatted.")
-        return primary_learning_prompt, secondary_learning_prompt
+from per.ops_learning_summary import OpsLearningSummaryTask
 
 
 @shared_task
@@ -479,3 +11,4 @@ def generate_summary(filter_data: dict, hash_value: str):
     primary_learning_prompt, secondary_learning_prompt = OpsLearningSummaryTask.format_prompt(
         primary_learning_df, secondary_learning_df, filter_data
     )
+    OpsLearningSummaryTask.generate_summaries(primary_learning_prompt, secondary_learning_prompt)
diff --git a/poetry.lock b/poetry.lock
index a03133a33..b51087a69 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -25,6 +25,39 @@ files = [
     {file = "aniso8601-7.0.0.tar.gz", hash = "sha256:513d2b6637b7853806ae79ffaca6f3e8754bdd547048f5ccc1420aec4b714f1e"},
 ]
 
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+description = "Reusable constraint types to use with typing.Annotated"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
+    {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
+]
+
+[[package]]
+name = "anyio"
+version = "4.4.0"
+description = "High level compatibility layer for multiple asynchronous event loop implementations"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"},
+    {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"},
+]
+
+[package.dependencies]
+exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
+idna = ">=2.8"
+sniffio = ">=1.1"
+typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
+
+[package.extras]
+doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
+trio = ["trio (>=0.23)"]
+
 [[package]]
 name = "arabic-reshaper"
 version = "3.0.0"
@@ -1557,6 +1590,17 @@ setproctitle = ["setproctitle"]
 testing = ["coverage", "eventlet", "gevent", "pytest", "pytest-cov"]
 tornado = ["tornado (>=0.2)"]
 
+[[package]]
+name = "h11"
+version = "0.14.0"
+description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
+    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
+]
+
 [[package]]
 name = "html5lib"
 version = "1.1"
@@ -1578,6 +1622,51 @@ chardet = ["chardet (>=2.2)"]
 genshi = ["genshi"]
 lxml = ["lxml"]
 
+[[package]]
+name = "httpcore"
+version = "1.0.5"
+description = "A minimal low-level HTTP client."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
+    {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
+]
+
+[package.dependencies]
+certifi = "*"
+h11 = ">=0.13,<0.15"
+
+[package.extras]
+asyncio = ["anyio (>=4.0,<5.0)"]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+trio = ["trio (>=0.22.0,<0.26.0)"]
+
+[[package]]
+name = "httpx"
+version = "0.27.0"
+description = "The next generation HTTP client."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
+    {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"},
+]
+
+[package.dependencies]
+anyio = "*"
+certifi = "*"
+httpcore = "==1.*"
+idna = "*"
+sniffio = "*"
+
+[package.extras]
+brotli = ["brotli", "brotlicffi"]
+cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+
 [[package]]
 name = "idna"
 version = "3.10"
@@ -2050,6 +2139,29 @@ files = [
     {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
 ]
 
+[[package]]
+name = "openai"
+version = "1.37.0"
+description = "The official Python library for the openai API"
+optional = false
+python-versions = ">=3.7.1"
+files = [
+    {file = "openai-1.37.0-py3-none-any.whl", hash = "sha256:a903245c0ecf622f2830024acdaa78683c70abb8e9d37a497b851670864c9f73"},
+    {file = "openai-1.37.0.tar.gz", hash = "sha256:dc8197fc40ab9d431777b6620d962cc49f4544ffc3011f03ce0a805e6eb54adb"},
+]
+
+[package.dependencies]
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+httpx = ">=0.23.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+tqdm = ">4"
+typing-extensions = ">=4.7,<5"
+
+[package.extras]
+datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
+
 [[package]]
 name = "opencensus"
 version = "0.11.4"
@@ -3469,6 +3581,17 @@ nose = ["nose"]
 pytest = ["pytest"]
 test = ["django (>=1.10.6)", "nose", "pytest (>=4.6)", "pytest-cov", "six"]
 
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+description = "Sniff out which async library your code is running under"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
+    {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
+]
+
 [[package]]
 name = "sortedcontainers"
 version = "2.4.0"
diff --git a/pyproject.toml b/pyproject.toml
index 6b1bff1cc..ed7dce845 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,6 +91,7 @@ colorlog = "*"
 mapbox-tilesets = "*"
 ipython = "*"
 tiktoken = "*"
+openai = "*"
 
 [tool.poetry.dev-dependencies]
 pytest-profiling = "*"