From d7eadd42f6961cdcf597876bb62bc90d11027af6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= <michal.pstrag@icloud.com>
Date: Fri, 30 Aug 2024 14:32:33 +0200
Subject: [PATCH] update examples

---
 docs/quickstart/multiple-views.md |  1 +
 examples/multiple_views.py        | 81 +++++++++++++++++++++++++++++++
 examples/semantic_similarity.py   | 39 +++++++++++++++
 3 files changed, 121 insertions(+)

diff --git a/docs/quickstart/multiple-views.md b/docs/quickstart/multiple-views.md
index e1783cc5..33cdc2d1 100644
--- a/docs/quickstart/multiple-views.md
+++ b/docs/quickstart/multiple-views.md
@@ -28,6 +28,7 @@ jobs_data = pd.DataFrame.from_records([
     {"title": "Machine Learning Engineer", "company": "Company C", "location": "Berlin", "salary": 90000},
     {"title": "Data Scientist", "company": "Company D", "location": "London", "salary": 110000},
     {"title": "Data Scientist", "company": "Company E", "location": "Warsaw", "salary": 80000},
+    {"title": "Data Scientist", "company": "Company F", "location": "Warsaw", "salary": 100000},
 ])
 ```
 
diff --git a/examples/multiple_views.py b/examples/multiple_views.py
index 0644de73..a8b9423d 100644
--- a/examples/multiple_views.py
+++ b/examples/multiple_views.py
@@ -15,6 +15,7 @@
 from dbally.embeddings.litellm import LiteLLMEmbeddingClient
 from dbally.llms.litellm import LiteLLM
 from dbally.similarity import FaissStore, SimilarityIndex, SimpleSqlAlchemyFetcher
+from dbally.views.pandas_base import Aggregation, AggregationGroup
 
 engine = create_engine("sqlite:///examples/recruiting/data/candidates.db")
 
@@ -76,6 +77,45 @@ def from_country(self, country: Annotated[str, country_similarity]) -> sqlalchem
         """
         return Candidate.country == country
 
+    @decorators.view_aggregation()
+    def average_years_of_experience(self) -> sqlalchemy.Select:
+        """
+        Calculates the average years of experience of candidates.
+        """
+        return self.select.with_only_columns(
+            sqlalchemy.func.avg(Candidate.years_of_experience).label("average_years_of_experience")
+        )
+
+    @decorators.view_aggregation()
+    def positions_per_country(self) -> sqlalchemy.Select:
+        """
+        Returns the number of candidates per position per country.
+        """
+        return (
+            self.select.with_only_columns(
+                sqlalchemy.func.count(Candidate.position).label("number_of_candidates"),
+                Candidate.position,
+                Candidate.country,
+            )
+            .group_by(Candidate.position, Candidate.country)
+            .order_by(sqlalchemy.desc("number_of_candidates"))
+        )
+
+    @decorators.view_aggregation()
+    def top_universities(self, limit: int) -> sqlalchemy.Select:
+        """
+        Returns the top universities by the number of candidates.
+        """
+        return (
+            self.select.with_only_columns(
+                sqlalchemy.func.count(Candidate.id).label("number_of_candidates"),
+                Candidate.university,
+            )
+            .group_by(Candidate.university)
+            .order_by(sqlalchemy.desc("number_of_candidates"))
+            .limit(limit)
+        )
+
 
 jobs_data = pd.DataFrame.from_records(
     [
@@ -84,6 +124,7 @@ def from_country(self, country: Annotated[str, country_similarity]) -> sqlalchem
         {"title": "Machine Learning Engineer", "company": "Company C", "location": "Berlin", "salary": 90000},
         {"title": "Data Scientist", "company": "Company D", "location": "London", "salary": 110000},
         {"title": "Data Scientist", "company": "Company E", "location": "Warsaw", "salary": 80000},
+        {"title": "Data Scientist", "company": "Company F", "location": "Warsaw", "salary": 100000},
     ]
 )
 
@@ -114,6 +155,46 @@ def from_company(self, company: str) -> pd.Series:
         """
         return self.df.company == company
 
+    @decorators.view_aggregation()
+    def average_salary(self) -> AggregationGroup:
+        """
+        Calculates the average salary of job offers.
+        """
+        return AggregationGroup(
+            aggregations=[
+                Aggregation(column="salary", function="mean"),
+            ],
+        )
+
+    @decorators.view_aggregation()
+    def average_salary_per_location(self) -> AggregationGroup:
+        """
+        Calculates the average salary of job offers per location and title.
+        """
+        return AggregationGroup(
+            aggregations=[
+                Aggregation(column="salary", function="mean"),
+            ],
+            groupbys=[
+                "location",
+                "title",
+            ],
+        )
+
+    @decorators.view_aggregation()
+    def count_per_title(self) -> AggregationGroup:
+        """
+        Counts the number of job offers per title.
+        """
+        return AggregationGroup(
+            aggregations=[
+                Aggregation(column="title", function="count"),
+            ],
+            groupbys=[
+                "title",
+            ],
+        )
+
 
 def display_results(result: ExecutionResult):
     if result.view_name == "CandidateView":
diff --git a/examples/semantic_similarity.py b/examples/semantic_similarity.py
index b4a03b66..098f167a 100644
--- a/examples/semantic_similarity.py
+++ b/examples/semantic_similarity.py
@@ -76,6 +76,45 @@ def from_country(self, country: Annotated[str, country_similarity]) -> sqlalchem
         """
         return Candidate.country == country
 
+    @decorators.view_aggregation()
+    def average_years_of_experience(self) -> sqlalchemy.Select:
+        """
+        Calculates the average years of experience of candidates.
+        """
+        return self.select.with_only_columns(
+            sqlalchemy.func.avg(Candidate.years_of_experience).label("average_years_of_experience")
+        )
+
+    @decorators.view_aggregation()
+    def positions_per_country(self) -> sqlalchemy.Select:
+        """
+        Returns the number of candidates per position per country.
+        """
+        return (
+            self.select.with_only_columns(
+                sqlalchemy.func.count(Candidate.position).label("number_of_candidates"),
+                Candidate.position,
+                Candidate.country,
+            )
+            .group_by(Candidate.position, Candidate.country)
+            .order_by(sqlalchemy.desc("number_of_candidates"))
+        )
+
+    @decorators.view_aggregation()
+    def top_universities(self, limit: int) -> sqlalchemy.Select:
+        """
+        Returns the top universities by the number of candidates.
+        """
+        return (
+            self.select.with_only_columns(
+                sqlalchemy.func.count(Candidate.id).label("number_of_candidates"),
+                Candidate.university,
+            )
+            .group_by(Candidate.university)
+            .order_by(sqlalchemy.desc("number_of_candidates"))
+            .limit(limit)
+        )
+
 
 async def main():
     dbally.event_handlers = [CLIEventHandler()]