Skip to content

Commit

Permalink
update examples
Browse files Browse the repository at this point in the history
  • Loading branch information
micpst committed Aug 30, 2024
1 parent 85b3af1 commit d7eadd4
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/quickstart/multiple-views.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jobs_data = pd.DataFrame.from_records([
{"title": "Machine Learning Engineer", "company": "Company C", "location": "Berlin", "salary": 90000},
{"title": "Data Scientist", "company": "Company D", "location": "London", "salary": 110000},
{"title": "Data Scientist", "company": "Company E", "location": "Warsaw", "salary": 80000},
{"title": "Data Scientist", "company": "Company F", "location": "Warsaw", "salary": 100000},
])
```

Expand Down
81 changes: 81 additions & 0 deletions examples/multiple_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from dbally.embeddings.litellm import LiteLLMEmbeddingClient
from dbally.llms.litellm import LiteLLM
from dbally.similarity import FaissStore, SimilarityIndex, SimpleSqlAlchemyFetcher
from dbally.views.pandas_base import Aggregation, AggregationGroup

engine = create_engine("sqlite:///examples/recruiting/data/candidates.db")

Expand Down Expand Up @@ -76,6 +77,45 @@ def from_country(self, country: Annotated[str, country_similarity]) -> sqlalchem
"""
return Candidate.country == country

@decorators.view_aggregation()
def average_years_of_experience(self) -> sqlalchemy.Select:
"""
Calculates the average years of experience of candidates.
"""
return self.select.with_only_columns(
sqlalchemy.func.avg(Candidate.years_of_experience).label("average_years_of_experience")
)

@decorators.view_aggregation()
def positions_per_country(self) -> sqlalchemy.Select:
"""
Returns the number of candidates per position per country.
"""
return (
self.select.with_only_columns(
sqlalchemy.func.count(Candidate.position).label("number_of_candidates"),
Candidate.position,
Candidate.country,
)
.group_by(Candidate.position, Candidate.country)
.order_by(sqlalchemy.desc("number_of_candidates"))
)

@decorators.view_aggregation()
def top_universities(self, limit: int) -> sqlalchemy.Select:
"""
Returns the top universities by the number of candidates.
"""
return (
self.select.with_only_columns(
sqlalchemy.func.count(Candidate.id).label("number_of_candidates"),
Candidate.university,
)
.group_by(Candidate.university)
.order_by(sqlalchemy.desc("number_of_candidates"))
.limit(limit)
)


jobs_data = pd.DataFrame.from_records(
[
Expand All @@ -84,6 +124,7 @@ def from_country(self, country: Annotated[str, country_similarity]) -> sqlalchem
{"title": "Machine Learning Engineer", "company": "Company C", "location": "Berlin", "salary": 90000},
{"title": "Data Scientist", "company": "Company D", "location": "London", "salary": 110000},
{"title": "Data Scientist", "company": "Company E", "location": "Warsaw", "salary": 80000},
{"title": "Data Scientist", "company": "Company F", "location": "Warsaw", "salary": 100000},
]
)

Expand Down Expand Up @@ -114,6 +155,46 @@ def from_company(self, company: str) -> pd.Series:
"""
return self.df.company == company

@decorators.view_aggregation()
def average_salary(self) -> AggregationGroup:
"""
Calculates the average salary of job offers.
"""
return AggregationGroup(
aggregations=[
Aggregation(column="salary", function="mean"),
],
)

@decorators.view_aggregation()
def average_salary_per_location(self) -> AggregationGroup:
"""
Calculates the average salary of job offers per location and title.
"""
return AggregationGroup(
aggregations=[
Aggregation(column="salary", function="mean"),
],
groupbys=[
"location",
"title",
],
)

@decorators.view_aggregation()
def count_per_title(self) -> AggregationGroup:
"""
Counts the number of job offers per title.
"""
return AggregationGroup(
aggregations=[
Aggregation(column="title", function="count"),
],
groupbys=[
"title",
],
)


def display_results(result: ExecutionResult):
if result.view_name == "CandidateView":
Expand Down
39 changes: 39 additions & 0 deletions examples/semantic_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,45 @@ def from_country(self, country: Annotated[str, country_similarity]) -> sqlalchem
"""
return Candidate.country == country

@decorators.view_aggregation()
def average_years_of_experience(self) -> sqlalchemy.Select:
"""
Calculates the average years of experience of candidates.
"""
return self.select.with_only_columns(
sqlalchemy.func.avg(Candidate.years_of_experience).label("average_years_of_experience")
)

@decorators.view_aggregation()
def positions_per_country(self) -> sqlalchemy.Select:
"""
Returns the number of candidates per position per country.
"""
return (
self.select.with_only_columns(
sqlalchemy.func.count(Candidate.position).label("number_of_candidates"),
Candidate.position,
Candidate.country,
)
.group_by(Candidate.position, Candidate.country)
.order_by(sqlalchemy.desc("number_of_candidates"))
)

@decorators.view_aggregation()
def top_universities(self, limit: int) -> sqlalchemy.Select:
"""
Returns the top universities by the number of candidates.
"""
return (
self.select.with_only_columns(
sqlalchemy.func.count(Candidate.id).label("number_of_candidates"),
Candidate.university,
)
.group_by(Candidate.university)
.order_by(sqlalchemy.desc("number_of_candidates"))
.limit(limit)
)


async def main():
dbally.event_handlers = [CLIEventHandler()]
Expand Down

0 comments on commit d7eadd4

Please sign in to comment.