Skip to content

Commit

Permalink
Remove the use_* global functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ludwiktrammer committed Apr 15, 2024
1 parent 7c16a24 commit d4f86a1
Show file tree
Hide file tree
Showing 28 changed files with 116 additions and 154 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ This is a basic implementation of a db-ally view for an example HR application,

```python
from dbally import decorators, SqlAlchemyBaseView, create_collection
from dbally.llm_client.openai_client import OpenAIClient
from sqlalchemy import create_engine

class CandidateView(SqlAlchemyBaseView):
Expand All @@ -52,7 +53,8 @@ class CandidateView(SqlAlchemyBaseView):
return Candidate.country == country

engine = create_engine('sqlite:///candidates.db')
my_collection = create_collection("collection_name")
llm = OpenAIClient(model_name="gpt-3.5-turbo")
my_collection = create_collection("collection_name", llm)
my_collection.add(CandidateView, lambda: CandidateView(engine))

my_collection.ask("Find candidates from United States")
Expand Down
12 changes: 6 additions & 6 deletions benchmark/dbally_benchmark/e2e_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from dbally.collection import Collection
from dbally.data_models.prompts.iql_prompt_template import default_iql_template
from dbally.data_models.prompts.view_selector_prompt_template import default_view_selector_template
from dbally.llm_client.openai_client import OpenAIClient
from dbally.utils.errors import NoViewFoundError, UnsupportedQueryError


Expand Down Expand Up @@ -81,13 +82,12 @@ async def evaluate(cfg: DictConfig) -> Any:

engine = create_engine(benchmark_cfg.pg_connection_string + f"/{cfg.db_name}")

if "gpt" in cfg.model_name:
dbally.use_openai_llm(
model_name="gpt-4",
openai_api_key=benchmark_cfg.openai_api_key,
)
llm_client = OpenAIClient(
model_name="gpt-4",
api_key=benchmark_cfg.openai_api_key,
)

db = dbally.create_collection(cfg.db_name)
db = dbally.create_collection(cfg.db_name, llm_client)

for view_name in cfg.view_names:
view = VIEW_REGISTRY[ViewName(view_name)]
Expand Down
4 changes: 2 additions & 2 deletions docs/concepts/collections.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
At its core, a collection groups together multiple [views](views.md). Once you've defined your views, the next step is to register them within a collection. Here's how you might do it:

```python
my_collection = dbally.create_collection("collection_name")
my_collection = dbally.create_collection("collection_name", llm_client=OpenAIClient())
my_collection.add(ExampleView)
my_collection.add(RecipesView)
```

Sometimes, view classes might need certain arguments when they're instantiated. In these instances, you'll want to register your view with a builder function that takes care of supplying these arguments. For instance, with views that rely on SQLAlchemy, you'll typically need to pass a database engine object like so:

```python
my_collection = dbally.create_collection("collection_name")
my_collection = dbally.create_collection("collection_name", llm_client=OpenAIClient())
engine = sqlalchemy.create_engine("sqlite://")
my_collection.add(ExampleView, lambda: ExampleView(engine))
my_collection.add(RecipesView, lambda: RecipesView(engine))
Expand Down
11 changes: 8 additions & 3 deletions docs/how-to/create_custom_event_handler.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,18 @@ class FileEventHandler(EventHandler[TextIOWrapper, datetime]):

## Registering our event handler

To use our event handler, we need to register it with the db-ally `use_event_handler` function.
To use our event handler, we need to pass it to the collection when creating it:

```python
import dbally
from dbally.llm_client.openai_client import OpenAIClient

dbally.use_event_handler(FileEventHandler())
my_collection = bally.create_collection(
"collection_name",
llm_client=OpenAIClient(),
event_handlers=[FileEventHandler()],
)
```

Now you can test your event handler by running a query and checking the logs directory for the log files.
Now you can test your event handler by running a query against the collection and checking the logs directory for the log files.

11 changes: 6 additions & 5 deletions docs/how-to/custom_views.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,18 +151,18 @@ import abc
from typing import Callable, Any, Iterable

from dbally.iql import IQLQuery
from dbally.data_models.execution_result import ExecutionResult
from dbally.data_models.execution_result import ViewExecutionResult

@abc.abstractmethod
def get_data(self) -> Iterable:
"""
Returns the full data to be filtered.
"""

def execute(self, dry_run: bool = False) -> ExecutionResult:
def execute(self, dry_run: bool = False) -> ViewExecutionResult:
filtered_data = list(filter(self._filter, self.get_data()))

return ExecutionResult(results=filtered_data, context={})
return ViewExecutionResult(results=filtered_data, context={})
```

The `execute` function gets the data (by calling the `get_data` method) and applies the combined filters to it. We're using the [`filter`](https://docs.python.org/3/library/functions.html#filter) function from Python's standard library to accomplish this. The filtered data is then returned as a list.
Expand Down Expand Up @@ -216,10 +216,11 @@ Finally, we can use the `CandidatesView` just like any other view in db-ally. We
```python
import asyncio
import dbally
from dbally import CLIEventHandler
from dbally.llm_client.openai_client import OpenAIClient

async def main():
collection = dbally.create_collection("recruitment")
llm = OpenAIClient(model_name="gpt-3.5-turbo")
collection = dbally.create_collection("recruitment", llm)
collection.add(CandidateView)

result = await collection.ask("Find me French candidates suitable for a senior data scientist position.")
Expand Down
17 changes: 7 additions & 10 deletions docs/how-to/custom_views_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,8 @@
from dbally import decorators, MethodsBaseView
from dbally.audit.event_handlers.cli_event_handler import CLIEventHandler
from dbally.iql import IQLQuery, syntax
from dbally.data_models.execution_result import ExecutionResult

dbally.use_openai_llm(
openai_api_key=os.environ["OPENAI_API_KEY"],
model_name="gpt-3.5-turbo",
)
from dbally.data_models.execution_result import ViewExecutionResult
from dbally.llm_client.openai_client import OpenAIClient

@dataclass
class Candidate:
Expand Down Expand Up @@ -65,11 +61,11 @@ async def build_filter_node(self, node: syntax.Node) -> Callable[[Any], bool]:
return lambda x: not child(x)
raise ValueError(f"Unsupported grammar: {node}")

def execute(self, dry_run: bool = False) -> ExecutionResult:
def execute(self, dry_run: bool = False) -> ViewExecutionResult:
print(self._filter)
filtered_data = list(filter(self._filter, self.get_data()))

return ExecutionResult(results=filtered_data, context={})
return ViewExecutionResult(results=filtered_data, context={})

class CandidateView(FilteredIterableBaseView):
def get_data(self) -> Iterable:
Expand Down Expand Up @@ -103,8 +99,9 @@ def from_country(self, country: str) -> Callable[[Candidate], bool]:
return lambda x: x.country == country

async def main():
collection = dbally.create_collection("recruitment")
dbally.use_event_handler(CLIEventHandler())
llm = OpenAIClient(model_name="gpt-3.5-turbo")
event_handlers = [CLIEventHandler()]
collection = dbally.create_collection("recruitment", llm, event_handlers=event_handlers)
collection.add(CandidateView)

result = await collection.ask("Find me French candidates suitable for a senior data scientist position.")
Expand Down
10 changes: 7 additions & 3 deletions docs/how-to/log_runs_to_langsmith.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@ This guide aims to demonstrate the process of logging the executions of db-ally

## Logging runs to LangSmith

Enabling LangSmith integration can be done by registering a prepared [EventHandler](../reference/event_handlers/index.md) using the `dbally.use_event_handler` method.
Enabling LangSmith integration can be done by passing a prepared [EventHandler](../reference/event_handlers/index.md) when creating a db-ally collection:

```python
import dbally
from dbally.audit.event_handlers.langsmith_event_handler import LangSmithEventHandler
dbally.use_event_handler(LangSmithEventHandler(api_key="your_api_key"))
my_collection = dbally.create_collection(
"collection_name",
llm_client=OpenAIClient(),
event_handlers=[LangSmithEventHandler(api_key="your_api_key")],
)
```

After this, all the runs of db-ally will be logged to LangSmith.
After this, all the queries against the collection will be logged to LangSmith.
4 changes: 3 additions & 1 deletion docs/how-to/pandas_views.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ To use the view, you need to create a [Collection](../concepts/collections.md) a

```python
import dbally
from dbally.llm_client.openai_client import OpenAIClient

collection = dbally.create_collection("recruitment")
llm = OpenAIClient(model_name="gpt-3.5-turbo")
collection = dbally.create_collection("recruitment", llm)
collection.add(CandidateView, lambda: CandidateView(CANDIDATE_DATA))

result = await collection.ask("Find me French candidates suitable for a senior data scientist position.")
Expand Down
9 changes: 3 additions & 6 deletions docs/how-to/pandas_views_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,8 @@

from dbally import decorators, DataFrameBaseView
from dbally.audit.event_handlers.cli_event_handler import CLIEventHandler
from dbally.llm_client.openai_client import OpenAIClient

dbally.use_openai_llm(
openai_api_key=os.environ["OPENAI_API_KEY"],
model_name="gpt-3.5-turbo",
)

class CandidateView(DataFrameBaseView):
"""
Expand Down Expand Up @@ -49,8 +46,8 @@ def senior_data_scientist_position(self) -> pd.Series:
])

async def main():
collection = dbally.create_collection("recruitment")
dbally.use_event_handler(CLIEventHandler())
llm = OpenAIClient(model_name="gpt-3.5-turbo")
collection = dbally.create_collection("recruitment", llm, event_handlers=[CLIEventHandler()])
collection.add(CandidateView, lambda: CandidateView(CANDIDATE_DATA))

result = await collection.ask("Find me French candidates suitable for a senior data scientist position.")
Expand Down
6 changes: 4 additions & 2 deletions docs/how-to/sql_views.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ db-ally is a Python library that allows you to use natural language to query var
## Views
The majority of the db-ally's codebase is independent of any particular kind of data source. The part that is specific to a data source is the view. A [view](../concepts/views.md) is a class that defines how to interact with a data source. It contains methods that define how to retrieve data from the data source and how to filter the data in response to natural language queries.

There are several methods for creating a view that connects to a SQL database, including creating a custom view from scratch<!-- TODO: link to how-to on custom views-->. However, in most cases the easiest will be to use the `SqlAlchemyBaseView`<!-- TODO: link to reference of SqlAlchemyBaseView--> class provided by db-ally. This class is designed to work with [SQLAlchemy](https://www.sqlalchemy.org/), a popular SQL toolkit and Object-Relational Mapping (ORM) library for Python. To define your view, you will need to produce a class that inherits from `SqlAlchemyBaseView`and implement the `get_select` method, which returns a [SQLAlchemy `Select`](https://docs.sqlalchemy.org/en/20/core/selectable.html#sqlalchemy.sql.expression.Select) object:
There are several methods for creating a view that connects to a SQL database, including [creating a custom view from scratch](./custom_views.md). However, in most cases the easiest will be to use the [`SqlAlchemyBaseView`][dbally.SqlAlchemyBaseView] class provided by db-ally. This class is designed to work with [SQLAlchemy](https://www.sqlalchemy.org/), a popular SQL toolkit and Object-Relational Mapping (ORM) library for Python. To define your view, you will need to produce a class that inherits from `SqlAlchemyBaseView`and implement the `get_select` method, which returns a [SQLAlchemy `Select`](https://docs.sqlalchemy.org/en/20/core/selectable.html#sqlalchemy.sql.expression.Select) object:

```python
from dbally import SqlAlchemyBaseView
Expand Down Expand Up @@ -84,7 +84,9 @@ engine = create_engine('sqlite:///candidates.db')
Once you have defined your view and created an engine, you can register the view with db-ally. You do this by creating a collection and adding the view to it:

```python
my_collection = dbally.create_collection("collection_name")
from dbally.llm_client.openai_client import OpenAIClient

my_collection = dbally.create_collection("collection_name", llm_client=OpenAIClient())
my_collection.add(CandidateView, lambda: CandidateView(engine))
```

Expand Down
3 changes: 2 additions & 1 deletion docs/how-to/update_similarity_indexes.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ If you have a [collection](../concepts/collections.md) and want to update Simila

```python
from db_ally import create_collection
from db_ally.llm_client.openai_client import OpenAIClient

my_collection = create_collection("collection_name")
my_collection = create_collection("collection_name", llm_client=OpenAIClient())

# ... add views to the collection

Expand Down
Empty file removed docs/how-to/use_custom_llm.md
Empty file.
31 changes: 15 additions & 16 deletions docs/quickstart/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ We will cover the following topics:

- [Installation](#installation)
- [Database Configuration](#configuring-the-database)
- [OpenAI Access Configuration](#configuring-openai-access)
- [View Definition](#defining-the-views)
- [OpenAI Access Configuration](#configuring-openai-access)
- [Collection Definition](#defining-the-collection)
- [Query Execution](#running-the-query)

Expand Down Expand Up @@ -50,19 +50,6 @@ Base.prepare(autoload_with=engine)
Candidate = Base.classes.candidates
```

## OpenAI Access Configuration

To use OpenAI's GPT, configure db-ally and provide your OpenAI API key:

```python
import dbally

dbally.use_openai_llm(
openai_api_key="...",
model_name="gpt-3.5-turbo",
)
```

## View Definition

To use db-ally, define the views you want to use. A [view](../concepts/views.md) is a class that specifies what to select from the database and includes methods that the AI model can use to filter rows. These methods are known as "filters".
Expand Down Expand Up @@ -112,15 +99,27 @@ By setting up these filters, you enable the LLM to fetch candidates while option
!!! note
The `from_country` filter defined above supports only exact matches, which is not always ideal. Thankfully, db-ally comes with a solution for this problem - Similarity Indexes, which can be used to find the most similar value from the ones available. Refer to [Quickstart Part 2: Semantic Similarity](./quickstart2.md) for an example of using semantic similarity when filtering candidates by country.

## OpenAI Access Configuration

To use OpenAI's GPT, configure db-ally and provide your OpenAI API key:

```python
from dbally.llm_client.openai_client import OpenAIClient

llm = OpenAIClient(model_name="gpt-3.5-turbo", api_key="...")
```

Replace `...` with your OpenAI API key. Alternatively, you can set the `OPENAI_API_KEY` environment variable with your API key and omit the `api_key` parameter altogether.

## Collection Definition

Next, create a db-ally collection. A [collection](../concepts/collections.md) is an object where you register views and execute queries.
Next, create a db-ally collection. A [collection](../concepts/collections.md) is an object where you register views and execute queries. It also requires an AI model to use for generating [IQL queries](../concepts/iql.md) (in this case, the GPT model defined above).

```python
import dbally

async def main():
collection = dbally.create_collection("recruitment")
collection = dbally.create_collection("recruitment", llm)
collection.add(CandidateView, lambda: CandidateView(engine))
```

Expand Down
10 changes: 3 additions & 7 deletions docs/quickstart/quickstart2_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from dbally.audit.event_handlers.cli_event_handler import CLIEventHandler
from dbally.similarity import SimpleSqlAlchemyFetcher, FaissStore, SimilarityIndex
from dbally.embedding_client.openai import OpenAiEmbeddingClient
from dbally.llm_client.openai_client import OpenAIClient

engine = create_engine('sqlite:///candidates.db')

Expand All @@ -20,11 +21,6 @@

Candidate = Base.classes.candidates

dbally.use_openai_llm(
openai_api_key=os.environ["OPENAI_API_KEY"],
model_name="gpt-3.5-turbo",
)

country_similarity = SimilarityIndex(
fetcher=SimpleSqlAlchemyFetcher(
engine,
Expand Down Expand Up @@ -77,8 +73,8 @@ def from_country(self, country: Annotated[str, country_similarity]) -> sqlalchem
async def main():
await country_similarity.update()

collection = dbally.create_collection("recruitment")
dbally.use_event_handler(CLIEventHandler())
llm = OpenAIClient(model_name="gpt-3.5-turbo")
collection = dbally.create_collection("recruitment", llm, event_handlers=[CLIEventHandler()])
collection.add(CandidateView, lambda: CandidateView(engine))

result = await collection.ask("Find someone from the United States with more than 2 years of experience.")
Expand Down
10 changes: 3 additions & 7 deletions docs/quickstart/quickstart3_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
import pandas as pd

from dbally import decorators, SqlAlchemyBaseView, DataFrameBaseView, ExecutionResult
from dbally.audit.event_handlers.cli_event_handler import CLIEventHandler
from dbally.similarity import SimpleSqlAlchemyFetcher, FaissStore, SimilarityIndex
from dbally.embedding_client.openai import OpenAiEmbeddingClient
from dbally.llm_client.openai_client import OpenAIClient

engine = create_engine('sqlite:///candidates.db')

Expand All @@ -21,10 +21,6 @@

Candidate = Base.classes.candidates

dbally.use_openai_llm(
openai_api_key=os.environ["OPENAI_API_KEY"],
model_name="gpt-3.5-turbo",
)

country_similarity = SimilarityIndex(
fetcher=SimpleSqlAlchemyFetcher(
Expand Down Expand Up @@ -126,8 +122,8 @@ def display_results(result: ExecutionResult):
async def main():
await country_similarity.update()

collection = dbally.create_collection("recruitment")
# dbally.use_event_handler(CLIEventHandler())
llm = OpenAIClient(model_name="gpt-3.5-turbo")
collection = dbally.create_collection("recruitment", llm)
collection.add(CandidateView, lambda: CandidateView(engine))
collection.add(JobView, lambda: JobView(jobs_data))

Expand Down
Loading

0 comments on commit d4f86a1

Please sign in to comment.