-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcollection.py
296 lines (246 loc) · 11.6 KB
/
collection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
import asyncio
import inspect
import textwrap
import time
from collections import defaultdict
from typing import Callable, Dict, List, Optional, Type, TypeVar
from dbally.audit.event_handlers.base import EventHandler
from dbally.audit.event_tracker import EventTracker
from dbally.data_models.audit import RequestEnd, RequestStart
from dbally.data_models.execution_result import ExecutionResult
from dbally.llms.base import LLM
from dbally.llms.clients.base import LLMOptions
from dbally.nl_responder.nl_responder import NLResponder
from dbally.similarity.index import AbstractSimilarityIndex
from dbally.utils.errors import NoViewFoundError
from dbally.view_selection.base import ViewSelector
from dbally.views.base import BaseView, IndexLocation
class IndexUpdateError(Exception):
"""
Exception for when updating any of the Collection's similarity indexes fails.
Provides a dictionary mapping failed indexes to their
respective exceptions as the `failed_indexes` attribute.
"""
def __init__(self, message: str, failed_indexes: Dict[AbstractSimilarityIndex, Exception]) -> None:
"""
Args:
failed_indexes: Dictionary mapping failed indexes to their respective exceptions.
"""
self.failed_indexes = failed_indexes
super().__init__(message)
class Collection:
"""
Collection is a container for a set of views that can be used by db-ally to answer user questions.
Tip:
It is recommended to create new collections using the [`dbally.create_colletion`][dbally.create_collection]\
function instead of instantiating this class directly.
"""
def __init__(
self,
name: str,
view_selector: ViewSelector,
llm: LLM,
event_handlers: List[EventHandler],
nl_responder: NLResponder,
n_retries: int = 3,
) -> None:
"""
Args:
name: Name of the collection is available for [Event handlers](event_handlers/index.md) and is\
used to distinguish different db-ally runs.
view_selector: As you register more then one [View](views/index.md) within single collection,\
before generating the IQL query, a View that fits query the most is selected by the\
[ViewSelector](view_selection/index.md).
llm: LLM used by the collection to generate views and respond to natural language queries.
event_handlers: Event handlers used by the collection during query executions. Can be used\
to log events as [CLIEventHandler](event_handlers/cli_handler.md) or to validate system performance\
as [LangSmithEventHandler](event_handlers/langsmith_handler.md).
nl_responder: Object that translates RAW response from db-ally into natural language.
n_retries: IQL generator may produce invalid IQL. If this is the case this argument specifies\
how many times db-ally will try to regenerate it. Previous try with the error message is\
appended to the chat history to guide next generations.
"""
self.name = name
self.n_retries = n_retries
self._views: Dict[str, Callable[[], BaseView]] = {}
self._builders: Dict[str, Callable[[], BaseView]] = {}
self._view_selector = view_selector
self._nl_responder = nl_responder
self._event_handlers = event_handlers
self._llm = llm
T = TypeVar("T", bound=BaseView)
def add(self, view: Type[T], builder: Optional[Callable[[], T]] = None, name: Optional[str] = None) -> None:
"""
Register new [View](views/index.md) that will be available to query via the collection.
Args:
view: A class inherithing from BaseView. Object of this type will be initialized during\
query execution. We expect Class instead of object, as otherwise Views must have been implemented\
stateless, which would be cumbersome.
builder: Optional factory function that will be used to create the View instance. Use it when you\
need to pass outcome of API call or database connection to the view and it can change over time.
name: Custom name of the view (defaults to the name of the class).
Raises:
ValueError: if view with the given name is already registered or views class possess some non-default\
arguments.
**Example** of custom `builder` usage
```python
def build_dogs_df_view():
dogs_df = request.get("https://dog.ceo/api/breeds/list")
return DogsDFView(dogs_df)
collection.add(DogsDFView, build_dogs_df_view)
```
"""
if name is None:
name = view.__name__
if name in self._views or name in self._builders:
raise ValueError(f"View with name {name} is already registered")
non_default_args = any(
p.default == inspect.Parameter.empty for p in inspect.signature(view).parameters.values()
)
if non_default_args and builder is None:
raise ValueError("Builder function is required for views with non-default arguments")
builder = builder or view
# instantiate view to check if the builder is correct
view_instance = builder()
if not isinstance(view_instance, view):
raise ValueError(f"The builder function for view {name} must return an instance of {view.__name__}")
self._views[name] = view
self._builders[name] = builder
def add_event_handler(self, event_handler: EventHandler):
"""
Adds an event handler to the list of event handlers.
Args:
event_handler: The event handler to be added.
"""
self._event_handlers.append(event_handler)
def get(self, name: str) -> BaseView:
"""
Returns an instance of the view with the given name
Args:
name: Name of the view to return
Returns:
View instance
Raises:
NoViewFoundError: If there is no view with the given name
"""
if name not in self._views:
raise NoViewFoundError
return self._builders[name]()
def list(self) -> Dict[str, str]:
"""
Lists all registered view names and their descriptions
Returns:
Dictionary of view names and descriptions
"""
return {
name: (textwrap.dedent(view.__doc__).strip() if view.__doc__ else "") for name, view in self._views.items()
}
async def ask(
self,
question: str,
dry_run: bool = False,
return_natural_response: bool = False,
llm_options: Optional[LLMOptions] = None,
) -> ExecutionResult:
"""
Ask question in a text form and retrieve the answer based on the available views.
Question answering is composed of following steps:
1. View Selection
2. IQL Generation
3. IQL Parsing
4. Query Building
5. Query Execution
Args:
question: question posed using natural language representation e.g\
"What job offers for Data Scientists do we have?"
dry_run: if True, only generate the query without executing it
return_natural_response: if True (and dry_run is False as natural response requires query results),
the natural response will be included in the answer
llm_options: options to use for the LLM client. If provided, these options will be merged with the default
options provided to the LLM client, prioritizing option values other than NOT_GIVEN
Returns:
ExecutionResult object representing the result of the query execution.
Raises:
ValueError: if collection is empty
IQLError: if incorrect IQL was generated `n_retries` amount of times.
ValueError: if incorrect IQL was generated `n_retries` amount of times.
"""
start_time = time.monotonic()
event_tracker = EventTracker.initialize_with_handlers(self._event_handlers)
await event_tracker.request_start(RequestStart(question=question, collection_name=self.name))
# select view
views = self.list()
if len(views) == 0:
raise ValueError("Empty collection")
if len(views) == 1:
selected_view = next(iter(views))
else:
selected_view = await self._view_selector.select_view(
question=question,
views=views,
event_tracker=event_tracker,
llm_options=llm_options,
)
view = self.get(selected_view)
start_time_view = time.monotonic()
view_result = await view.ask(
query=question,
llm=self._llm,
event_tracker=event_tracker,
n_retries=self.n_retries,
dry_run=dry_run,
llm_options=llm_options,
)
end_time_view = time.monotonic()
textual_response = None
if not dry_run and return_natural_response:
textual_response = await self._nl_responder.generate_response(
result=view_result,
question=question,
event_tracker=event_tracker,
llm_options=llm_options,
)
result = ExecutionResult(
results=view_result.results,
context=view_result.context,
execution_time=time.monotonic() - start_time,
execution_time_view=end_time_view - start_time_view,
view_name=selected_view,
textual_response=textual_response,
)
await event_tracker.request_end(RequestEnd(result=result))
return result
def get_similarity_indexes(self) -> Dict[AbstractSimilarityIndex, List[IndexLocation]]:
"""
List all similarity indexes from all views in the collection.
Returns:
Mapping of similarity indexes to their locations, following view format.
For:
- freeform views, the format is (view_name, table_name, column_name)
- structured views, the format is (view_name, filter_name, argument_name)
"""
indexes = defaultdict(list)
for view_name in self._views:
view = self.get(view_name)
view_indexes = view.list_similarity_indexes()
for index, location in view_indexes.items():
indexes[index].extend(location)
return indexes
async def update_similarity_indexes(self) -> None:
"""
Update all similarity indexes from all structured views in the collection.
Raises:
IndexUpdateError: if updating any of the indexes fails. The exception provides `failed_indexes` attribute,
a dictionary mapping failed indexes to their respective exceptions. Indexes not present in
the dictionary were updated successfully.
"""
indexes = self.get_similarity_indexes()
update_coroutines = [index.update() for index in indexes]
results = await asyncio.gather(*update_coroutines, return_exceptions=True)
failed_indexes = {
index: exception for index, exception in zip(indexes, results) if isinstance(exception, Exception)
}
if failed_indexes:
failed_locations = [loc for index in failed_indexes for loc in indexes[index]]
descriptions = ", ".join(".".join(name for name in location) for location in failed_locations)
raise IndexUpdateError(f"Failed to update similarity indexes for {descriptions}", failed_indexes)