Skip to content

Commit 2e42ed5

Browse files
baskaryanErick Friis
and
Erick Friis
authored
Self-query template (langchain-ai#12694)
Co-authored-by: Erick Friis <[email protected]>
1 parent 1e43025 commit 2e42ed5

File tree

16 files changed

+3240
-21
lines changed

16 files changed

+3240
-21
lines changed

libs/langchain/langchain/chains/query_constructor/parser.py

+20-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import datetime
2-
from typing import Any, Optional, Sequence, Union
2+
from typing import Any, Literal, Optional, Sequence, Union
3+
4+
from typing_extensions import TypedDict
35

46
from langchain.utils import check_package_version
57

@@ -32,14 +34,14 @@ def v_args(*args: Any, **kwargs: Any) -> Any: # type: ignore
3234
3335
?value: SIGNED_INT -> int
3436
| SIGNED_FLOAT -> float
35-
| TIMESTAMP -> timestamp
37+
| DATE -> date
3638
| list
3739
| string
3840
| ("false" | "False" | "FALSE") -> false
3941
| ("true" | "True" | "TRUE") -> true
4042
4143
args: expr ("," expr)*
42-
TIMESTAMP.2: /["'](\d{4}-[01]\d-[0-3]\d)["']/
44+
DATE.2: /["']?(\d{4}-[01]\d-[0-3]\d)["']?/
4345
string: /'[^']*'/ | ESCAPED_STRING
4446
list: "[" [args] "]"
4547
@@ -52,6 +54,11 @@ def v_args(*args: Any, **kwargs: Any) -> Any: # type: ignore
5254
"""
5355

5456

57+
class ISO8601Date(TypedDict):
58+
date: str
59+
type: Literal["date"]
60+
61+
5562
@v_args(inline=True)
5663
class QueryTransformer(Transformer):
5764
"""Transforms a query string into an intermediate representation."""
@@ -129,9 +136,16 @@ def int(self, item: Any) -> int:
129136
def float(self, item: Any) -> float:
130137
return float(item)
131138

132-
def timestamp(self, item: Any) -> datetime.date:
133-
item = item.replace("'", '"')
134-
return datetime.datetime.strptime(item, '"%Y-%m-%d"').date()
139+
def date(self, item: Any) -> ISO8601Date:
140+
item = str(item).strip("\"'")
141+
try:
142+
datetime.datetime.strptime(item, "%Y-%m-%d")
143+
except ValueError as e:
144+
raise ValueError(
145+
"Dates are expected to be provided in ISO 8601 date format "
146+
"(YYYY-MM-DD)."
147+
) from e
148+
return {"date": item, "type": "date"}
135149

136150
def string(self, item: Any) -> str:
137151
# Remove escaped quotes

libs/langchain/langchain/chains/query_constructor/prompt.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@
141141
Make sure that you only use the comparators and logical operators listed above and no others.
142142
Make sure that filters only refer to attributes that exist in the data source.
143143
Make sure that filters only use the attributed names with its function names if there are functions applied on them.
144-
Make sure that filters only use format `YYYY-MM-DD` when handling timestamp data typed values.
144+
Make sure that filters only use format `YYYY-MM-DD` when handling date data typed values.
145145
Make sure that filters take into account the descriptions of attributes and only make comparisons that are feasible given the type of data being stored.
146146
Make sure that filters are only used as needed. If there are no filters that should be applied return "NO_FILTER" for the filter value.\
147147
"""
@@ -175,7 +175,7 @@
175175
Make sure that you only use the comparators and logical operators listed above and no others.
176176
Make sure that filters only refer to attributes that exist in the data source.
177177
Make sure that filters only use the attributed names with its function names if there are functions applied on them.
178-
Make sure that filters only use format `YYYY-MM-DD` when handling timestamp data typed values.
178+
Make sure that filters only use format `YYYY-MM-DD` when handling date data typed values.
179179
Make sure that filters take into account the descriptions of attributes and only make comparisons that are feasible given the type of data being stored.
180180
Make sure that filters are only used as needed. If there are no filters that should be applied return "NO_FILTER" for the filter value.
181181
Make sure the `limit` is always an int value. It is an optional parameter so leave it blank if it does not make sense.

libs/langchain/langchain/retrievers/self_query/myscale.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import datetime
21
import re
32
from typing import Any, Callable, Dict, Tuple
43

@@ -106,9 +105,9 @@ def visit_comparison(self, comparison: Comparison) -> Dict:
106105
value = f"'{value}'" if isinstance(value, str) else value
107106

108107
# convert timestamp for datetime objects
109-
if type(value) is datetime.date:
108+
if isinstance(value, dict) and value.get("type") == "date":
110109
attr = f"parseDateTime32BestEffort({attr})"
111-
value = f"parseDateTime32BestEffort('{value.strftime('%Y-%m-%d')}')"
110+
value = f"parseDateTime32BestEffort('{value['date']}')"
112111

113112
# string pattern match
114113
if comp is Comparator.LIKE:

libs/langchain/langchain/retrievers/self_query/weaviate.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import date, datetime
1+
from datetime import datetime
22
from typing import Dict, Tuple, Union
33

44
from langchain.chains.query_constructor.ir import (
@@ -47,23 +47,26 @@ def visit_operation(self, operation: Operation) -> Dict:
4747

4848
def visit_comparison(self, comparison: Comparison) -> Dict:
4949
value_type = "valueText"
50+
value = comparison.value
5051
if isinstance(comparison.value, bool):
5152
value_type = "valueBoolean"
5253
elif isinstance(comparison.value, float):
5354
value_type = "valueNumber"
5455
elif isinstance(comparison.value, int):
5556
value_type = "valueInt"
56-
elif isinstance(comparison.value, datetime) or isinstance(
57-
comparison.value, date
57+
elif (
58+
isinstance(comparison.value, dict)
59+
and comparison.value.get("type") == "date"
5860
):
5961
value_type = "valueDate"
6062
# ISO 8601 timestamp, formatted as RFC3339
61-
comparison.value = comparison.value.strftime("%Y-%m-%dT%H:%M:%SZ")
63+
date = datetime.strptime(comparison.value["date"], "%Y-%m-%d")
64+
value = date.strftime("%Y-%m-%dT%H:%M:%SZ")
6265
filter = {
6366
"path": [comparison.attribute],
6467
"operator": self._format_func(comparison.comparator),
68+
value_type: value,
6569
}
66-
filter[value_type] = comparison.value
6770
return filter
6871

6972
def visit_structured_query(

libs/langchain/tests/unit_tests/chains/query_constructor/test_parser.py

+7
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,10 @@ def test_parser_unpack_single_arg_operation(op: str, arg: str) -> None:
122122
expected = DEFAULT_PARSER.parse(arg)
123123
actual = DEFAULT_PARSER.parse(f"{op}({arg})")
124124
assert expected == actual
125+
126+
127+
@pytest.mark.parametrize("x", ('"2022-10-20"', "'2022-10-20'", "2022-10-20"))
128+
def test_parse_date_value(x: str) -> None:
129+
parsed = cast(Comparison, DEFAULT_PARSER.parse(f'eq("x", {x})'))
130+
actual = parsed.value["date"]
131+
assert actual == x.strip("'\"")

libs/langchain/tests/unit_tests/retrievers/self_query/test_weaviate.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from datetime import date, datetime
21
from typing import Dict, Tuple
32

43
from langchain.chains.query_constructor.ir import (
@@ -45,20 +44,22 @@ def test_visit_comparison_datetime() -> None:
4544
comp = Comparison(
4645
comparator=Comparator.LTE,
4746
attribute="foo",
48-
value=datetime(2023, 9, 13, 4, 20, 0),
47+
value={"type": "date", "date": "2023-09-13"},
4948
)
5049
expected = {
5150
"operator": "LessThanEqual",
5251
"path": ["foo"],
53-
"valueDate": "2023-09-13T04:20:00Z",
52+
"valueDate": "2023-09-13T00:00:00Z",
5453
}
5554
actual = DEFAULT_TRANSLATOR.visit_comparison(comp)
5655
assert expected == actual
5756

5857

5958
def test_visit_comparison_date() -> None:
6059
comp = Comparison(
61-
comparator=Comparator.LT, attribute="foo", value=date(2023, 9, 13)
60+
comparator=Comparator.LT,
61+
attribute="foo",
62+
value={"type": "date", "date": "2023-09-13"},
6263
)
6364
expected = {
6465
"operator": "LessThan",
@@ -75,7 +76,9 @@ def test_visit_operation() -> None:
7576
arguments=[
7677
Comparison(comparator=Comparator.EQ, attribute="foo", value="hello"),
7778
Comparison(
78-
comparator=Comparator.GTE, attribute="bar", value=date(2023, 9, 13)
79+
comparator=Comparator.GTE,
80+
attribute="bar",
81+
value={"type": "date", "date": "2023-09-13"},
7982
),
8083
Comparison(comparator=Comparator.LTE, attribute="abc", value=1.4),
8184
],

templates/rag-self-query/LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 pingpong-templates
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

templates/rag-self-query/README.md

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# rag-self-query
2+
3+
This template performs RAG using the self-query retrieval technique. The main idea is to let an LLM convert unstructured queries into structured queries. See the [docs for more on how this works](https://python.langchain.com/docs/modules/data_connection/retrievers/self_query).
4+
5+
## Environment Setup
6+
7+
In this template we'll use OpenAI models and an Elasticsearch vector store, but the approach generalizes to all LLMs/ChatModels and [a number of vector stores](https://python.langchain.com/docs/integrations/retrievers/self_query/).
8+
9+
Set the `OPENAI_API_KEY` environment variable to access the OpenAI models.
10+
11+
To connect to your Elasticsearch instance, use the following environment variables:
12+
13+
```bash
14+
export ELASTIC_CLOUD_ID = <ClOUD_ID>
15+
export ELASTIC_USERNAME = <ClOUD_USERNAME>
16+
export ELASTIC_PASSWORD = <ClOUD_PASSWORD>
17+
```
18+
For local development with Docker, use:
19+
20+
```bash
21+
export ES_URL = "http://localhost:9200"
22+
docker run -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -e "xpack.security.http.ssl.enabled=false" docker.elastic.co/elasticsearch/elasticsearch:8.9.0
23+
```
24+
25+
## Usage
26+
27+
To use this package, you should first have the LangChain CLI installed:
28+
29+
```shell
30+
pip install -U "langchain-cli[serve]"
31+
```
32+
33+
To create a new LangChain project and install this as the only package, you can do:
34+
35+
```shell
36+
langchain app new my-app --package rag-self-query
37+
```
38+
39+
If you want to add this to an existing project, you can just run:
40+
41+
```shell
42+
langchain app add rag-self-query
43+
```
44+
45+
And add the following code to your `server.py` file:
46+
```python
47+
from rag_self_query import chain
48+
49+
add_routes(app, chain, path="/rag-elasticsearch")
50+
```
51+
52+
To populate the vector store with the sample data, from the root of the directory run:
53+
```bash
54+
python ingest.py
55+
```
56+
57+
(Optional) Let's now configure LangSmith.
58+
LangSmith will help us trace, monitor and debug LangChain applications.
59+
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
60+
If you don't have access, you can skip this section
61+
62+
```shell
63+
export LANGCHAIN_TRACING_V2=true
64+
export LANGCHAIN_API_KEY=<your-api-key>
65+
export LANGCHAIN_PROJECT=<your-project> # if not specified, defaults to "default"
66+
```
67+
68+
If you are inside this directory, then you can spin up a LangServe instance directly by:
69+
70+
```shell
71+
langchain serve
72+
```
73+
74+
This will start the FastAPI app with a server is running locally at
75+
[http://localhost:8000](http://localhost:8000)
76+
77+
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
78+
We can access the playground at [http://127.0.0.1:8000/rag-elasticsearch/playground](http://127.0.0.1:8000/rag-elasticsearch/playground)
79+
80+
We can access the template from code with:
81+
82+
```python
83+
from langserve.client import RemoteRunnable
84+
85+
runnable = RemoteRunnable("http://localhost:8000/rag-self-query")
86+
```

0 commit comments

Comments
 (0)