From bcd86a0aff8d35691a3af3873942865ca3d62b8e Mon Sep 17 00:00:00 2001 From: "David W. Dougherty" Date: Wed, 18 Sep 2024 13:56:52 -0700 Subject: [PATCH] DOC-4199: add TCEs to the combined query page --- doctests/query_combined.py | 124 +++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 doctests/query_combined.py diff --git a/doctests/query_combined.py b/doctests/query_combined.py new file mode 100644 index 0000000000..a17f19417c --- /dev/null +++ b/doctests/query_combined.py @@ -0,0 +1,124 @@ +# EXAMPLE: query_combined +# HIDE_START +import json +import numpy as np +import redis +import warnings +from redis.commands.json.path import Path +from redis.commands.search.field import NumericField, TagField, TextField, VectorField +from redis.commands.search.indexDefinition import IndexDefinition, IndexType +from redis.commands.search.query import Query +from sentence_transformers import SentenceTransformer + + +def embed_text(model, text): + return np.array(model.encode(text)).astype(np.float32).tobytes() + +warnings.filterwarnings("ignore", category=FutureWarning, message=r".*clean_up_tokenization_spaces.*") +model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') +query = "Bike for small kids" +query_vector = embed_text(model, query) + +r = redis.Redis(decode_responses=True) + +# create index +schema = ( + TextField("$.description", no_stem=True, as_name="model"), + TagField("$.condition", as_name="condition"), + NumericField("$.price", as_name="price"), + VectorField( + "$.description_embeddings", + "FLAT", + { + "TYPE": "FLOAT32", + "DIM": 384, + "DISTANCE_METRIC": "COSINE", + }, + as_name="vector", + ), +) + +index = r.ft("idx:bicycle") +index.create_index( + schema, + definition=IndexDefinition(prefix=["bicycle:"], index_type=IndexType.JSON), +) + +# load data +with open("data/query_vector.json") as f: + bicycles = json.load(f) + +pipeline = r.pipeline(transaction=False) +for bid, bicycle in enumerate(bicycles): + pipeline.json().set(f'bicycle:{bid}', Path.root_path(), bicycle) +pipeline.execute() +# HIDE_END + +# STEP_START combined1 +q = Query("@price:[500 1000] @condition:{new}") +res = index.search(q) +print(res.total) # >>> 1 +# REMOVE_START +assert res.total == 1 +# REMOVE_END +# STEP_END + +# STEP_START combined2 +q = Query("kids @price:[500 1000] @condition:{used}") +res = index.search(q) +print(res.total) # >>> 1 +# REMOVE_START +assert res.total == 1 +# REMOVE_END +# STEP_END + +# STEP_START combined3 +q = Query("(kids | small) @condition:{used}") +res = index.search(q) +print(res.total) # >>> 2 +# REMOVE_START +assert res.total == 2 +# REMOVE_END +# STEP_END + +# STEP_START combined4 +q = Query("@description:(kids | small) @condition:{used}") +res = index.search(q) +print(res.total) # >>> 0 +# REMOVE_START +assert res.total == 0 +# REMOVE_END +# STEP_END + +# STEP_START combined5 +q = Query("@description:(kids | small) @condition:{new | used}") +res = index.search(q) +print(res.total) # >>> 0 +# REMOVE_START +assert res.total == 0 +# REMOVE_END +# STEP_END + +# STEP_START combined6 +q = Query("@price:[500 1000] -@condition:{new}") +res = index.search(q) +print(res.total) # >>> 2 +# REMOVE_START +assert res.total == 2 +# REMOVE_END +# STEP_END + +# STEP_START combined7 +q = Query("(@price:[500 1000] -@condition:{new})=>[KNN 3 @vector $query_vector]").dialect(2) +# put query string here +res = index.search(q,{ 'query_vector': query_vector }) +print(res.total) # >>> 2 +# REMOVE_START +assert res.total == 2 +# REMOVE_END +# STEP_END + +# REMOVE_START +# destroy index and data +r.ft("idx:bicycle").dropindex(delete_documents=True) +# REMOVE_END