Skip to content

Commit e40703a

Browse files
authored
Flaky test fixes (#328)
- Centralize HG vectorizers - Use worker ID in search index names and keys
1 parent b44ac7b commit e40703a

12 files changed

+174
-93
lines changed

redisvl/extensions/cache/llm/schema.py

-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ class SemanticCacheIndexSchema(IndexSchema):
114114

115115
@classmethod
116116
def from_params(cls, name: str, prefix: str, vector_dims: int, dtype: str):
117-
118117
return cls(
119118
index={"name": name, "prefix": prefix}, # type: ignore
120119
fields=[ # type: ignore

tests/conftest.py

+40-17
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,32 @@
1010
from redisvl.utils.vectorize import HFTextVectorizer
1111

1212

13+
@pytest.fixture(scope="session")
14+
def worker_id(request):
15+
"""
16+
Get the worker ID for the current test.
17+
18+
In pytest-xdist, the config has "workerid" in workerinput.
19+
This fixture abstracts that logic to provide a consistent worker_id
20+
across all tests.
21+
"""
22+
workerinput = getattr(request.config, "workerinput", {})
23+
return workerinput.get("workerid", "master")
24+
25+
1326
@pytest.fixture(autouse=True)
1427
def set_tokenizers_parallelism():
1528
"""Disable tokenizers parallelism in tests to avoid deadlocks"""
1629
os.environ["TOKENIZERS_PARALLELISM"] = "false"
1730

1831

1932
@pytest.fixture(scope="session", autouse=True)
20-
def redis_container(request):
33+
def redis_container(worker_id):
2134
"""
2235
If using xdist, create a unique Compose project for each xdist worker by
2336
setting COMPOSE_PROJECT_NAME. That prevents collisions on container/volume
2437
names.
2538
"""
26-
# In xdist, the config has "workerid" in workerinput
27-
workerinput = getattr(request.config, "workerinput", {})
28-
worker_id = workerinput.get("workerid", "master")
29-
3039
# Set the Compose project name so containers do not clash across workers
3140
os.environ["COMPOSE_PROJECT_NAME"] = f"redis_test_{worker_id}"
3241
os.environ.setdefault("REDIS_IMAGE", "redis/redis-stack-server:latest")
@@ -80,6 +89,16 @@ def hf_vectorizer():
8089
)
8190

8291

92+
@pytest.fixture(scope="session")
93+
def hf_vectorizer_float16():
94+
return HFTextVectorizer(dtype="float16")
95+
96+
97+
@pytest.fixture(scope="session")
98+
def hf_vectorizer_with_model():
99+
return HFTextVectorizer("sentence-transformers/all-mpnet-base-v2")
100+
101+
83102
@pytest.fixture
84103
def sample_datetimes():
85104
return {
@@ -196,16 +215,17 @@ def pytest_collection_modifyitems(
196215

197216

198217
@pytest.fixture
199-
def flat_index(sample_data, redis_url):
218+
def flat_index(sample_data, redis_url, worker_id):
200219
"""
201220
A fixture that uses the "flag" algorithm for its vector field.
202221
"""
222+
203223
# construct a search index from the schema
204224
index = SearchIndex.from_dict(
205225
{
206226
"index": {
207-
"name": "user_index",
208-
"prefix": "v1",
227+
"name": f"user_index_{worker_id}",
228+
"prefix": f"v1_{worker_id}",
209229
"storage_type": "hash",
210230
},
211231
"fields": [
@@ -250,16 +270,17 @@ def hash_preprocess(item: dict) -> dict:
250270

251271

252272
@pytest.fixture
253-
async def async_flat_index(sample_data, redis_url):
273+
async def async_flat_index(sample_data, redis_url, worker_id):
254274
"""
255275
A fixture that uses the "flag" algorithm for its vector field.
256276
"""
277+
257278
# construct a search index from the schema
258279
index = AsyncSearchIndex.from_dict(
259280
{
260281
"index": {
261-
"name": "user_index",
262-
"prefix": "v1",
282+
"name": f"user_index_{worker_id}",
283+
"prefix": f"v1_{worker_id}",
263284
"storage_type": "hash",
264285
},
265286
"fields": [
@@ -304,15 +325,16 @@ def hash_preprocess(item: dict) -> dict:
304325

305326

306327
@pytest.fixture
307-
async def async_hnsw_index(sample_data, redis_url):
328+
async def async_hnsw_index(sample_data, redis_url, worker_id):
308329
"""
309330
A fixture that uses the "hnsw" algorithm for its vector field.
310331
"""
332+
311333
index = AsyncSearchIndex.from_dict(
312334
{
313335
"index": {
314-
"name": "user_index",
315-
"prefix": "v1",
336+
"name": f"user_index_{worker_id}",
337+
"prefix": f"v1_{worker_id}",
316338
"storage_type": "hash",
317339
},
318340
"fields": [
@@ -354,15 +376,16 @@ def hash_preprocess(item: dict) -> dict:
354376

355377

356378
@pytest.fixture
357-
def hnsw_index(sample_data, redis_url):
379+
def hnsw_index(sample_data, redis_url, worker_id):
358380
"""
359381
A fixture that uses the "hnsw" algorithm for its vector field.
360382
"""
383+
361384
index = SearchIndex.from_dict(
362385
{
363386
"index": {
364-
"name": "user_index",
365-
"prefix": "v1",
387+
"name": f"user_index_{worker_id}",
388+
"prefix": f"v1_{worker_id}",
366389
"storage_type": "hash",
367390
},
368391
"fields": [

tests/integration/test_aggregation.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010

1111

1212
@pytest.fixture
13-
def index(sample_data, redis_url):
13+
def index(sample_data, redis_url, worker_id):
1414
index = SearchIndex.from_dict(
1515
{
1616
"index": {
17-
"name": "user_index",
18-
"prefix": "v1",
17+
"name": f"user_index_{worker_id}",
18+
"prefix": f"v1_{worker_id}",
1919
"storage_type": "hash",
2020
},
2121
"fields": [

tests/integration/test_async_search_index.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,24 @@ def async_index(index_schema, async_client):
3232

3333

3434
@pytest.fixture
35-
def async_index_from_dict():
36-
return AsyncSearchIndex.from_dict({"index": {"name": "my_index"}, "fields": fields})
35+
def async_index_from_dict(worker_id):
36+
37+
return AsyncSearchIndex.from_dict(
38+
{
39+
"index": {"name": f"my_index_{worker_id}", "prefix": f"rvl_{worker_id}"},
40+
"fields": fields,
41+
}
42+
)
3743

3844

3945
@pytest.fixture
40-
def async_index_from_yaml():
41-
return AsyncSearchIndex.from_yaml("schemas/test_json_schema.yaml")
46+
def async_index_from_yaml(worker_id):
47+
48+
index = AsyncSearchIndex.from_yaml("schemas/test_json_schema.yaml")
49+
# Update the index name and prefix to include worker_id
50+
index.schema.index.name = f"{index.schema.index.name}_{worker_id}"
51+
index.schema.index.prefix = f"{index.schema.index.prefix}_{worker_id}"
52+
return index
4253

4354

4455
def test_search_index_properties(index_schema, async_index):
@@ -56,18 +67,18 @@ def test_search_index_properties(index_schema, async_index):
5667

5768

5869
def test_search_index_from_yaml(async_index_from_yaml):
59-
assert async_index_from_yaml.name == "json-test"
70+
assert async_index_from_yaml.name.startswith("json-test")
6071
assert async_index_from_yaml.client is None
61-
assert async_index_from_yaml.prefix == "json"
72+
assert async_index_from_yaml.prefix.startswith("json_")
6273
assert async_index_from_yaml.key_separator == ":"
6374
assert async_index_from_yaml.storage_type == StorageType.JSON
6475
assert async_index_from_yaml.key("foo").startswith(async_index_from_yaml.prefix)
6576

6677

6778
def test_search_index_from_dict(async_index_from_dict):
68-
assert async_index_from_dict.name == "my_index"
79+
assert async_index_from_dict.name.startswith("my_index")
6980
assert async_index_from_dict.client is None
70-
assert async_index_from_dict.prefix == "rvl"
81+
assert async_index_from_dict.prefix.startswith("rvl_")
7182
assert async_index_from_dict.key_separator == ":"
7283
assert async_index_from_dict.storage_type == StorageType.HASH
7384
assert len(async_index_from_dict.schema.fields) == len(fields)

tests/integration/test_flow.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,12 @@
4242

4343

4444
@pytest.mark.parametrize("schema", [hash_schema, json_schema])
45-
def test_simple(client, schema, sample_data):
45+
def test_simple(client, schema, sample_data, worker_id):
46+
# Update schema with worker_id
47+
schema = schema.copy()
48+
schema["index"] = schema["index"].copy()
49+
schema["index"]["name"] = f"{schema['index']['name']}_{worker_id}"
50+
schema["index"]["prefix"] = f"{schema['index']['prefix']}_{worker_id}"
4651
index = SearchIndex.from_dict(schema, redis_client=client)
4752
# create the index
4853
index.create(overwrite=True, drop=True)

tests/integration/test_flow_async.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,12 @@
4646

4747
@pytest.mark.asyncio
4848
@pytest.mark.parametrize("schema", [hash_schema, json_schema])
49-
async def test_simple(async_client, schema, sample_data):
49+
async def test_simple(async_client, schema, sample_data, worker_id):
50+
# Update schema with worker_id
51+
schema = schema.copy()
52+
schema["index"] = schema["index"].copy()
53+
schema["index"]["name"] = f"{schema['index']['name']}_{worker_id}"
54+
schema["index"]["prefix"] = f"{schema['index']['prefix']}_{worker_id}"
5055
index = AsyncSearchIndex.from_dict(schema, redis_client=async_client)
5156
# create the index
5257
await index.create(overwrite=True, drop=True)

0 commit comments

Comments
 (0)