@@ -131,6 +131,8 @@ class Query(pydantic.BaseModel):
131
131
132
132
fast_search : bool = False
133
133
134
+ ef : Optional [int ] = None
135
+
134
136
135
137
class LanceQueryBuilder (ABC ):
136
138
"""An abstract query builder. Subclasses are defined for vector search,
@@ -257,6 +259,7 @@ def __init__(self, table: "Table"):
257
259
self ._with_row_id = False
258
260
self ._vector = None
259
261
self ._text = None
262
+ self ._ef = None
260
263
261
264
@deprecation .deprecated (
262
265
deprecated_in = "0.3.1" ,
@@ -638,6 +641,28 @@ def nprobes(self, nprobes: int) -> LanceVectorQueryBuilder:
638
641
self ._nprobes = nprobes
639
642
return self
640
643
644
+ def ef (self , ef : int ) -> LanceVectorQueryBuilder :
645
+ """Set the number of candidates to consider during search.
646
+
647
+ Higher values will yield better recall (more likely to find vectors if
648
+ they exist) at the expense of latency.
649
+
650
+ This only applies to the HNSW-related index.
651
+ The default value is 1.5 * limit.
652
+
653
+ Parameters
654
+ ----------
655
+ ef: int
656
+ The number of candidates to consider during search.
657
+
658
+ Returns
659
+ -------
660
+ LanceVectorQueryBuilder
661
+ The LanceQueryBuilder object.
662
+ """
663
+ self ._ef = ef
664
+ return self
665
+
641
666
def refine_factor (self , refine_factor : int ) -> LanceVectorQueryBuilder :
642
667
"""Set the refine factor to use, increasing the number of vectors sampled.
643
668
@@ -700,6 +725,7 @@ def to_batches(self, /, batch_size: Optional[int] = None) -> pa.RecordBatchReade
700
725
with_row_id = self ._with_row_id ,
701
726
offset = self ._offset ,
702
727
fast_search = self ._fast_search ,
728
+ ef = self ._ef ,
703
729
)
704
730
result_set = self ._table ._execute_query (query , batch_size )
705
731
if self ._reranker is not None :
@@ -1071,6 +1097,8 @@ def to_arrow(self) -> pa.Table:
1071
1097
self ._vector_query .nprobes (self ._nprobes )
1072
1098
if self ._refine_factor :
1073
1099
self ._vector_query .refine_factor (self ._refine_factor )
1100
+ if self ._ef :
1101
+ self ._vector_query .ef (self ._ef )
1074
1102
1075
1103
with ThreadPoolExecutor () as executor :
1076
1104
fts_future = executor .submit (self ._fts_query .with_row_id (True ).to_arrow )
@@ -1197,6 +1225,29 @@ def nprobes(self, nprobes: int) -> LanceHybridQueryBuilder:
1197
1225
self ._nprobes = nprobes
1198
1226
return self
1199
1227
1228
+ def ef (self , ef : int ) -> LanceHybridQueryBuilder :
1229
+ """
1230
+ Set the number of candidates to consider during search.
1231
+
1232
+ Higher values will yield better recall (more likely to find vectors if
1233
+ they exist) at the expense of latency.
1234
+
1235
+ This only applies to the HNSW-related index.
1236
+ The default value is 1.5 * limit.
1237
+
1238
+ Parameters
1239
+ ----------
1240
+ ef: int
1241
+ The number of candidates to consider during search.
1242
+
1243
+ Returns
1244
+ -------
1245
+ LanceHybridQueryBuilder
1246
+ The LanceHybridQueryBuilder object.
1247
+ """
1248
+ self ._ef = ef
1249
+ return self
1250
+
1200
1251
def metric (self , metric : Literal ["L2" , "cosine" , "dot" ]) -> LanceHybridQueryBuilder :
1201
1252
"""Set the distance metric to use.
1202
1253
@@ -1644,6 +1695,21 @@ def nprobes(self, nprobes: int) -> AsyncVectorQuery:
1644
1695
self ._inner .nprobes (nprobes )
1645
1696
return self
1646
1697
1698
+ def ef (self , ef : int ) -> AsyncVectorQuery :
1699
+ """
1700
+ Set the number of candidates to consider during search
1701
+
1702
+ This argument is only used when the vector column has an HNSW index.
1703
+ If there is no index then this value is ignored.
1704
+
1705
+ Increasing this value will increase the recall of your query but will also
1706
+ increase the latency of your query. The default value is 1.5 * limit. This
1707
+ default is good for many cases but the best value to use will depend on your
1708
+ data and the recall that you need to achieve.
1709
+ """
1710
+ self ._inner .ef (ef )
1711
+ return self
1712
+
1647
1713
def refine_factor (self , refine_factor : int ) -> AsyncVectorQuery :
1648
1714
"""
1649
1715
A multiplier to control how many additional rows are taken during the refine
0 commit comments