Skip to content

Commit

Permalink
Warmup queries
Browse files Browse the repository at this point in the history
  • Loading branch information
LuQQiu committed Nov 13, 2024
1 parent 76a94a0 commit b0694d6
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ def add_benchmark_args(parser: argparse.ArgumentParser):
action=argparse.BooleanOptionalAction,
help="drop tables before starting",
)
parser.add_argument(
"-w",
"--warmup-queries",
type=int,
help="Number of warmup queries to run against each table to help the cluster warm up. Defaults to the number of queries.",
)


class Benchmark:
Expand All @@ -100,6 +106,7 @@ def __init__(
index: bool,
prefix: str,
reset: bool,
warmup_queries: int,
):
self.dataset = dataset
self.num_tables = num_tables
Expand All @@ -109,6 +116,7 @@ def __init__(
self.index = index
self.prefix = prefix
self.reset = reset
self.warmup_queries = warmup_queries

self.db = lancedb.connect(
uri=os.environ["LANCEDB_DB_URI"],
Expand Down Expand Up @@ -340,7 +348,7 @@ def _convert_dataset(self, schema) -> Iterable[pa.RecordBatch]:
for b in buffer:
yield b

def _query_table(self, table: RemoteTable, warmup_queries=100):
def _query_table(self, table: RemoteTable):
# log a warning if data is not fully indexed
try:
total_rows = table.count_rows()
Expand All @@ -355,9 +363,9 @@ def _query_table(self, table: RemoteTable, warmup_queries=100):
print(f"{table.name}: failed to check index status: {e}")

print(
f"{table.name}: starting query test. {self.num_queries=} {warmup_queries=} {total_rows=}"
f"{table.name}: starting query test. {self.num_queries=} {self.warmup_queries=} {total_rows=}"
)
for _ in range(warmup_queries):
for _ in range(self.warmup_queries):
self._query(table)

diffs = []
Expand Down Expand Up @@ -425,12 +433,13 @@ def run_multi_benchmark(
index: bool,
prefix: str,
reset: bool,
warmup_queries: int,
) -> BenchmarkResults:
total_processes = num_processes * (
query_processes if not ingest and not index else 1
)
print(f"Starting {total_processes} benchmark processes...")

warmup_queries = min(warmup_queries, num_queries)
bench_kwargs = {
"dataset": dataset,
"num_tables": num_tables,
Expand All @@ -441,6 +450,7 @@ def run_multi_benchmark(
"index": index,
"prefix": prefix, # Base prefix, will be modified per process
"reset": reset,
"warmup_queries": warmup_queries,
}

process_args = []
Expand Down Expand Up @@ -506,6 +516,8 @@ def main():
add_benchmark_args(parser)
args = parser.parse_args()
validate_args(args)
if args.warmup_queries is None:
args.warmup_queries = args.queries
print(args)

result = run_multi_benchmark(
Expand All @@ -520,6 +532,7 @@ def main():
args.index,
args.prefix,
args.reset,
args.warmup_queries,
)

result.print()
Expand Down

0 comments on commit b0694d6

Please sign in to comment.