Skip to content

Commit

Permalink
notebook runs
Browse files Browse the repository at this point in the history
  • Loading branch information
sdan committed Apr 1, 2024
1 parent 1f22c35 commit 85ab088
Show file tree
Hide file tree
Showing 5 changed files with 363 additions and 115 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='vlite',
version='1.0.1',
version='1.1.1',
author='Surya Dantuluri',
author_email='[email protected]',
description='A simple vector database that stores vectors in a numpy array.',
Expand Down
70 changes: 3 additions & 67 deletions tests/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,69 +257,9 @@ def main(query, corpuss, top_k, token_counts) -> pd.DataFrame:
)

#################################################
# tinyvector #
# vespa #
#################################################
print("Begin tinyvector benchmark.")
print("Initializing tinyvector instance...")
base_url = "http://localhost:5001"
table_name = "test"
index_types = "brute_force"
vector_dim = 384
t0 = time.time()
print("Creating tinyvector instance")
response = requests.post(f"{base_url}/create_table", json={
"table_name": table_name,
"index_types": index_types,
"dimension": 384,
"use_uuid": True
})
print("Status code: ", response.status_code)
print("Creating tinyvector index")
try:
response = requests.post(f"{base_url}/create_index", json={
"table_name": table_name,
"index_types": index_types
})
print("Status code: ", response.status_code)
except:
print("Error tinyvector index already exists")
t0 = time.time()

t1 = time.time()
print(f"Took {t1 - t0:.3f}s to initialize")

print("Adding vectors to tinyvector instance...")
model = SentenceTransformer('all-MiniLM-L6-v2')
t0 = time.time()
for i in range(len(corpus)):
embeddings = model.encode([corpus[i]]).tolist()
print("[tinyvec] vector: ", i)
print("[tinyvec] vector: ", embeddings[0])
print("[tinyvec] corpus: ", corpus[i])
try:
response = requests.post(f"{base_url}/insert", json={
"table_name": table_name,
"ids": [i],
"vectors": embeddings[0],
"content": corpus[i]
})
except:
print("Error inserting vector")
t0 = time.time()
break

t1 = time.time()
print(f"Took {t1 - t0:.3f}s to add vectors.")
indexing_times.append(
{
"num_tokens": token_count,
"lib": "tinyvector",
"num_embeddings": len(corpus),
"indexing_time": t1 - t0,
}
)

# no query for now


#################################################
# qdrant #
Expand Down Expand Up @@ -411,11 +351,7 @@ def main(query, corpuss, top_k, token_counts) -> pd.DataFrame:
# milvus #
#################################################

# too complicated docs
temp_results = pd.DataFrame(results)
temp_indexing_times = pd.DataFrame(indexing_times)
temp_results.to_csv("temp_vlite_benchmark_query.csv", index=False)
temp_indexing_times.to_csv("temp_vlite_benchmark_index.csv", index=False)


results = pd.DataFrame(results)
indexing_times = pd.DataFrame(indexing_times)
Expand Down
Loading

0 comments on commit 85ab088

Please sign in to comment.