Skip to content

Commit

Permalink
add query filter by distance threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
potofo committed Jan 2, 2024
1 parent f793331 commit 8c65975
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 5 deletions.
28 changes: 24 additions & 4 deletions query_livedoor.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@
print('connecting collection of vecor database spendding {0} seconds.'.format(str(round(time_diff,3))))


#prompt = "宮崎あおい"
prompt = "宮崎あおいが出演する映画を教えてください。"
#prompt = "ヴェネチア国際映画祭で金獅子賞を受賞した映画はなんですか?"
#prompt = "兎が出てくるホラー映画はなんですか?"
#prompt = "映画三銃士の主演女優の名前を教えてください。"
#prompt = "スリリングでかっこいいスパイ映画の名前とその映画の見どころを教えてください。"
#prompt = "子供向けのファンタジー映画で人気のある映画の名前とその映画の見どころを教えてください。"
prompt = "子供向けのアニメ映画のおすすめのタイトルと、その映画の見どころを教えてください。"
#prompt = "子供向けのアニメ映画のおすすめのタイトルと、その映画の見どころを教えてください。"
#prompt = "アップルの最新のOSの特徴を教えてください。"

start = time.time()
results = collection.query(
Expand All @@ -60,9 +61,28 @@
end = time.time()
time_diff = end - start
spendding_time = round(time_diff,2)
print('query documents spendding {0} seconds.'.format(str(round(time_diff,3))))

# distance_threshold
# The score of the distance_threshold depends on the method of similarity measurement that is used.
# For example, in the case of cosine similarity, identical vectors will have a score of 0,
# whereas completely different vectors will have a score of 1.
# With other distance metrics, such as Euclidean distance, the scores can range from 0 (the same point) to infinity.
# For example, if the distance_threshold is 0.35, it means that only those with a confidence level of more than 65% will be filtered.
distance_threshold = 0.35

for ids, docs, distances, metas in zip(results['ids'], results['documents'], results['distances'], results['metadatas']):
for i in range(len(ids)-1, -1, -1):
if distances[i] > distance_threshold:
# Remove items exceeding the distance_threshold
ids.pop(i)
docs.pop(i)
distances.pop(i)
metas.pop(i)



print(results['ids'])
print(results['distances'])
print(results['documents'])
#print(results['documents'])
print(results['metadatas'])
print('query documents spendding {0} seconds.'.format(str(round(time_diff,3))))
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ chromadb==0.4.17
langchain==0.0.334
unstructured==0.10.30
torch==2.0.0
sentence-transformers==2.2.2
sentence-transformers==2.2.2
requests==2.31.0
beautifulsoup4==4.12.2

0 comments on commit 8c65975

Please sign in to comment.