-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_context.py
57 lines (45 loc) · 1.67 KB
/
create_context.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from chromadb.api.types import IncludeEnum
from models import embed_func
from logger import setup_logger
from alkemio_virtual_contributor_engine.chromadb_client import chromadb_client
logger = setup_logger(__name__)
def combine_documents(docs, document_separator="\n\n"):
chunks_array = []
for index, document in enumerate(docs["documents"][0]):
chunks_array.append(f"[source:{index}] {document}")
return document_separator.join(chunks_array)
def get_documents(message: str):
collections = [
"alkem.io-knowledge",
"welcome.alkem.io-knowledge",
"www.alkemio.org-knowledge",
]
result = {"documents": [[]], "metadatas": [[]], "distances": [[]]}
for collection in collections:
collection = chromadb_client.get_collection(
collection, embedding_function=embed_func
)
tmp_result = collection.query(
query_texts=[message],
include=[
IncludeEnum.documents,
IncludeEnum.metadatas,
IncludeEnum.distances,
],
n_results=3,
)
if (
tmp_result
and tmp_result["documents"]
and tmp_result["distances"]
and tmp_result["metadatas"]
):
result["distances"][0] += tmp_result["distances"][0]
result["documents"][0] += tmp_result["documents"][0]
result["metadatas"][0] += tmp_result["metadatas"][0]
return result
def create_context(message):
documents = get_documents(message)
logger.info("Context retrieved.")
logger.debug(f"Context is {documents}")
return documents, combine_documents(documents)