Skip to content

Commit

Permalink
chore: updated migrations + added indices support
Browse files Browse the repository at this point in the history
  • Loading branch information
Vedantsahai18 committed Dec 21, 2024
1 parent 74add36 commit 249513d
Show file tree
Hide file tree
Showing 15 changed files with 349 additions and 280 deletions.
9 changes: 5 additions & 4 deletions agents-api/agents_api/queries/developers/get_developer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
SELECT * FROM developers WHERE developer_id = $1 -- developer_id
""").sql(pretty=True)

ModelT = TypeVar("ModelT", bound=Any)
T = TypeVar("T")


@rewrap_exceptions(
{
Expand All @@ -37,7 +34,11 @@
)
}
)
@wrap_in_class(Developer, one=True, transform=lambda d: {**d, "id": d["developer_id"]})
@wrap_in_class(
Developer,
one=True,
transform=lambda d: {**d, "id": d["developer_id"]},
)
@pg_query
@beartype
async def get_developer(
Expand Down
6 changes: 4 additions & 2 deletions agents-api/agents_api/queries/docs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Listing documents based on various criteria, including ownership and metadata filters.
- Deleting documents by their unique identifiers.
- Embedding document snippets for retrieval purposes.
- Searching documents by text.
The module interacts with other parts of the application, such as the agents and users modules, to provide a comprehensive document management system. Its role is crucial in enabling document search, retrieval, and management features within the context of agents and users.
Expand All @@ -22,12 +23,13 @@
from .list_docs import list_docs

# from .search_docs_by_embedding import search_docs_by_embedding
# from .search_docs_by_text import search_docs_by_text
from .search_docs_by_text import search_docs_by_text

__all__ = [
"create_doc",
"delete_doc",
"get_doc",
"list_docs",
# "search_docs_by_embct",
# "search_docs_by_embedding",
"search_docs_by_text",
]
141 changes: 103 additions & 38 deletions agents-api/agents_api/queries/docs/create_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,38 @@
INSERT INTO doc_owners (
developer_id,
doc_id,
index,
owner_type,
owner_id
)
VALUES ($1, $2, $3, $4)
VALUES ($1, $2, $3, $4, $5)
RETURNING doc_id
)
SELECT d.*
SELECT DISTINCT ON (docs.doc_id)
docs.doc_id,
docs.developer_id,
docs.title,
array_agg(docs.content ORDER BY docs.index) as content,
array_agg(docs.index ORDER BY docs.index) as indices,
docs.modality,
docs.embedding_model,
docs.embedding_dimensions,
docs.language,
docs.metadata,
docs.created_at
FROM inserted_owner io
JOIN docs d ON d.doc_id = io.doc_id;
JOIN docs ON docs.doc_id = io.doc_id
GROUP BY
docs.doc_id,
docs.developer_id,
docs.title,
docs.modality,
docs.embedding_model,
docs.embedding_dimensions,
docs.language,
docs.metadata,
docs.created_at;
""").sql(pretty=True)


Expand All @@ -82,11 +105,10 @@
Doc,
one=True,
transform=lambda d: {
**d,
"id": d["doc_id"],
"content": ast.literal_eval(d["content"])[0]
if len(ast.literal_eval(d["content"])) == 1
else ast.literal_eval(d["content"]),
"index": d["indices"][0],
"content": d["content"][0] if len(d["content"]) == 1 else d["content"],
**d,
},
)
@increase_counter("create_doc")
Expand All @@ -97,56 +119,99 @@ async def create_doc(
developer_id: UUID,
doc_id: UUID | None = None,
data: CreateDocRequest,
owner_type: Literal["user", "agent"] | None = None,
owner_id: UUID | None = None,
owner_type: Literal["user", "agent"],
owner_id: UUID,
modality: Literal["text", "image", "mixed"] | None = "text",
embedding_model: str | None = "voyage-3",
embedding_dimensions: int | None = 1024,
language: str | None = "english",
index: int | None = 0,
) -> list[tuple[str, list] | tuple[str, list, str]]:
) -> list[tuple[str, list, Literal["fetch", "fetchmany", "fetchrow"]]]:
"""
Insert a new doc record into Timescale and optionally associate it with an owner.
Insert a new doc record into Timescale and associate it with an owner.
Parameters:
owner_type (Literal["user", "agent"]): The type of the owner of the documents.
owner_id (UUID): The ID of the owner of the documents.
developer_id (UUID): The ID of the developer.
doc_id (UUID | None): Optional custom UUID for the document. If not provided, one will be generated.
data (CreateDocRequest): The data for the document.
owner_type (Literal["user", "agent"]): The type of the owner (required).
owner_id (UUID): The ID of the owner (required).
modality (Literal["text", "image", "mixed"]): The modality of the documents.
embedding_model (str): The model used for embedding.
embedding_dimensions (int): The dimensions of the embedding.
language (str): The language of the documents.
index (int): The index of the documents.
data (CreateDocRequest): The data for the document.
Returns:
list[tuple[str, list] | tuple[str, list, str]]: SQL query and parameters for creating the document.
"""
queries = []
# Generate a UUID if not provided
doc_id = doc_id or uuid7()
current_doc_id = uuid7() if doc_id is None else doc_id

# check if content is a string
if isinstance(data.content, str):
data.content = [data.content]
# Check if content is a list
if isinstance(data.content, list):
final_params_doc = []
final_params_owner = []

for idx, content in enumerate(data.content):
doc_params = [
developer_id,
current_doc_id,
data.title,
content,
idx,
modality,
embedding_model,
embedding_dimensions,
language,
data.metadata or {},
]
final_params_doc.append(doc_params)

# Create the doc record
doc_params = [
developer_id,
doc_id,
data.title,
str(data.content),
index,
modality,
embedding_model,
embedding_dimensions,
language,
data.metadata or {},
]

queries = [(doc_query, doc_params)]

# If an owner is specified, associate it:
if owner_type and owner_id:
owner_params = [developer_id, doc_id, owner_type, owner_id]
queries.append((doc_owner_query, owner_params))
owner_params = [
developer_id,
current_doc_id,
idx,
owner_type,
owner_id,
]
final_params_owner.append(owner_params)

# Add the doc query for each content
queries.append((doc_query, final_params_doc, "fetchmany"))

# Add the owner query
queries.append((doc_owner_query, final_params_owner, "fetchmany"))

else:

# Create the doc record
doc_params = [
developer_id,
current_doc_id,
data.title,
data.content,
index,
modality,
embedding_model,
embedding_dimensions,
language,
data.metadata or {},
]

owner_params = [
developer_id,
current_doc_id,
index,
owner_type,
owner_id,
]

# Add the doc query for single content
queries.append((doc_query, doc_params, "fetch"))

# Add the owner query
queries.append((doc_owner_query, owner_params, "fetch"))

return queries
24 changes: 10 additions & 14 deletions agents-api/agents_api/queries/docs/delete_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,18 @@
DELETE FROM doc_owners
WHERE developer_id = $1
AND doc_id = $2
AND (
($3::text IS NULL AND $4::uuid IS NULL)
OR (owner_type = $3 AND owner_id = $4)
)
AND owner_type = $3
AND owner_id = $4
)
DELETE FROM docs
WHERE developer_id = $1
AND doc_id = $2
AND (
$3::text IS NULL OR EXISTS (
SELECT 1 FROM doc_owners
WHERE developer_id = $1
AND doc_id = $2
AND owner_type = $3
AND owner_id = $4
)
AND EXISTS (
SELECT 1 FROM doc_owners
WHERE developer_id = $1
AND doc_id = $2
AND owner_type = $3
AND owner_id = $4
)
RETURNING doc_id;
""").sql(pretty=True)
Expand Down Expand Up @@ -61,8 +57,8 @@ async def delete_doc(
*,
developer_id: UUID,
doc_id: UUID,
owner_type: Literal["user", "agent"] | None = None,
owner_id: UUID | None = None,
owner_type: Literal["user", "agent"],
owner_id: UUID,
) -> tuple[str, list]:
"""
Deletes a doc (and associated doc_owners) for the given developer and doc_id.
Expand Down
37 changes: 0 additions & 37 deletions agents-api/agents_api/queries/docs/embed_snippets.py

This file was deleted.

Loading

0 comments on commit 249513d

Please sign in to comment.