Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(memory-store, agents-api): Fix distance and confidence values for vector search #1035

Merged
merged 3 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def search_docs_by_embedding(
owner_types,
owner_ids,
k,
1.0 - confidence,
confidence,
metadata_filter,
],
)
2 changes: 1 addition & 1 deletion agents-api/agents_api/queries/docs/search_docs_hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ async def search_docs_hybrid(
owner_ids,
k,
alpha,
1.0 - confidence,
confidence,
metadata_filter,
search_language,
],
Expand Down
5 changes: 5 additions & 0 deletions memory-store/migrations/000022_vector_search.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
BEGIN;

DROP FUNCTION IF EXISTS search_by_vector;

COMMIT;
91 changes: 91 additions & 0 deletions memory-store/migrations/000022_vector_search.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
BEGIN;

CREATE OR REPLACE FUNCTION search_by_vector (
developer_id UUID,
query_embedding vector (1024),
owner_types TEXT[],
owner_ids UUID [],
k integer DEFAULT 3,
confidence float DEFAULT 0.5,
metadata_filter jsonb DEFAULT NULL
) RETURNS SETOF doc_search_result LANGUAGE plpgsql AS $$
DECLARE
search_threshold float;
owner_filter_sql text;
metadata_filter_sql text;
BEGIN
-- Input validation
IF k <= 0 THEN
RAISE EXCEPTION 'k must be greater than 0';
END IF;

IF confidence < 0 OR confidence > 1 THEN
RAISE EXCEPTION 'confidence must be between 0 and 1';
END IF;

IF owner_types IS NOT NULL AND owner_ids IS NOT NULL AND
array_length(owner_types, 1) != array_length(owner_ids, 1) AND
array_length(owner_types, 1) <= 0 THEN
Ahmad-mtos marked this conversation as resolved.
Show resolved Hide resolved
RAISE EXCEPTION 'owner_types and owner_ids arrays must have the same length';
END IF;

-- Calculate search threshold from confidence
search_threshold := confidence;

-- Build owner filter SQL
owner_filter_sql := '
AND (
doc_owners.owner_id = ANY($5::uuid[]) AND doc_owners.owner_type = ANY($4::text[])
)';

-- Build metadata filter SQL if provided
IF metadata_filter IS NOT NULL THEN
metadata_filter_sql := 'AND d.metadata @> $6';
ELSE
metadata_filter_sql := '';
END IF;

-- Return search results
RETURN QUERY EXECUTE format(
'WITH ranked_docs AS (
SELECT
d.developer_id,
d.doc_id,
d.index,
d.title,
d.content,
((1 - (d.embedding <=> $1)) + 1) * 0.5 as distance,
d.embedding,
d.metadata,
doc_owners.owner_type,
doc_owners.owner_id
FROM docs_embeddings d
LEFT JOIN doc_owners ON d.doc_id = doc_owners.doc_id
WHERE d.developer_id = $7
AND ((1 - (d.embedding <=> $1)) + 1) * 0.5 >= $2
%s
%s
ORDER BY ((1 - (d.embedding <=> $1)) + 1) * 0.5 DESC
LIMIT ($3 * 4) -- Get more candidates than needed
)
SELECT DISTINCT ON (doc_id) *
FROM ranked_docs
ORDER BY doc_id, distance DESC
LIMIT $3',
owner_filter_sql,
metadata_filter_sql
)
USING
query_embedding,
search_threshold,
k,
owner_types,
owner_ids,
metadata_filter,
developer_id;


END;
$$;

COMMIT;
Loading