Skip to content

Commit

Permalink
optimize getting just the id as well
Browse files Browse the repository at this point in the history
  • Loading branch information
LostVector committed Feb 19, 2025
1 parent 00071fc commit 39f137a
Showing 1 changed file with 13 additions and 17 deletions.
30 changes: 13 additions & 17 deletions backend/onyx/db/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,28 +104,24 @@ def construct_document_select_for_connector_credential_pair_by_needs_sync(
def construct_document_id_select_for_connector_credential_pair_by_needs_sync(
connector_id: int, credential_id: int
) -> Select:
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
and_(
DocumentByConnectorCredentialPair.connector_id == connector_id,
DocumentByConnectorCredentialPair.credential_id == credential_id,
)
)

stmt = (
return (
select(DbDocument.id)
.join(
DocumentByConnectorCredentialPair,
DbDocument.id == DocumentByConnectorCredentialPair.id,
)
.where(
DbDocument.id.in_(initial_doc_ids_stmt),
or_(
DbDocument.last_modified
> DbDocument.last_synced, # last_modified is newer than last_synced
DbDocument.last_synced.is_(None), # never synced
),
and_(
DocumentByConnectorCredentialPair.connector_id == connector_id,
DocumentByConnectorCredentialPair.credential_id == credential_id,
or_(
DbDocument.last_modified > DbDocument.last_synced,
DbDocument.last_synced.is_(None),
),
)
)
.distinct()
)

return stmt


def get_all_documents_needing_vespa_sync_for_cc_pair(
db_session: Session, cc_pair_id: int
Expand Down

0 comments on commit 39f137a

Please sign in to comment.