Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Entities #405

Open
wants to merge 21 commits into
base: mwp_v1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
3f2cd2d
added backend (entity management)
Jun 25, 2024
60d90db
added frontend (entities)
Jun 25, 2024
e6ab5f1
update backend
Jun 25, 2024
31b49c8
update frontend
Jun 25, 2024
a4eb087
BugFix span text create_multi now doesnt create duplicates
Jun 26, 2024
615f6f3
Update entity to include KnowledgeBase and IsHuman (backend)
Jun 26, 2024
2340280
Update entity to include KnowledgeBase and IsHuman (frontend)
Jun 26, 2024
2b93d9a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 27, 2024
02ac98a
update db and openapi
Jun 27, 2024
a03a895
update, more efficient entity cleanup, cleanup enpoint code
Jul 4, 2024
80da369
Merge branch 'entities' of github.com:uhh-lt/dats into entities
Jul 4, 2024
9fb795a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 4, 2024
26e9791
Activated SubRow select, and removed entity_ids
Aug 6, 2024
0eb2052
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 6, 2024
7040ba1
Added Typing projectEntitiesRows
Aug 6, 2024
91d2502
Merge branch 'entities' of github.com:uhh-lt/dats into entities
Aug 6, 2024
030d92a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 6, 2024
7f7ab0a
Missed files/console.log
Aug 6, 2024
6258a27
Merge branch 'entities' of github.com:uhh-lt/dats into entities
Aug 6, 2024
4344024
Changes: fixed error in entity multi create (db objs and dtos were no…
Aug 8, 2024
ca824be
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""add entity and span text entity link

Revision ID: 13cc78f77731
Revises: 2b91203d1bb6
Create Date: 2024-06-27 16:05:14.589423

"""

from typing import Sequence, Union

import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "13cc78f77731"
down_revision: Union[str, None] = "2b91203d1bb6"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"entity",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column(
"created", sa.DateTime(), server_default=sa.text("now()"), nullable=True
),
sa.Column(
"updated", sa.DateTime(), server_default=sa.text("now()"), nullable=True
),
sa.Column("is_human", sa.Boolean(), nullable=False),
sa.Column("knowledge_base_id", sa.String(), nullable=False),
sa.Column("project_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(op.f("ix_entity_created"), "entity", ["created"], unique=False)
op.create_index(op.f("ix_entity_id"), "entity", ["id"], unique=False)
op.create_index(op.f("ix_entity_is_human"), "entity", ["is_human"], unique=False)
op.create_index(
op.f("ix_entity_knowledge_base_id"),
"entity",
["knowledge_base_id"],
unique=False,
)
op.create_index(op.f("ix_entity_name"), "entity", ["name"], unique=False)
op.create_index(
op.f("ix_entity_project_id"), "entity", ["project_id"], unique=False
)
op.create_table(
"spantextentitylink",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("linked_entity_id", sa.Integer(), nullable=True),
sa.Column("linked_span_text_id", sa.Integer(), nullable=True),
sa.Column("is_human", sa.Boolean(), nullable=False),
sa.ForeignKeyConstraint(
["linked_entity_id"], ["entity.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(
["linked_span_text_id"], ["spantext.id"], ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_spantextentitylink_id"), "spantextentitylink", ["id"], unique=False
)
op.create_index(
op.f("ix_spantextentitylink_is_human"),
"spantextentitylink",
["is_human"],
unique=False,
)
op.create_index(
op.f("ix_spantextentitylink_linked_entity_id"),
"spantextentitylink",
["linked_entity_id"],
unique=False,
)
op.create_index(
op.f("ix_spantextentitylink_linked_span_text_id"),
"spantextentitylink",
["linked_span_text_id"],
unique=False,
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(
op.f("ix_spantextentitylink_linked_span_text_id"),
table_name="spantextentitylink",
)
op.drop_index(
op.f("ix_spantextentitylink_linked_entity_id"), table_name="spantextentitylink"
)
op.drop_index(
op.f("ix_spantextentitylink_is_human"), table_name="spantextentitylink"
)
op.drop_index(op.f("ix_spantextentitylink_id"), table_name="spantextentitylink")
op.drop_table("spantextentitylink")
op.drop_index(op.f("ix_entity_project_id"), table_name="entity")
op.drop_index(op.f("ix_entity_name"), table_name="entity")
op.drop_index(op.f("ix_entity_knowledge_base_id"), table_name="entity")
op.drop_index(op.f("ix_entity_is_human"), table_name="entity")
op.drop_index(op.f("ix_entity_id"), table_name="entity")
op.drop_index(op.f("ix_entity_created"), table_name="entity")
op.drop_table("entity")
# ### end Alembic commands ###
71 changes: 71 additions & 0 deletions backend/src/api/endpoints/entity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from typing import List

from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session

from api.dependencies import get_current_user, get_db_session
from app.core.authorization.authz_user import AuthzUser
from app.core.data.crud import Crud
from app.core.data.crud.entity import crud_entity
from app.core.data.dto.entity import (
EntityMerge,
EntityRead,
EntityRelease,
EntityUpdate,
)

router = APIRouter(
prefix="/entity", dependencies=[Depends(get_current_user)], tags=["entity"]
)


@router.patch(
"/{entity_id}",
response_model=EntityRead,
summary="Updates the Entity with the given ID.",
)
def update_by_id(
*,
db: Session = Depends(get_db_session),
entity_id: int,
entity: EntityUpdate,
authz_user: AuthzUser = Depends(),
) -> EntityRead:
authz_user.assert_in_same_project_as(Crud.ENTITY, entity_id)
entity.is_human = True
db_obj = crud_entity.update(db=db, id=entity_id, update_dto=entity)
return EntityRead.model_validate(db_obj)


# add merge endpoint
@router.put(
"/merge",
response_model=EntityRead,
summary="Merges entities and/or span texts with given IDs.",
)
def merge_entities(
*,
db: Session = Depends(get_db_session),
entity_merge: EntityMerge,
authz_user: AuthzUser = Depends(),
) -> EntityRead:
authz_user.assert_in_same_project_as_many(Crud.ENTITY, entity_merge.entity_ids)
db_obj = crud_entity.merge(db, entity_merge=entity_merge)
return EntityRead.model_validate(db_obj)


# add resolve endpoint
@router.put(
"/release",
response_model=List[EntityRead],
summary="Releases entities and/or span texts with given IDs.",
)
def release_entities(
*,
db: Session = Depends(get_db_session),
entity_release: EntityRelease,
authz_user: AuthzUser = Depends(),
) -> List[EntityRead]:
authz_user.assert_in_same_project_as_many(Crud.ENTITY, entity_release.entity_ids)
db_objs = crud_entity.release(db=db, entity_release=entity_release)
return [EntityRead.model_validate(db_obj) for db_obj in db_objs]
21 changes: 21 additions & 0 deletions backend/src/api/endpoints/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
from app.core.data.crud.code import crud_code
from app.core.data.crud.crud_base import NoSuchElementError
from app.core.data.crud.document_tag import crud_document_tag
from app.core.data.crud.entity import crud_entity
from app.core.data.crud.memo import crud_memo
from app.core.data.crud.project import crud_project
from app.core.data.crud.project_metadata import crud_project_meta
from app.core.data.crud.source_document import crud_sdoc
from app.core.data.dto.action import ActionQueryParameters, ActionRead
from app.core.data.dto.code import CodeRead
from app.core.data.dto.document_tag import DocumentTagRead
from app.core.data.dto.entity import EntityRead
from app.core.data.dto.memo import AttachedObjectType, MemoCreate, MemoInDB, MemoRead
from app.core.data.dto.preprocessing_job import PreprocessingJobRead
from app.core.data.dto.project import ProjectCreate, ProjectRead, ProjectUpdate
Expand Down Expand Up @@ -530,3 +532,22 @@ def find_duplicate_text_sdocs(
return DuplicateFinderService().find_duplicate_text_sdocs(
project_id=proj_id, max_different_words=max_different_words
)


@router.get(
"/{proj_id}/entity",
response_model=List[EntityRead],
summary="Returns all Entities of the Project with the given ID",
)
def get_project_entities(
*,
proj_id: int,
db: Session = Depends(get_db_session),
authz_user: AuthzUser = Depends(),
) -> List[EntityRead]:
authz_user.assert_in_project(proj_id)

result = crud_entity.read_by_project(db=db, proj_id=proj_id)
result = [EntityRead.model_validate(entity) for entity in result]
result.sort(key=lambda c: c.id)
return result
4 changes: 4 additions & 0 deletions backend/src/app/core/data/crud/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from app.core.data.crud.concept_over_time_analysis import crud_cota
from app.core.data.crud.current_code import crud_current_code
from app.core.data.crud.document_tag import crud_document_tag
from app.core.data.crud.entity import crud_entity
from app.core.data.crud.memo import crud_memo
from app.core.data.crud.object_handle import crud_object_handle
from app.core.data.crud.preprocessing_job import crud_prepro_job
Expand All @@ -21,6 +22,7 @@
from app.core.data.crud.span_annotation import crud_span_anno
from app.core.data.crud.span_group import crud_span_group
from app.core.data.crud.span_text import crud_span_text
from app.core.data.crud.span_text_entity_link import crud_span_text_entity_link
from app.core.data.crud.timeline_analysis import crud_timeline_analysis
from app.core.data.crud.user import crud_user
from app.core.data.crud.whiteboard import crud_whiteboard
Expand Down Expand Up @@ -51,3 +53,5 @@ class Crud(Enum):
COTA_ANALYSIS = crud_cota
USER = crud_user
WHITEBOARD = crud_whiteboard
ENTITY = crud_entity
SPAN_TEXT_ENTITY_LINK = crud_span_text_entity_link
153 changes: 153 additions & 0 deletions backend/src/app/core/data/crud/entity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from itertools import chain
from typing import List

from fastapi.encoders import jsonable_encoder
from sqlalchemy.orm import Session

from app.core.data.crud.crud_base import CRUDBase
from app.core.data.crud.span_text import crud_span_text
from app.core.data.crud.span_text_entity_link import crud_span_text_entity_link
from app.core.data.dto.entity import (
EntityCreate,
EntityMerge,
EntityRelease,
EntityUpdate,
)
from app.core.data.dto.span_text_entity_link import (
SpanTextEntityLinkCreate,
)
from app.core.data.orm.entity import EntityORM
from app.core.data.orm.span_text_entity_link import SpanTextEntityLinkORM


class CRUDEntity(CRUDBase[EntityORM, EntityCreate, EntityUpdate]):
def create(
self, db: Session, *, create_dto: EntityCreate, force: bool = True
) -> EntityORM:
result = self.create_multi(db=db, create_dtos=[create_dto], force=force)
return result[0] if len(result) > 0 else None

def create_multi(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

create multi muss mit hilfe einer hash map umgesetzt werden.

span_text_id -> entity

dann alle entities erstellen.

self, db: Session, *, create_dtos: List[EntityCreate], force: bool = True
) -> List[EntityORM]:
if len(create_dtos) == 0:
return []

# assumption all entities belong to the same project
project_id = create_dtos[0].project_id

# duplicate assignments to the same span text are filtered out here
span_text_dict = {}
for i, create_dto in enumerate(create_dtos):
for span_text_id in create_dto.span_text_ids:
span_text_dict[span_text_id] = i

ids = list(span_text_dict.keys())
existing_links = crud_span_text_entity_link.read_multi_span_text_and_project_id(
db=db, span_text_ids=ids, project_id=project_id
)
existing_link_ids = [link.linked_span_text_id for link in existing_links]
old_entities = [link.linked_entity_id for link in existing_links]

if not force:
# if a span text is already assigned it should not be reassigned
for id in existing_link_ids:
del span_text_dict[id]

indexes_to_use = list(set(span_text_dict.values()))
create_dtos = [c for i, c in enumerate(create_dtos) if i in indexes_to_use]
dto_objs_data = [
jsonable_encoder(dto, exclude={"span_text_ids"}) for dto in create_dtos
]
db_objs = [self.model(**data) for data in dto_objs_data]
db.add_all(db_objs)
db.flush()
db.commit()

links = []
for db_obj, create_dto in zip(db_objs, create_dtos):
for span_text_id in create_dto.span_text_ids:
links.append(
SpanTextEntityLinkCreate(
linked_entity_id=db_obj.id, linked_span_text_id=span_text_id
)
)
crud_span_text_entity_link.create_multi(db=db, create_dtos=links)
db.commit()
if force:
existing_links = (
crud_span_text_entity_link.read_multi_span_text_and_project_id(
db=db, span_text_ids=ids, project_id=project_id
)
)
new_entities = [x.linked_entity_id for x in existing_links]
to_check = list(set(old_entities) - set(new_entities))
self.remove_unused_entites(db=db, ids=to_check)
return db_objs

def read_by_project(self, db: Session, proj_id: int) -> List[EntityORM]:
return db.query(self.model).filter(self.model.project_id == proj_id).all()

def remove_multi(self, db: Session, *, ids: List[int]) -> List[EntityORM]:
removed = db.query(EntityORM).filter(EntityORM.id.in_(ids)).all()
db.query(EntityORM).filter(EntityORM.id.in_(ids)).delete(
synchronize_session=False
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this?

)
db.commit()
return removed

def remove_unused_entites(self, db: Session, ids: List[int]) -> List[EntityORM]:
linked_ids_result = (
db.query(SpanTextEntityLinkORM.linked_entity_id)
.filter(SpanTextEntityLinkORM.linked_entity_id.in_(ids))
.distinct()
.all()
)
linked_ids = {item[0] for item in linked_ids_result}
ids = list(set(ids) - set(linked_ids))
return self.remove_multi(db=db, ids=ids)

def merge(self, db: Session, entity_merge: EntityMerge) -> EntityORM:
all_span_texts = (
list(
chain.from_iterable(
[st.id for st in crud_entity.read(db=db, id=id).span_texts]
for id in entity_merge.entity_ids
)
)
+ entity_merge.spantext_ids
)
new_entity = EntityCreate(
name=entity_merge.name,
project_id=entity_merge.project_id,
span_text_ids=all_span_texts,
is_human=True,
knowledge_base_id=entity_merge.knowledge_base_id,
)
return self.create(db=db, create_dto=new_entity, force=True)

def release(self, db: Session, entity_release: EntityRelease) -> List[EntityORM]:
all_span_texts = (
list(
chain.from_iterable(
[st.id for st in self.read(db=db, id=id).span_texts]
for id in entity_release.entity_ids
)
)
+ entity_release.spantext_ids
)
new_entities = []
for span_text_id in all_span_texts:
span_text = crud_span_text.read(db=db, id=span_text_id)
new_entity = EntityCreate(
name=span_text.text,
project_id=entity_release.project_id,
span_text_ids=[span_text_id],
)
new_entities.append(new_entity)
db_objs = self.create_multi(db=db, create_dtos=new_entities, force=True)
self.remove_unused_entites(db=db, ids=entity_release.entity_ids)
return db_objs


crud_entity = CRUDEntity(EntityORM)
Loading
Loading