From 611e5ca398f9ee56d3c9febfc8976d5bc1865e82 Mon Sep 17 00:00:00 2001 From: Tim Fischer Date: Fri, 18 Oct 2024 11:32:17 +0000 Subject: [PATCH 1/3] removed SourceDocumentWithDataRead --- backend/src/api/endpoints/source_document.py | 48 +++++++++++- .../cota/pipeline/steps/init_search_space.py | 18 +++-- .../src/app/core/data/crud/source_document.py | 78 +++---------------- .../src/app/core/data/dto/source_document.py | 5 -- .../app/core/data/dto/source_document_data.py | 11 +-- backend/src/app/core/data/llm/llm_service.py | 27 +++++-- .../app/core/data/orm/source_document_data.py | 14 +++- 7 files changed, 105 insertions(+), 96 deletions(-) diff --git a/backend/src/api/endpoints/source_document.py b/backend/src/api/endpoints/source_document.py index 6e34786bc..c02e1c29b 100644 --- a/backend/src/api/endpoints/source_document.py +++ b/backend/src/api/endpoints/source_document.py @@ -34,8 +34,8 @@ from app.core.data.dto.source_document import ( SourceDocumentRead, SourceDocumentUpdate, - SourceDocumentWithDataRead, ) +from app.core.data.dto.source_document_data import SourceDocumentDataRead from app.core.data.dto.source_document_metadata import ( SourceDocumentMetadataReadResolved, ) @@ -54,7 +54,7 @@ @router.get( "/{sdoc_id}", - response_model=SourceDocumentWithDataRead, + response_model=SourceDocumentRead, summary="Returns the SourceDocument with the given ID if it exists", ) def get_by_id( @@ -63,13 +63,53 @@ def get_by_id( sdoc_id: int, only_if_finished: bool = True, authz_user: AuthzUser = Depends(), -) -> SourceDocumentWithDataRead: +) -> SourceDocumentRead: authz_user.assert_in_same_project_as(Crud.SOURCE_DOCUMENT, sdoc_id) if not only_if_finished: crud_sdoc.get_status(db=db, sdoc_id=sdoc_id, raise_error_on_unfinished=True) - return crud_sdoc.read_with_data(db=db, id=sdoc_id) + return SourceDocumentRead.model_validate(crud_sdoc.read(db=db, id=sdoc_id)) + + +@router.get( + "/data/{sdoc_id}", + response_model=SourceDocumentDataRead, + summary="Returns the SourceDocumentData with the given ID if it exists", +) +def get_by_id_with_data( + *, + db: Session = Depends(get_db_session), + sdoc_id: int, + only_if_finished: bool = True, + authz_user: AuthzUser = Depends(), +) -> SourceDocumentDataRead: + authz_user.assert_in_same_project_as(Crud.SOURCE_DOCUMENT, sdoc_id) + + if not only_if_finished: + crud_sdoc.get_status(db=db, sdoc_id=sdoc_id, raise_error_on_unfinished=True) + + sdoc_data = crud_sdoc.read_data(db=db, id=sdoc_id) + if sdoc_data is None: + # if data is none, that means the document is not a text document + # instead of returning html, we return the URL to the image / video / audio file + sdoc = SourceDocumentRead.model_validate(crud_sdoc.read(db=db, id=sdoc_id)) + url = RepoService().get_sdoc_url( + sdoc=sdoc, + relative=True, + webp=True, + thumbnail=False, + ) + return SourceDocumentDataRead( + id=sdoc_id, + project_id=sdoc.project_id, + token_character_offsets=[], + tokens=[], + sentences=[], + html=url, + ) + else: + return SourceDocumentDataRead.model_validate(sdoc_data) @router.delete( diff --git a/backend/src/app/core/analysis/cota/pipeline/steps/init_search_space.py b/backend/src/app/core/analysis/cota/pipeline/steps/init_search_space.py index 62ddb1ca7..54eeb4f19 100644 --- a/backend/src/app/core/analysis/cota/pipeline/steps/init_search_space.py +++ b/backend/src/app/core/analysis/cota/pipeline/steps/init_search_space.py @@ -8,8 +8,8 @@ COTASentence, ) from app.core.data.dto.search import SearchColumns, SimSearchQuery -from app.core.data.dto.source_document import SourceDocumentWithDataRead from app.core.data.orm.source_document import SourceDocumentORM +from app.core.data.orm.source_document_data import SourceDocumentDataORM from app.core.data.orm.source_document_metadata import SourceDocumentMetadataORM from app.core.db.sql_service import SQLService from app.core.filters.filtering import Filter, LogicalOperator @@ -91,24 +91,26 @@ def add_sentences_to_search_space( # get the data from the database with sqls.db_session() as db: - sdoc_data = crud_sdoc.read_with_data_batch(db=db, ids=sdoc_ids) + sdoc_datas = crud_sdoc.read_data_batch(db=db, ids=sdoc_ids) # map the data - sdoc_id2sdocreadwithdata: Dict[int, SourceDocumentWithDataRead] = { - sdoc_data_read.id: sdoc_data_read for sdoc_data_read in sdoc_data + sdoc_id2sdocdata: Dict[int, SourceDocumentDataORM] = { + sdoc_data_read.id: sdoc_data_read + for sdoc_data_read in sdoc_datas + if sdoc_data_read is not None } sentences = [] for cota_sent in search_space: - if cota_sent.sdoc_id not in sdoc_id2sdocreadwithdata: + if cota_sent.sdoc_id not in sdoc_id2sdocdata: raise ValueError( - f"Could not find SourceDocumentWithDataRead for sdoc_id {cota_sent.sdoc_id}!" + f"Could not find SourceDocumentDataORM for sdoc_id {cota_sent.sdoc_id}!" ) - sdoc_data_read = sdoc_id2sdocreadwithdata[cota_sent.sdoc_id] + sdoc_data_read = sdoc_id2sdocdata[cota_sent.sdoc_id] if cota_sent.sentence_id >= len(sdoc_data_read.sentences): raise ValueError( - f"Could not find sentence with id {cota_sent.sentence_id} in SourceDocumentWithDataRead with id {sdoc_data_read.id}!" + f"Could not find sentence with id {cota_sent.sentence_id} in SourceDocumentDataORM with id {sdoc_data_read.id}!" ) sentences.append(sdoc_data_read.sentences[cota_sent.sentence_id]) diff --git a/backend/src/app/core/data/crud/source_document.py b/backend/src/app/core/data/crud/source_document.py index 1bae16bc4..467432106 100644 --- a/backend/src/app/core/data/crud/source_document.py +++ b/backend/src/app/core/data/crud/source_document.py @@ -4,7 +4,7 @@ from sqlalchemy import and_, desc, func, or_ from sqlalchemy.orm import Session -from app.core.data.crud.crud_base import CRUDBase, NoSuchElementError +from app.core.data.crud.crud_base import CRUDBase from app.core.data.crud.source_document_metadata import crud_sdoc_meta from app.core.data.dto.action import ActionType from app.core.data.dto.document_tag import DocumentTagRead @@ -14,7 +14,6 @@ SourceDocumentRead, SourceDocumentReadAction, SourceDocumentUpdate, - SourceDocumentWithDataRead, ) from app.core.data.dto.source_document_data import SourceDocumentDataRead from app.core.data.dto.source_document_metadata import SourceDocumentMetadataRead @@ -57,78 +56,25 @@ def get_status( raise SourceDocumentPreprocessingUnfinishedError(sdoc_id=sdoc_id) return status - def read_with_data(self, db: Session, *, id: int) -> SourceDocumentWithDataRead: + def read_data(self, db: Session, *, id: int) -> Optional[SourceDocumentDataRead]: db_obj = ( - db.query(self.model, SourceDocumentDataORM) - .join(SourceDocumentDataORM, isouter=True) - .filter(self.model.id == id) + db.query(SourceDocumentDataORM) + .filter(SourceDocumentDataORM.id == id) .first() ) - if not db_obj: - raise NoSuchElementError(self.model, id=id) - sdoc, data = db_obj.tuple() - sdoc_read = SourceDocumentRead.model_validate(sdoc) - - # sdoc data is None for audio and video documents - if data is None: - sdoc_data_read = SourceDocumentDataRead( - id=sdoc.id, - content="", - html="", - token_starts=[], - token_ends=[], - sentence_starts=[], - sentence_ends=[], - tokens=[], - token_character_offsets=[], - sentences=[], - sentence_character_offsets=[], - ) - else: - sdoc_data_read = SourceDocumentDataRead.model_validate(data) - return SourceDocumentWithDataRead( - **(sdoc_read.model_dump() | sdoc_data_read.model_dump()) - ) + return SourceDocumentDataRead.model_validate(db_obj) if db_obj else None - def read_with_data_batch( + def read_data_batch( self, db: Session, *, ids: List[int] - ) -> List[SourceDocumentWithDataRead]: + ) -> List[Optional[SourceDocumentDataORM]]: db_objs = ( - db.query(SourceDocumentORM, SourceDocumentDataORM) - .join(SourceDocumentDataORM, isouter=True) - .filter(SourceDocumentORM.id.in_(ids)) + db.query(SourceDocumentDataORM) + .filter(SourceDocumentDataORM.id.in_(ids)) .all() ) - - results = [] - for db_obj in db_objs: - sdoc, data = db_obj - sdoc_read = SourceDocumentRead.model_validate(sdoc) - - if data is None: - sdoc_data_read = SourceDocumentDataRead( - id=sdoc.id, - content="", - html="", - token_starts=[], - token_ends=[], - sentence_starts=[], - sentence_ends=[], - tokens=[], - token_character_offsets=[], - sentences=[], - sentence_character_offsets=[], - ) - else: - sdoc_data_read = SourceDocumentDataRead.model_validate(data) - - results.append( - SourceDocumentWithDataRead( - **(sdoc_read.model_dump() | sdoc_data_read.model_dump()) - ) - ) - - return results + # create id, data map + id2data = {db_obj.id: db_obj for db_obj in db_objs} + return [id2data.get(id) for id in ids] def remove(self, db: Session, *, id: int) -> SourceDocumentORM: # Import SimSearchService here to prevent a cyclic dependency diff --git a/backend/src/app/core/data/dto/source_document.py b/backend/src/app/core/data/dto/source_document.py index eae28ac6e..d3131fefa 100644 --- a/backend/src/app/core/data/dto/source_document.py +++ b/backend/src/app/core/data/dto/source_document.py @@ -7,7 +7,6 @@ from app.core.data.doc_type import DocType from app.core.data.dto.document_tag import DocumentTagRead from app.core.data.dto.dto_base import UpdateDTOBase -from app.core.data.dto.source_document_data import SourceDocumentDataRead from app.core.data.dto.source_document_metadata import SourceDocumentMetadataRead SDOC_FILENAME_MAX_LENGTH = 200 @@ -57,7 +56,3 @@ class SourceDocumentReadAction(SourceDocumentRead): class SourceDocumentCreate(SourceDocumentBaseDTO): pass - - -class SourceDocumentWithDataRead(SourceDocumentRead, SourceDocumentDataRead): - pass diff --git a/backend/src/app/core/data/dto/source_document_data.py b/backend/src/app/core/data/dto/source_document_data.py index d10a292ae..ae8cf9874 100644 --- a/backend/src/app/core/data/dto/source_document_data.py +++ b/backend/src/app/core/data/dto/source_document_data.py @@ -21,16 +21,17 @@ class SourceDocumentDataBase(BaseModel): ) -class SourceDocumentDataRead(SourceDocumentDataBase): +class SourceDocumentDataRead(BaseModel): + id: int = Field(description="ID of the SourceDocument") + project_id: int = Field( + description="ID of the Project the SourceDocument belongs to" + ) + html: str = Field(description="Processed HTML of the SourceDocument") tokens: List[str] = Field(description="List of tokens in the SourceDocument") token_character_offsets: List[Tuple[int, int]] = Field( description="List of character offsets of each token" ) - sentences: List[str] = Field(description="List of sentences in the SourceDocument") - sentence_character_offsets: List[Tuple[int, int]] = Field( - description="List of character offsets of each sentence" - ) model_config = ConfigDict(from_attributes=True) diff --git a/backend/src/app/core/data/llm/llm_service.py b/backend/src/app/core/data/llm/llm_service.py index 5ff7de49b..117af1048 100644 --- a/backend/src/app/core/data/llm/llm_service.py +++ b/backend/src/app/core/data/llm/llm_service.py @@ -226,11 +226,16 @@ def _llm_document_tagging( ) # read sdocs - sdoc_datas = crud_sdoc.read_with_data_batch(db=db, ids=sdoc_ids) + sdoc_datas = crud_sdoc.read_data_batch(db=db, ids=sdoc_ids) # automatic document tagging result: List[DocumentTaggingResult] = [] - for idx, sdoc_data in enumerate(sdoc_datas): + for idx, (sdoc_id, sdoc_data) in enumerate(zip(sdoc_ids, sdoc_datas)): + if sdoc_data is None: + raise ValueError( + f"Could not find SourceDocumentDataORM for sdoc_id {sdoc_id}!" + ) + # get current tag ids current_tag_ids = [ tag.id for tag in crud_sdoc.read(db=db, id=sdoc_data.id).document_tags @@ -316,10 +321,15 @@ def _llm_metadata_extraction( ) # read sdocs - sdoc_datas = crud_sdoc.read_with_data_batch(db=db, ids=sdoc_ids) + sdoc_datas = crud_sdoc.read_data_batch(db=db, ids=sdoc_ids) # automatic metadata extraction result: List[MetadataExtractionResult] = [] - for idx, sdoc_data in enumerate(sdoc_datas): + for idx, (sdoc_id, sdoc_data) in enumerate(zip(sdoc_ids, sdoc_datas)): + if sdoc_data is None: + raise ValueError( + f"Could not find SourceDocumentDataORM for sdoc_id {sdoc_id}!" + ) + # get current metadata values current_metadata = [ SourceDocumentMetadataReadResolved.model_validate(metadata) @@ -426,12 +436,17 @@ def _llm_annotation( ) # read sdocs - sdoc_datas = crud_sdoc.read_with_data_batch(db=db, ids=sdoc_ids) + sdoc_datas = crud_sdoc.read_data_batch(db=db, ids=sdoc_ids) # automatic annotation annotation_id = 0 result: List[AnnotationResult] = [] - for idx, sdoc_data in enumerate(sdoc_datas): + for idx, (sdoc_id, sdoc_data) in enumerate(zip(sdoc_ids, sdoc_datas)): + if sdoc_data is None: + raise ValueError( + f"Could not find SourceDocumentDataORM for sdoc_id {sdoc_id}!" + ) + # get language language = crud_sdoc_meta.read_by_sdoc_and_key( db=db, sdoc_id=sdoc_data.id, key="language" diff --git a/backend/src/app/core/data/orm/source_document_data.py b/backend/src/app/core/data/orm/source_document_data.py index 1ad322b94..e0659817e 100644 --- a/backend/src/app/core/data/orm/source_document_data.py +++ b/backend/src/app/core/data/orm/source_document_data.py @@ -1,11 +1,14 @@ -from typing import List +from typing import TYPE_CHECKING, List from sqlalchemy import ForeignKey, Integer, String from sqlalchemy.dialects.postgresql import ARRAY -from sqlalchemy.orm import Mapped, mapped_column +from sqlalchemy.orm import Mapped, mapped_column, relationship from app.core.data.orm.orm_base import ORMBase +if TYPE_CHECKING: + from app.core.data.orm.source_document import SourceDocumentORM + class SourceDocumentDataORM(ORMBase): id: Mapped[int] = mapped_column( @@ -15,6 +18,9 @@ class SourceDocumentDataORM(ORMBase): nullable=False, index=True, ) + source_document: Mapped["SourceDocumentORM"] = relationship( + "SourceDocumentORM", back_populates="data" + ) content: Mapped[str] = mapped_column(String, nullable=False, index=False) html: Mapped[str] = mapped_column(String, nullable=False, index=False) token_starts: Mapped[List[int]] = mapped_column( @@ -30,6 +36,10 @@ class SourceDocumentDataORM(ORMBase): ARRAY(Integer), nullable=False, index=False ) + @property + def project_id(self) -> int: + return self.source_document.project_id + @property def tokens(self): return [self.content[s:e] for s, e in zip(self.token_starts, self.token_ends)] From ee704fcd130832b88290e3efb17b6ee60b7377b1 Mon Sep 17 00:00:00 2001 From: Tim Fischer Date: Fri, 18 Oct 2024 11:33:08 +0000 Subject: [PATCH 2/3] updated api --- frontend/src/api/QueryKey.ts | 2 + frontend/src/api/SdocHooks.ts | 44 ++-- .../openapi/models/SourceDocumentDataRead.ts | 30 +++ .../models/SourceDocumentWithDataRead.ts | 80 -------- .../openapi/services/SourceDocumentService.ts | 32 ++- frontend/src/openapi.json | 194 ++++++------------ 6 files changed, 139 insertions(+), 243 deletions(-) create mode 100644 frontend/src/api/openapi/models/SourceDocumentDataRead.ts delete mode 100644 frontend/src/api/openapi/models/SourceDocumentWithDataRead.ts diff --git a/frontend/src/api/QueryKey.ts b/frontend/src/api/QueryKey.ts index 0ff379d82..d05598cac 100644 --- a/frontend/src/api/QueryKey.ts +++ b/frontend/src/api/QueryKey.ts @@ -53,6 +53,8 @@ export const QueryKey = { // a single document (by sdoc id) SDOC: "sdoc", + // a single document's data (by sdoc id) + SDOC_DATA: "sdocData", // all tags of a document (by sdoc id) SDOC_TAGS: "sdocTags", // Count how many source documents each tag has diff --git a/frontend/src/api/SdocHooks.ts b/frontend/src/api/SdocHooks.ts index 40298acb5..1df16b2d4 100644 --- a/frontend/src/api/SdocHooks.ts +++ b/frontend/src/api/SdocHooks.ts @@ -3,11 +3,11 @@ import { useMutation, useQuery } from "@tanstack/react-query"; import queryClient from "../plugins/ReactQueryClient.ts"; import { QueryKey } from "./QueryKey.ts"; import { BBoxAnnotationReadResolved } from "./openapi/models/BBoxAnnotationReadResolved.ts"; -import { DocType } from "./openapi/models/DocType.ts"; import { DocumentTagRead } from "./openapi/models/DocumentTagRead.ts"; import { MemoRead } from "./openapi/models/MemoRead.ts"; +import { SourceDocumentDataRead } from "./openapi/models/SourceDocumentDataRead.ts"; import { SourceDocumentMetadataReadResolved } from "./openapi/models/SourceDocumentMetadataReadResolved.ts"; -import { SourceDocumentWithDataRead } from "./openapi/models/SourceDocumentWithDataRead.ts"; +import { SourceDocumentRead } from "./openapi/models/SourceDocumentRead.ts"; import { SpanAnnotationReadResolved } from "./openapi/models/SpanAnnotationReadResolved.ts"; import { DocumentTagService } from "./openapi/services/DocumentTagService.ts"; import { ProjectService } from "./openapi/services/ProjectService.ts"; @@ -15,38 +15,19 @@ import { SourceDocumentService } from "./openapi/services/SourceDocumentService. import { useSelectEnabledBboxAnnotations, useSelectEnabledSpanAnnotations } from "./utils.ts"; // sdoc -const fetchSdoc = async (sdocId: number) => { - const sdoc = await SourceDocumentService.getById({ - sdocId: sdocId!, +const useGetDocument = (sdocId: number | null | undefined) => + useQuery({ + queryKey: [QueryKey.SDOC, sdocId], + queryFn: () => SourceDocumentService.getById({ sdocId: sdocId! }), + enabled: !!sdocId, }); - switch (sdoc.doctype) { - case DocType.TEXT: - // dont do anything - break; - case DocType.IMAGE: { - const url = await SourceDocumentService.getFileUrl({ - sdocId: sdocId, - webp: true, - }); - sdoc.content = encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + url); - break; - } - case DocType.VIDEO: - case DocType.AUDIO: { - const url2 = await SourceDocumentService.getFileUrl({ sdocId: sdocId }); - sdoc.content = encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + url2); - break; - } - } +// encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + url2) - return sdoc; -}; - -const useGetDocument = (sdocId: number | null | undefined) => - useQuery({ - queryKey: [QueryKey.SDOC, sdocId], - queryFn: () => fetchSdoc(sdocId!), +const useGetDocumentData = (sdocId: number | null | undefined) => + useQuery({ + queryKey: [QueryKey.SDOC_DATA, sdocId], + queryFn: () => SourceDocumentService.getByIdWithData({ sdocId: sdocId! }), enabled: !!sdocId, staleTime: Infinity, }); @@ -266,6 +247,7 @@ const useGetBBoxAnnotationsBatch = (sdocId: number | null | undefined, userIds: const SdocHooks = { // sdoc useGetDocument, + useGetDocumentData, useGetLinkedSdocIds, useDeleteDocuments, useGetDocumentIdByFilename, diff --git a/frontend/src/api/openapi/models/SourceDocumentDataRead.ts b/frontend/src/api/openapi/models/SourceDocumentDataRead.ts new file mode 100644 index 000000000..560337d2f --- /dev/null +++ b/frontend/src/api/openapi/models/SourceDocumentDataRead.ts @@ -0,0 +1,30 @@ +/* generated using openapi-typescript-codegen -- do not edit */ +/* istanbul ignore file */ +/* tslint:disable */ +/* eslint-disable */ +export type SourceDocumentDataRead = { + /** + * ID of the SourceDocument + */ + id: number; + /** + * ID of the Project the SourceDocument belongs to + */ + project_id: number; + /** + * Processed HTML of the SourceDocument + */ + html: string; + /** + * List of tokens in the SourceDocument + */ + tokens: Array; + /** + * List of character offsets of each token + */ + token_character_offsets: Array; + /** + * List of sentences in the SourceDocument + */ + sentences: Array; +}; diff --git a/frontend/src/api/openapi/models/SourceDocumentWithDataRead.ts b/frontend/src/api/openapi/models/SourceDocumentWithDataRead.ts deleted file mode 100644 index 065df6a76..000000000 --- a/frontend/src/api/openapi/models/SourceDocumentWithDataRead.ts +++ /dev/null @@ -1,80 +0,0 @@ -/* generated using openapi-typescript-codegen -- do not edit */ -/* istanbul ignore file */ -/* tslint:disable */ -/* eslint-disable */ -import type { DocType } from "./DocType"; -import type { SDocStatus } from "./SDocStatus"; -export type SourceDocumentWithDataRead = { - /** - * ID of the SourceDocument - */ - id: number; - /** - * Raw,original content of the SourceDocument - */ - content: string; - /** - * Processed HTML of the SourceDocument - */ - html: string; - /** - * Start of each token in character offsets in content - */ - token_starts: Array; - /** - * End of each token in character offsets in content - */ - token_ends: Array; - /** - * Start of each sentence in character offsets in content - */ - sentence_starts: Array; - /** - * End of each sentence in character offsets in content - */ - sentence_ends: Array; - /** - * List of tokens in the SourceDocument - */ - tokens: Array; - /** - * List of character offsets of each token - */ - token_character_offsets: Array; - /** - * List of sentences in the SourceDocument - */ - sentences: Array; - /** - * List of character offsets of each sentence - */ - sentence_character_offsets: Array; - /** - * Filename of the SourceDocument - */ - filename: string; - /** - * User-defined name of the document - */ - name?: string | null; - /** - * DOCTYPE of the SourceDocument - */ - doctype: DocType; - /** - * Status of the SourceDocument - */ - status: SDocStatus; - /** - * Project the SourceDocument belongs to - */ - project_id: number; - /** - * The created timestamp of the SourceDocument - */ - created: string; - /** - * Updated timestamp of the Memo - */ - updated: string; -}; diff --git a/frontend/src/api/openapi/services/SourceDocumentService.ts b/frontend/src/api/openapi/services/SourceDocumentService.ts index 05dfb6f4d..551ad2f48 100644 --- a/frontend/src/api/openapi/services/SourceDocumentService.ts +++ b/frontend/src/api/openapi/services/SourceDocumentService.ts @@ -7,10 +7,10 @@ import type { BBoxAnnotationReadResolved } from "../models/BBoxAnnotationReadRes import type { DocumentTagRead } from "../models/DocumentTagRead"; import type { MemoCreate } from "../models/MemoCreate"; import type { MemoRead } from "../models/MemoRead"; +import type { SourceDocumentDataRead } from "../models/SourceDocumentDataRead"; import type { SourceDocumentMetadataReadResolved } from "../models/SourceDocumentMetadataReadResolved"; import type { SourceDocumentRead } from "../models/SourceDocumentRead"; import type { SourceDocumentUpdate } from "../models/SourceDocumentUpdate"; -import type { SourceDocumentWithDataRead } from "../models/SourceDocumentWithDataRead"; import type { SpanAnnotationRead } from "../models/SpanAnnotationRead"; import type { SpanAnnotationReadResolved } from "../models/SpanAnnotationReadResolved"; import type { SpanGroupRead } from "../models/SpanGroupRead"; @@ -21,7 +21,7 @@ import { request as __request } from "../core/request"; export class SourceDocumentService { /** * Returns the SourceDocument with the given ID if it exists - * @returns SourceDocumentWithDataRead Successful Response + * @returns SourceDocumentRead Successful Response * @throws ApiError */ public static getById({ @@ -30,7 +30,7 @@ export class SourceDocumentService { }: { sdocId: number; onlyIfFinished?: boolean; - }): CancelablePromise { + }): CancelablePromise { return __request(OpenAPI, { method: "GET", url: "/sdoc/{sdoc_id}", @@ -87,6 +87,32 @@ export class SourceDocumentService { }, }); } + /** + * Returns the SourceDocumentData with the given ID if it exists + * @returns SourceDocumentDataRead Successful Response + * @throws ApiError + */ + public static getByIdWithData({ + sdocId, + onlyIfFinished = true, + }: { + sdocId: number; + onlyIfFinished?: boolean; + }): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/sdoc/data/{sdoc_id}", + path: { + sdoc_id: sdocId, + }, + query: { + only_if_finished: onlyIfFinished, + }, + errors: { + 422: `Validation Error`, + }, + }); + } /** * Returns the ids of SourceDocuments linked to the SourceDocument with the given id. * @returns number Successful Response diff --git a/frontend/src/openapi.json b/frontend/src/openapi.json index 72bc424a5..2ec4f5888 100644 --- a/frontend/src/openapi.json +++ b/frontend/src/openapi.json @@ -913,9 +913,7 @@ "responses": { "200": { "description": "Successful Response", - "content": { - "application/json": { "schema": { "$ref": "#/components/schemas/SourceDocumentWithDataRead" } } - } + "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SourceDocumentRead" } } } }, "422": { "description": "Validation Error", @@ -966,6 +964,33 @@ } } }, + "/sdoc/data/{sdoc_id}": { + "get": { + "tags": ["sourceDocument"], + "summary": "Returns the SourceDocumentData with the given ID if it exists", + "operationId": "get_by_id_with_data", + "security": [{ "OAuth2PasswordBearer": [] }], + "parameters": [ + { "name": "sdoc_id", "in": "path", "required": true, "schema": { "type": "integer", "title": "Sdoc Id" } }, + { + "name": "only_if_finished", + "in": "query", + "required": false, + "schema": { "type": "boolean", "default": true, "title": "Only If Finished" } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SourceDocumentDataRead" } } } + }, + "422": { + "description": "Validation Error", + "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } + } + } + } + }, "/sdoc/{sdoc_id}/linked_sdocs": { "get": { "tags": ["sourceDocument"], @@ -8631,6 +8656,43 @@ "required": ["column", "direction"], "title": "Sort[WordFrequencyColumns]" }, + "SourceDocumentDataRead": { + "properties": { + "id": { "type": "integer", "title": "Id", "description": "ID of the SourceDocument" }, + "project_id": { + "type": "integer", + "title": "Project Id", + "description": "ID of the Project the SourceDocument belongs to" + }, + "html": { "type": "string", "title": "Html", "description": "Processed HTML of the SourceDocument" }, + "tokens": { + "items": { "type": "string" }, + "type": "array", + "title": "Tokens", + "description": "List of tokens in the SourceDocument" + }, + "token_character_offsets": { + "items": { + "prefixItems": [{ "type": "integer" }, { "type": "integer" }], + "type": "array", + "maxItems": 2, + "minItems": 2 + }, + "type": "array", + "title": "Token Character Offsets", + "description": "List of character offsets of each token" + }, + "sentences": { + "items": { "type": "string" }, + "type": "array", + "title": "Sentences", + "description": "List of sentences in the SourceDocument" + } + }, + "type": "object", + "required": ["id", "project_id", "html", "tokens", "token_character_offsets", "sentences"], + "title": "SourceDocumentDataRead" + }, "SourceDocumentDocumentTagLinks": { "properties": { "source_document_id": { @@ -8942,132 +9004,6 @@ "required": ["name"], "title": "SourceDocumentUpdate" }, - "SourceDocumentWithDataRead": { - "properties": { - "id": { "type": "integer", "title": "Id", "description": "ID of the SourceDocument" }, - "content": { - "type": "string", - "title": "Content", - "description": "Raw,original content of the SourceDocument" - }, - "html": { "type": "string", "title": "Html", "description": "Processed HTML of the SourceDocument" }, - "token_starts": { - "items": { "type": "integer" }, - "type": "array", - "title": "Token Starts", - "description": "Start of each token in character offsets in content" - }, - "token_ends": { - "items": { "type": "integer" }, - "type": "array", - "title": "Token Ends", - "description": "End of each token in character offsets in content" - }, - "sentence_starts": { - "items": { "type": "integer" }, - "type": "array", - "title": "Sentence Starts", - "description": "Start of each sentence in character offsets in content" - }, - "sentence_ends": { - "items": { "type": "integer" }, - "type": "array", - "title": "Sentence Ends", - "description": "End of each sentence in character offsets in content" - }, - "tokens": { - "items": { "type": "string" }, - "type": "array", - "title": "Tokens", - "description": "List of tokens in the SourceDocument" - }, - "token_character_offsets": { - "items": { - "prefixItems": [{ "type": "integer" }, { "type": "integer" }], - "type": "array", - "maxItems": 2, - "minItems": 2 - }, - "type": "array", - "title": "Token Character Offsets", - "description": "List of character offsets of each token" - }, - "sentences": { - "items": { "type": "string" }, - "type": "array", - "title": "Sentences", - "description": "List of sentences in the SourceDocument" - }, - "sentence_character_offsets": { - "items": { - "prefixItems": [{ "type": "integer" }, { "type": "integer" }], - "type": "array", - "maxItems": 2, - "minItems": 2 - }, - "type": "array", - "title": "Sentence Character Offsets", - "description": "List of character offsets of each sentence" - }, - "filename": { - "type": "string", - "maxLength": 230, - "title": "Filename", - "description": "Filename of the SourceDocument" - }, - "name": { - "anyOf": [{ "type": "string" }, { "type": "null" }], - "title": "Name", - "description": "User-defined name of the document" - }, - "doctype": { - "allOf": [{ "$ref": "#/components/schemas/DocType" }], - "description": "DOCTYPE of the SourceDocument" - }, - "status": { - "allOf": [{ "$ref": "#/components/schemas/SDocStatus" }], - "description": "Status of the SourceDocument" - }, - "project_id": { - "type": "integer", - "title": "Project Id", - "description": "Project the SourceDocument belongs to" - }, - "created": { - "type": "string", - "format": "date-time", - "title": "Created", - "description": "The created timestamp of the SourceDocument" - }, - "updated": { - "type": "string", - "format": "date-time", - "title": "Updated", - "description": "Updated timestamp of the Memo" - } - }, - "type": "object", - "required": [ - "id", - "content", - "html", - "token_starts", - "token_ends", - "sentence_starts", - "sentence_ends", - "tokens", - "token_character_offsets", - "sentences", - "sentence_character_offsets", - "filename", - "doctype", - "status", - "project_id", - "created", - "updated" - ], - "title": "SourceDocumentWithDataRead" - }, "SpanAnnotationCreate": { "properties": { "begin": { "type": "integer", "title": "Begin", "description": "Begin of the SpanAnnotation in characters" }, From f3dd09a784d391dd3b6040f39e6af5af7349bfbe Mon Sep 17 00:00:00 2001 From: Tim Fischer Date: Fri, 18 Oct 2024 11:34:00 +0000 Subject: [PATCH 3/3] updated to use SourceDocumentDataRead when necessary --- .../TextAnnotationValidator.tsx | 18 ++++++------- .../SourceDocument/SdocSentenceRenderer.tsx | 26 ++++++++++++------- .../HighlightTokenRenderer.tsx | 26 +++++++++++++++++++ .../AnnotatedSegments/SpanAnnotationCard.tsx | 18 ++++++++----- .../annotatedSegmentsSlice.ts | 2 +- frontend/src/views/annotation/Annotation.tsx | 20 +++++++++----- .../AnnotationExploer/BBoxAnnotationCard.tsx | 6 ++--- .../DocumentRenderer/useComputeTokenData.ts | 16 ++++++------ .../useComputeTokenDataWithAnnotations.ts | 12 ++++----- .../DocumentViewer/AudioVideoViewer.tsx | 10 +++---- .../annotation/DocumentViewer/ImageViewer.tsx | 22 +++++++++------- .../annotation/DocumentViewer/TextViewer.tsx | 16 ++++++------ .../ImageAnnotator/ImageAnnotator.tsx | 20 ++++++++------ .../TextAnnotator/TextAnnotator.tsx | 16 ++++++------ .../whiteboard/nodes/BboxAnnotationNode.tsx | 8 +++--- .../src/views/whiteboard/nodes/SdocNode.tsx | 26 +++++++++++++------ 16 files changed, 161 insertions(+), 101 deletions(-) create mode 100644 frontend/src/views/analysis/AnnotatedSegments/HighlightTokenRenderer.tsx diff --git a/frontend/src/components/LLMDialog/steps/AnnotationResultStep/TextAnnotationValidator.tsx b/frontend/src/components/LLMDialog/steps/AnnotationResultStep/TextAnnotationValidator.tsx index cd254f112..fa8308ccd 100644 --- a/frontend/src/components/LLMDialog/steps/AnnotationResultStep/TextAnnotationValidator.tsx +++ b/frontend/src/components/LLMDialog/steps/AnnotationResultStep/TextAnnotationValidator.tsx @@ -1,6 +1,6 @@ import { MouseEventHandler, useRef } from "react"; import { CodeRead } from "../../../../api/openapi/models/CodeRead.ts"; -import { SourceDocumentWithDataRead } from "../../../../api/openapi/models/SourceDocumentWithDataRead.ts"; +import { SourceDocumentDataRead } from "../../../../api/openapi/models/SourceDocumentDataRead.ts"; import { SpanAnnotationReadResolved } from "../../../../api/openapi/models/SpanAnnotationReadResolved.ts"; import SdocHooks from "../../../../api/SdocHooks.ts"; import DocumentRenderer from "../../../../views/annotation/DocumentRenderer/DocumentRenderer.tsx"; @@ -27,12 +27,12 @@ function TextAnnotationValidator({ annotations, handleChangeAnnotations, }: TextAnnotatorValidatorProps) { - const sdoc = SdocHooks.useGetDocument(sdocId); + const sdocData = SdocHooks.useGetDocumentData(sdocId); - if (sdoc.isSuccess) { + if (sdocData.isSuccess) { return ( diff --git a/frontend/src/components/SourceDocument/SdocSentenceRenderer.tsx b/frontend/src/components/SourceDocument/SdocSentenceRenderer.tsx index 1462b9dbe..a7c3c070a 100644 --- a/frontend/src/components/SourceDocument/SdocSentenceRenderer.tsx +++ b/frontend/src/components/SourceDocument/SdocSentenceRenderer.tsx @@ -1,8 +1,8 @@ +import { SourceDocumentDataRead } from "../../api/openapi/models/SourceDocumentDataRead.ts"; import SdocHooks from "../../api/SdocHooks.ts"; -import { SourceDocumentWithDataRead } from "../../api/openapi/models/SourceDocumentWithDataRead.ts"; interface SdocSentenceRendererProps { - sdoc: number | SourceDocumentWithDataRead; + sdoc: number | SourceDocumentDataRead; sentenceId: number; } @@ -10,23 +10,29 @@ function SdocSentenceRenderer({ sdoc, sentenceId }: SdocSentenceRendererProps) { if (typeof sdoc === "number") { return ; } - return ; + return ; } function SdocSentenceRendererWithoutData({ sdocId, sentenceId }: { sdocId: number; sentenceId: number }) { - const sdoc = SdocHooks.useGetDocument(sdocId); + const sdocData = SdocHooks.useGetDocumentData(sdocId); - if (sdoc.isSuccess) { - return ; - } else if (sdoc.isError) { - return
{sdoc.error.message}
; + if (sdocData.isSuccess) { + return ; + } else if (sdocData.isError) { + return
{sdocData.error.message}
; } else { return
Loading...
; } } -function SdocSentenceRendererWithData({ sdoc, sentenceId }: { sdoc: SourceDocumentWithDataRead; sentenceId: number }) { - return <>{sdoc.sentences[sentenceId]}; +function SdocSentenceRendererWithData({ + sdocData, + sentenceId, +}: { + sdocData: SourceDocumentDataRead; + sentenceId: number; +}) { + return <>{sdocData.sentences[sentenceId]}; } export default SdocSentenceRenderer; diff --git a/frontend/src/views/analysis/AnnotatedSegments/HighlightTokenRenderer.tsx b/frontend/src/views/analysis/AnnotatedSegments/HighlightTokenRenderer.tsx new file mode 100644 index 000000000..4772650b6 --- /dev/null +++ b/frontend/src/views/analysis/AnnotatedSegments/HighlightTokenRenderer.tsx @@ -0,0 +1,26 @@ +import { useMemo } from "react"; +import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts"; + +interface HighlightTokenRendererProps { + beginToken: number; + endToken: number; + contextSize: number; + sdocData: SourceDocumentDataRead; +} + +function HighlightTokenRenderer({ beginToken, endToken, contextSize, sdocData }: HighlightTokenRendererProps) { + const tokens = sdocData.tokens; + const { textBefore, textHighlight, textAfter } = useMemo(() => { + const textBefore = tokens.slice(beginToken - contextSize, beginToken).join(" "); + const textHighlight = tokens.slice(beginToken, endToken).join(" "); + const textAfter = tokens.slice(endToken, endToken + contextSize).join(" "); + return { textBefore, textHighlight, textAfter }; + }, [tokens, beginToken, endToken, contextSize]); + return ( + <> + {textBefore} {textHighlight} {textAfter} + + ); +} + +export default HighlightTokenRenderer; diff --git a/frontend/src/views/analysis/AnnotatedSegments/SpanAnnotationCard.tsx b/frontend/src/views/analysis/AnnotatedSegments/SpanAnnotationCard.tsx index 706aadc58..79b4452ae 100644 --- a/frontend/src/views/analysis/AnnotatedSegments/SpanAnnotationCard.tsx +++ b/frontend/src/views/analysis/AnnotatedSegments/SpanAnnotationCard.tsx @@ -15,6 +15,7 @@ import { AttachedObjectType } from "../../../api/openapi/models/AttachedObjectTy import MemoButton from "../../../components/Memo/MemoButton.tsx"; import { CRUDDialogActions } from "../../../components/dialogSlice.ts"; import { useAppDispatch, useAppSelector } from "../../../plugins/ReduxHooks.ts"; +import HighlightTokenRenderer from "./HighlightTokenRenderer.tsx"; interface SpanAnnotationCardProps { annotationId: number | undefined; @@ -23,7 +24,7 @@ interface SpanAnnotationCardProps { function SpanAnnotationCard({ annotationId, ...props }: SpanAnnotationCardProps & Omit) { // global server state (react-query) const spanAnnotation = SpanAnnotationHooks.useGetAnnotation(annotationId); - const sdoc = SdocHooks.useGetDocument(spanAnnotation.data?.sdoc_id); + const sdocData = SdocHooks.useGetDocumentData(spanAnnotation.data?.sdoc_id); // global client state (redux) const contextSize = useAppSelector((state) => state.annotatedSegments.contextSize); @@ -42,18 +43,21 @@ function SpanAnnotationCard({ annotationId, ...props }: SpanAnnotationCardProps Select an annotation to view it & it's context :) - ) : spanAnnotation.isSuccess && sdoc.isSuccess ? ( + ) : spanAnnotation.isSuccess && sdocData.isSuccess ? ( - {sdoc.data.content.substring(spanAnnotation.data.begin - contextSize, spanAnnotation.data.begin)} - {sdoc.data.content.substring(spanAnnotation.data.begin, spanAnnotation.data.end)} - {sdoc.data.content.substring(spanAnnotation.data.end, spanAnnotation.data.end + contextSize)} + - ) : spanAnnotation.isLoading || sdoc.isLoading ? ( + ) : spanAnnotation.isLoading || sdocData.isLoading ? ( ) : ( {spanAnnotation.error?.message} - {sdoc.error?.message} + {sdocData.error?.message} )} diff --git a/frontend/src/views/analysis/AnnotatedSegments/annotatedSegmentsSlice.ts b/frontend/src/views/analysis/AnnotatedSegments/annotatedSegmentsSlice.ts index a6e19e774..966a21f11 100644 --- a/frontend/src/views/analysis/AnnotatedSegments/annotatedSegmentsSlice.ts +++ b/frontend/src/views/analysis/AnnotatedSegments/annotatedSegmentsSlice.ts @@ -13,7 +13,7 @@ const initialState: TableState & AnnotatedSegmentsState = { ...initialTableState, // app state: isSplitView: false, - contextSize: 100, + contextSize: 10, }; export const AnnotatedSegmentsSlice = createSlice({ diff --git a/frontend/src/views/annotation/Annotation.tsx b/frontend/src/views/annotation/Annotation.tsx index e60d2cbe0..a5470ff05 100644 --- a/frontend/src/views/annotation/Annotation.tsx +++ b/frontend/src/views/annotation/Annotation.tsx @@ -55,6 +55,7 @@ function Annotation() { // global server state (react query) const sdoc = SdocHooks.useGetDocument(sdocId); + const sdocData = SdocHooks.useGetDocumentData(sdocId); // rename document const openSnackbar = useOpenSnackbar(); @@ -190,7 +191,7 @@ function Annotation() { {sdocId ? ( <> - {sdoc.isSuccess ? ( + {sdoc.isSuccess && sdocData.isSuccess ? (
{sdoc.data.doctype === DocType.IMAGE ? ( isAnnotationMode ? ( - + ) : ( - + ) ) : sdoc.data.doctype === DocType.TEXT ? ( isAnnotationMode ? ( - + ) : ( - + ) ) : sdoc.data.doctype === DocType.AUDIO ? ( isAnnotationMode ? (
Annotation is not (yet) supported for Audio Documents.
) : ( - + ) ) : sdoc.data.doctype === DocType.VIDEO ? ( isAnnotationMode ? (
Annotation is not (yet) supported for Video Documents.
) : ( - + ) ) : (
ERROR! This DocType is not (yet) supported!
diff --git a/frontend/src/views/annotation/AnnotationExploer/BBoxAnnotationCard.tsx b/frontend/src/views/annotation/AnnotationExploer/BBoxAnnotationCard.tsx index e14fa8f1f..d88ad4461 100644 --- a/frontend/src/views/annotation/AnnotationExploer/BBoxAnnotationCard.tsx +++ b/frontend/src/views/annotation/AnnotationExploer/BBoxAnnotationCard.tsx @@ -9,7 +9,7 @@ import AnnotationCardActionsMenu from "./AnnotationCardActionMenu.tsx"; import { AnnotationCardProps } from "./AnnotationCardProps.ts"; function BBoxAnnotationCard({ annotation, onClick, cardProps }: AnnotationCardProps) { - const sdoc = SdocHooks.useGetDocument(annotation.sdoc_id); + const sdocData = SdocHooks.useGetDocumentData(annotation.sdoc_id); return ( @@ -32,9 +32,9 @@ function BBoxAnnotationCard({ annotation, onClick, cardProps }: AnnotationCardPr /> - {sdoc.isSuccess ? ( + {sdocData.isSuccess ? ( { - if (!sdoc.data) return undefined; - if (!sdoc.data.token_character_offsets) return undefined; + if (!sdocData) return undefined; + if (!sdocData.token_character_offsets) return undefined; - const offsets = sdoc.data.token_character_offsets; - const texts = sdoc.data.tokens; + const offsets = sdocData.token_character_offsets; + const texts = sdocData.tokens; const result = texts.map((text, index) => ({ beginChar: offsets[index][0], endChar: offsets[index][1], @@ -25,7 +25,7 @@ function useComputeTokenData({ sdocId, userIds }: { sdocId: number; userIds: num newLine: text.split("\n").length - 1, })); return result; - }, [sdoc.data]); + }, [sdocData]); // annotationMap stores annotationId -> SpanAnnotationReadResolved // annotationsPerToken map stores tokenId -> spanAnnotationId[] diff --git a/frontend/src/views/annotation/DocumentRenderer/useComputeTokenDataWithAnnotations.ts b/frontend/src/views/annotation/DocumentRenderer/useComputeTokenDataWithAnnotations.ts index a55e97c1d..f3fc3bf9f 100644 --- a/frontend/src/views/annotation/DocumentRenderer/useComputeTokenDataWithAnnotations.ts +++ b/frontend/src/views/annotation/DocumentRenderer/useComputeTokenDataWithAnnotations.ts @@ -1,20 +1,20 @@ import { useMemo } from "react"; -import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts"; +import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts"; import { SpanAnnotationReadResolved } from "../../../api/openapi/models/SpanAnnotationReadResolved.ts"; import { IToken } from "./IToken.ts"; function useComputeTokenDataWithAnnotations({ - sdoc, + sdocData, annotations, }: { - sdoc: SourceDocumentWithDataRead; + sdocData: SourceDocumentDataRead; annotations: SpanAnnotationReadResolved[]; }) { // computed // todo: maybe implement with selector? const tokenData: IToken[] | undefined = useMemo(() => { - const offsets = sdoc.token_character_offsets; - const texts = sdoc.tokens; + const offsets = sdocData.token_character_offsets; + const texts = sdocData.tokens; const result = texts.map((text, index) => ({ beginChar: offsets[index][0], endChar: offsets[index][1], @@ -24,7 +24,7 @@ function useComputeTokenDataWithAnnotations({ newLine: text.split("\n").length - 1, })); return result; - }, [sdoc]); + }, [sdocData]); // todo: maybe implement with selector? // this map stores annotationId -> SpanAnnotationReadResolved diff --git a/frontend/src/views/annotation/DocumentViewer/AudioVideoViewer.tsx b/frontend/src/views/annotation/DocumentViewer/AudioVideoViewer.tsx index 0e3dcb4fe..0eb6db7ba 100644 --- a/frontend/src/views/annotation/DocumentViewer/AudioVideoViewer.tsx +++ b/frontend/src/views/annotation/DocumentViewer/AudioVideoViewer.tsx @@ -3,23 +3,23 @@ import { useMemo, useRef, useState } from "react"; import ReactPlayer from "react-player"; import type { OnProgressProps } from "react-player/base.d.ts"; import SdocHooks from "../../../api/SdocHooks.ts"; -import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts"; +import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts"; interface AudioVideoViewerProps { - sdoc: SourceDocumentWithDataRead; + sdocData: SourceDocumentDataRead; showEntities: boolean; width?: number; height?: number; } -function AudioVideoViewer({ sdoc, width, height }: AudioVideoViewerProps) { +function AudioVideoViewer({ sdocData, width, height }: AudioVideoViewerProps) { // local client state const [highlightedWordId, setHighlightedWordId] = useState(-1); const playerRef = useRef(null); const currentHighlightedWordSpanRef = useRef(null); // global server state (react-query) - const transcriptWords = SdocHooks.useGetWordLevelTranscriptions(sdoc.id); + const transcriptWords = SdocHooks.useGetWordLevelTranscriptions(sdocData.id); // ui events const handleProgress = (state: OnProgressProps) => { @@ -71,7 +71,7 @@ function AudioVideoViewer({ sdoc, width, height }: AudioVideoViewerProps) { <> (null); const gRef = useRef(null); const bboxRef = useRef(null); @@ -46,7 +46,7 @@ function ImageViewerWithData({ sdoc, height, width }: ImageViewerProps & { heigh const hiddenCodeIds = useAppSelector((state) => state.annotations.hiddenCodeIds); // global server state (react query) - const annotations = SdocHooks.useGetBBoxAnnotationsBatch(sdoc.id, visibleUserIds); + const annotations = SdocHooks.useGetBBoxAnnotationsBatch(sdocData.id, visibleUserIds); const annotationData = useMemo(() => { return (annotations.data || []).filter((bbox) => !hiddenCodeIds.includes(bbox.code.id)); @@ -114,13 +114,13 @@ function ImageViewerWithData({ sdoc, height, width }: ImageViewerProps & { heigh (update) => update.attr("x", (d) => scaledRatio * (d.x_min + 3) + xCentering), (exit) => exit.remove(), ); - }, [width, height, annotationData, sdoc.content]); + }, [width, height, annotationData, sdocData.html]); // find similar images const dispatch = useAppDispatch(); const navigate = useNavigate(); const handleImageSimilaritySearch = () => { - dispatch(ImageSearchActions.onChangeSearchQuery(sdoc.id)); + dispatch(ImageSearchActions.onChangeSearchQuery(sdocData.id)); navigate("../imagesearch"); }; @@ -131,7 +131,11 @@ function ImageViewerWithData({ sdoc, height, width }: ImageViewerProps & { heigh - + diff --git a/frontend/src/views/annotation/DocumentViewer/TextViewer.tsx b/frontend/src/views/annotation/DocumentViewer/TextViewer.tsx index 9abdbef6d..dbbeb4010 100644 --- a/frontend/src/views/annotation/DocumentViewer/TextViewer.tsx +++ b/frontend/src/views/annotation/DocumentViewer/TextViewer.tsx @@ -1,19 +1,19 @@ import React, { useRef } from "react"; -import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts"; +import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts"; import { useAppSelector } from "../../../plugins/ReduxHooks.ts"; import { TagStyle } from "../annoSlice.ts"; import DocumentRenderer from "../DocumentRenderer/DocumentRenderer.tsx"; import useComputeTokenData from "../DocumentRenderer/useComputeTokenData.ts"; import SentenceMenu, { SentenceMenuHandle } from "./SentenceMenu.tsx"; -interface AnnotationVisualizerProps { - sdoc: SourceDocumentWithDataRead; +interface TextViewerProps { + sdocData: SourceDocumentDataRead; } /** * Super simple annotation rendering, does not work for overlapping annotations!!! */ -function TextViewer({ sdoc }: AnnotationVisualizerProps) { +function TextViewer({ sdocData: sdocData }: TextViewerProps) { // local state const sentenceMenuRef = useRef(null); @@ -22,9 +22,9 @@ function TextViewer({ sdoc }: AnnotationVisualizerProps) { const tagStyle = useAppSelector((state) => state.annotations.tagStyle); // global server state (react-query) - const sentences = sdoc.sentences; + const sentences = sdocData.sentences; const { tokenData, annotationsPerToken, annotationMap } = useComputeTokenData({ - sdocId: sdoc.id, + sdocData, userIds: visibleUserIds, }); @@ -107,8 +107,8 @@ function TextViewer({ sdoc }: AnnotationVisualizerProps) { annotationMap={annotationMap} onClick={handleClick} isViewer={true} - html={sdoc.html} - projectId={sdoc.project_id} + html={sdocData.html} + projectId={sdocData.project_id} style={{ zIndex: 1, overflowY: "auto", diff --git a/frontend/src/views/annotation/ImageAnnotator/ImageAnnotator.tsx b/frontend/src/views/annotation/ImageAnnotator/ImageAnnotator.tsx index 9a52a1aed..c17e616e8 100644 --- a/frontend/src/views/annotation/ImageAnnotator/ImageAnnotator.tsx +++ b/frontend/src/views/annotation/ImageAnnotator/ImageAnnotator.tsx @@ -4,7 +4,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import SdocHooks from "../../../api/SdocHooks.ts"; import { BBoxAnnotationReadResolved } from "../../../api/openapi/models/BBoxAnnotationReadResolved.ts"; -import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts"; +import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts"; import { SpanAnnotationReadResolved } from "../../../api/openapi/models/SpanAnnotationReadResolved.ts"; import ConfirmationAPI from "../../../components/ConfirmationDialog/ConfirmationAPI.ts"; import { useOpenSnackbar } from "../../../components/SnackbarDialog/useOpenSnackbar.ts"; @@ -16,14 +16,14 @@ import SVGBBoxText from "./SVGBBoxText.tsx"; import { useCreateBBoxAnnotation, useDeleteBBoxAnnotation, useUpdateBBoxAnnotation } from "./imageAnnotationHooks.ts"; interface ImageAnnotatorProps { - sdoc: SourceDocumentWithDataRead; + sdocData: SourceDocumentDataRead; } function ImageAnnotator(props: ImageAnnotatorProps) { - const heightMetadata = SdocHooks.useGetMetadataByKey(props.sdoc.id, "height"); + const heightMetadata = SdocHooks.useGetMetadataByKey(props.sdocData.id, "height"); if (heightMetadata.isSuccess) { - return ; + return ; } else if (heightMetadata.isError) { return
{heightMetadata.error.message}
; } else if (heightMetadata.isLoading) { @@ -33,7 +33,7 @@ function ImageAnnotator(props: ImageAnnotatorProps) { } } -function ImageAnnotatorWithHeight({ sdoc, height }: ImageAnnotatorProps & { height: number }) { +function ImageAnnotatorWithHeight({ sdocData, height }: ImageAnnotatorProps & { height: number }) { // references to svg elements const svgRef = useRef(null); const gZoomRef = useRef(null); @@ -47,7 +47,7 @@ function ImageAnnotatorWithHeight({ sdoc, height }: ImageAnnotatorProps & { heig const hiddenCodeIds = useAppSelector((state) => state.annotations.hiddenCodeIds); // global server state (react query) - const annotations = SdocHooks.useGetBBoxAnnotationsBatch(sdoc.id, visibleUserIds); + const annotations = SdocHooks.useGetBBoxAnnotationsBatch(sdocData.id, visibleUserIds); // snackbar const openSnackbar = useOpenSnackbar(); @@ -216,7 +216,7 @@ function ImageAnnotatorWithHeight({ sdoc, height }: ImageAnnotatorProps & { heig { requestBody: { code_id: code.id, - sdoc_id: sdoc.id, + sdoc_id: sdocData.id, x_min: x, x_max: x + width, y_min: y, @@ -327,7 +327,11 @@ function ImageAnnotatorWithHeight({ sdoc, height }: ImageAnnotatorProps & { heig > - + { return selection.toString().trim().length === 0; }; -interface AnnotatorRemasteredProps { - sdoc: SourceDocumentWithDataRead; +interface TextAnnotatorProps { + sdocData: SourceDocumentDataRead; } -function TextAnnotator({ sdoc }: AnnotatorRemasteredProps) { +function TextAnnotator({ sdocData }: TextAnnotatorProps) { // local state const spanMenuRef = useRef(null); const [fakeAnnotation, setFakeAnnotation] = useState(undefined); @@ -42,7 +42,7 @@ function TextAnnotator({ sdoc }: AnnotatorRemasteredProps) { // computed / custom hooks const { tokenData, annotationsPerToken, annotationMap } = useComputeTokenData({ - sdocId: sdoc.id, + sdocData, userIds: visibleUserIds, }); @@ -139,7 +139,7 @@ function TextAnnotator({ sdoc }: AnnotatorRemasteredProps) { const requestBody: SpanAnnotationCreate = { code_id: mostRecentCodeId || -1, - sdoc_id: sdoc.id, + sdoc_id: sdocData.id, begin: tokenData[begin_token].beginChar, end: tokenData[end_token].endChar, begin_token: begin_token, @@ -308,12 +308,12 @@ function TextAnnotator({ sdoc }: AnnotatorRemasteredProps) { ) { // global server state (react-query) const annotation = BboxAnnotationHooks.useGetAnnotation(props.data.bboxAnnotationId); const code = CodeHooks.useGetCode(annotation.data?.code.id); - const sdoc = SdocHooks.useGetDocument(annotation.data?.sdoc_id); + const sdocData = SdocHooks.useGetDocumentData(annotation.data?.sdoc_id); const memo = BboxAnnotationHooks.useGetUserMemo(props.data.bboxAnnotationId); // effects @@ -198,9 +198,9 @@ function BboxAnnotationNode(props: NodeProps) { } /> - {annotation.isSuccess && sdoc.isSuccess ? ( + {annotation.isSuccess && sdocData.isSuccess ? ( ) { border: "4px solid " + annotation.data.code.color, }} /> - ) : annotation.isError || sdoc.isError ? ( + ) : annotation.isError || sdocData.isError ? ( Error! ) : ( Loading ... diff --git a/frontend/src/views/whiteboard/nodes/SdocNode.tsx b/frontend/src/views/whiteboard/nodes/SdocNode.tsx index cbd341933..045d096fc 100644 --- a/frontend/src/views/whiteboard/nodes/SdocNode.tsx +++ b/frontend/src/views/whiteboard/nodes/SdocNode.tsx @@ -1,11 +1,11 @@ -import { CardContent, CardHeader, CardMedia, Divider, MenuItem, Typography } from "@mui/material"; +import { CardContent, CardHeader, CardMedia, CircularProgress, Divider, MenuItem, Typography } from "@mui/material"; import { intersection } from "lodash"; import { useEffect, useRef } from "react"; import { NodeProps, useReactFlow } from "reactflow"; import SdocHooks from "../../../api/SdocHooks.ts"; import { AttachedObjectType } from "../../../api/openapi/models/AttachedObjectType.ts"; import { DocType } from "../../../api/openapi/models/DocType.ts"; -import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts"; +import { SourceDocumentRead } from "../../../api/openapi/models/SourceDocumentRead.ts"; import GenericPositionMenu, { GenericPositionMenuHandle } from "../../../components/GenericPositionMenu.tsx"; import MemoDialogAPI from "../../../components/Memo/MemoDialog/MemoDialogAPI.ts"; import SdocRenderer from "../../../components/SourceDocument/SdocRenderer.tsx"; @@ -141,10 +141,8 @@ function SdocNode(props: NodeProps) { {sdoc.isSuccess ? ( <> {docType === DocType.IMAGE ? ( - - ) : docType === DocType.TEXT ? ( - - ) : ( + + ) : docType === DocType.TEXT ? null : ( DOC TYPE IS NOT SUPPORTED @@ -172,8 +170,20 @@ function SdocNode(props: NodeProps) { ); } -function TextPreview({ sdoc }: { sdoc: SourceDocumentWithDataRead }) { - return {sdoc.content}; +function SdocNodeImageContent({ sdoc }: { sdoc: SourceDocumentRead }) { + const sdocData = SdocHooks.useGetDocumentData(sdoc.id); + + if (!sdocData.isSuccess) { + return ; + } + + return ( + + ); } export default SdocNode;