From 611e5ca398f9ee56d3c9febfc8976d5bc1865e82 Mon Sep 17 00:00:00 2001
From: Tim Fischer <tim.fischer@uni-hamburg.de>
Date: Fri, 18 Oct 2024 11:32:17 +0000
Subject: [PATCH 1/3] removed SourceDocumentWithDataRead

---
 backend/src/api/endpoints/source_document.py  | 48 +++++++++++-
 .../cota/pipeline/steps/init_search_space.py  | 18 +++--
 .../src/app/core/data/crud/source_document.py | 78 +++----------------
 .../src/app/core/data/dto/source_document.py  |  5 --
 .../app/core/data/dto/source_document_data.py | 11 +--
 backend/src/app/core/data/llm/llm_service.py  | 27 +++++--
 .../app/core/data/orm/source_document_data.py | 14 +++-
 7 files changed, 105 insertions(+), 96 deletions(-)

diff --git a/backend/src/api/endpoints/source_document.py b/backend/src/api/endpoints/source_document.py
index 6e34786bc..c02e1c29b 100644
--- a/backend/src/api/endpoints/source_document.py
+++ b/backend/src/api/endpoints/source_document.py
@@ -34,8 +34,8 @@
 from app.core.data.dto.source_document import (
     SourceDocumentRead,
     SourceDocumentUpdate,
-    SourceDocumentWithDataRead,
 )
+from app.core.data.dto.source_document_data import SourceDocumentDataRead
 from app.core.data.dto.source_document_metadata import (
     SourceDocumentMetadataReadResolved,
 )
@@ -54,7 +54,7 @@
 
 @router.get(
     "/{sdoc_id}",
-    response_model=SourceDocumentWithDataRead,
+    response_model=SourceDocumentRead,
     summary="Returns the SourceDocument with the given ID if it exists",
 )
 def get_by_id(
@@ -63,13 +63,53 @@ def get_by_id(
     sdoc_id: int,
     only_if_finished: bool = True,
     authz_user: AuthzUser = Depends(),
-) -> SourceDocumentWithDataRead:
+) -> SourceDocumentRead:
     authz_user.assert_in_same_project_as(Crud.SOURCE_DOCUMENT, sdoc_id)
 
     if not only_if_finished:
         crud_sdoc.get_status(db=db, sdoc_id=sdoc_id, raise_error_on_unfinished=True)
 
-    return crud_sdoc.read_with_data(db=db, id=sdoc_id)
+    return SourceDocumentRead.model_validate(crud_sdoc.read(db=db, id=sdoc_id))
+
+
+@router.get(
+    "/data/{sdoc_id}",
+    response_model=SourceDocumentDataRead,
+    summary="Returns the SourceDocumentData with the given ID if it exists",
+)
+def get_by_id_with_data(
+    *,
+    db: Session = Depends(get_db_session),
+    sdoc_id: int,
+    only_if_finished: bool = True,
+    authz_user: AuthzUser = Depends(),
+) -> SourceDocumentDataRead:
+    authz_user.assert_in_same_project_as(Crud.SOURCE_DOCUMENT, sdoc_id)
+
+    if not only_if_finished:
+        crud_sdoc.get_status(db=db, sdoc_id=sdoc_id, raise_error_on_unfinished=True)
+
+    sdoc_data = crud_sdoc.read_data(db=db, id=sdoc_id)
+    if sdoc_data is None:
+        # if data is none, that means the document is not a text document
+        # instead of returning html, we return the URL to the image / video / audio file
+        sdoc = SourceDocumentRead.model_validate(crud_sdoc.read(db=db, id=sdoc_id))
+        url = RepoService().get_sdoc_url(
+            sdoc=sdoc,
+            relative=True,
+            webp=True,
+            thumbnail=False,
+        )
+        return SourceDocumentDataRead(
+            id=sdoc_id,
+            project_id=sdoc.project_id,
+            token_character_offsets=[],
+            tokens=[],
+            sentences=[],
+            html=url,
+        )
+    else:
+        return SourceDocumentDataRead.model_validate(sdoc_data)
 
 
 @router.delete(
diff --git a/backend/src/app/core/analysis/cota/pipeline/steps/init_search_space.py b/backend/src/app/core/analysis/cota/pipeline/steps/init_search_space.py
index 62ddb1ca7..54eeb4f19 100644
--- a/backend/src/app/core/analysis/cota/pipeline/steps/init_search_space.py
+++ b/backend/src/app/core/analysis/cota/pipeline/steps/init_search_space.py
@@ -8,8 +8,8 @@
     COTASentence,
 )
 from app.core.data.dto.search import SearchColumns, SimSearchQuery
-from app.core.data.dto.source_document import SourceDocumentWithDataRead
 from app.core.data.orm.source_document import SourceDocumentORM
+from app.core.data.orm.source_document_data import SourceDocumentDataORM
 from app.core.data.orm.source_document_metadata import SourceDocumentMetadataORM
 from app.core.db.sql_service import SQLService
 from app.core.filters.filtering import Filter, LogicalOperator
@@ -91,24 +91,26 @@ def add_sentences_to_search_space(
 
     # get the data from the database
     with sqls.db_session() as db:
-        sdoc_data = crud_sdoc.read_with_data_batch(db=db, ids=sdoc_ids)
+        sdoc_datas = crud_sdoc.read_data_batch(db=db, ids=sdoc_ids)
 
     # map the data
-    sdoc_id2sdocreadwithdata: Dict[int, SourceDocumentWithDataRead] = {
-        sdoc_data_read.id: sdoc_data_read for sdoc_data_read in sdoc_data
+    sdoc_id2sdocdata: Dict[int, SourceDocumentDataORM] = {
+        sdoc_data_read.id: sdoc_data_read
+        for sdoc_data_read in sdoc_datas
+        if sdoc_data_read is not None
     }
 
     sentences = []
     for cota_sent in search_space:
-        if cota_sent.sdoc_id not in sdoc_id2sdocreadwithdata:
+        if cota_sent.sdoc_id not in sdoc_id2sdocdata:
             raise ValueError(
-                f"Could not find SourceDocumentWithDataRead for sdoc_id {cota_sent.sdoc_id}!"
+                f"Could not find SourceDocumentDataORM for sdoc_id {cota_sent.sdoc_id}!"
             )
-        sdoc_data_read = sdoc_id2sdocreadwithdata[cota_sent.sdoc_id]
+        sdoc_data_read = sdoc_id2sdocdata[cota_sent.sdoc_id]
 
         if cota_sent.sentence_id >= len(sdoc_data_read.sentences):
             raise ValueError(
-                f"Could not find sentence with id {cota_sent.sentence_id} in SourceDocumentWithDataRead with id {sdoc_data_read.id}!"
+                f"Could not find sentence with id {cota_sent.sentence_id} in SourceDocumentDataORM with id {sdoc_data_read.id}!"
             )
         sentences.append(sdoc_data_read.sentences[cota_sent.sentence_id])
 
diff --git a/backend/src/app/core/data/crud/source_document.py b/backend/src/app/core/data/crud/source_document.py
index 1bae16bc4..467432106 100644
--- a/backend/src/app/core/data/crud/source_document.py
+++ b/backend/src/app/core/data/crud/source_document.py
@@ -4,7 +4,7 @@
 from sqlalchemy import and_, desc, func, or_
 from sqlalchemy.orm import Session
 
-from app.core.data.crud.crud_base import CRUDBase, NoSuchElementError
+from app.core.data.crud.crud_base import CRUDBase
 from app.core.data.crud.source_document_metadata import crud_sdoc_meta
 from app.core.data.dto.action import ActionType
 from app.core.data.dto.document_tag import DocumentTagRead
@@ -14,7 +14,6 @@
     SourceDocumentRead,
     SourceDocumentReadAction,
     SourceDocumentUpdate,
-    SourceDocumentWithDataRead,
 )
 from app.core.data.dto.source_document_data import SourceDocumentDataRead
 from app.core.data.dto.source_document_metadata import SourceDocumentMetadataRead
@@ -57,78 +56,25 @@ def get_status(
             raise SourceDocumentPreprocessingUnfinishedError(sdoc_id=sdoc_id)
         return status
 
-    def read_with_data(self, db: Session, *, id: int) -> SourceDocumentWithDataRead:
+    def read_data(self, db: Session, *, id: int) -> Optional[SourceDocumentDataRead]:
         db_obj = (
-            db.query(self.model, SourceDocumentDataORM)
-            .join(SourceDocumentDataORM, isouter=True)
-            .filter(self.model.id == id)
+            db.query(SourceDocumentDataORM)
+            .filter(SourceDocumentDataORM.id == id)
             .first()
         )
-        if not db_obj:
-            raise NoSuchElementError(self.model, id=id)
-        sdoc, data = db_obj.tuple()
-        sdoc_read = SourceDocumentRead.model_validate(sdoc)
-
-        # sdoc data is None for audio and video documents
-        if data is None:
-            sdoc_data_read = SourceDocumentDataRead(
-                id=sdoc.id,
-                content="",
-                html="",
-                token_starts=[],
-                token_ends=[],
-                sentence_starts=[],
-                sentence_ends=[],
-                tokens=[],
-                token_character_offsets=[],
-                sentences=[],
-                sentence_character_offsets=[],
-            )
-        else:
-            sdoc_data_read = SourceDocumentDataRead.model_validate(data)
-        return SourceDocumentWithDataRead(
-            **(sdoc_read.model_dump() | sdoc_data_read.model_dump())
-        )
+        return SourceDocumentDataRead.model_validate(db_obj) if db_obj else None
 
-    def read_with_data_batch(
+    def read_data_batch(
         self, db: Session, *, ids: List[int]
-    ) -> List[SourceDocumentWithDataRead]:
+    ) -> List[Optional[SourceDocumentDataORM]]:
         db_objs = (
-            db.query(SourceDocumentORM, SourceDocumentDataORM)
-            .join(SourceDocumentDataORM, isouter=True)
-            .filter(SourceDocumentORM.id.in_(ids))
+            db.query(SourceDocumentDataORM)
+            .filter(SourceDocumentDataORM.id.in_(ids))
             .all()
         )
-
-        results = []
-        for db_obj in db_objs:
-            sdoc, data = db_obj
-            sdoc_read = SourceDocumentRead.model_validate(sdoc)
-
-            if data is None:
-                sdoc_data_read = SourceDocumentDataRead(
-                    id=sdoc.id,
-                    content="",
-                    html="",
-                    token_starts=[],
-                    token_ends=[],
-                    sentence_starts=[],
-                    sentence_ends=[],
-                    tokens=[],
-                    token_character_offsets=[],
-                    sentences=[],
-                    sentence_character_offsets=[],
-                )
-            else:
-                sdoc_data_read = SourceDocumentDataRead.model_validate(data)
-
-            results.append(
-                SourceDocumentWithDataRead(
-                    **(sdoc_read.model_dump() | sdoc_data_read.model_dump())
-                )
-            )
-
-        return results
+        # create id, data map
+        id2data = {db_obj.id: db_obj for db_obj in db_objs}
+        return [id2data.get(id) for id in ids]
 
     def remove(self, db: Session, *, id: int) -> SourceDocumentORM:
         # Import SimSearchService here to prevent a cyclic dependency
diff --git a/backend/src/app/core/data/dto/source_document.py b/backend/src/app/core/data/dto/source_document.py
index eae28ac6e..d3131fefa 100644
--- a/backend/src/app/core/data/dto/source_document.py
+++ b/backend/src/app/core/data/dto/source_document.py
@@ -7,7 +7,6 @@
 from app.core.data.doc_type import DocType
 from app.core.data.dto.document_tag import DocumentTagRead
 from app.core.data.dto.dto_base import UpdateDTOBase
-from app.core.data.dto.source_document_data import SourceDocumentDataRead
 from app.core.data.dto.source_document_metadata import SourceDocumentMetadataRead
 
 SDOC_FILENAME_MAX_LENGTH = 200
@@ -57,7 +56,3 @@ class SourceDocumentReadAction(SourceDocumentRead):
 
 class SourceDocumentCreate(SourceDocumentBaseDTO):
     pass
-
-
-class SourceDocumentWithDataRead(SourceDocumentRead, SourceDocumentDataRead):
-    pass
diff --git a/backend/src/app/core/data/dto/source_document_data.py b/backend/src/app/core/data/dto/source_document_data.py
index d10a292ae..ae8cf9874 100644
--- a/backend/src/app/core/data/dto/source_document_data.py
+++ b/backend/src/app/core/data/dto/source_document_data.py
@@ -21,16 +21,17 @@ class SourceDocumentDataBase(BaseModel):
     )
 
 
-class SourceDocumentDataRead(SourceDocumentDataBase):
+class SourceDocumentDataRead(BaseModel):
+    id: int = Field(description="ID of the SourceDocument")
+    project_id: int = Field(
+        description="ID of the Project the SourceDocument belongs to"
+    )
+    html: str = Field(description="Processed HTML of the SourceDocument")
     tokens: List[str] = Field(description="List of tokens in the SourceDocument")
     token_character_offsets: List[Tuple[int, int]] = Field(
         description="List of character offsets of each token"
     )
-
     sentences: List[str] = Field(description="List of sentences in the SourceDocument")
-    sentence_character_offsets: List[Tuple[int, int]] = Field(
-        description="List of character offsets of each sentence"
-    )
 
     model_config = ConfigDict(from_attributes=True)
 
diff --git a/backend/src/app/core/data/llm/llm_service.py b/backend/src/app/core/data/llm/llm_service.py
index 5ff7de49b..117af1048 100644
--- a/backend/src/app/core/data/llm/llm_service.py
+++ b/backend/src/app/core/data/llm/llm_service.py
@@ -226,11 +226,16 @@ def _llm_document_tagging(
         )
 
         # read sdocs
-        sdoc_datas = crud_sdoc.read_with_data_batch(db=db, ids=sdoc_ids)
+        sdoc_datas = crud_sdoc.read_data_batch(db=db, ids=sdoc_ids)
 
         # automatic document tagging
         result: List[DocumentTaggingResult] = []
-        for idx, sdoc_data in enumerate(sdoc_datas):
+        for idx, (sdoc_id, sdoc_data) in enumerate(zip(sdoc_ids, sdoc_datas)):
+            if sdoc_data is None:
+                raise ValueError(
+                    f"Could not find SourceDocumentDataORM for sdoc_id {sdoc_id}!"
+                )
+
             # get current tag ids
             current_tag_ids = [
                 tag.id for tag in crud_sdoc.read(db=db, id=sdoc_data.id).document_tags
@@ -316,10 +321,15 @@ def _llm_metadata_extraction(
         )
 
         # read sdocs
-        sdoc_datas = crud_sdoc.read_with_data_batch(db=db, ids=sdoc_ids)
+        sdoc_datas = crud_sdoc.read_data_batch(db=db, ids=sdoc_ids)
         # automatic metadata extraction
         result: List[MetadataExtractionResult] = []
-        for idx, sdoc_data in enumerate(sdoc_datas):
+        for idx, (sdoc_id, sdoc_data) in enumerate(zip(sdoc_ids, sdoc_datas)):
+            if sdoc_data is None:
+                raise ValueError(
+                    f"Could not find SourceDocumentDataORM for sdoc_id {sdoc_id}!"
+                )
+
             # get current metadata values
             current_metadata = [
                 SourceDocumentMetadataReadResolved.model_validate(metadata)
@@ -426,12 +436,17 @@ def _llm_annotation(
         )
 
         # read sdocs
-        sdoc_datas = crud_sdoc.read_with_data_batch(db=db, ids=sdoc_ids)
+        sdoc_datas = crud_sdoc.read_data_batch(db=db, ids=sdoc_ids)
 
         # automatic annotation
         annotation_id = 0
         result: List[AnnotationResult] = []
-        for idx, sdoc_data in enumerate(sdoc_datas):
+        for idx, (sdoc_id, sdoc_data) in enumerate(zip(sdoc_ids, sdoc_datas)):
+            if sdoc_data is None:
+                raise ValueError(
+                    f"Could not find SourceDocumentDataORM for sdoc_id {sdoc_id}!"
+                )
+
             # get language
             language = crud_sdoc_meta.read_by_sdoc_and_key(
                 db=db, sdoc_id=sdoc_data.id, key="language"
diff --git a/backend/src/app/core/data/orm/source_document_data.py b/backend/src/app/core/data/orm/source_document_data.py
index 1ad322b94..e0659817e 100644
--- a/backend/src/app/core/data/orm/source_document_data.py
+++ b/backend/src/app/core/data/orm/source_document_data.py
@@ -1,11 +1,14 @@
-from typing import List
+from typing import TYPE_CHECKING, List
 
 from sqlalchemy import ForeignKey, Integer, String
 from sqlalchemy.dialects.postgresql import ARRAY
-from sqlalchemy.orm import Mapped, mapped_column
+from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from app.core.data.orm.orm_base import ORMBase
 
+if TYPE_CHECKING:
+    from app.core.data.orm.source_document import SourceDocumentORM
+
 
 class SourceDocumentDataORM(ORMBase):
     id: Mapped[int] = mapped_column(
@@ -15,6 +18,9 @@ class SourceDocumentDataORM(ORMBase):
         nullable=False,
         index=True,
     )
+    source_document: Mapped["SourceDocumentORM"] = relationship(
+        "SourceDocumentORM", back_populates="data"
+    )
     content: Mapped[str] = mapped_column(String, nullable=False, index=False)
     html: Mapped[str] = mapped_column(String, nullable=False, index=False)
     token_starts: Mapped[List[int]] = mapped_column(
@@ -30,6 +36,10 @@ class SourceDocumentDataORM(ORMBase):
         ARRAY(Integer), nullable=False, index=False
     )
 
+    @property
+    def project_id(self) -> int:
+        return self.source_document.project_id
+
     @property
     def tokens(self):
         return [self.content[s:e] for s, e in zip(self.token_starts, self.token_ends)]

From ee704fcd130832b88290e3efb17b6ee60b7377b1 Mon Sep 17 00:00:00 2001
From: Tim Fischer <tim.fischer@uni-hamburg.de>
Date: Fri, 18 Oct 2024 11:33:08 +0000
Subject: [PATCH 2/3] updated api

---
 frontend/src/api/QueryKey.ts                  |   2 +
 frontend/src/api/SdocHooks.ts                 |  44 ++--
 .../openapi/models/SourceDocumentDataRead.ts  |  30 +++
 .../models/SourceDocumentWithDataRead.ts      |  80 --------
 .../openapi/services/SourceDocumentService.ts |  32 ++-
 frontend/src/openapi.json                     | 194 ++++++------------
 6 files changed, 139 insertions(+), 243 deletions(-)
 create mode 100644 frontend/src/api/openapi/models/SourceDocumentDataRead.ts
 delete mode 100644 frontend/src/api/openapi/models/SourceDocumentWithDataRead.ts

diff --git a/frontend/src/api/QueryKey.ts b/frontend/src/api/QueryKey.ts
index 0ff379d82..d05598cac 100644
--- a/frontend/src/api/QueryKey.ts
+++ b/frontend/src/api/QueryKey.ts
@@ -53,6 +53,8 @@ export const QueryKey = {
 
   // a single document (by sdoc id)
   SDOC: "sdoc",
+  // a single document's data (by sdoc id)
+  SDOC_DATA: "sdocData",
   // all tags of a document (by sdoc id)
   SDOC_TAGS: "sdocTags",
   // Count how many source documents each tag has
diff --git a/frontend/src/api/SdocHooks.ts b/frontend/src/api/SdocHooks.ts
index 40298acb5..1df16b2d4 100644
--- a/frontend/src/api/SdocHooks.ts
+++ b/frontend/src/api/SdocHooks.ts
@@ -3,11 +3,11 @@ import { useMutation, useQuery } from "@tanstack/react-query";
 import queryClient from "../plugins/ReactQueryClient.ts";
 import { QueryKey } from "./QueryKey.ts";
 import { BBoxAnnotationReadResolved } from "./openapi/models/BBoxAnnotationReadResolved.ts";
-import { DocType } from "./openapi/models/DocType.ts";
 import { DocumentTagRead } from "./openapi/models/DocumentTagRead.ts";
 import { MemoRead } from "./openapi/models/MemoRead.ts";
+import { SourceDocumentDataRead } from "./openapi/models/SourceDocumentDataRead.ts";
 import { SourceDocumentMetadataReadResolved } from "./openapi/models/SourceDocumentMetadataReadResolved.ts";
-import { SourceDocumentWithDataRead } from "./openapi/models/SourceDocumentWithDataRead.ts";
+import { SourceDocumentRead } from "./openapi/models/SourceDocumentRead.ts";
 import { SpanAnnotationReadResolved } from "./openapi/models/SpanAnnotationReadResolved.ts";
 import { DocumentTagService } from "./openapi/services/DocumentTagService.ts";
 import { ProjectService } from "./openapi/services/ProjectService.ts";
@@ -15,38 +15,19 @@ import { SourceDocumentService } from "./openapi/services/SourceDocumentService.
 import { useSelectEnabledBboxAnnotations, useSelectEnabledSpanAnnotations } from "./utils.ts";
 
 // sdoc
-const fetchSdoc = async (sdocId: number) => {
-  const sdoc = await SourceDocumentService.getById({
-    sdocId: sdocId!,
+const useGetDocument = (sdocId: number | null | undefined) =>
+  useQuery<SourceDocumentRead, Error>({
+    queryKey: [QueryKey.SDOC, sdocId],
+    queryFn: () => SourceDocumentService.getById({ sdocId: sdocId! }),
+    enabled: !!sdocId,
   });
 
-  switch (sdoc.doctype) {
-    case DocType.TEXT:
-      // dont do anything
-      break;
-    case DocType.IMAGE: {
-      const url = await SourceDocumentService.getFileUrl({
-        sdocId: sdocId,
-        webp: true,
-      });
-      sdoc.content = encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + url);
-      break;
-    }
-    case DocType.VIDEO:
-    case DocType.AUDIO: {
-      const url2 = await SourceDocumentService.getFileUrl({ sdocId: sdocId });
-      sdoc.content = encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + url2);
-      break;
-    }
-  }
+// encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + url2)
 
-  return sdoc;
-};
-
-const useGetDocument = (sdocId: number | null | undefined) =>
-  useQuery<SourceDocumentWithDataRead, Error>({
-    queryKey: [QueryKey.SDOC, sdocId],
-    queryFn: () => fetchSdoc(sdocId!),
+const useGetDocumentData = (sdocId: number | null | undefined) =>
+  useQuery<SourceDocumentDataRead, Error>({
+    queryKey: [QueryKey.SDOC_DATA, sdocId],
+    queryFn: () => SourceDocumentService.getByIdWithData({ sdocId: sdocId! }),
     enabled: !!sdocId,
     staleTime: Infinity,
   });
@@ -266,6 +247,7 @@ const useGetBBoxAnnotationsBatch = (sdocId: number | null | undefined, userIds:
 const SdocHooks = {
   // sdoc
   useGetDocument,
+  useGetDocumentData,
   useGetLinkedSdocIds,
   useDeleteDocuments,
   useGetDocumentIdByFilename,
diff --git a/frontend/src/api/openapi/models/SourceDocumentDataRead.ts b/frontend/src/api/openapi/models/SourceDocumentDataRead.ts
new file mode 100644
index 000000000..560337d2f
--- /dev/null
+++ b/frontend/src/api/openapi/models/SourceDocumentDataRead.ts
@@ -0,0 +1,30 @@
+/* generated using openapi-typescript-codegen -- do not edit */
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export type SourceDocumentDataRead = {
+  /**
+   * ID of the SourceDocument
+   */
+  id: number;
+  /**
+   * ID of the Project the SourceDocument belongs to
+   */
+  project_id: number;
+  /**
+   * Processed HTML of the SourceDocument
+   */
+  html: string;
+  /**
+   * List of tokens in the SourceDocument
+   */
+  tokens: Array<string>;
+  /**
+   * List of character offsets of each token
+   */
+  token_character_offsets: Array<any[]>;
+  /**
+   * List of sentences in the SourceDocument
+   */
+  sentences: Array<string>;
+};
diff --git a/frontend/src/api/openapi/models/SourceDocumentWithDataRead.ts b/frontend/src/api/openapi/models/SourceDocumentWithDataRead.ts
deleted file mode 100644
index 065df6a76..000000000
--- a/frontend/src/api/openapi/models/SourceDocumentWithDataRead.ts
+++ /dev/null
@@ -1,80 +0,0 @@
-/* generated using openapi-typescript-codegen -- do not edit */
-/* istanbul ignore file */
-/* tslint:disable */
-/* eslint-disable */
-import type { DocType } from "./DocType";
-import type { SDocStatus } from "./SDocStatus";
-export type SourceDocumentWithDataRead = {
-  /**
-   * ID of the SourceDocument
-   */
-  id: number;
-  /**
-   * Raw,original content of the SourceDocument
-   */
-  content: string;
-  /**
-   * Processed HTML of the SourceDocument
-   */
-  html: string;
-  /**
-   * Start of each token in character offsets in content
-   */
-  token_starts: Array<number>;
-  /**
-   * End of each token in character offsets in content
-   */
-  token_ends: Array<number>;
-  /**
-   * Start of each sentence in character offsets in content
-   */
-  sentence_starts: Array<number>;
-  /**
-   * End of each sentence in character offsets in content
-   */
-  sentence_ends: Array<number>;
-  /**
-   * List of tokens in the SourceDocument
-   */
-  tokens: Array<string>;
-  /**
-   * List of character offsets of each token
-   */
-  token_character_offsets: Array<any[]>;
-  /**
-   * List of sentences in the SourceDocument
-   */
-  sentences: Array<string>;
-  /**
-   * List of character offsets of each sentence
-   */
-  sentence_character_offsets: Array<any[]>;
-  /**
-   * Filename of the SourceDocument
-   */
-  filename: string;
-  /**
-   * User-defined name of the document
-   */
-  name?: string | null;
-  /**
-   * DOCTYPE of the SourceDocument
-   */
-  doctype: DocType;
-  /**
-   * Status of the SourceDocument
-   */
-  status: SDocStatus;
-  /**
-   * Project the SourceDocument belongs to
-   */
-  project_id: number;
-  /**
-   * The created timestamp of the SourceDocument
-   */
-  created: string;
-  /**
-   * Updated timestamp of the Memo
-   */
-  updated: string;
-};
diff --git a/frontend/src/api/openapi/services/SourceDocumentService.ts b/frontend/src/api/openapi/services/SourceDocumentService.ts
index 05dfb6f4d..551ad2f48 100644
--- a/frontend/src/api/openapi/services/SourceDocumentService.ts
+++ b/frontend/src/api/openapi/services/SourceDocumentService.ts
@@ -7,10 +7,10 @@ import type { BBoxAnnotationReadResolved } from "../models/BBoxAnnotationReadRes
 import type { DocumentTagRead } from "../models/DocumentTagRead";
 import type { MemoCreate } from "../models/MemoCreate";
 import type { MemoRead } from "../models/MemoRead";
+import type { SourceDocumentDataRead } from "../models/SourceDocumentDataRead";
 import type { SourceDocumentMetadataReadResolved } from "../models/SourceDocumentMetadataReadResolved";
 import type { SourceDocumentRead } from "../models/SourceDocumentRead";
 import type { SourceDocumentUpdate } from "../models/SourceDocumentUpdate";
-import type { SourceDocumentWithDataRead } from "../models/SourceDocumentWithDataRead";
 import type { SpanAnnotationRead } from "../models/SpanAnnotationRead";
 import type { SpanAnnotationReadResolved } from "../models/SpanAnnotationReadResolved";
 import type { SpanGroupRead } from "../models/SpanGroupRead";
@@ -21,7 +21,7 @@ import { request as __request } from "../core/request";
 export class SourceDocumentService {
   /**
    * Returns the SourceDocument with the given ID if it exists
-   * @returns SourceDocumentWithDataRead Successful Response
+   * @returns SourceDocumentRead Successful Response
    * @throws ApiError
    */
   public static getById({
@@ -30,7 +30,7 @@ export class SourceDocumentService {
   }: {
     sdocId: number;
     onlyIfFinished?: boolean;
-  }): CancelablePromise<SourceDocumentWithDataRead> {
+  }): CancelablePromise<SourceDocumentRead> {
     return __request(OpenAPI, {
       method: "GET",
       url: "/sdoc/{sdoc_id}",
@@ -87,6 +87,32 @@ export class SourceDocumentService {
       },
     });
   }
+  /**
+   * Returns the SourceDocumentData with the given ID if it exists
+   * @returns SourceDocumentDataRead Successful Response
+   * @throws ApiError
+   */
+  public static getByIdWithData({
+    sdocId,
+    onlyIfFinished = true,
+  }: {
+    sdocId: number;
+    onlyIfFinished?: boolean;
+  }): CancelablePromise<SourceDocumentDataRead> {
+    return __request(OpenAPI, {
+      method: "GET",
+      url: "/sdoc/data/{sdoc_id}",
+      path: {
+        sdoc_id: sdocId,
+      },
+      query: {
+        only_if_finished: onlyIfFinished,
+      },
+      errors: {
+        422: `Validation Error`,
+      },
+    });
+  }
   /**
    * Returns the ids of SourceDocuments linked to the SourceDocument with the given id.
    * @returns number Successful Response
diff --git a/frontend/src/openapi.json b/frontend/src/openapi.json
index 72bc424a5..2ec4f5888 100644
--- a/frontend/src/openapi.json
+++ b/frontend/src/openapi.json
@@ -913,9 +913,7 @@
         "responses": {
           "200": {
             "description": "Successful Response",
-            "content": {
-              "application/json": { "schema": { "$ref": "#/components/schemas/SourceDocumentWithDataRead" } }
-            }
+            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SourceDocumentRead" } } }
           },
           "422": {
             "description": "Validation Error",
@@ -966,6 +964,33 @@
         }
       }
     },
+    "/sdoc/data/{sdoc_id}": {
+      "get": {
+        "tags": ["sourceDocument"],
+        "summary": "Returns the SourceDocumentData with the given ID if it exists",
+        "operationId": "get_by_id_with_data",
+        "security": [{ "OAuth2PasswordBearer": [] }],
+        "parameters": [
+          { "name": "sdoc_id", "in": "path", "required": true, "schema": { "type": "integer", "title": "Sdoc Id" } },
+          {
+            "name": "only_if_finished",
+            "in": "query",
+            "required": false,
+            "schema": { "type": "boolean", "default": true, "title": "Only If Finished" }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SourceDocumentDataRead" } } }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } }
+          }
+        }
+      }
+    },
     "/sdoc/{sdoc_id}/linked_sdocs": {
       "get": {
         "tags": ["sourceDocument"],
@@ -8631,6 +8656,43 @@
         "required": ["column", "direction"],
         "title": "Sort[WordFrequencyColumns]"
       },
+      "SourceDocumentDataRead": {
+        "properties": {
+          "id": { "type": "integer", "title": "Id", "description": "ID of the SourceDocument" },
+          "project_id": {
+            "type": "integer",
+            "title": "Project Id",
+            "description": "ID of the Project the SourceDocument belongs to"
+          },
+          "html": { "type": "string", "title": "Html", "description": "Processed HTML of the SourceDocument" },
+          "tokens": {
+            "items": { "type": "string" },
+            "type": "array",
+            "title": "Tokens",
+            "description": "List of tokens in the SourceDocument"
+          },
+          "token_character_offsets": {
+            "items": {
+              "prefixItems": [{ "type": "integer" }, { "type": "integer" }],
+              "type": "array",
+              "maxItems": 2,
+              "minItems": 2
+            },
+            "type": "array",
+            "title": "Token Character Offsets",
+            "description": "List of character offsets of each token"
+          },
+          "sentences": {
+            "items": { "type": "string" },
+            "type": "array",
+            "title": "Sentences",
+            "description": "List of sentences in the SourceDocument"
+          }
+        },
+        "type": "object",
+        "required": ["id", "project_id", "html", "tokens", "token_character_offsets", "sentences"],
+        "title": "SourceDocumentDataRead"
+      },
       "SourceDocumentDocumentTagLinks": {
         "properties": {
           "source_document_id": {
@@ -8942,132 +9004,6 @@
         "required": ["name"],
         "title": "SourceDocumentUpdate"
       },
-      "SourceDocumentWithDataRead": {
-        "properties": {
-          "id": { "type": "integer", "title": "Id", "description": "ID of the SourceDocument" },
-          "content": {
-            "type": "string",
-            "title": "Content",
-            "description": "Raw,original content of the SourceDocument"
-          },
-          "html": { "type": "string", "title": "Html", "description": "Processed HTML of the SourceDocument" },
-          "token_starts": {
-            "items": { "type": "integer" },
-            "type": "array",
-            "title": "Token Starts",
-            "description": "Start of each token in character offsets in content"
-          },
-          "token_ends": {
-            "items": { "type": "integer" },
-            "type": "array",
-            "title": "Token Ends",
-            "description": "End of each token in character offsets in content"
-          },
-          "sentence_starts": {
-            "items": { "type": "integer" },
-            "type": "array",
-            "title": "Sentence Starts",
-            "description": "Start of each sentence in character offsets in content"
-          },
-          "sentence_ends": {
-            "items": { "type": "integer" },
-            "type": "array",
-            "title": "Sentence Ends",
-            "description": "End of each sentence in character offsets in content"
-          },
-          "tokens": {
-            "items": { "type": "string" },
-            "type": "array",
-            "title": "Tokens",
-            "description": "List of tokens in the SourceDocument"
-          },
-          "token_character_offsets": {
-            "items": {
-              "prefixItems": [{ "type": "integer" }, { "type": "integer" }],
-              "type": "array",
-              "maxItems": 2,
-              "minItems": 2
-            },
-            "type": "array",
-            "title": "Token Character Offsets",
-            "description": "List of character offsets of each token"
-          },
-          "sentences": {
-            "items": { "type": "string" },
-            "type": "array",
-            "title": "Sentences",
-            "description": "List of sentences in the SourceDocument"
-          },
-          "sentence_character_offsets": {
-            "items": {
-              "prefixItems": [{ "type": "integer" }, { "type": "integer" }],
-              "type": "array",
-              "maxItems": 2,
-              "minItems": 2
-            },
-            "type": "array",
-            "title": "Sentence Character Offsets",
-            "description": "List of character offsets of each sentence"
-          },
-          "filename": {
-            "type": "string",
-            "maxLength": 230,
-            "title": "Filename",
-            "description": "Filename of the SourceDocument"
-          },
-          "name": {
-            "anyOf": [{ "type": "string" }, { "type": "null" }],
-            "title": "Name",
-            "description": "User-defined name of the document"
-          },
-          "doctype": {
-            "allOf": [{ "$ref": "#/components/schemas/DocType" }],
-            "description": "DOCTYPE of the SourceDocument"
-          },
-          "status": {
-            "allOf": [{ "$ref": "#/components/schemas/SDocStatus" }],
-            "description": "Status of the SourceDocument"
-          },
-          "project_id": {
-            "type": "integer",
-            "title": "Project Id",
-            "description": "Project the SourceDocument belongs to"
-          },
-          "created": {
-            "type": "string",
-            "format": "date-time",
-            "title": "Created",
-            "description": "The created timestamp of the SourceDocument"
-          },
-          "updated": {
-            "type": "string",
-            "format": "date-time",
-            "title": "Updated",
-            "description": "Updated timestamp of the Memo"
-          }
-        },
-        "type": "object",
-        "required": [
-          "id",
-          "content",
-          "html",
-          "token_starts",
-          "token_ends",
-          "sentence_starts",
-          "sentence_ends",
-          "tokens",
-          "token_character_offsets",
-          "sentences",
-          "sentence_character_offsets",
-          "filename",
-          "doctype",
-          "status",
-          "project_id",
-          "created",
-          "updated"
-        ],
-        "title": "SourceDocumentWithDataRead"
-      },
       "SpanAnnotationCreate": {
         "properties": {
           "begin": { "type": "integer", "title": "Begin", "description": "Begin of the SpanAnnotation in characters" },

From f3dd09a784d391dd3b6040f39e6af5af7349bfbe Mon Sep 17 00:00:00 2001
From: Tim Fischer <tim.fischer@uni-hamburg.de>
Date: Fri, 18 Oct 2024 11:34:00 +0000
Subject: [PATCH 3/3] updated to use SourceDocumentDataRead when necessary

---
 .../TextAnnotationValidator.tsx               | 18 ++++++-------
 .../SourceDocument/SdocSentenceRenderer.tsx   | 26 ++++++++++++-------
 .../HighlightTokenRenderer.tsx                | 26 +++++++++++++++++++
 .../AnnotatedSegments/SpanAnnotationCard.tsx  | 18 ++++++++-----
 .../annotatedSegmentsSlice.ts                 |  2 +-
 frontend/src/views/annotation/Annotation.tsx  | 20 +++++++++-----
 .../AnnotationExploer/BBoxAnnotationCard.tsx  |  6 ++---
 .../DocumentRenderer/useComputeTokenData.ts   | 16 ++++++------
 .../useComputeTokenDataWithAnnotations.ts     | 12 ++++-----
 .../DocumentViewer/AudioVideoViewer.tsx       | 10 +++----
 .../annotation/DocumentViewer/ImageViewer.tsx | 22 +++++++++-------
 .../annotation/DocumentViewer/TextViewer.tsx  | 16 ++++++------
 .../ImageAnnotator/ImageAnnotator.tsx         | 20 ++++++++------
 .../TextAnnotator/TextAnnotator.tsx           | 16 ++++++------
 .../whiteboard/nodes/BboxAnnotationNode.tsx   |  8 +++---
 .../src/views/whiteboard/nodes/SdocNode.tsx   | 26 +++++++++++++------
 16 files changed, 161 insertions(+), 101 deletions(-)
 create mode 100644 frontend/src/views/analysis/AnnotatedSegments/HighlightTokenRenderer.tsx

diff --git a/frontend/src/components/LLMDialog/steps/AnnotationResultStep/TextAnnotationValidator.tsx b/frontend/src/components/LLMDialog/steps/AnnotationResultStep/TextAnnotationValidator.tsx
index cd254f112..fa8308ccd 100644
--- a/frontend/src/components/LLMDialog/steps/AnnotationResultStep/TextAnnotationValidator.tsx
+++ b/frontend/src/components/LLMDialog/steps/AnnotationResultStep/TextAnnotationValidator.tsx
@@ -1,6 +1,6 @@
 import { MouseEventHandler, useRef } from "react";
 import { CodeRead } from "../../../../api/openapi/models/CodeRead.ts";
-import { SourceDocumentWithDataRead } from "../../../../api/openapi/models/SourceDocumentWithDataRead.ts";
+import { SourceDocumentDataRead } from "../../../../api/openapi/models/SourceDocumentDataRead.ts";
 import { SpanAnnotationReadResolved } from "../../../../api/openapi/models/SpanAnnotationReadResolved.ts";
 import SdocHooks from "../../../../api/SdocHooks.ts";
 import DocumentRenderer from "../../../../views/annotation/DocumentRenderer/DocumentRenderer.tsx";
@@ -27,12 +27,12 @@ function TextAnnotationValidator({
   annotations,
   handleChangeAnnotations,
 }: TextAnnotatorValidatorProps) {
-  const sdoc = SdocHooks.useGetDocument(sdocId);
+  const sdocData = SdocHooks.useGetDocumentData(sdocId);
 
-  if (sdoc.isSuccess) {
+  if (sdocData.isSuccess) {
     return (
       <TextAnnotationValidatorWithSdoc
-        sdoc={sdoc.data}
+        sdocData={sdocData.data}
         codesForSelection={codesForSelection}
         annotations={annotations}
         handleChangeAnnotations={handleChangeAnnotations}
@@ -43,11 +43,11 @@ function TextAnnotationValidator({
 }
 
 interface TextAnnotatorValidatorWithSdocProps extends TextAnnotatorValidatorSharedProps {
-  sdoc: SourceDocumentWithDataRead;
+  sdocData: SourceDocumentDataRead;
 }
 
 function TextAnnotationValidatorWithSdoc({
-  sdoc,
+  sdocData,
   codesForSelection,
   annotations,
   handleChangeAnnotations,
@@ -57,7 +57,7 @@ function TextAnnotationValidatorWithSdoc({
 
   // computed
   const { tokenData, annotationsPerToken, annotationMap } = useComputeTokenDataWithAnnotations({
-    sdoc: sdoc,
+    sdocData,
     annotations: annotations,
   });
 
@@ -136,12 +136,12 @@ function TextAnnotationValidatorWithSdoc({
       <DocumentRenderer
         className="myFlexFillAllContainer"
         onMouseUp={handleMouseUp}
-        html={sdoc.html}
+        html={sdocData.html}
         tokenData={tokenData}
         annotationsPerToken={annotationsPerToken}
         annotationMap={annotationMap}
         isViewer={false}
-        projectId={sdoc.project_id}
+        projectId={sdocData.project_id}
         style={{ zIndex: 1, overflowY: "auto" }}
       />
     </>
diff --git a/frontend/src/components/SourceDocument/SdocSentenceRenderer.tsx b/frontend/src/components/SourceDocument/SdocSentenceRenderer.tsx
index 1462b9dbe..a7c3c070a 100644
--- a/frontend/src/components/SourceDocument/SdocSentenceRenderer.tsx
+++ b/frontend/src/components/SourceDocument/SdocSentenceRenderer.tsx
@@ -1,8 +1,8 @@
+import { SourceDocumentDataRead } from "../../api/openapi/models/SourceDocumentDataRead.ts";
 import SdocHooks from "../../api/SdocHooks.ts";
-import { SourceDocumentWithDataRead } from "../../api/openapi/models/SourceDocumentWithDataRead.ts";
 
 interface SdocSentenceRendererProps {
-  sdoc: number | SourceDocumentWithDataRead;
+  sdoc: number | SourceDocumentDataRead;
   sentenceId: number;
 }
 
@@ -10,23 +10,29 @@ function SdocSentenceRenderer({ sdoc, sentenceId }: SdocSentenceRendererProps) {
   if (typeof sdoc === "number") {
     return <SdocSentenceRendererWithoutData sdocId={sdoc} sentenceId={sentenceId} />;
   }
-  return <SdocSentenceRendererWithData sdoc={sdoc} sentenceId={sentenceId} />;
+  return <SdocSentenceRendererWithData sdocData={sdoc} sentenceId={sentenceId} />;
 }
 
 function SdocSentenceRendererWithoutData({ sdocId, sentenceId }: { sdocId: number; sentenceId: number }) {
-  const sdoc = SdocHooks.useGetDocument(sdocId);
+  const sdocData = SdocHooks.useGetDocumentData(sdocId);
 
-  if (sdoc.isSuccess) {
-    return <SdocSentenceRendererWithData sdoc={sdoc.data} sentenceId={sentenceId} />;
-  } else if (sdoc.isError) {
-    return <div>{sdoc.error.message}</div>;
+  if (sdocData.isSuccess) {
+    return <SdocSentenceRendererWithData sdocData={sdocData.data} sentenceId={sentenceId} />;
+  } else if (sdocData.isError) {
+    return <div>{sdocData.error.message}</div>;
   } else {
     return <div>Loading...</div>;
   }
 }
 
-function SdocSentenceRendererWithData({ sdoc, sentenceId }: { sdoc: SourceDocumentWithDataRead; sentenceId: number }) {
-  return <>{sdoc.sentences[sentenceId]}</>;
+function SdocSentenceRendererWithData({
+  sdocData,
+  sentenceId,
+}: {
+  sdocData: SourceDocumentDataRead;
+  sentenceId: number;
+}) {
+  return <>{sdocData.sentences[sentenceId]}</>;
 }
 
 export default SdocSentenceRenderer;
diff --git a/frontend/src/views/analysis/AnnotatedSegments/HighlightTokenRenderer.tsx b/frontend/src/views/analysis/AnnotatedSegments/HighlightTokenRenderer.tsx
new file mode 100644
index 000000000..4772650b6
--- /dev/null
+++ b/frontend/src/views/analysis/AnnotatedSegments/HighlightTokenRenderer.tsx
@@ -0,0 +1,26 @@
+import { useMemo } from "react";
+import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts";
+
+interface HighlightTokenRendererProps {
+  beginToken: number;
+  endToken: number;
+  contextSize: number;
+  sdocData: SourceDocumentDataRead;
+}
+
+function HighlightTokenRenderer({ beginToken, endToken, contextSize, sdocData }: HighlightTokenRendererProps) {
+  const tokens = sdocData.tokens;
+  const { textBefore, textHighlight, textAfter } = useMemo(() => {
+    const textBefore = tokens.slice(beginToken - contextSize, beginToken).join(" ");
+    const textHighlight = tokens.slice(beginToken, endToken).join(" ");
+    const textAfter = tokens.slice(endToken, endToken + contextSize).join(" ");
+    return { textBefore, textHighlight, textAfter };
+  }, [tokens, beginToken, endToken, contextSize]);
+  return (
+    <>
+      {textBefore} <strong>{textHighlight}</strong> {textAfter}
+    </>
+  );
+}
+
+export default HighlightTokenRenderer;
diff --git a/frontend/src/views/analysis/AnnotatedSegments/SpanAnnotationCard.tsx b/frontend/src/views/analysis/AnnotatedSegments/SpanAnnotationCard.tsx
index 706aadc58..79b4452ae 100644
--- a/frontend/src/views/analysis/AnnotatedSegments/SpanAnnotationCard.tsx
+++ b/frontend/src/views/analysis/AnnotatedSegments/SpanAnnotationCard.tsx
@@ -15,6 +15,7 @@ import { AttachedObjectType } from "../../../api/openapi/models/AttachedObjectTy
 import MemoButton from "../../../components/Memo/MemoButton.tsx";
 import { CRUDDialogActions } from "../../../components/dialogSlice.ts";
 import { useAppDispatch, useAppSelector } from "../../../plugins/ReduxHooks.ts";
+import HighlightTokenRenderer from "./HighlightTokenRenderer.tsx";
 
 interface SpanAnnotationCardProps {
   annotationId: number | undefined;
@@ -23,7 +24,7 @@ interface SpanAnnotationCardProps {
 function SpanAnnotationCard({ annotationId, ...props }: SpanAnnotationCardProps & Omit<CardProps, "elevation">) {
   // global server state (react-query)
   const spanAnnotation = SpanAnnotationHooks.useGetAnnotation(annotationId);
-  const sdoc = SdocHooks.useGetDocument(spanAnnotation.data?.sdoc_id);
+  const sdocData = SdocHooks.useGetDocumentData(spanAnnotation.data?.sdoc_id);
 
   // global client state (redux)
   const contextSize = useAppSelector((state) => state.annotatedSegments.contextSize);
@@ -42,18 +43,21 @@ function SpanAnnotationCard({ annotationId, ...props }: SpanAnnotationCardProps
           <Typography variant="body1" component="div" sx={{ mt: 2 }}>
             <i>Select an annotation to view it & it's context :)</i>
           </Typography>
-        ) : spanAnnotation.isSuccess && sdoc.isSuccess ? (
+        ) : spanAnnotation.isSuccess && sdocData.isSuccess ? (
           <Typography variant="body1" component="div" sx={{ mt: 2 }}>
-            {sdoc.data.content.substring(spanAnnotation.data.begin - contextSize, spanAnnotation.data.begin)}
-            <b>{sdoc.data.content.substring(spanAnnotation.data.begin, spanAnnotation.data.end)}</b>
-            {sdoc.data.content.substring(spanAnnotation.data.end, spanAnnotation.data.end + contextSize)}
+            <HighlightTokenRenderer
+              beginToken={spanAnnotation.data.begin_token}
+              endToken={spanAnnotation.data.end_token}
+              contextSize={contextSize}
+              sdocData={sdocData.data}
+            />
           </Typography>
-        ) : spanAnnotation.isLoading || sdoc.isLoading ? (
+        ) : spanAnnotation.isLoading || sdocData.isLoading ? (
           <CircularProgress />
         ) : (
           <Typography variant="body1" component="div">
             {spanAnnotation.error?.message}
-            {sdoc.error?.message}
+            {sdocData.error?.message}
           </Typography>
         )}
       </CardContent>
diff --git a/frontend/src/views/analysis/AnnotatedSegments/annotatedSegmentsSlice.ts b/frontend/src/views/analysis/AnnotatedSegments/annotatedSegmentsSlice.ts
index a6e19e774..966a21f11 100644
--- a/frontend/src/views/analysis/AnnotatedSegments/annotatedSegmentsSlice.ts
+++ b/frontend/src/views/analysis/AnnotatedSegments/annotatedSegmentsSlice.ts
@@ -13,7 +13,7 @@ const initialState: TableState & AnnotatedSegmentsState = {
   ...initialTableState,
   // app state:
   isSplitView: false,
-  contextSize: 100,
+  contextSize: 10,
 };
 
 export const AnnotatedSegmentsSlice = createSlice({
diff --git a/frontend/src/views/annotation/Annotation.tsx b/frontend/src/views/annotation/Annotation.tsx
index e60d2cbe0..a5470ff05 100644
--- a/frontend/src/views/annotation/Annotation.tsx
+++ b/frontend/src/views/annotation/Annotation.tsx
@@ -55,6 +55,7 @@ function Annotation() {
 
   // global server state (react query)
   const sdoc = SdocHooks.useGetDocument(sdocId);
+  const sdocData = SdocHooks.useGetDocumentData(sdocId);
 
   // rename document
   const openSnackbar = useOpenSnackbar();
@@ -190,7 +191,7 @@ function Annotation() {
                   <CardContent>
                     {sdocId ? (
                       <>
-                        {sdoc.isSuccess ? (
+                        {sdoc.isSuccess && sdocData.isSuccess ? (
                           <Stack spacing={2}>
                             <div style={{ display: "flex", alignItems: "center" }}>
                               <EditableTypography
@@ -206,27 +207,32 @@ function Annotation() {
                             </div>
                             {sdoc.data.doctype === DocType.IMAGE ? (
                               isAnnotationMode ? (
-                                <ImageAnnotator sdoc={sdoc.data} />
+                                <ImageAnnotator sdocData={sdocData.data} />
                               ) : (
-                                <ImageViewer sdoc={sdoc.data} />
+                                <ImageViewer sdocData={sdocData.data} />
                               )
                             ) : sdoc.data.doctype === DocType.TEXT ? (
                               isAnnotationMode ? (
-                                <TextAnnotator sdoc={sdoc.data} />
+                                <TextAnnotator sdocData={sdocData.data} />
                               ) : (
-                                <TextViewer sdoc={sdoc.data} />
+                                <TextViewer sdocData={sdocData.data} />
                               )
                             ) : sdoc.data.doctype === DocType.AUDIO ? (
                               isAnnotationMode ? (
                                 <div>Annotation is not (yet) supported for Audio Documents.</div>
                               ) : (
-                                <AudioVideoViewer sdoc={sdoc.data} showEntities={true} height={200} />
+                                <AudioVideoViewer sdocData={sdocData.data} showEntities={true} height={200} />
                               )
                             ) : sdoc.data.doctype === DocType.VIDEO ? (
                               isAnnotationMode ? (
                                 <div>Annotation is not (yet) supported for Video Documents.</div>
                               ) : (
-                                <AudioVideoViewer sdoc={sdoc.data} showEntities={true} width={800} height={600} />
+                                <AudioVideoViewer
+                                  sdocData={sdocData.data}
+                                  showEntities={true}
+                                  width={800}
+                                  height={600}
+                                />
                               )
                             ) : (
                               <div>ERROR! This DocType is not (yet) supported!</div>
diff --git a/frontend/src/views/annotation/AnnotationExploer/BBoxAnnotationCard.tsx b/frontend/src/views/annotation/AnnotationExploer/BBoxAnnotationCard.tsx
index e14fa8f1f..d88ad4461 100644
--- a/frontend/src/views/annotation/AnnotationExploer/BBoxAnnotationCard.tsx
+++ b/frontend/src/views/annotation/AnnotationExploer/BBoxAnnotationCard.tsx
@@ -9,7 +9,7 @@ import AnnotationCardActionsMenu from "./AnnotationCardActionMenu.tsx";
 import { AnnotationCardProps } from "./AnnotationCardProps.ts";
 
 function BBoxAnnotationCard({ annotation, onClick, cardProps }: AnnotationCardProps<BBoxAnnotationReadResolved>) {
-  const sdoc = SdocHooks.useGetDocument(annotation.sdoc_id);
+  const sdocData = SdocHooks.useGetDocumentData(annotation.sdoc_id);
 
   return (
     <Card {...cardProps}>
@@ -32,9 +32,9 @@ function BBoxAnnotationCard({ annotation, onClick, cardProps }: AnnotationCardPr
       />
       <CardActionArea onClick={onClick}>
         <CardContent sx={{ pt: 1, pb: "16px !important", textAlign: "center" }}>
-          {sdoc.isSuccess ? (
+          {sdocData.isSuccess ? (
             <ImageCropper
-              imageUrl={sdoc.data.content}
+              imageUrl={encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + sdocData.data.html)}
               x={annotation.x_min}
               y={annotation.y_min}
               width={annotation.x_max - annotation.x_min}
diff --git a/frontend/src/views/annotation/DocumentRenderer/useComputeTokenData.ts b/frontend/src/views/annotation/DocumentRenderer/useComputeTokenData.ts
index c59978ba8..fa3df5030 100644
--- a/frontend/src/views/annotation/DocumentRenderer/useComputeTokenData.ts
+++ b/frontend/src/views/annotation/DocumentRenderer/useComputeTokenData.ts
@@ -1,21 +1,21 @@
 import { useMemo } from "react";
 import SdocHooks from "../../../api/SdocHooks.ts";
+import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts";
 import { SpanAnnotationReadResolved } from "../../../api/openapi/models/SpanAnnotationReadResolved.ts";
 import { IToken } from "./IToken.ts";
 
-function useComputeTokenData({ sdocId, userIds }: { sdocId: number; userIds: number[] }) {
+function useComputeTokenData({ sdocData, userIds }: { sdocData: SourceDocumentDataRead; userIds: number[] }) {
   // global server state (react query)
-  const sdoc = SdocHooks.useGetDocument(sdocId);
-  const annotations = SdocHooks.useGetSpanAnnotationsBatch(sdocId, userIds);
+  const annotations = SdocHooks.useGetSpanAnnotationsBatch(sdocData.id, userIds);
 
   // computed
   // todo: maybe implement with selector?
   const tokenData: IToken[] | undefined = useMemo(() => {
-    if (!sdoc.data) return undefined;
-    if (!sdoc.data.token_character_offsets) return undefined;
+    if (!sdocData) return undefined;
+    if (!sdocData.token_character_offsets) return undefined;
 
-    const offsets = sdoc.data.token_character_offsets;
-    const texts = sdoc.data.tokens;
+    const offsets = sdocData.token_character_offsets;
+    const texts = sdocData.tokens;
     const result = texts.map((text, index) => ({
       beginChar: offsets[index][0],
       endChar: offsets[index][1],
@@ -25,7 +25,7 @@ function useComputeTokenData({ sdocId, userIds }: { sdocId: number; userIds: num
       newLine: text.split("\n").length - 1,
     }));
     return result;
-  }, [sdoc.data]);
+  }, [sdocData]);
 
   // annotationMap stores annotationId -> SpanAnnotationReadResolved
   // annotationsPerToken map stores tokenId -> spanAnnotationId[]
diff --git a/frontend/src/views/annotation/DocumentRenderer/useComputeTokenDataWithAnnotations.ts b/frontend/src/views/annotation/DocumentRenderer/useComputeTokenDataWithAnnotations.ts
index a55e97c1d..f3fc3bf9f 100644
--- a/frontend/src/views/annotation/DocumentRenderer/useComputeTokenDataWithAnnotations.ts
+++ b/frontend/src/views/annotation/DocumentRenderer/useComputeTokenDataWithAnnotations.ts
@@ -1,20 +1,20 @@
 import { useMemo } from "react";
-import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts";
+import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts";
 import { SpanAnnotationReadResolved } from "../../../api/openapi/models/SpanAnnotationReadResolved.ts";
 import { IToken } from "./IToken.ts";
 
 function useComputeTokenDataWithAnnotations({
-  sdoc,
+  sdocData,
   annotations,
 }: {
-  sdoc: SourceDocumentWithDataRead;
+  sdocData: SourceDocumentDataRead;
   annotations: SpanAnnotationReadResolved[];
 }) {
   // computed
   // todo: maybe implement with selector?
   const tokenData: IToken[] | undefined = useMemo(() => {
-    const offsets = sdoc.token_character_offsets;
-    const texts = sdoc.tokens;
+    const offsets = sdocData.token_character_offsets;
+    const texts = sdocData.tokens;
     const result = texts.map((text, index) => ({
       beginChar: offsets[index][0],
       endChar: offsets[index][1],
@@ -24,7 +24,7 @@ function useComputeTokenDataWithAnnotations({
       newLine: text.split("\n").length - 1,
     }));
     return result;
-  }, [sdoc]);
+  }, [sdocData]);
 
   // todo: maybe implement with selector?
   // this map stores annotationId -> SpanAnnotationReadResolved
diff --git a/frontend/src/views/annotation/DocumentViewer/AudioVideoViewer.tsx b/frontend/src/views/annotation/DocumentViewer/AudioVideoViewer.tsx
index 0e3dcb4fe..0eb6db7ba 100644
--- a/frontend/src/views/annotation/DocumentViewer/AudioVideoViewer.tsx
+++ b/frontend/src/views/annotation/DocumentViewer/AudioVideoViewer.tsx
@@ -3,23 +3,23 @@ import { useMemo, useRef, useState } from "react";
 import ReactPlayer from "react-player";
 import type { OnProgressProps } from "react-player/base.d.ts";
 import SdocHooks from "../../../api/SdocHooks.ts";
-import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts";
+import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts";
 
 interface AudioVideoViewerProps {
-  sdoc: SourceDocumentWithDataRead;
+  sdocData: SourceDocumentDataRead;
   showEntities: boolean;
   width?: number;
   height?: number;
 }
 
-function AudioVideoViewer({ sdoc, width, height }: AudioVideoViewerProps) {
+function AudioVideoViewer({ sdocData, width, height }: AudioVideoViewerProps) {
   // local client state
   const [highlightedWordId, setHighlightedWordId] = useState(-1);
   const playerRef = useRef<ReactPlayer>(null);
   const currentHighlightedWordSpanRef = useRef<HTMLSpanElement>(null);
 
   // global server state (react-query)
-  const transcriptWords = SdocHooks.useGetWordLevelTranscriptions(sdoc.id);
+  const transcriptWords = SdocHooks.useGetWordLevelTranscriptions(sdocData.id);
 
   // ui events
   const handleProgress = (state: OnProgressProps) => {
@@ -71,7 +71,7 @@ function AudioVideoViewer({ sdoc, width, height }: AudioVideoViewerProps) {
     <>
       <Box sx={{ display: "flex", justifyContent: "center", alignItems: "center" }}>
         <ReactPlayer
-          url={sdoc.content}
+          url={encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + sdocData.html)}
           controls={true}
           width={width ?? 640}
           height={height ?? 360}
diff --git a/frontend/src/views/annotation/DocumentViewer/ImageViewer.tsx b/frontend/src/views/annotation/DocumentViewer/ImageViewer.tsx
index 0f00633d7..becf76f54 100644
--- a/frontend/src/views/annotation/DocumentViewer/ImageViewer.tsx
+++ b/frontend/src/views/annotation/DocumentViewer/ImageViewer.tsx
@@ -5,17 +5,17 @@ import { useEffect, useMemo, useRef } from "react";
 import { useNavigate } from "react-router-dom";
 import SdocHooks from "../../../api/SdocHooks.ts";
 import { BBoxAnnotationReadResolved } from "../../../api/openapi/models/BBoxAnnotationReadResolved.ts";
-import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts";
+import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts";
 import { useAppDispatch, useAppSelector } from "../../../plugins/ReduxHooks.ts";
 import { ImageSearchActions } from "../../search/ImageSearch/imageSearchSlice.ts";
 
 interface ImageViewerProps {
-  sdoc: SourceDocumentWithDataRead;
+  sdocData: SourceDocumentDataRead;
 }
 
 function ImageViewer(props: ImageViewerProps) {
-  const heightMetadata = SdocHooks.useGetMetadataByKey(props.sdoc.id, "height");
-  const widthMetadata = SdocHooks.useGetMetadataByKey(props.sdoc.id, "width");
+  const heightMetadata = SdocHooks.useGetMetadataByKey(props.sdocData.id, "height");
+  const widthMetadata = SdocHooks.useGetMetadataByKey(props.sdocData.id, "width");
 
   if (heightMetadata.isSuccess && widthMetadata.isSuccess) {
     return (
@@ -32,7 +32,7 @@ function ImageViewer(props: ImageViewerProps) {
   }
 }
 
-function ImageViewerWithData({ sdoc, height, width }: ImageViewerProps & { height: number; width: number }) {
+function ImageViewerWithData({ sdocData, height, width }: ImageViewerProps & { height: number; width: number }) {
   const svgRef = useRef<SVGSVGElement>(null);
   const gRef = useRef<SVGGElement>(null);
   const bboxRef = useRef<SVGGElement>(null);
@@ -46,7 +46,7 @@ function ImageViewerWithData({ sdoc, height, width }: ImageViewerProps & { heigh
   const hiddenCodeIds = useAppSelector((state) => state.annotations.hiddenCodeIds);
 
   // global server state (react query)
-  const annotations = SdocHooks.useGetBBoxAnnotationsBatch(sdoc.id, visibleUserIds);
+  const annotations = SdocHooks.useGetBBoxAnnotationsBatch(sdocData.id, visibleUserIds);
 
   const annotationData = useMemo(() => {
     return (annotations.data || []).filter((bbox) => !hiddenCodeIds.includes(bbox.code.id));
@@ -114,13 +114,13 @@ function ImageViewerWithData({ sdoc, height, width }: ImageViewerProps & { heigh
         (update) => update.attr("x", (d) => scaledRatio * (d.x_min + 3) + xCentering),
         (exit) => exit.remove(),
       );
-  }, [width, height, annotationData, sdoc.content]);
+  }, [width, height, annotationData, sdocData.html]);
 
   // find similar images
   const dispatch = useAppDispatch();
   const navigate = useNavigate();
   const handleImageSimilaritySearch = () => {
-    dispatch(ImageSearchActions.onChangeSearchQuery(sdoc.id));
+    dispatch(ImageSearchActions.onChangeSearchQuery(sdocData.id));
     navigate("../imagesearch");
   };
 
@@ -131,7 +131,11 @@ function ImageViewerWithData({ sdoc, height, width }: ImageViewerProps & { heigh
       </Button>
       <svg ref={svgRef} width="100%" height={imgContainerHeight + "px"} style={{ cursor: "move" }}>
         <g ref={gRef}>
-          <image ref={imgRef} href={sdoc.content} height={imgContainerHeight} />
+          <image
+            ref={imgRef}
+            href={encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + sdocData.html)}
+            height={imgContainerHeight}
+          />
           <g ref={bboxRef}></g>
           <g ref={textRef}></g>
         </g>
diff --git a/frontend/src/views/annotation/DocumentViewer/TextViewer.tsx b/frontend/src/views/annotation/DocumentViewer/TextViewer.tsx
index 9abdbef6d..dbbeb4010 100644
--- a/frontend/src/views/annotation/DocumentViewer/TextViewer.tsx
+++ b/frontend/src/views/annotation/DocumentViewer/TextViewer.tsx
@@ -1,19 +1,19 @@
 import React, { useRef } from "react";
-import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts";
+import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts";
 import { useAppSelector } from "../../../plugins/ReduxHooks.ts";
 import { TagStyle } from "../annoSlice.ts";
 import DocumentRenderer from "../DocumentRenderer/DocumentRenderer.tsx";
 import useComputeTokenData from "../DocumentRenderer/useComputeTokenData.ts";
 import SentenceMenu, { SentenceMenuHandle } from "./SentenceMenu.tsx";
 
-interface AnnotationVisualizerProps {
-  sdoc: SourceDocumentWithDataRead;
+interface TextViewerProps {
+  sdocData: SourceDocumentDataRead;
 }
 
 /**
  * Super simple annotation rendering, does not work for overlapping annotations!!!
  */
-function TextViewer({ sdoc }: AnnotationVisualizerProps) {
+function TextViewer({ sdocData: sdocData }: TextViewerProps) {
   // local state
   const sentenceMenuRef = useRef<SentenceMenuHandle>(null);
 
@@ -22,9 +22,9 @@ function TextViewer({ sdoc }: AnnotationVisualizerProps) {
   const tagStyle = useAppSelector((state) => state.annotations.tagStyle);
 
   // global server state (react-query)
-  const sentences = sdoc.sentences;
+  const sentences = sdocData.sentences;
   const { tokenData, annotationsPerToken, annotationMap } = useComputeTokenData({
-    sdocId: sdoc.id,
+    sdocData,
     userIds: visibleUserIds,
   });
 
@@ -107,8 +107,8 @@ function TextViewer({ sdoc }: AnnotationVisualizerProps) {
         annotationMap={annotationMap}
         onClick={handleClick}
         isViewer={true}
-        html={sdoc.html}
-        projectId={sdoc.project_id}
+        html={sdocData.html}
+        projectId={sdocData.project_id}
         style={{
           zIndex: 1,
           overflowY: "auto",
diff --git a/frontend/src/views/annotation/ImageAnnotator/ImageAnnotator.tsx b/frontend/src/views/annotation/ImageAnnotator/ImageAnnotator.tsx
index 9a52a1aed..c17e616e8 100644
--- a/frontend/src/views/annotation/ImageAnnotator/ImageAnnotator.tsx
+++ b/frontend/src/views/annotation/ImageAnnotator/ImageAnnotator.tsx
@@ -4,7 +4,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 
 import SdocHooks from "../../../api/SdocHooks.ts";
 import { BBoxAnnotationReadResolved } from "../../../api/openapi/models/BBoxAnnotationReadResolved.ts";
-import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts";
+import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts";
 import { SpanAnnotationReadResolved } from "../../../api/openapi/models/SpanAnnotationReadResolved.ts";
 import ConfirmationAPI from "../../../components/ConfirmationDialog/ConfirmationAPI.ts";
 import { useOpenSnackbar } from "../../../components/SnackbarDialog/useOpenSnackbar.ts";
@@ -16,14 +16,14 @@ import SVGBBoxText from "./SVGBBoxText.tsx";
 import { useCreateBBoxAnnotation, useDeleteBBoxAnnotation, useUpdateBBoxAnnotation } from "./imageAnnotationHooks.ts";
 
 interface ImageAnnotatorProps {
-  sdoc: SourceDocumentWithDataRead;
+  sdocData: SourceDocumentDataRead;
 }
 
 function ImageAnnotator(props: ImageAnnotatorProps) {
-  const heightMetadata = SdocHooks.useGetMetadataByKey(props.sdoc.id, "height");
+  const heightMetadata = SdocHooks.useGetMetadataByKey(props.sdocData.id, "height");
 
   if (heightMetadata.isSuccess) {
-    return <ImageAnnotatorWithHeight sdoc={props.sdoc} height={heightMetadata.data.int_value!} />;
+    return <ImageAnnotatorWithHeight sdocData={props.sdocData} height={heightMetadata.data.int_value!} />;
   } else if (heightMetadata.isError) {
     return <div>{heightMetadata.error.message}</div>;
   } else if (heightMetadata.isLoading) {
@@ -33,7 +33,7 @@ function ImageAnnotator(props: ImageAnnotatorProps) {
   }
 }
 
-function ImageAnnotatorWithHeight({ sdoc, height }: ImageAnnotatorProps & { height: number }) {
+function ImageAnnotatorWithHeight({ sdocData, height }: ImageAnnotatorProps & { height: number }) {
   // references to svg elements
   const svgRef = useRef<SVGSVGElement>(null);
   const gZoomRef = useRef<SVGGElement>(null);
@@ -47,7 +47,7 @@ function ImageAnnotatorWithHeight({ sdoc, height }: ImageAnnotatorProps & { heig
   const hiddenCodeIds = useAppSelector((state) => state.annotations.hiddenCodeIds);
 
   // global server state (react query)
-  const annotations = SdocHooks.useGetBBoxAnnotationsBatch(sdoc.id, visibleUserIds);
+  const annotations = SdocHooks.useGetBBoxAnnotationsBatch(sdocData.id, visibleUserIds);
 
   // snackbar
   const openSnackbar = useOpenSnackbar();
@@ -216,7 +216,7 @@ function ImageAnnotatorWithHeight({ sdoc, height }: ImageAnnotatorProps & { heig
       {
         requestBody: {
           code_id: code.id,
-          sdoc_id: sdoc.id,
+          sdoc_id: sdocData.id,
           x_min: x,
           x_max: x + width,
           y_min: y,
@@ -327,7 +327,11 @@ function ImageAnnotatorWithHeight({ sdoc, height }: ImageAnnotatorProps & { heig
       >
         <g ref={gZoomRef}>
           <g ref={gDragRef} style={{ cursor: isZooming ? "move" : "crosshair" }}>
-            <image ref={imgRef} href={sdoc.content} style={{ outline: "1px solid black" }} />
+            <image
+              ref={imgRef}
+              href={encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + sdocData.html)}
+              style={{ outline: "1px solid black" }}
+            />
             <rect
               ref={rectRef}
               x={0}
diff --git a/frontend/src/views/annotation/TextAnnotator/TextAnnotator.tsx b/frontend/src/views/annotation/TextAnnotator/TextAnnotator.tsx
index caa2a3f22..8b7da063e 100644
--- a/frontend/src/views/annotation/TextAnnotator/TextAnnotator.tsx
+++ b/frontend/src/views/annotation/TextAnnotator/TextAnnotator.tsx
@@ -5,7 +5,7 @@ import { FAKE_ANNOTATION_ID } from "../../../api/SpanAnnotationHooks.ts";
 
 import { BBoxAnnotationReadResolved } from "../../../api/openapi/models/BBoxAnnotationReadResolved.ts";
 import { CodeRead } from "../../../api/openapi/models/CodeRead.ts";
-import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts";
+import { SourceDocumentDataRead } from "../../../api/openapi/models/SourceDocumentDataRead.ts";
 import { SpanAnnotationCreate } from "../../../api/openapi/models/SpanAnnotationCreate.ts";
 import { SpanAnnotationReadResolved } from "../../../api/openapi/models/SpanAnnotationReadResolved.ts";
 import ConfirmationAPI from "../../../components/ConfirmationDialog/ConfirmationAPI.ts";
@@ -22,11 +22,11 @@ const selectionIsEmpty = (selection: Selection): boolean => {
   return selection.toString().trim().length === 0;
 };
 
-interface AnnotatorRemasteredProps {
-  sdoc: SourceDocumentWithDataRead;
+interface TextAnnotatorProps {
+  sdocData: SourceDocumentDataRead;
 }
 
-function TextAnnotator({ sdoc }: AnnotatorRemasteredProps) {
+function TextAnnotator({ sdocData }: TextAnnotatorProps) {
   // local state
   const spanMenuRef = useRef<CodeSelectorHandle>(null);
   const [fakeAnnotation, setFakeAnnotation] = useState<SpanAnnotationCreate | undefined>(undefined);
@@ -42,7 +42,7 @@ function TextAnnotator({ sdoc }: AnnotatorRemasteredProps) {
 
   // computed / custom hooks
   const { tokenData, annotationsPerToken, annotationMap } = useComputeTokenData({
-    sdocId: sdoc.id,
+    sdocData,
     userIds: visibleUserIds,
   });
 
@@ -139,7 +139,7 @@ function TextAnnotator({ sdoc }: AnnotatorRemasteredProps) {
 
     const requestBody: SpanAnnotationCreate = {
       code_id: mostRecentCodeId || -1,
-      sdoc_id: sdoc.id,
+      sdoc_id: sdocData.id,
       begin: tokenData[begin_token].beginChar,
       end: tokenData[end_token].endChar,
       begin_token: begin_token,
@@ -308,12 +308,12 @@ function TextAnnotator({ sdoc }: AnnotatorRemasteredProps) {
       <DocumentRenderer
         className="myFlexFillAllContainer"
         onMouseUp={handleMouseUp}
-        html={sdoc.html}
+        html={sdocData.html}
         tokenData={tokenData}
         annotationsPerToken={annotationsPerToken}
         annotationMap={annotationMap}
         isViewer={false}
-        projectId={sdoc.project_id}
+        projectId={sdocData.project_id}
         style={{
           zIndex: 1,
           overflowY: "auto",
diff --git a/frontend/src/views/whiteboard/nodes/BboxAnnotationNode.tsx b/frontend/src/views/whiteboard/nodes/BboxAnnotationNode.tsx
index ed8584d4b..d9ef954d8 100644
--- a/frontend/src/views/whiteboard/nodes/BboxAnnotationNode.tsx
+++ b/frontend/src/views/whiteboard/nodes/BboxAnnotationNode.tsx
@@ -43,7 +43,7 @@ function BboxAnnotationNode(props: NodeProps<BBoxAnnotationNodeData>) {
   // global server state (react-query)
   const annotation = BboxAnnotationHooks.useGetAnnotation(props.data.bboxAnnotationId);
   const code = CodeHooks.useGetCode(annotation.data?.code.id);
-  const sdoc = SdocHooks.useGetDocument(annotation.data?.sdoc_id);
+  const sdocData = SdocHooks.useGetDocumentData(annotation.data?.sdoc_id);
   const memo = BboxAnnotationHooks.useGetUserMemo(props.data.bboxAnnotationId);
 
   // effects
@@ -198,9 +198,9 @@ function BboxAnnotationNode(props: NodeProps<BBoxAnnotationNodeData>) {
               }
             />
             <CardContent className="bbox-content" style={{ padding: 2, textAlign: "center" }}>
-              {annotation.isSuccess && sdoc.isSuccess ? (
+              {annotation.isSuccess && sdocData.isSuccess ? (
                 <ImageCropper
-                  imageUrl={sdoc.data.content}
+                  imageUrl={encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + sdocData.data?.html)}
                   x={annotation.data.x_min}
                   y={annotation.data.y_min}
                   width={annotation.data.x_max - annotation.data.x_min}
@@ -211,7 +211,7 @@ function BboxAnnotationNode(props: NodeProps<BBoxAnnotationNodeData>) {
                     border: "4px solid " + annotation.data.code.color,
                   }}
                 />
-              ) : annotation.isError || sdoc.isError ? (
+              ) : annotation.isError || sdocData.isError ? (
                 <Typography variant="body2">Error!</Typography>
               ) : (
                 <Typography variant="body2">Loading ...</Typography>
diff --git a/frontend/src/views/whiteboard/nodes/SdocNode.tsx b/frontend/src/views/whiteboard/nodes/SdocNode.tsx
index cbd341933..045d096fc 100644
--- a/frontend/src/views/whiteboard/nodes/SdocNode.tsx
+++ b/frontend/src/views/whiteboard/nodes/SdocNode.tsx
@@ -1,11 +1,11 @@
-import { CardContent, CardHeader, CardMedia, Divider, MenuItem, Typography } from "@mui/material";
+import { CardContent, CardHeader, CardMedia, CircularProgress, Divider, MenuItem, Typography } from "@mui/material";
 import { intersection } from "lodash";
 import { useEffect, useRef } from "react";
 import { NodeProps, useReactFlow } from "reactflow";
 import SdocHooks from "../../../api/SdocHooks.ts";
 import { AttachedObjectType } from "../../../api/openapi/models/AttachedObjectType.ts";
 import { DocType } from "../../../api/openapi/models/DocType.ts";
-import { SourceDocumentWithDataRead } from "../../../api/openapi/models/SourceDocumentWithDataRead.ts";
+import { SourceDocumentRead } from "../../../api/openapi/models/SourceDocumentRead.ts";
 import GenericPositionMenu, { GenericPositionMenuHandle } from "../../../components/GenericPositionMenu.tsx";
 import MemoDialogAPI from "../../../components/Memo/MemoDialog/MemoDialogAPI.ts";
 import SdocRenderer from "../../../components/SourceDocument/SdocRenderer.tsx";
@@ -141,10 +141,8 @@ function SdocNode(props: NodeProps<SdocNodeData>) {
           {sdoc.isSuccess ? (
             <>
               {docType === DocType.IMAGE ? (
-                <CardMedia component="img" image={sdoc.data.content} alt="Thumbnail" />
-              ) : docType === DocType.TEXT ? (
-                <TextPreview sdoc={sdoc.data} />
-              ) : (
+                <SdocNodeImageContent sdoc={sdoc.data} />
+              ) : docType === DocType.TEXT ? null : (
                 <Typography fontSize={8} textAlign={"center"}>
                   DOC TYPE IS NOT SUPPORTED
                 </Typography>
@@ -172,8 +170,20 @@ function SdocNode(props: NodeProps<SdocNodeData>) {
   );
 }
 
-function TextPreview({ sdoc }: { sdoc: SourceDocumentWithDataRead }) {
-  return <Typography>{sdoc.content}</Typography>;
+function SdocNodeImageContent({ sdoc }: { sdoc: SourceDocumentRead }) {
+  const sdocData = SdocHooks.useGetDocumentData(sdoc.id);
+
+  if (!sdocData.isSuccess) {
+    return <CircularProgress />;
+  }
+
+  return (
+    <CardMedia
+      component="img"
+      image={encodeURI(import.meta.env.VITE_APP_CONTENT + "/" + sdocData.data?.html)}
+      alt="Thumbnail"
+    />
+  );
 }
 
 export default SdocNode;