Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚗️ Extract text from base64 yjs document #270

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to

## [Unreleased]

## Added

- ⚗️(backend) Extract text from base64 yjs document #270


## [1.4.0] - 2024-09-17

Expand Down
28 changes: 27 additions & 1 deletion src/backend/core/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import pytest

from core.utils import email_invitation
from core.utils import email_invitation, yjs_base64_to_text

pytestmark = pytest.mark.django_db

Expand Down Expand Up @@ -85,3 +85,29 @@ def test_utils__email_invitation_failed(mock_logger, _mock_send_mail):

assert email == "[email protected]"
assert isinstance(exception, smtplib.SMTPException)


def test_yjs_base64_to_text():
"""
Test extract_text_from_saved_yjs_document
This base64 string is an example of what is saved in the database.
This base64 is generated from the blocknote editor, it contains
the text \n# *Hello* \n- w**or**ld
"""
base64_string = (
"AR717vLVDgAHAQ5kb2N1bWVudC1zdG9yZQMKYmxvY2tHcm91cAcA9e7y1Q4AAw5ibG9ja0NvbnRh"
"aW5lcgcA9e7y1Q4BAwdoZWFkaW5nBwD17vLVDgIGBgD17vLVDgMGaXRhbGljAnt9hPXu8tUOBAVI"
"ZWxsb4b17vLVDgkGaXRhbGljBG51bGwoAPXu8tUOAg10ZXh0QWxpZ25tZW50AXcEbGVmdCgA9e7y"
"1Q4CBWxldmVsAX0BKAD17vLVDgECaWQBdyQwNGQ2MjM0MS04MzI2LTQyMzYtYTA4My00ODdlMjZm"
"YWQyMzAoAPXu8tUOAQl0ZXh0Q29sb3IBdwdkZWZhdWx0KAD17vLVDgEPYmFja2dyb3VuZENvbG9y"
"AXcHZGVmYXVsdIf17vLVDgEDDmJsb2NrQ29udGFpbmVyBwD17vLVDhADDmJ1bGxldExpc3RJdGVt"
"BwD17vLVDhEGBAD17vLVDhIBd4b17vLVDhMEYm9sZAJ7fYT17vLVDhQCb3KG9e7y1Q4WBGJvbGQE"
"bnVsbIT17vLVDhcCbGQoAPXu8tUOEQ10ZXh0QWxpZ25tZW50AXcEbGVmdCgA9e7y1Q4QAmlkAXck"
"ZDM1MWUwNjgtM2U1NS00MjI2LThlYTUtYWJiMjYzMTk4ZTJhKAD17vLVDhAJdGV4dENvbG9yAXcH"
"ZGVmYXVsdCgA9e7y1Q4QD2JhY2tncm91bmRDb2xvcgF3B2RlZmF1bHSH9e7y1Q4QAw5ibG9ja0Nv"
"bnRhaW5lcgcA9e7y1Q4eAwlwYXJhZ3JhcGgoAPXu8tUOHw10ZXh0QWxpZ25tZW50AXcEbGVmdCgA"
"9e7y1Q4eAmlkAXckODk3MDBjMDctZTBlMS00ZmUwLWFjYTItODQ5MzIwOWE3ZTQyKAD17vLVDh4J"
"dGV4dENvbG9yAXcHZGVmYXVsdCgA9e7y1Q4eD2JhY2tncm91bmRDb2xvcgF3B2RlZmF1bHQA"
)

assert yjs_base64_to_text(base64_string) == "Hello world"
18 changes: 18 additions & 0 deletions src/backend/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Utilities for the core app.
"""

import base64
import smtplib
from logging import getLogger

Expand All @@ -12,6 +13,9 @@
from django.utils.translation import gettext_lazy as _
from django.utils.translation import override

import y_py as Y
from bs4 import BeautifulSoup

logger = getLogger(__name__)


Expand All @@ -38,3 +42,17 @@ def email_invitation(language, email, document_id):

except smtplib.SMTPException as exception:
logger.error("invitation to %s was not sent: %s", email, exception)


def yjs_base64_to_text(base64_string):
"""Extract text from base64 yjs document"""

decoded_bytes = base64.b64decode(base64_string)
uint8_array = bytearray(decoded_bytes)

doc = Y.YDoc() # pylint: disable=E1101
Y.apply_update(doc, uint8_array) # pylint: disable=E1101
blocknote_structure = str(doc.get_xml_element("document-store"))

soup = BeautifulSoup(blocknote_structure, "html.parser")
return soup.get_text(separator=" ").strip()
2 changes: 2 additions & 0 deletions src/backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ license = { file = "LICENSE" }
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"beautifulsoup4==4.12.3",
"boto3==1.35.10",
"Brotli==1.1.0",
"celery[redis]==5.4.0",
Expand Down Expand Up @@ -57,6 +58,7 @@ dependencies = [
"WeasyPrint>=60.2",
"whitenoise==6.7.0",
"mozilla-django-oidc==4.0.1",
"y-py==0.5.5",
]

[project.urls]
Expand Down
Loading