diff --git a/.github/workflows/ci-container-build.yaml b/.github/workflows/ci-container-build.yaml index 6597620c0..a9a5ad2c4 100644 --- a/.github/workflows/ci-container-build.yaml +++ b/.github/workflows/ci-container-build.yaml @@ -12,7 +12,6 @@ on: - 'backend/**' - 'frontend/**' - 'unstract/**' - - 'document-service/**' - 'platform-service/**' - 'x2text-service/**' - 'worker/**' @@ -26,7 +25,6 @@ on: - 'backend/**' - 'frontend/**' - 'unstract/**' - - 'document-service/**' - 'platform-service/**' - 'x2text-service/**' - 'worker/**' @@ -52,7 +50,6 @@ jobs: working-directory: ./docker run: | cp ../backend/sample.env ../backend/.env - cp ../document-service/sample.env ../document-service/.env cp ../platform-service/sample.env ../platform-service/.env cp ../prompt-service/sample.env ../prompt-service/.env cp ../worker/sample.env ../worker/.env diff --git a/.github/workflows/production-build.yaml b/.github/workflows/production-build.yaml index eaf1bd57f..650834033 100644 --- a/.github/workflows/production-build.yaml +++ b/.github/workflows/production-build.yaml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - service_name: [backend, frontend, document-service, platform-service, prompt-service, worker, x2text-service] + service_name: [backend, frontend, platform-service, prompt-service, worker, x2text-service] steps: - name: Checkout code for release diff --git a/backend/sample.env b/backend/sample.env index 06c22e10f..92bccb953 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -74,10 +74,6 @@ PLATFORM_SERVICE_PORT=3001 UNSTRACT_RUNNER_HOST=http://unstract-runner UNSTRACT_RUNNER_PORT=5002 -# Document Service -DOCUMENT_PROCESSOR_URL=http://unstract-document-service:3002 -DOCUMENT_PROCESSOR_API_KEY="0vpepz5CyzRIAKuy+9ZJXLOCXO+TYOAt+55M+ewxGMk=" - # Workflow execution WORKFLOW_DATA_DIR = "/data/execution" diff --git a/docker/docker-compose.build.yaml b/docker/docker-compose.build.yaml index 48aa376ff..14673e0de 100644 --- a/docker/docker-compose.build.yaml +++ b/docker/docker-compose.build.yaml @@ -19,13 +19,6 @@ services: build: dockerfile: docker/dockerfiles/platform.Dockerfile context: .. - document-service: - profiles: - - optional - image: unstract/document-service:${VERSION} - build: - dockerfile: docker/dockerfiles/document.Dockerfile - context: .. prompt-service: image: unstract/prompt-service:${VERSION} build: diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 67a19c6b9..cb3766d7f 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -203,23 +203,6 @@ services: labels: - traefik.enable=false - document-service: - profiles: - - optional - image: unstract/document-service:${VERSION} - container_name: unstract-document-service - restart: unless-stopped - ports: - - 3002:3002 - depends_on: - - redis - env_file: - - ../document-service/.env - environment: - - ENVIRONMENT=development - labels: - - traefik.enable=false - volumes: prompt_studio_data: unstract_data: diff --git a/docker/dockerfiles/backend.Dockerfile.dockerignore b/docker/dockerfiles/backend.Dockerfile.dockerignore index aa3a5142a..739cbd3bc 100644 --- a/docker/dockerfiles/backend.Dockerfile.dockerignore +++ b/docker/dockerfiles/backend.Dockerfile.dockerignore @@ -54,7 +54,6 @@ test*.py !backend -document-service frontend platform-service prompt-service diff --git a/docker/dockerfiles/document.Dockerfile b/docker/dockerfiles/document.Dockerfile deleted file mode 100644 index a7aac7e6b..000000000 --- a/docker/dockerfiles/document.Dockerfile +++ /dev/null @@ -1,58 +0,0 @@ -FROM openjdk:8-jre-slim - -LABEL maintainer="Zipstack Inc." - -# Keeps Python from generating .pyc files in the container -ENV PYTHONDONTWRITEBYTECODE 1 -# Turns off buffering for easier container logging -ENV PYTHONUNBUFFERED=1 - -ENV BUILD_CONTEXT_PATH document-service -ENV PYTHON_VERSION 3.9 -ENV PDM_VERSION 2.16.1 - -RUN DEBIAN_FRONTEND=noninteractive apt-get update; \ - apt-get --no-install-recommends install -y \ - fonts-dejavu fonts-dejavu-core fonts-dejavu-extra fonts-droid-fallback fonts-dustin \ - fonts-f500 fonts-fanwood fonts-freefont-ttf \ - fonts-liberation fonts-lmodern fonts-lyx \ - fonts-opensymbol fonts-sil-gentium fonts-texgyre fonts-tlwg-purisa \ - hyphen-af hyphen-en-us \ - libreoffice-common \ - python${PYTHON_VERSION} python3-pip \ - software-properties-common \ - unoconv; \ - apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*; \ - \ - pip install --no-cache-dir -U pip pdm~=${PDM_VERSION}; \ - \ - # Creates a non-root user with an explicit UID and adds permission to access the /app folder - # For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers - adduser -u 5678 --disabled-password --gecos "" unstract; - -USER unstract - -WORKDIR /app - -COPY --chown=unstract ${BUILD_CONTEXT_PATH}/ . - -RUN set -e; \ - \ - rm -rf .venv .pdm* .python* requirements.txt 2>/dev/null; \ - \ - pdm venv create -w virtualenv --with-pip; \ - # source command may not be availble in sh - . .venv/bin/activate; \ - \ - pdm sync --prod --no-editable; \ - \ - # REF: https://docs.gunicorn.org/en/stable/deploy.html#using-virtualenv - pip install --no-cache-dir gunicorn; \ - \ - # Storage for document uploads and processing - mkdir /app/uploads /app/processed; - -EXPOSE 3002 - -# Wrapper to run both python server and libreoffice. -CMD [ "/app/wrapper.sh" ] diff --git a/docker/dockerfiles/document.Dockerfile.dockerignore b/docker/dockerfiles/document.Dockerfile.dockerignore deleted file mode 100644 index c63f53024..000000000 --- a/docker/dockerfiles/document.Dockerfile.dockerignore +++ /dev/null @@ -1,64 +0,0 @@ -**/__pycache__ -**/.pytest_cache -**/.python-version -**/.pyc -**/.pyo -**/.venv -**/.classpath -**/.dockerignore -**/.env -**/.git -**/.gitignore -**/.gitkeep -**/.project -**/.settings -**/.toolstarget -**/.vs -**/.vscode -**/*.*proj.user -**/*.dbmdl -**/*.jfm -**/bin -**/charts -**/docker-compose* -**/compose* -**/Dockerfile* -**/build -**/dist -**/node_modules -**/npm-debug.log -**/obj -**/secrets.dev.yaml -**/values.dev.yaml -**/.db -**/.sqlite3 -**/.log -**/*-log.txt -**/*.drawio -**/.tmp -**/.swp -**/.swo -**/.bak -*.idea -*.vscode -*.git -**/.pdm.toml -**/.pdm-build -**/.pdm-python -!LICENSE -*.md -!README.md -.jshintrc -.pre-commit-config.yaml -**/tests -test*.py - -backend -!document-service -frontend -platform-service -prompt-service -tools -unstract -worker -x2text-service diff --git a/docker/dockerfiles/frontend.Dockerfile.dockerignore b/docker/dockerfiles/frontend.Dockerfile.dockerignore index 4ea2254b5..38ca16755 100644 --- a/docker/dockerfiles/frontend.Dockerfile.dockerignore +++ b/docker/dockerfiles/frontend.Dockerfile.dockerignore @@ -55,7 +55,6 @@ test*.py backend !frontend -document-service platform-service prompt-service tools diff --git a/docker/dockerfiles/platform.Dockerfile.dockerignore b/docker/dockerfiles/platform.Dockerfile.dockerignore index 00b841bd4..06f2017a7 100644 --- a/docker/dockerfiles/platform.Dockerfile.dockerignore +++ b/docker/dockerfiles/platform.Dockerfile.dockerignore @@ -54,7 +54,6 @@ test*.py backend -document-service frontend !platform-service prompt-service diff --git a/docker/dockerfiles/prompt.Dockerfile.dockerignore b/docker/dockerfiles/prompt.Dockerfile.dockerignore index 00c305965..d3c688586 100644 --- a/docker/dockerfiles/prompt.Dockerfile.dockerignore +++ b/docker/dockerfiles/prompt.Dockerfile.dockerignore @@ -54,7 +54,6 @@ test*.py backend -document-service frontend platform-service !prompt-service diff --git a/docker/dockerfiles/worker.Dockerfile.dockerignore b/docker/dockerfiles/worker.Dockerfile.dockerignore index 077282992..02c44352b 100644 --- a/docker/dockerfiles/worker.Dockerfile.dockerignore +++ b/docker/dockerfiles/worker.Dockerfile.dockerignore @@ -54,7 +54,6 @@ test*.py backend -document-service frontend platform-service prompt-service diff --git a/docker/dockerfiles/x2text.Dockerfile.dockerignore b/docker/dockerfiles/x2text.Dockerfile.dockerignore index d86d203e1..dec89acc7 100644 --- a/docker/dockerfiles/x2text.Dockerfile.dockerignore +++ b/docker/dockerfiles/x2text.Dockerfile.dockerignore @@ -54,7 +54,6 @@ test*.py backend -document-service frontend platform-service prompt-service diff --git a/document-service/.gitignore b/document-service/.gitignore deleted file mode 100644 index 4b3ab3003..000000000 --- a/document-service/.gitignore +++ /dev/null @@ -1,162 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml -.pdm-python -.pdm-build/ - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ diff --git a/document-service/README.md b/document-service/README.md deleted file mode 100644 index 89de1833e..000000000 --- a/document-service/README.md +++ /dev/null @@ -1,72 +0,0 @@ -## Document Processor Server - -A flask server with the following functionalities: - -* Upload a document to the server -* Perform find and replace on the document - -### Pre-requisites - -#### LibreOffice - -Application Should be installed and running in headless mode - -```bash -/Applications/LibreOffice.app/Contents/MacOS/soffice --backtrace --headless --nocrashreport --nodefault --nologo --nofirststartwizard --norestore --accept="socket,host=127.0.0.1,port=2002,tcpNoDelay=1;urp;StarOffice.ComponentContext" -``` - -_Change the application path to suit your requirement. The example is for MacOS_ - -#### Unoserver - -Should be installed on the server. Note that Unoserver installation is not straighforward. Unoserver requires the Python -distribution which is bundled with LibreOffice. The following command can be used to install Unoserver - -```bash -/Applications/LibreOffice.app/Contents/Resources/python -m pip install unoserver -``` - -_Change the application path to suit your requirement. The example is for MacOS. Refer -to - -### Environment Variables - -```bash -REDIS_HOST=redis-15866.c99.us-east-1-4.ec2.cloud.redislabs.com -REDIS_PORT=15866 -REDIS_PASSWORD=XXXXXXXXXXXXXXXXXXX -UPLOAD_FOLDER=/tmp/document_service/uploads -PROCESS_FOLDER=/tmp/document_service/processed -LIBREOFFICE_PYTHON=/Applications/LibreOffice.app/Contents/Resources/python -PORT=3000 -MAX_FILE_SIZE=31457280 -``` - -### Run in development - -``` -flask --app "src.unstract.document_service.main:app" run --debug -``` - -### Nginx Configuration - -If we are using Nginx to frontend the server for SSL, make sure `Autherization` and `Content-Length` headers are passed - -```nginx -location / { - proxy_pass http://localhost:3000/; - proxy_buffering off; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-Host $host; - proxy_set_header X-Forwarded-Port $server_port; - proxy_pass_request_headers on; - client_max_body_size 100M; - proxy_request_buffering off; - include proxy_params; -} -``` - -Thunder collection `thunder-collection_document-service.json` for API testing. `upload` a pdf document, then hit `find_and_replace` api. -it will generate a pdf document. download it and verify replacement text. - -VS Marketplace Link: diff --git a/document-service/pdm.lock b/document-service/pdm.lock deleted file mode 100644 index 5e43579fa..000000000 --- a/document-service/pdm.lock +++ /dev/null @@ -1,366 +0,0 @@ -# This file is @generated by PDM. -# It is not intended for manual editing. - -[metadata] -groups = ["default", "deploy", "test"] -strategy = ["cross_platform", "inherit_metadata"] -lock_version = "4.4.2" -content_hash = "sha256:ecc29cf1d0d5572d74fb1a967cf78473f13efb09e26f9941bae5e948c00f34c0" - -[[package]] -name = "async-timeout" -version = "4.0.3" -requires_python = ">=3.7" -summary = "Timeout context manager for asyncio programs" -groups = ["default"] -marker = "python_full_version <= \"3.11.2\"" -files = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, -] - -[[package]] -name = "blinker" -version = "1.8.2" -requires_python = ">=3.8" -summary = "Fast, simple object-to-object and broadcast signaling" -groups = ["default"] -files = [ - {file = "blinker-1.8.2-py3-none-any.whl", hash = "sha256:1779309f71bf239144b9399d06ae925637cf6634cf6bd131104184531bf67c01"}, - {file = "blinker-1.8.2.tar.gz", hash = "sha256:8f77b09d3bf7c795e969e9486f39c2c5e9c39d4ee07424be2bc594ece9642d83"}, -] - -[[package]] -name = "click" -version = "8.1.7" -requires_python = ">=3.7" -summary = "Composable command line interface toolkit" -groups = ["default"] -dependencies = [ - "colorama; platform_system == \"Windows\"", -] -files = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, -] - -[[package]] -name = "colorama" -version = "0.4.6" -requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -summary = "Cross-platform colored terminal text." -groups = ["default", "test"] -marker = "sys_platform == \"win32\" or platform_system == \"Windows\"" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] - -[[package]] -name = "defusedxml" -version = "0.7.1" -requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -summary = "XML bomb protection for Python stdlib modules" -groups = ["default"] -files = [ - {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, - {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, -] - -[[package]] -name = "exceptiongroup" -version = "1.2.2" -requires_python = ">=3.7" -summary = "Backport of PEP 654 (exception groups)" -groups = ["test"] -marker = "python_version < \"3.11\"" -files = [ - {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, - {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, -] - -[[package]] -name = "flask" -version = "2.3.2" -requires_python = ">=3.8" -summary = "A simple framework for building complex web applications." -groups = ["default"] -dependencies = [ - "Jinja2>=3.1.2", - "Werkzeug>=2.3.3", - "blinker>=1.6.2", - "click>=8.1.3", - "importlib-metadata>=3.6.0; python_version < \"3.10\"", - "itsdangerous>=2.1.2", -] -files = [ - {file = "Flask-2.3.2-py3-none-any.whl", hash = "sha256:77fd4e1249d8c9923de34907236b747ced06e5467ecac1a7bb7115ae0e9670b0"}, - {file = "Flask-2.3.2.tar.gz", hash = "sha256:8c2f9abd47a9e8df7f0c3f091ce9497d011dc3b31effcf4c85a6e2b50f4114ef"}, -] - -[[package]] -name = "gunicorn" -version = "22.0.0" -requires_python = ">=3.7" -summary = "WSGI HTTP Server for UNIX" -groups = ["deploy"] -dependencies = [ - "packaging", -] -files = [ - {file = "gunicorn-22.0.0-py3-none-any.whl", hash = "sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9"}, - {file = "gunicorn-22.0.0.tar.gz", hash = "sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63"}, -] - -[[package]] -name = "importlib-metadata" -version = "8.2.0" -requires_python = ">=3.8" -summary = "Read metadata from Python packages" -groups = ["default"] -marker = "python_version < \"3.10\"" -dependencies = [ - "zipp>=0.5", -] -files = [ - {file = "importlib_metadata-8.2.0-py3-none-any.whl", hash = "sha256:11901fa0c2f97919b288679932bb64febaeacf289d18ac84dd68cb2e74213369"}, - {file = "importlib_metadata-8.2.0.tar.gz", hash = "sha256:72e8d4399996132204f9a16dcc751af254a48f8d1b20b9ff0f98d4a8f901e73d"}, -] - -[[package]] -name = "iniconfig" -version = "2.0.0" -requires_python = ">=3.7" -summary = "brain-dead simple config-ini parsing" -groups = ["test"] -files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, -] - -[[package]] -name = "itsdangerous" -version = "2.2.0" -requires_python = ">=3.8" -summary = "Safely pass data to untrusted environments and back." -groups = ["default"] -files = [ - {file = "itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef"}, - {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"}, -] - -[[package]] -name = "jinja2" -version = "3.1.4" -requires_python = ">=3.7" -summary = "A very fast and expressive template engine." -groups = ["default"] -dependencies = [ - "MarkupSafe>=2.0", -] -files = [ - {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, - {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, -] - -[[package]] -name = "markupsafe" -version = "2.1.5" -requires_python = ">=3.7" -summary = "Safely add untrusted strings to HTML/XML markup." -groups = ["default"] -files = [ - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, - {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, -] - -[[package]] -name = "odfpy" -version = "1.4.1" -summary = "Python API and tools to manipulate OpenDocument files" -groups = ["default"] -dependencies = [ - "defusedxml", -] -files = [ - {file = "odfpy-1.4.1.tar.gz", hash = "sha256:db766a6e59c5103212f3cc92ec8dd50a0f3a02790233ed0b52148b70d3c438ec"}, -] - -[[package]] -name = "packaging" -version = "24.1" -requires_python = ">=3.8" -summary = "Core utilities for Python packages" -groups = ["deploy", "test"] -files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, -] - -[[package]] -name = "pluggy" -version = "1.5.0" -requires_python = ">=3.8" -summary = "plugin and hook calling mechanisms for python" -groups = ["test"] -files = [ - {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, - {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, -] - -[[package]] -name = "pytest" -version = "8.3.2" -requires_python = ">=3.8" -summary = "pytest: simple powerful testing with Python" -groups = ["test"] -dependencies = [ - "colorama; sys_platform == \"win32\"", - "exceptiongroup>=1.0.0rc8; python_version < \"3.11\"", - "iniconfig", - "packaging", - "pluggy<2,>=1.5", - "tomli>=1; python_version < \"3.11\"", -] -files = [ - {file = "pytest-8.3.2-py3-none-any.whl", hash = "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5"}, - {file = "pytest-8.3.2.tar.gz", hash = "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce"}, -] - -[[package]] -name = "python-dotenv" -version = "1.0.1" -requires_python = ">=3.8" -summary = "Read key-value pairs from a .env file and set them as environment variables" -groups = ["default"] -files = [ - {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, - {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, -] - -[[package]] -name = "redis" -version = "4.5.5" -requires_python = ">=3.7" -summary = "Python client for Redis database and key-value store" -groups = ["default"] -dependencies = [ - "async-timeout>=4.0.2; python_full_version <= \"3.11.2\"", -] -files = [ - {file = "redis-4.5.5-py3-none-any.whl", hash = "sha256:77929bc7f5dab9adf3acba2d3bb7d7658f1e0c2f1cafe7eb36434e751c471119"}, - {file = "redis-4.5.5.tar.gz", hash = "sha256:dc87a0bdef6c8bfe1ef1e1c40be7034390c2ae02d92dcd0c7ca1729443899880"}, -] - -[[package]] -name = "setuptools" -version = "71.1.0" -requires_python = ">=3.8" -summary = "Easily download, build, install, upgrade, and uninstall Python packages" -groups = ["default"] -files = [ - {file = "setuptools-71.1.0-py3-none-any.whl", hash = "sha256:33874fdc59b3188304b2e7c80d9029097ea31627180896fb549c578ceb8a0855"}, - {file = "setuptools-71.1.0.tar.gz", hash = "sha256:032d42ee9fb536e33087fb66cac5f840eb9391ed05637b3f2a76a7c8fb477936"}, -] - -[[package]] -name = "tomli" -version = "2.0.1" -requires_python = ">=3.7" -summary = "A lil' TOML parser" -groups = ["test"] -marker = "python_version < \"3.11\"" -files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] - -[[package]] -name = "unoserver" -version = "1.5" -requires_python = ">=3.7" -summary = "A server for file conversions with Libre Office" -groups = ["default"] -dependencies = [ - "setuptools", -] -files = [ - {file = "unoserver-1.5-py3-none-any.whl", hash = "sha256:2c45f96ba8da8a56b61edb66df679f49fd234cdd58202ec1ec984f7fc74ead1f"}, - {file = "unoserver-1.5.tar.gz", hash = "sha256:af3dedc5fd12bbf1d54d715b5e3bcd0bf0b6adcc1bd0317f3163a02303c5716b"}, -] - -[[package]] -name = "waitress" -version = "2.1.2" -requires_python = ">=3.7.0" -summary = "Waitress WSGI server" -groups = ["default"] -files = [ - {file = "waitress-2.1.2-py3-none-any.whl", hash = "sha256:7500c9625927c8ec60f54377d590f67b30c8e70ef4b8894214ac6e4cad233d2a"}, - {file = "waitress-2.1.2.tar.gz", hash = "sha256:780a4082c5fbc0fde6a2fcfe5e26e6efc1e8f425730863c04085769781f51eba"}, -] - -[[package]] -name = "werkzeug" -version = "3.0.3" -requires_python = ">=3.8" -summary = "The comprehensive WSGI web application library." -groups = ["default"] -dependencies = [ - "MarkupSafe>=2.1.1", -] -files = [ - {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"}, - {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"}, -] - -[[package]] -name = "zipp" -version = "3.19.2" -requires_python = ">=3.8" -summary = "Backport of pathlib-compatible object wrapper for zip files" -groups = ["default"] -marker = "python_version < \"3.10\"" -files = [ - {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, - {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, -] diff --git a/document-service/pyproject.toml b/document-service/pyproject.toml deleted file mode 100644 index cd900f0bd..000000000 --- a/document-service/pyproject.toml +++ /dev/null @@ -1,34 +0,0 @@ -[project] -name = "unstract-document-service" -version = "0.1.0" -description = "Document processing service" -authors = [ - {name = "Zipstack", email = "devsupport@zipstack.com"}, -] -dependencies = [ - "flask==2.3.2", - "waitress==2.1.2", - "odfpy>=1.4.1", - "unoserver==1.5", - "redis==4.5.5", - "python-dotenv>=1.0.0", -] -requires-python = ">=3.9" -readme = "README.md" -license = {text = "MIT"} - -[build-system] -requires = ["pdm-backend"] -build-backend = "pdm.backend" - -[tool.pdm.build] -includes = ["src"] -package-dir = "src" - -[tool.pdm.dev-dependencies] -test = [ - "pytest>=8.0.1", -] -deploy = [ - "gunicorn>=21.2.0", -] diff --git a/document-service/sample.env b/document-service/sample.env deleted file mode 100644 index 272e10beb..000000000 --- a/document-service/sample.env +++ /dev/null @@ -1,12 +0,0 @@ -SERVICE_API_TOKEN="0vpepz5CyzRIAKuy+9ZJXLOCXO+TYOAt+55M+ewxGMk=" -UPLOAD_FOLDER="/app/uploads" -PROCESS_FOLDER="/app/processed" -LIBREOFFICE_PYTHON="/usr/bin/python3" -MAX_FILE_SIZE=31457280 - - -#Redis -REDIS_HOST="unstract-redis" -REDIS_PORT=6379 -REDIS_PASSWORD="" -REDIS_USER=default diff --git a/document-service/src/unstract/document_service/__init__.py b/document-service/src/unstract/document_service/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/document-service/src/unstract/document_service/main.py b/document-service/src/unstract/document_service/main.py deleted file mode 100644 index 731a419b9..000000000 --- a/document-service/src/unstract/document_service/main.py +++ /dev/null @@ -1,219 +0,0 @@ -# type: ignore -import logging -import os -import subprocess -import time -from typing import Any - -import redis -from flask import Flask, request, send_file -from odf import teletype, text -from odf.opendocument import load - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(levelname)s %(name)s : %(message)s", -) - -UPLOAD_FOLDER = os.environ.get("UPLOAD_FOLDER", "/tmp/document_service/upload") -PROCESS_FOLDER = os.environ.get("PROCESS_FOLDER", "/tmp/document_service/process") -LIBREOFFICE_PYTHON = os.environ.get("LIBREOFFICE_PYTHON", "/usr/bin/python3") -MAX_FILE_SIZE = int(os.environ.get("MAX_FILE_SIZE", 10485760)) # 10 * 1024 * 1024 -SERVICE_API_TOKEN = os.environ.get("SERVICE_API_TOKEN", "") - -app = Flask("document_service") -app.config["WTF_CSRF_ENABLED"] = False # Sensitive - - -def authentication_middleware(func: Any) -> Any: - def wrapper(*args, **kwargs): - bearer_token = request.headers.get("Authorization") - - # Check if bearer token exists and validate it - if not bearer_token or not validate_bearer_token(bearer_token): - return "Unauthorized", 401 - - return func(*args, **kwargs) - - return wrapper - - -def allowed_file_size(file: Any) -> bool: - return file.content_length <= MAX_FILE_SIZE - - -def validate_bearer_token(token: Any) -> bool: - key_status = None - if token == SERVICE_API_TOKEN: - key_status = True - else: - app.logger.error(f"Error while validating bearer token: {token}") - key_status = False - return key_status - - -@app.route("/health", methods=["GET"], endpoint="health_check") -def health_check(): - return "OK" - - -@app.route("/upload", methods=["POST"], endpoint="upload_file") -@authentication_middleware -def upload_file(): - """ - Sample Usage: - curl -X POST -H "Authorization: 0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \ - -F "file=@/Users/arun/Devel/pandora_storage/train_ticket.pdf" \ - http://localhost:3000/upload?file_name=file1.pdf&account_id=1234 - """ - - if "file" not in request.files: - app.logger.error("No file found!") - return "No file found!", 400 - - _file = request.files["file"] - if _file.filename == "": - app.logger.error("No selected file!") - return "No selected file!", 400 - - # Check file size - content_length = request.headers.get("Content-Length", type=int) - if content_length is not None and content_length > MAX_FILE_SIZE: - app.logger.error( - f"File size exceeds the limit! {content_length} > {MAX_FILE_SIZE}" - ) - return "File size exceeds the limit!", 400 - - account_id = request.args.get("account_id") - file_name = request.args.get("file_name") - app.logger.info(f"Uploading file {file_name} for account {account_id}") - - try: - file_path = os.path.join(UPLOAD_FOLDER, f"{account_id}_{file_name}") - _file.save(file_path) - except Exception as e: - app.logger.error(f"Error while saving file: {e}") - return "Error while saving file!", 500 - - try: - # Store upload time in redis - redis_host = os.environ.get("REDIS_HOST") - redis_port = int(os.environ.get("REDIS_PORT")) - redis_password = os.environ.get("REDIS_PASSWORD") - r = redis.Redis(host=redis_host, port=redis_port, password=redis_password) - # TODO: Create a file reaper process to look at uploaded time and delete - redis_key = f"upload_time:{account_id}_{file_name}" - current_timestamp = int(time.time()) - r.set(redis_key, current_timestamp) - r.close() - except Exception as e: - app.logger.error(f"Error while storing upload time in redis: {e}") - if os.path.exists(file_path): - os.remove(file_path) - return "Error while storing upload time in redis!", 500 - - app.logger.info(f"File uploaded successfully! {file_path}") - return "File uploaded successfully!", 200 - - -@app.route("/find_and_replace", methods=["POST"], endpoint="find_and_replace") -@authentication_middleware -def find_and_replace(): - account_id = request.args.get("account_id") - file_name = request.args.get("file_name") - output_format = request.args.get("output_format").lower() - find_and_replace_text = request.json - - app.logger.info(f"Find and replace for file {file_name} for account {account_id}") - app.logger.info(f"Output format: {output_format}") - - if output_format not in ["pdf"]: - app.logger.error(f"Unsupported output format: {output_format}") - return "Unsupported output format!", 400 - - file_namex = os.path.join(UPLOAD_FOLDER, f"{account_id}_{file_name}") - - # Check if file exists - if not os.path.exists(file_namex): - app.logger.error(f"File not found! {file_namex}") - return "File not found!", 400 - - # Convert the orginal file to ODT format for processing - file_name_odt = f"{account_id}_{file_name}.odt" - file_name_odt = os.path.join(PROCESS_FOLDER, file_name_odt) - try: - command = f"{LIBREOFFICE_PYTHON} -m unoserver.converter --convert-to odt \ - --filter writer8 {file_namex} {file_name_odt}" - result = subprocess.run(command, shell=True, capture_output=True, text=True) - app.logger.info(result) - if result.returncode != 0: - app.logger.error( - f"Failed to convert file to ODT format: \ - {result.stdout} | ERR: {result.stderr}" - ) - return "Failed to convert file to ODT format!", 500 - else: - app.logger.info( - f"File converted to ODT format successfully! {file_name_odt}" - ) - app.logger.info(f"ODT convertion result: {result.stdout} | {result.stderr}") - except Exception as e: - app.logger.error(f"Error while converting file to ODT format: {e}") - return "Error while converting file to ODT format!", 500 - - # Find and replace - doc = load(file_name_odt) - for find_str in find_and_replace_text: - app.logger.info( - f"Find and replace: {find_str} -> {find_and_replace_text[find_str]}" - ) - replace_str = find_and_replace_text[find_str] - for element in doc.getElementsByType(text.Span): - if find_str in teletype.extractText(element): - app.logger.info(f"Found {find_str} in {teletype.extractText(element)}") - new_element = text.Span() - new_element.setAttribute("stylename", element.getAttribute("stylename")) - t = teletype.extractText(element) - t = t.replace(find_str, replace_str) - new_element.addText(t) - element.parentNode.insertBefore(new_element, element) - element.parentNode.removeChild(element) - doc.save(file_name_odt) - - file_name_output = f"{account_id}_{file_name}.{output_format}" - file_name_output = os.path.join(PROCESS_FOLDER, file_name_output) - - # Convert the ODT file to the requested format - try: - command = ( - f"{LIBREOFFICE_PYTHON} -m unoserver.converter --convert-to pdf " - f"--filter writer_pdf_Export {file_name_odt} {file_name_output}" - ) - result = subprocess.run(command, shell=True, capture_output=True, text=True) - if result.returncode != 0: - app.logger.error( - f"Failed to convert file to {output_format} format: " - f"{result.stdout} | ERR: {result.stderr}" - ) - return "Failed to convert file to ODT format!", 500 - else: - app.logger.info( - f"File converted to {output_format} format successfully! " - f"{file_name_output}" - ) - app.logger.info(f"ODT convertion result: {result.stdout} | {result.stderr}") - except Exception as e: - app.logger.error(f"Error while converting file to {output_format} format: {e}") - return f"Error while converting file to {output_format} format!", 500 - return send_file(file_name_output, as_attachment=True) - - -if __name__ == "__main__": - # Check if upload folder exists and create it if not - if not os.path.exists(UPLOAD_FOLDER): - os.makedirs(UPLOAD_FOLDER) - if not os.path.exists(PROCESS_FOLDER): - os.makedirs(PROCESS_FOLDER) - - # Start the server - app.run() diff --git a/document-service/tests/__init__.py b/document-service/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/document-service/thunder-collection_document-service.json b/document-service/thunder-collection_document-service.json deleted file mode 100644 index 443f60502..000000000 --- a/document-service/thunder-collection_document-service.json +++ /dev/null @@ -1,97 +0,0 @@ -{ - "client": "Thunder Client", - "collectionName": "document-service", - "dateExported": "2023-08-15T03:42:29.575Z", - "version": "1.1", - "folders": [], - "requests": [ - { - "_id": "54e80168-0fbf-4de6-8397-4069ff0bcb0d", - "colId": "a50995ac-f753-4b4c-847e-fc03d134641c", - "containerId": "", - "name": "upload", - "url": "/upload?file_name=test.pdf&account_id=1234", - "method": "POST", - "sortNum": 10000, - "created": "2023-08-14T09:58:13.352Z", - "modified": "2023-08-14T16:58:48.550Z", - "headers": [ - { - "name": "Authorization", - "value": "builtin_functions_key:123", - "isDisabled": true - } - ], - "params": [ - { - "name": "file_name", - "value": "test.pdf", - "isPath": false - }, - { - "name": "account_id", - "value": "1234", - "isPath": false - } - ], - "body": { - "type": "formdata", - "raw": "", - "form": [], - "files": [ - { - "name": "file", - "value": "/Users/path_to_file/test.pdf" - } - ] - }, - "tests": [] - }, - { - "_id": "e5d544ac-1500-4ea8-a9cd-22583be4c86f", - "colId": "a50995ac-f753-4b4c-847e-fc03d134641c", - "containerId": "", - "name": "find_and_replace", - "url": "/find_and_replace?account_id=1234&file_name=test.pdf&output_format=pdf", - "method": "POST", - "sortNum": 20000, - "created": "2023-08-14T17:00:10.866Z", - "modified": "2023-08-14T17:05:05.746Z", - "headers": [], - "params": [ - { - "name": "account_id", - "value": "1234", - "isPath": false - }, - { - "name": "file_name", - "value": "test.pdf", - "isPath": false - }, - { - "name": "output_format", - "value": "pdf", - "isPath": false - } - ], - "body": { - "type": "json", - "raw": "{\n \"Slack\": \"Jaseem\"\n}", - "form": [] - }, - "tests": [] - } - ], - "settings": { - "headers": [ - { - "name": "Authorization", - "value": "123" - } - ], - "options": { - "baseUrl": "http://localhost:3000" - } - } -} diff --git a/document-service/wrapper.sh b/document-service/wrapper.sh deleted file mode 100755 index 1bfa6e0fb..000000000 --- a/document-service/wrapper.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -# Start the first process -# Args: --nocrashreport --nodefault -/usr/bin/libreoffice --headless --nologo --nofirststartwizard --norestore --accept="socket,host=127.0.0.1,port=2002,tcpNoDelay=1;urp;StarOffice.ComponentContext" & - -# Start the second process -# 'src' layout is detected from pdm settings in pyproject.toml -.venv/bin/gunicorn --bind 0.0.0.0:3002 --timeout 300 unstract.document_service.main:app & - -# Wait for any process to exit -wait -n - -# Exit with status of process that exited first -exit $? diff --git a/pdm.lock b/pdm.lock index 0fdb01d80..ddba68d43 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "hook-check-django-migrations", "lint"] strategy = ["inherit_metadata"] lock_version = "4.4.2" -content_hash = "sha256:d7266fb7a14b58660f3a28e3d04039f440d85ddec49fd85f6e492f4188f02564" +content_hash = "sha256:ea95cfab5594692de846e2f59fddbc63689fbd9a8a5a724957f4821f1a23fb30" [[package]] name = "adlfs" @@ -92,16 +92,16 @@ files = [ [[package]] name = "aioitertools" -version = "0.11.0" -requires_python = ">=3.6" +version = "0.12.0" +requires_python = ">=3.8" summary = "itertools and builtins for AsyncIO and mixed iterables" groups = ["hook-check-django-migrations"] dependencies = [ "typing-extensions>=4.0; python_version < \"3.10\"", ] files = [ - {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, - {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, + {file = "aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796"}, + {file = "aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b"}, ] [[package]] @@ -508,18 +508,18 @@ files = [ [[package]] name = "certifi" -version = "2024.7.4" +version = "2024.8.30" requires_python = ">=3.6" summary = "Python package for providing Mozilla's CA Bundle." groups = ["hook-check-django-migrations"] files = [ - {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, - {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, + {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, + {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, ] [[package]] name = "cffi" -version = "1.17.0" +version = "1.17.1" requires_python = ">=3.8" summary = "Foreign Function Interface for Python calling C code." groups = ["hook-check-django-migrations", "lint"] @@ -527,8 +527,8 @@ dependencies = [ "pycparser", ] files = [ - {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f76a90c345796c01d85e6332e81cab6d70de83b829cf1d9762d0a3da59c7932"}, - {file = "cffi-1.17.0.tar.gz", hash = "sha256:f3157624b7558b914cb039fd1af735e5e8049a87c817cc215109ad1c8779df76"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"}, + {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, ] [[package]] @@ -1155,7 +1155,7 @@ files = [ [[package]] name = "google-cloud-aiplatform" -version = "1.64.0" +version = "1.65.0" requires_python = ">=3.8" summary = "Vertex AI API client library" groups = ["hook-check-django-migrations"] @@ -1173,8 +1173,8 @@ dependencies = [ "shapely<3.0.0dev", ] files = [ - {file = "google-cloud-aiplatform-1.64.0.tar.gz", hash = "sha256:475a612829b283eb8f783e773d37115c30db42e2e50065c8653db0c9bd18b0da"}, - {file = "google_cloud_aiplatform-1.64.0-py2.py3-none-any.whl", hash = "sha256:3a79ce2ec047868c348336624a60993464ca977fd258bcf609cc79309a8101c4"}, + {file = "google-cloud-aiplatform-1.65.0.tar.gz", hash = "sha256:2349a2b8b85868c378788233929810aaac3efbb72cfec46f01097923faf8d7b6"}, + {file = "google_cloud_aiplatform-1.65.0-py2.py3-none-any.whl", hash = "sha256:88e1eef5a7cab737606ae456cb5e866133f990ed9c8bca534077144037e18524"}, ] [[package]] @@ -1269,14 +1269,14 @@ files = [ [[package]] name = "google-crc32c" -version = "1.5.0" -requires_python = ">=3.7" +version = "1.6.0" +requires_python = ">=3.9" summary = "A python wrapper of the C library 'Google CRC32C'" groups = ["hook-check-django-migrations"] files = [ - {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210"}, + {file = "google_crc32c-1.6.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2"}, + {file = "google_crc32c-1.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57"}, + {file = "google_crc32c-1.6.0.tar.gz", hash = "sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc"}, ] [[package]] @@ -2463,7 +2463,7 @@ files = [ [[package]] name = "openai" -version = "1.42.0" +version = "1.43.0" requires_python = ">=3.7.1" summary = "The official Python library for the openai API" groups = ["hook-check-django-migrations"] @@ -2478,8 +2478,8 @@ dependencies = [ "typing-extensions<5,>=4.11", ] files = [ - {file = "openai-1.42.0-py3-none-any.whl", hash = "sha256:dc91e0307033a4f94931e5d03cc3b29b9717014ad5e73f9f2051b6cb5eda4d80"}, - {file = "openai-1.42.0.tar.gz", hash = "sha256:c9d31853b4e0bc2dc8bd08003b462a006035655a701471695d0bfdc08529cde3"}, + {file = "openai-1.43.0-py3-none-any.whl", hash = "sha256:1a748c2728edd3a738a72a0212ba866f4fdbe39c9ae03813508b267d45104abe"}, + {file = "openai-1.43.0.tar.gz", hash = "sha256:e607aff9fc3e28eade107e5edd8ca95a910a4b12589336d3cbb6bfe2ac306b3c"}, ] [[package]] @@ -2874,13 +2874,13 @@ files = [ [[package]] name = "pymilvus" -version = "2.4.5" +version = "2.4.6" requires_python = ">=3.8" summary = "Python Sdk for Milvus" groups = ["hook-check-django-migrations"] dependencies = [ "environs<=9.5.0", - "grpcio<=1.63.0,>=1.49.1", + "grpcio>=1.49.1", "milvus-lite<2.5.0,>=2.4.0; sys_platform != \"win32\"", "pandas>=1.2.4", "protobuf>=3.20.0", @@ -2888,8 +2888,8 @@ dependencies = [ "ujson>=2.0.0", ] files = [ - {file = "pymilvus-2.4.5-py3-none-any.whl", hash = "sha256:dc4f2d1eac8db9cf3951de39566a1a244695760bb94d8310fbfc73d6d62bb267"}, - {file = "pymilvus-2.4.5.tar.gz", hash = "sha256:1a497fe9b41d6bf62b1d5e1c412960922dde1598576fcbb8818040c8af11149f"}, + {file = "pymilvus-2.4.6-py3-none-any.whl", hash = "sha256:b4c43472edc313b845d313be50610e19054e6954b2c5c3b515565c596c2d3d97"}, + {file = "pymilvus-2.4.6.tar.gz", hash = "sha256:6ac3eb91c92cc01bbe444fe83f895f02d7b2546d96ac67998630bf31ac074d66"}, ] [[package]] @@ -3230,24 +3230,24 @@ files = [ [[package]] name = "safetensors" -version = "0.4.4" +version = "0.4.5" requires_python = ">=3.7" summary = "" groups = ["hook-check-django-migrations"] files = [ - {file = "safetensors-0.4.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9850754c434e636ce3dc586f534bb23bcbd78940c304775bee9005bf610e98f1"}, - {file = "safetensors-0.4.4.tar.gz", hash = "sha256:5fe3e9b705250d0172ed4e100a811543108653fb2b66b9e702a088ad03772a07"}, + {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b75a616e02f21b6f1d5785b20cecbab5e2bd3f6358a90e8925b813d557666ec1"}, + {file = "safetensors-0.4.5.tar.gz", hash = "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310"}, ] [[package]] name = "setuptools" -version = "74.0.0" +version = "74.1.2" requires_python = ">=3.8" summary = "Easily download, build, install, upgrade, and uninstall Python packages" groups = ["hook-check-django-migrations"] files = [ - {file = "setuptools-74.0.0-py3-none-any.whl", hash = "sha256:0274581a0037b638b9fc1c6883cc71c0210865aaa76073f7882376b641b84e8f"}, - {file = "setuptools-74.0.0.tar.gz", hash = "sha256:a85e96b8be2b906f3e3e789adec6a9323abf79758ecfa3065bd740d81158b11e"}, + {file = "setuptools-74.1.2-py3-none-any.whl", hash = "sha256:5f4c08aa4d3ebcb57a50c33b1b07e94315d7fc7230f7115e47fc99776c8ce308"}, + {file = "setuptools-74.1.2.tar.gz", hash = "sha256:95b40ed940a1c67eb70fc099094bd6e99c6ee7c23aa2306f4d2697ba7916f9c6"}, ] [[package]] @@ -3400,7 +3400,7 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.32" +version = "2.0.34" requires_python = ">=3.7" summary = "Database Abstraction Library" groups = ["hook-check-django-migrations"] @@ -3409,26 +3409,26 @@ dependencies = [ "typing-extensions>=4.6.0", ] files = [ - {file = "SQLAlchemy-2.0.32-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaf04784797dcdf4c0aa952c8d234fa01974c4729db55c45732520ce12dd95b4"}, - {file = "SQLAlchemy-2.0.32-py3-none-any.whl", hash = "sha256:e567a8793a692451f706b363ccf3c45e056b67d90ead58c3bc9471af5d212202"}, - {file = "SQLAlchemy-2.0.32.tar.gz", hash = "sha256:c1b88cc8b02b6a5f0efb0345a03672d4c897dc7d92585176f88c67346f565ea8"}, + {file = "SQLAlchemy-2.0.34-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ebc11c54c6ecdd07bb4efbfa1554538982f5432dfb8456958b6d46b9f834bb7"}, + {file = "SQLAlchemy-2.0.34-py3-none-any.whl", hash = "sha256:7286c353ee6475613d8beff83167374006c6b3e3f0e6491bfe8ca610eb1dec0f"}, + {file = "sqlalchemy-2.0.34.tar.gz", hash = "sha256:10d8f36990dd929690666679b0f42235c159a7051534adb135728ee52828dd22"}, ] [[package]] name = "sqlalchemy" -version = "2.0.32" +version = "2.0.34" extras = ["asyncio"] requires_python = ">=3.7" summary = "Database Abstraction Library" groups = ["hook-check-django-migrations"] dependencies = [ "greenlet!=0.4.17", - "sqlalchemy==2.0.32", + "sqlalchemy==2.0.34", ] files = [ - {file = "SQLAlchemy-2.0.32-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaf04784797dcdf4c0aa952c8d234fa01974c4729db55c45732520ce12dd95b4"}, - {file = "SQLAlchemy-2.0.32-py3-none-any.whl", hash = "sha256:e567a8793a692451f706b363ccf3c45e056b67d90ead58c3bc9471af5d212202"}, - {file = "SQLAlchemy-2.0.32.tar.gz", hash = "sha256:c1b88cc8b02b6a5f0efb0345a03672d4c897dc7d92585176f88c67346f565ea8"}, + {file = "SQLAlchemy-2.0.34-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ebc11c54c6ecdd07bb4efbfa1554538982f5432dfb8456958b6d46b9f834bb7"}, + {file = "SQLAlchemy-2.0.34-py3-none-any.whl", hash = "sha256:7286c353ee6475613d8beff83167374006c6b3e3f0e6491bfe8ca610eb1dec0f"}, + {file = "sqlalchemy-2.0.34.tar.gz", hash = "sha256:10d8f36990dd929690666679b0f42235c159a7051534adb135728ee52828dd22"}, ] [[package]] @@ -3627,7 +3627,7 @@ files = [ [[package]] name = "types-redis" -version = "4.6.0.20240819" +version = "4.6.0.20240903" requires_python = ">=3.8" summary = "Typing stubs for redis" groups = ["lint"] @@ -3636,8 +3636,8 @@ dependencies = [ "types-pyOpenSSL", ] files = [ - {file = "types-redis-4.6.0.20240819.tar.gz", hash = "sha256:08f51f550ad41d0152bd98d77ac9d6d8f761369121710a213642f6036b9a7183"}, - {file = "types_redis-4.6.0.20240819-py3-none-any.whl", hash = "sha256:86db9af6f0033154e12bc22c77236cef0907b995fda8c9f0f0eacd59943ed2fc"}, + {file = "types-redis-4.6.0.20240903.tar.gz", hash = "sha256:4bab1a378dbf23c2c95c370dfdb89a8f033957c4fd1a53fee71b529c182fe008"}, + {file = "types_redis-4.6.0.20240903-py3-none-any.whl", hash = "sha256:0e7537e5c085fe96b7d468d5edae0cf667b4ba4b62c6e4a5dfc340bd3b868c23"}, ] [[package]] @@ -3656,13 +3656,13 @@ files = [ [[package]] name = "types-setuptools" -version = "73.0.0.20240822" +version = "74.0.0.20240831" requires_python = ">=3.8" summary = "Typing stubs for setuptools" groups = ["lint"] files = [ - {file = "types-setuptools-73.0.0.20240822.tar.gz", hash = "sha256:3a060681098eb3fbc2fea0a86f7f6af6aa1ca71906039d88d891ea2cecdd4dbf"}, - {file = "types_setuptools-73.0.0.20240822-py3-none-any.whl", hash = "sha256:b9eba9b68546031317a0fa506d4973641d987d74f79e7dd8369ad4f7a93dea17"}, + {file = "types-setuptools-74.0.0.20240831.tar.gz", hash = "sha256:8b4a544cc91d42a019dc1e41fd397608b4bc7e20c7d7d5bc326589ffd9e8f8a1"}, + {file = "types_setuptools-74.0.0.20240831-py3-none-any.whl", hash = "sha256:4d9d18ea9214828d695a384633130009f5dee2681a157ee873d3680b62931590"}, ] [[package]] @@ -3795,7 +3795,7 @@ dependencies = [ [[package]] name = "unstract-sdk" -version = "0.46.0" +version = "0.48.1" requires_python = "<3.11.1,>=3.9" summary = "A framework for writing Unstract Tools/Apps" groups = ["hook-check-django-migrations"] @@ -3834,8 +3834,8 @@ dependencies = [ "transformers==4.37.0", ] files = [ - {file = "unstract_sdk-0.46.0-py3-none-any.whl", hash = "sha256:09b13289c3384578b6e9598cfe2ae1013705d9e394218682aa24bbb6406422b4"}, - {file = "unstract_sdk-0.46.0.tar.gz", hash = "sha256:4fa969eb078b5f1bb0eb05142519a08d307b5ea95afaaf06fb8383285857d037"}, + {file = "unstract_sdk-0.48.1-py3-none-any.whl", hash = "sha256:90bd27142f265d0397bb5a4b3630ad6e4f232a4405a15db8806c91c6cf5be4af"}, + {file = "unstract_sdk-0.48.1.tar.gz", hash = "sha256:72b62d15f1dcd9dc9d07d3c2682a9a4edcfc7b2dcf568d94376703c909630455"}, ] [[package]] @@ -3850,7 +3850,7 @@ dependencies = [ "PyYAML~=6.0.1", "docker~=6.1.3", "jsonschema~=4.18.2", - "unstract-sdk~=0.46.0", + "unstract-sdk~=0.48.0", "unstract-tool-sandbox", ] @@ -3893,13 +3893,13 @@ files = [ [[package]] name = "urllib3" -version = "1.26.19" +version = "1.26.20" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" summary = "HTTP library with thread-safe connection pooling, file post, and more." groups = ["hook-check-django-migrations"] files = [ - {file = "urllib3-1.26.19-py2.py3-none-any.whl", hash = "sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3"}, - {file = "urllib3-1.26.19.tar.gz", hash = "sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429"}, + {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, + {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, ] [[package]] @@ -4011,8 +4011,8 @@ files = [ [[package]] name = "yarl" -version = "1.9.4" -requires_python = ">=3.7" +version = "1.9.11" +requires_python = ">=3.8" summary = "Yet another URL library" groups = ["hook-check-django-migrations"] dependencies = [ @@ -4020,7 +4020,7 @@ dependencies = [ "multidict>=4.0", ] files = [ - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"}, - {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"}, - {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"}, + {file = "yarl-1.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afcac5bda602b74ff701e1f683feccd8cce0d5a21dbc68db81bf9bd8fd93ba56"}, + {file = "yarl-1.9.11-py3-none-any.whl", hash = "sha256:c6f6c87665a9e18a635f0545ea541d9640617832af2317d4f5ad389686b4ed3d"}, + {file = "yarl-1.9.11.tar.gz", hash = "sha256:c7548a90cb72b67652e2cd6ae80e2683ee08fde663104528ac7df12d8ef271d2"}, ] diff --git a/pyproject.toml b/pyproject.toml index 8c5cbd4e7..4d1049fd8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ hook-check-django-migrations = [ "psycopg2-binary==2.9.9", "python-dotenv==1.0.0", "python-magic==0.4.27", - "unstract-sdk~=0.46.0", + "unstract-sdk~=0.48.0", "-e unstract-connectors @ file:///${PROJECT_ROOT}/unstract/connectors", "-e unstract-core @ file:///${PROJECT_ROOT}/unstract/core", "-e unstract-flags @ file:///${PROJECT_ROOT}/unstract/flags", @@ -77,7 +77,6 @@ check_untyped_defs = true exclude = '''(?x)^( .*migrations/.*\.py| backend/prompt/.*| - document-service/.*| unstract/connectors/tests/.*| unstract/core/.*| unstract/flags/src/unstract/flags/.*|