Skip to content

Commit

Permalink
docling: init at 2.17.0 (NixOS#359783)
Browse files Browse the repository at this point in the history
  • Loading branch information
happysalada authored Feb 5, 2025
2 parents ceaea20 + b011853 commit e76dc01
Show file tree
Hide file tree
Showing 6 changed files with 211 additions and 20 deletions.
3 changes: 3 additions & 0 deletions pkgs/by-name/do/docling/package.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{ python3Packages }:

python3Packages.toPythonApplication python3Packages.docling
24 changes: 19 additions & 5 deletions pkgs/development/python-modules/docling-core/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,52 @@
buildPythonPackage,
fetchFromGitHub,
poetry-core,
# dependencies
jsonref,
jsonschema,
pandas,
pillow,
pydantic,
tabulate,
pyyaml,
typing-extensions,
transformers,
typer,
latex2mathml,
jsondiff,
requests,
pytestCheckHook,
}:

buildPythonPackage rec {
pname = "docling-core";
version = "2.3.2";
version = "2.16.1";
pyproject = true;

src = fetchFromGitHub {
owner = "DS4SD";
repo = "docling-core";
tag = "v${version}";
hash = "sha256-N8rL+5bCVF4Qi5eqgkaB2r3LTYoqTVPeK4gQ6stiW/w=";
hash = "sha256-oW/jX9IHCpztc0FDm8/3OzDmOxM92jrkFq/JeAcI9ZA=";
};

build-system = [
poetry-core
];

dependencies = [
jsonref
jsonschema
pandas
pillow
pydantic
jsonref
tabulate
pandas
pillow
pyyaml
typing-extensions
transformers
# semchunk
typer
latex2mathml
];

pythonRelaxDeps = [
Expand All @@ -47,6 +59,8 @@ buildPythonPackage rec {
"docling_core"
];

doCheck = false;

nativeCheckInputs = [
jsondiff
pytestCheckHook
Expand Down
34 changes: 22 additions & 12 deletions pkgs/development/python-modules/docling-ibm-models/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,54 @@
buildPythonPackage,
fetchFromGitHub,
poetry-core,
# dependencies
torch,
torchvision,
transformers,
huggingface-hub,
jsonlines,
mean-average-precision,
numpy,
opencv-python-headless,
pillow,
torch,
torchvision,
tqdm,
safetensors,
pytestCheckHook,
}:

buildPythonPackage rec {
pname = "docling-ibm-models";
version = "2.0.4";
version = "3.3.0";
pyproject = true;

src = fetchFromGitHub {
owner = "DS4SD";
repo = "docling-ibm-models";
tag = "v${version}";
hash = "sha256-QZvkkazxgkGuSQKIYI+YghH7pLlDSEbCGhg89gZsOpk=";
hash = "sha256-wxkHd+TCBibOTWO09JOsjX6oBtUxZ/9IOmyLdeptzeQ=";
};

build-system = [
poetry-core
];

dependencies = [
huggingface-hub
jsonlines
mean-average-precision
numpy
opencv-python-headless
pillow
torch
torchvision
transformers
numpy
jsonlines
pillow
tqdm
opencv-python-headless
huggingface-hub
safetensors
];

pythonRelaxDeps = [
"mean_average_precision"
"pillow"
"torchvision"
"transformers"
"numpy"
];

pythonImportsCheck = [
Expand All @@ -57,10 +61,16 @@ buildPythonPackage rec {
pytestCheckHook
];

preCheck = ''
export HOME="$TEMPDIR"
'';

disabledTests = [
# Requires network access
"test_layoutpredictor"
"test_tf_predictor"
"test_code_formula_predictor" # huggingface_hub.errors.LocalEntryNotFoundError
"test_figure_classifier" # huggingface_hub.errors.LocalEntryNotFoundError
];

meta = {
Expand Down
18 changes: 15 additions & 3 deletions pkgs/development/python-modules/docling-parse/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,30 @@
cxxopts,
poetry-core,
pybind11,
tabulate,
zlib,
nlohmann_json,
utf8cpp,
libjpeg,
qpdf,
loguru-cpp,
# python dependencies
tabulate,
pillow,
pydantic,
docling-core,
pytestCheckHook,
}:

buildPythonPackage rec {
pname = "docling-parse";
version = "2.0.3";
version = "3.1.2";
pyproject = true;

src = fetchFromGitHub {
owner = "DS4SD";
repo = "docling-parse";
tag = "v${version}";
hash = "sha256-pZJ7lneg4ftAoWS5AOflkkKCwZGF4TJIuqDjq4W4VBw=";
hash = "sha256-SgVLk1kruUSjtzuo/5YFY4Keha8zMzovm/UeCtfGaNY=";
};

dontUseCmakeConfigure = true;
Expand Down Expand Up @@ -61,6 +65,14 @@ buildPythonPackage rec {

dependencies = [
tabulate
pillow
pydantic
docling-core
];

pythonRelaxDeps = [
"pydantic"
"pillow"
];

pythonImportsCheck = [
Expand Down
150 changes: 150 additions & 0 deletions pkgs/development/python-modules/docling/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
{
lib,
buildPythonPackage,
fetchFromGitHub,
# dependencies
pydantic,
docling-core,
docling-ibm-models,
deepsearch-glm,
docling-parse,
filetype,
pypdfium2,
pydantic-settings,
huggingface-hub,
requests,
easyocr,
tesserocr,
certifi,
rtree,
scipy,
typer,
python-docx,
python-pptx,
beautifulsoup4,
pandas,
marko,
openpyxl,
lxml,
# ocrmac # not yet packaged
rapidocr-onnxruntime,
onnxruntime,
pillow,
pyarrow,
# build system
poetry-core,
# optional dependencies
mkdocs-material,
mkdocs-jupyter,
# mkdocs-click # not yet packaged
mkdocstrings,
# native check inputs
pytestCheckHook,
}:

buildPythonPackage rec {
pname = "docling";
version = "2.17.0";
pyproject = true;

src = fetchFromGitHub {
owner = "DS4SD";
repo = "docling";
tag = "v${version}";
hash = "sha256-OtUFQRNqyTGT1Z41tHziwM5hqbk+tg/97bxhtPVtmN0=";
};

build-system = [
poetry-core
];

dependencies = [
pydantic
docling-core
docling-ibm-models
deepsearch-glm
docling-parse
filetype
pypdfium2
pydantic-settings
huggingface-hub
requests
easyocr
tesserocr
certifi
rtree
scipy
typer
python-docx
python-pptx
beautifulsoup4
pandas
marko
openpyxl
lxml
# ocrmac # not yet packaged
rapidocr-onnxruntime
onnxruntime
pillow
pyarrow
];

pythonRelaxDeps = [
"pillow"
];

optional-dependencies = {
ocrmac = [
# ocrmac # not yet packaged
];
rapidocr = [
onnxruntime
rapidocr-onnxruntime
];
tesserocr = [
tesserocr
];

docs = [
mkdocs-material
mkdocs-jupyter
# mkdocs-click # not yet packaged
mkdocstrings
# griffle-pydantic
];
};

preCheck = ''
export HOME="$TEMPDIR"
'';

nativeCheckInputs = [
pytestCheckHook
];

pythonImportsCheck = [
"docling"
];

disabledTests = [
"test_e2e_pdfs_conversions" # AssertionError: ## TableFormer: Table Structure Understanding with Transf
"test_e2e_conversions" # RuntimeError: Tesseract is not available
# huggingface_hub.errors.LocalEntryNotFoundError: An error happened
"test_cli_convert"
"test_code_and_formula_conversion"
"test_picture_classifier"
"test_convert_path"
"test_convert_stream"
"test_compare_legacy_output"
"test_ocr_coverage_threshold"
];

meta = {
description = "Get your documents ready for gen AI";
homepage = "https://github.com/DS4SD/docling";
changelog = "https://github.com/DS4SD/docling/blob/${src.rev}/CHANGELOG.md";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ happysalada ];
mainProgram = "docling";
};
}
2 changes: 2 additions & 0 deletions pkgs/top-level/python-packages.nix
Original file line number Diff line number Diff line change
Expand Up @@ -3804,6 +3804,8 @@ self: super: with self; {

dockerspawner = callPackage ../development/python-modules/dockerspawner { };

docling = callPackage ../development/python-modules/docling { };

docling-core = callPackage ../development/python-modules/docling-core { };

docling-ibm-models = callPackage ../development/python-modules/docling-ibm-models { };
Expand Down

0 comments on commit e76dc01

Please sign in to comment.