Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions pkgs/development/python-modules/docling-ibm-models/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,18 @@ buildPythonPackage (finalAttrs: {
];

disabledTests = [
# Require network access
"test_code_formula_predictor" # huggingface_hub.errors.LocalEntryNotFoundError
"test_figure_classifier" # huggingface_hub.errors.LocalEntryNotFoundError
# Requires network access
"test_figure_classifier"
"test_layoutpredictor"
"test_readingorder"
"test_tableformer_v2_model_loading"
"test_tableformer_v2_tokenizer_loading"
"test_tableformer_v2_image_encoding"
"test_tableformer_v2_forward_pass"
"test_tableformer_v2_predict"
"test_tableformer_v2_numpy_input"
"test_tableformer_v2_batch_inference"
"test_tableformer_v2_unsupported_input"
"test_tf_predictor"
];

Expand Down
19 changes: 10 additions & 9 deletions pkgs/development/python-modules/docling-jobkit/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,22 @@
msgpack,

# tests
aiohttp,
pytestCheckHook,
pytest-asyncio,
writableTmpDirAsHomeHook,
}:

buildPythonPackage rec {
pname = "docling-jobkit";
version = "1.8.1";
version = "1.15.0";
pyproject = true;

src = fetchFromGitHub {
owner = "docling-project";
repo = "docling-jobkit";
tag = "v${version}";
hash = "sha256-9DzQY/XMmx/8XP1bMYZYl+Bp7AVcYfuv3MtO6lvQ/24=";
hash = "sha256-GFta/0Bdu+lN1Yv97t9yVLoWQxkF9CZhBAL88UaaPqw=";
};

build-system = [
Expand All @@ -65,31 +66,31 @@ buildPythonPackage rec {
];
};

pythonRelaxDeps = [
"boto3"
"pandas"
"pyarrow"
];

pythonImportsCheck = [
"docling"
"docling_jobkit"
];

nativeCheckInputs = [
aiohttp
pytestCheckHook
pytest-asyncio
writableTmpDirAsHomeHook
]
++ optional-dependencies.rq;

disabledTests = [
# requires network access
# requires network access / remote model downloads
"test_chunk_file"
"test_convert_file"
"test_convert_warmup"
"test_convert_url"
"test_replicated_convert"
"test_clear_converters_clears_caches"
"test_chunker_manager_shared_across_workers"
"test_convert_with_callbacks"
"test_delete_task_cleans_up_job"
"test_clear_converters_clears_worker_cache"
]
++ lib.optionals stdenv.hostPlatform.isDarwin [
# Flaky due to comparison with magic object
Expand Down
9 changes: 6 additions & 3 deletions pkgs/development/python-modules/docling-mcp/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,19 @@
smolagents,
torch,
transformers,
typer,
}:

buildPythonPackage rec {
pname = "docling-mcp";
version = "1.3.3";
version = "1.3.4";
pyproject = true;

src = fetchFromGitHub {
owner = "docling-project";
repo = "docling-mcp";
tag = "v${version}";
hash = "sha256-OyLL8g9fh1H9N3i5ok885IzC5pFckMoqsjd8oX/HdRY=";
hash = "sha256-GaAJU28zjs23PmN/iSXJghY6qWUt3Jp+j3XUkJf91ts=";
};

pythonRemoveDeps = [
Expand All @@ -54,6 +55,7 @@ buildPythonPackage rec {
pydantic
pydantic-settings
python-dotenv
typer
];

optional-dependencies = {
Expand Down Expand Up @@ -81,12 +83,13 @@ buildPythonPackage rec {
nativeCheckInputs = [
pytest-asyncio
pytestCheckHook
typer
];

pythonImportsCheck = [ "docling_mcp" ];

disabledTestPaths = [
# Tests require network access
# Tests require network access / MCP server setup not available in nix build sandbox
"tests/test_mcp_server.py"
"tests/test_conversion_tools.py"
];
Expand Down
17 changes: 12 additions & 5 deletions pkgs/development/python-modules/docling-parse/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
lib,
buildPythonPackage,
fetchFromGitHub,
fetchpatch,
cmake,
pkg-config,
cxxopts,
Expand All @@ -12,6 +13,7 @@
utf8cpp,
libjpeg,
qpdf,
blend2d,
loguru-cpp,
# python dependencies
tabulate,
Expand All @@ -23,16 +25,23 @@

buildPythonPackage rec {
pname = "docling-parse";
version = "5.0.0";
version = "5.7.0";
pyproject = true;

src = fetchFromGitHub {
owner = "docling-project";
repo = "docling-parse";
tag = "v${version}";
hash = "sha256-qxD3ryU1jXf8Gm5/IiG2NTOnRgA6HADPfgBj6Kn+Pj4=";
hash = "sha256-HKhS6sIhUAr+VFo4jikQ1MMQpcLY6sS7RZaqcjaKvQc=";
};

patches = [
Comment thread
attilaolah marked this conversation as resolved.
(fetchpatch {
url = "https://github.com/docling-project/docling-parse/commit/e922be4ef4af2053b27fa755407aeae7d85c2b9c.patch";
hash = "sha256-UfuaEDqRWVIYG57l59/bIw1a+qssxTw27nprQ6jC2WA=";
})
];

postPatch = ''
substituteInPlace pyproject.toml \
--replace-fail \
Expand All @@ -54,6 +63,7 @@ buildPythonPackage rec {
env.NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp";

buildInputs = [
blend2d
pybind11
cxxopts
libjpeg
Expand Down Expand Up @@ -102,8 +112,5 @@ buildPythonPackage rec {
homepage = "https://github.com/DS4SD/docling-parse";
license = lib.licenses.mit;
maintainers = [ ];
# error: no matching conversion for functional-style cast from 'bool' to 'nlohmann::basic_json<>'
# See https://github.com/docling-project/docling-parse/issues/172 for context
broken = true;
};
}
21 changes: 18 additions & 3 deletions pkgs/development/python-modules/docling-serve/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
pydantic-settings,
python-multipart,
scalar-fastapi,
opentelemetry-api,
opentelemetry-sdk,
opentelemetry-exporter-otlp,
opentelemetry-instrumentation-fastapi,
opentelemetry-exporter-prometheus,
prometheus-client,
uvicorn,
websockets,
tesserocr,
Expand All @@ -32,14 +38,14 @@

buildPythonPackage rec {
pname = "docling-serve";
version = "1.10.0";
version = "1.15.1";
pyproject = true;

src = fetchFromGitHub {
owner = "docling-project";
repo = "docling-serve";
tag = "v${version}";
hash = "sha256-g0ATehTRtrqgTjvMTs+yvFdFwXTZ8AWsO+Hljwlcbto=";
hash = "sha256-/UM/P/m4KdtYinYd1+Y8ESLfVURc7jQL8KpV2wR2ISs=";
};

build-system = [
Expand All @@ -48,7 +54,10 @@ buildPythonPackage rec {
];

pythonRelaxDeps = [
"websockets"
"opentelemetry-api"
"opentelemetry-sdk"
"opentelemetry-instrumentation-fastapi"
"opentelemetry-exporter-prometheus"
];

pythonRemoveDeps = [
Expand All @@ -64,6 +73,12 @@ buildPythonPackage rec {
pydantic-settings
python-multipart
scalar-fastapi
opentelemetry-api
opentelemetry-sdk
opentelemetry-exporter-otlp
opentelemetry-instrumentation-fastapi
opentelemetry-exporter-prometheus
prometheus-client
typer
uvicorn
websockets
Expand Down
69 changes: 37 additions & 32 deletions pkgs/development/python-modules/docling/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
pandas,
pillow,
pluggy,
polyfactory,
pydantic,
pydantic-settings,
pylatexenc,
Expand Down Expand Up @@ -52,14 +53,14 @@

buildPythonPackage rec {
pname = "docling";
version = "2.69.1";
version = "2.84.0";
pyproject = true;

src = fetchFromGitHub {
owner = "docling-project";
repo = "docling";
tag = "v${version}";
hash = "sha256-r7jAah/tqLylPyyzrK0NW2ok66NVdb/V/YLV95McGC4=";
hash = "sha256-rjRGBZDWqao32AGM4WTFubZ50cNqRWxKAOLojgR7uBk=";
};

build-system = [
Expand All @@ -84,6 +85,7 @@ buildPythonPackage rec {
pandas
pillow
pluggy
polyfactory
pydantic
pydantic-settings
pylatexenc
Expand All @@ -101,9 +103,8 @@ buildPythonPackage rec {
];

pythonRelaxDeps = [
"lxml"
"pypdfium2"
"pillow"
"defusedxml"
"typer"
];

optional-dependencies = {
Expand Down Expand Up @@ -137,43 +138,47 @@ buildPythonPackage rec {
];

disabledTests = [
"test_e2e_pdfs_conversions" # AssertionError: ## TableFormer: Table Structure Understanding with Transf
"test_e2e_conversions" # RuntimeError: Tesseract is not available

# AssertionError
# assert doc.export_to_markdown() == pair[1], f"Error in case {idx}"
"test_ordered_lists"

# AssertionError: export to md
"test_e2e_html_conversions"

# AssertionError: assert 'Unordered li...d code block:' == 'Unordered li...d code block:'
"test_convert_valid"

# AssertionError: Markdown file mismatch against groundtruth pftaps057006474.md
"test_patent_groundtruth"
# Missing optional ASR/XBRL dependencies or require network/model downloads
"test_asr_pipeline_conversion"
"test_asr_pipeline_with_silent_audio"
"test_has_text_and_determine_status_helpers"
"test_native_and_mlx_transcribe_language_handling"
"test_native_init_with_artifacts_path_and_device_logging"
"test_native_run_success_with_bytesio_builds_document"
"test_native_run_failure_sets_status"
"test_e2e_xbrl_conversions"

# Failing against current groundtruth snapshots
"test_e2e_valid_csv_conversions"
"test_e2e_docx_conversions"

# huggingface_hub.errors.LocalEntryNotFoundError: An error happened
# Network/model-dependent failures in sandboxed nix builds
"test_get_text_from_rect_rotated"
"test_e2e_webp_conversions"
"test_cli_convert"
"test_code_and_formula_conversion"
"test_formula_conversion_with_page_range"
"test_conversion_result_json_roundtrip_string"
"test_picture_classifier"
"test_e2e_pdfs_conversions"
"test_e2e_conversions"
"test_normal_pages_all_present"
"test_failed_pages_added_to_document_1page"
"test_failed_pages_added_to_document_2pages"
"test_failed_pages_have_size_info"
"test_errors_recorded_for_failed_pages"
"test_convert_path"
"test_convert_stream"
"test_compare_legacy_output"
"test_ocr_coverage_threshold"
"test_formula_conversion_with_page_range"

# requires network access
"test_page_range"
"test_document_timeout"
"test_ocr_coverage_threshold"
"test_parser_backends"
"test_pipeline_cache_after_initialize"
"test_confidence"
"test_e2e_webp_conversions"
"test_asr_pipeline_conversion"
"test_threaded_pipeline"
"test_get_text_from_rect"
"test_threaded_pipeline_multiple_documents"
"test_pipeline_comparison"

# AssertionError: pred_itxt==true_itxt
"test_e2e_valid_csv_conversions"
"test_pypdfium_threaded_pipeline"
];

meta = {
Expand Down
Loading
Loading