From 32055fe9d6544da53c8752fbe6c21fd944799ec3 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:09:40 +0100 Subject: [PATCH] docs: add Data Prep Kit integration Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- docs/integrations/data_prep_kit.md | 13 +++++++++++++ docs/integrations/llamaindex.md | 2 +- mkdocs.yml | 5 +++-- 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 docs/integrations/data_prep_kit.md diff --git a/docs/integrations/data_prep_kit.md b/docs/integrations/data_prep_kit.md new file mode 100644 index 000000000..5885e8eda --- /dev/null +++ b/docs/integrations/data_prep_kit.md @@ -0,0 +1,13 @@ +## Get started + +Docling is used by the [Data Prep Kit \[↗\]](https://ibm.github.io/data-prep-kit/) open-source toolkit for preparing unstructured data for LLM application development ranging from laptop scale to datacenter scale. + +Below you find the Data Prep Kit modules powered by Docling. + +## PDF ingestion to Parquet +- 💻 [GitHub \[↗\]](https://github.com/IBM/data-prep-kit/tree/dev/transforms/language/pdf2parquet) +- 📖 [API docs \[↗\]](https://ibm.github.io/data-prep-kit/transforms/language/pdf2parquet/python/) + +## Document chunking +- 💻 [GitHub \[↗\]](https://github.com/IBM/data-prep-kit/tree/dev/transforms/language/doc_chunk) +- 📖 [API docs \[↗\]](https://ibm.github.io/data-prep-kit/transforms/language/doc_chunk/python/) diff --git a/docs/integrations/llamaindex.md b/docs/integrations/llamaindex.md index a43a6e6a4..41eb6e3d9 100644 --- a/docs/integrations/llamaindex.md +++ b/docs/integrations/llamaindex.md @@ -1,6 +1,6 @@ ## Get started -Docling is available as an official LlamaIndex extension! +Docling is available as an official [LlamaIndex \[↗\]](https://docs.llamaindex.ai/) extension. To get started, check out the [step-by-step guide in LlamaIndex \[↗\]](https://docs.llamaindex.ai/en/stable/examples/data_connectors/DoclingReaderDemo/). diff --git a/mkdocs.yml b/mkdocs.yml index 2ce244bd7..ec23b9a5b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -80,8 +80,9 @@ nav: # - CLI: examples/cli.md - Integrations: - Integrations: integrations/index.md - - "LlamaIndex 🦙 extension": integrations/llamaindex.md - # - "LangChain 🦜🔗 extension": integrations/langchain.md + - "Data Prep Kit": integrations/data_prep_kit.md + - "LlamaIndex 🦙": integrations/llamaindex.md + # - "LangChain 🦜🔗": integrations/langchain.md # - API reference: # - API reference: api_reference/index.md