diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 63857729..5b09246b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,15 +18,15 @@ env: jobs: code-checks: - if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'DS4SD/docling-parse' && github.event.pull_request.head.repo.full_name != 'ds4sd/docling-parse') }} + if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'docling-project/docling-parse' && github.event.pull_request.head.repo.full_name != 'docling-project/docling-parse') }} uses: ./.github/workflows/checks.yml build-wheels: - if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'DS4SD/docling-parse' && github.event.pull_request.head.repo.full_name != 'ds4sd/docling-parse') }} + if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'docling-project/docling-parse' && github.event.pull_request.head.repo.full_name != 'docling-project/docling-parse') }} uses: ./.github/workflows/wheels.yml permissions: id-token: write # needed also if not used (see publish condition) contents: write # needed also if not used (see publish condition) rhel-build: - if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'DS4SD/docling-parse' && github.event.pull_request.head.repo.full_name != 'ds4sd/docling-parse') }} + if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'docling-project/docling-parse' && github.event.pull_request.head.repo.full_name != 'docling-project/docling-parse') }} uses: ./.github/workflows/rhel.yml \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 116d90c5..64ebdc1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,262 +1,262 @@ -## [v3.4.0](https://github.com/DS4SD/docling-parse/releases/tag/v3.4.0) - 2025-02-18 +## [v3.4.0](https://github.com/docling-project/docling-parse/releases/tag/v3.4.0) - 2025-02-18 ### Feature -* Establish char_cells, word_cells and line_cells, other fixes ([#101](https://github.com/DS4SD/docling-parse/issues/101)) ([`c2f9741`](https://github.com/DS4SD/docling-parse/commit/c2f9741a5b2882aacb5e77f81f4ded47a78b5b38)) +* Establish char_cells, word_cells and line_cells, other fixes ([#101](https://github.com/docling-project/docling-parse/issues/101)) ([`c2f9741`](https://github.com/docling-project/docling-parse/commit/c2f9741a5b2882aacb5e77f81f4ded47a78b5b38)) -## [v3.3.1](https://github.com/DS4SD/docling-parse/releases/tag/v3.3.1) - 2025-02-13 +## [v3.3.1](https://github.com/docling-project/docling-parse/releases/tag/v3.3.1) - 2025-02-13 ### Fix -* Update Pillow constraints ([#102](https://github.com/DS4SD/docling-parse/issues/102)) ([`d9b6961`](https://github.com/DS4SD/docling-parse/commit/d9b69612fff101d22e38d5f3b8bbd18db47bf253)) +* Update Pillow constraints ([#102](https://github.com/docling-project/docling-parse/issues/102)) ([`d9b6961`](https://github.com/docling-project/docling-parse/commit/d9b69612fff101d22e38d5f3b8bbd18db47bf253)) ### Documentation -* Updated import for `pdf_parser_v2` in README ([#100](https://github.com/DS4SD/docling-parse/issues/100)) ([`01238dd`](https://github.com/DS4SD/docling-parse/commit/01238ddc32a6388583ff95a7403d51870d10b599)) -* Fixed broken link in README.md ([#97](https://github.com/DS4SD/docling-parse/issues/97)) ([`8ec116e`](https://github.com/DS4SD/docling-parse/commit/8ec116ef853e427254c166aa85371caea0db4ceb)) +* Updated import for `pdf_parser_v2` in README ([#100](https://github.com/docling-project/docling-parse/issues/100)) ([`01238dd`](https://github.com/docling-project/docling-parse/commit/01238ddc32a6388583ff95a7403d51870d10b599)) +* Fixed broken link in README.md ([#97](https://github.com/docling-project/docling-parse/issues/97)) ([`8ec116e`](https://github.com/docling-project/docling-parse/commit/8ec116ef853e427254c166aa85371caea0db4ceb)) -## [v3.3.0](https://github.com/DS4SD/docling-parse/releases/tag/v3.3.0) - 2025-02-06 +## [v3.3.0](https://github.com/docling-project/docling-parse/releases/tag/v3.3.0) - 2025-02-06 ### Feature -* Add support for RtL ([#94](https://github.com/DS4SD/docling-parse/issues/94)) ([`25b1e64`](https://github.com/DS4SD/docling-parse/commit/25b1e64846390bf2af7afc4d95bf3a634742aeb1)) +* Add support for RtL ([#94](https://github.com/docling-project/docling-parse/issues/94)) ([`25b1e64`](https://github.com/docling-project/docling-parse/commit/25b1e64846390bf2af7afc4d95bf3a634742aeb1)) ### Fix -* Update vizualisation script ([#95](https://github.com/DS4SD/docling-parse/issues/95)) ([`b634c11`](https://github.com/DS4SD/docling-parse/commit/b634c11571a06a843aefde8cd1b8772ae74c8e6f)) +* Update vizualisation script ([#95](https://github.com/docling-project/docling-parse/issues/95)) ([`b634c11`](https://github.com/docling-project/docling-parse/commit/b634c11571a06a843aefde8cd1b8772ae74c8e6f)) -## [v3.2.0](https://github.com/DS4SD/docling-parse/releases/tag/v3.2.0) - 2025-02-02 +## [v3.2.0](https://github.com/docling-project/docling-parse/releases/tag/v3.2.0) - 2025-02-02 ### Feature -* Added the pure chars and fixed the duplicate text ([#91](https://github.com/DS4SD/docling-parse/issues/91)) ([`9718762`](https://github.com/DS4SD/docling-parse/commit/97187622095793eb8a780f1e74680c6867b39a6e)) +* Added the pure chars and fixed the duplicate text ([#91](https://github.com/docling-project/docling-parse/issues/91)) ([`9718762`](https://github.com/docling-project/docling-parse/commit/97187622095793eb8a780f1e74680c6867b39a6e)) ### Fix -* Added the fix for rotated pages ([#90](https://github.com/DS4SD/docling-parse/issues/90)) ([`d663eec`](https://github.com/DS4SD/docling-parse/commit/d663eec5fdc06ab7159b97f7d7b45f3a3ba72975)) +* Added the fix for rotated pages ([#90](https://github.com/docling-project/docling-parse/issues/90)) ([`d663eec`](https://github.com/docling-project/docling-parse/commit/d663eec5fdc06ab7159b97f7d7b45f3a3ba72975)) ### Documentation -* Fix unit of measure of processing speed ([#89](https://github.com/DS4SD/docling-parse/issues/89)) ([`760b932`](https://github.com/DS4SD/docling-parse/commit/760b932b6770d928380ee83cdd9d14b901f695f8)) +* Fix unit of measure of processing speed ([#89](https://github.com/docling-project/docling-parse/issues/89)) ([`760b932`](https://github.com/docling-project/docling-parse/commit/760b932b6770d928380ee83cdd9d14b901f695f8)) -## [v3.1.2](https://github.com/DS4SD/docling-parse/releases/tag/v3.1.2) - 2025-01-27 +## [v3.1.2](https://github.com/docling-project/docling-parse/releases/tag/v3.1.2) - 2025-01-27 ### Fix -* Added more updates to better font-parsing ([#87](https://github.com/DS4SD/docling-parse/issues/87)) ([`de18986`](https://github.com/DS4SD/docling-parse/commit/de18986f03f1e56ebb750ccdc2b955eeeebbde3b)) +* Added more updates to better font-parsing ([#87](https://github.com/docling-project/docling-parse/issues/87)) ([`de18986`](https://github.com/docling-project/docling-parse/commit/de18986f03f1e56ebb750ccdc2b955eeeebbde3b)) -## [v3.1.1](https://github.com/DS4SD/docling-parse/releases/tag/v3.1.1) - 2025-01-21 +## [v3.1.1](https://github.com/docling-project/docling-parse/releases/tag/v3.1.1) - 2025-01-21 ### Fix -* Move autoflake to dev dependencies ([#86](https://github.com/DS4SD/docling-parse/issues/86)) ([`eed5080`](https://github.com/DS4SD/docling-parse/commit/eed50805ebb00a9fdf48bb99caf5f38d4d9959f7)) +* Move autoflake to dev dependencies ([#86](https://github.com/docling-project/docling-parse/issues/86)) ([`eed5080`](https://github.com/docling-project/docling-parse/commit/eed50805ebb00a9fdf48bb99caf5f38d4d9959f7)) -## [v3.1.0](https://github.com/DS4SD/docling-parse/releases/tag/v3.1.0) - 2025-01-17 +## [v3.1.0](https://github.com/docling-project/docling-parse/releases/tag/v3.1.0) - 2025-01-17 ### Feature -* Update for complex fonts, rendering, and experimental high-level API ([#82](https://github.com/DS4SD/docling-parse/issues/82)) ([`525ed8e`](https://github.com/DS4SD/docling-parse/commit/525ed8e38003c846f5ad9c9089bfa845db0d8117)) +* Update for complex fonts, rendering, and experimental high-level API ([#82](https://github.com/docling-project/docling-parse/issues/82)) ([`525ed8e`](https://github.com/docling-project/docling-parse/commit/525ed8e38003c846f5ad9c9089bfa845db0d8117)) -## [v3.0.0](https://github.com/DS4SD/docling-parse/releases/tag/v3.0.0) - 2024-12-09 +## [v3.0.0](https://github.com/docling-project/docling-parse/releases/tag/v3.0.0) - 2024-12-09 ### Feature -* Massive quality improvements to v2 parser and new sanitize_cells API ([#73](https://github.com/DS4SD/docling-parse/issues/73)) ([`1fccb29`](https://github.com/DS4SD/docling-parse/commit/1fccb29d3f827450c2d259b3b9e433321a3f8751)) +* Massive quality improvements to v2 parser and new sanitize_cells API ([#73](https://github.com/docling-project/docling-parse/issues/73)) ([`1fccb29`](https://github.com/docling-project/docling-parse/commit/1fccb29d3f827450c2d259b3b9e433321a3f8751)) ### Breaking -* Massive quality improvements to v2 parser and new sanitize_cells API ([#73](https://github.com/DS4SD/docling-parse/issues/73)) ([`1fccb29`](https://github.com/DS4SD/docling-parse/commit/1fccb29d3f827450c2d259b3b9e433321a3f8751)) +* Massive quality improvements to v2 parser and new sanitize_cells API ([#73](https://github.com/docling-project/docling-parse/issues/73)) ([`1fccb29`](https://github.com/docling-project/docling-parse/commit/1fccb29d3f827450c2d259b3b9e433321a3f8751)) -## [v2.1.2](https://github.com/DS4SD/docling-parse/releases/tag/v2.1.2) - 2024-11-22 +## [v2.1.2](https://github.com/docling-project/docling-parse/releases/tag/v2.1.2) - 2024-11-22 ### Fix -* Added the PDF documentation ([#64](https://github.com/DS4SD/docling-parse/issues/64)) ([`2033f95`](https://github.com/DS4SD/docling-parse/commit/2033f95f3d8ad0df4a506d7543af0808c439124d)) +* Added the PDF documentation ([#64](https://github.com/docling-project/docling-parse/issues/64)) ([`2033f95`](https://github.com/docling-project/docling-parse/commit/2033f95f3d8ad0df4a506d7543af0808c439124d)) -## [v2.1.1](https://github.com/DS4SD/docling-parse/releases/tag/v2.1.1) - 2024-11-21 +## [v2.1.1](https://github.com/docling-project/docling-parse/releases/tag/v2.1.1) - 2024-11-21 ### Fix -* Compatibility with qpdf v10 ([#62](https://github.com/DS4SD/docling-parse/issues/62)) ([`7f87b26`](https://github.com/DS4SD/docling-parse/commit/7f87b2630e6957eb1339e3b222d51969573e4bdc)) +* Compatibility with qpdf v10 ([#62](https://github.com/docling-project/docling-parse/issues/62)) ([`7f87b26`](https://github.com/docling-project/docling-parse/commit/7f87b2630e6957eb1339e3b222d51969573e4bdc)) -## [v2.1.0](https://github.com/DS4SD/docling-parse/releases/tag/v2.1.0) - 2024-11-20 +## [v2.1.0](https://github.com/docling-project/docling-parse/releases/tag/v2.1.0) - 2024-11-20 ### Feature -* Add the export of annotations and ToC ([#58](https://github.com/DS4SD/docling-parse/issues/58)) ([`22cf280`](https://github.com/DS4SD/docling-parse/commit/22cf280b1f2d7651b9684aba6a575edce9b35c00)) +* Add the export of annotations and ToC ([#58](https://github.com/docling-project/docling-parse/issues/58)) ([`22cf280`](https://github.com/docling-project/docling-parse/commit/22cf280b1f2d7651b9684aba6a575edce9b35c00)) -## [v2.0.5](https://github.com/DS4SD/docling-parse/releases/tag/v2.0.5) - 2024-11-20 +## [v2.0.5](https://github.com/docling-project/docling-parse/releases/tag/v2.0.5) - 2024-11-20 ### Fix -* Enable python3.9 wheels ([#60](https://github.com/DS4SD/docling-parse/issues/60)) ([`8e36f66`](https://github.com/DS4SD/docling-parse/commit/8e36f66b069e264875877ee3655601f8f1ff1b77)) +* Enable python3.9 wheels ([#60](https://github.com/docling-project/docling-parse/issues/60)) ([`8e36f66`](https://github.com/docling-project/docling-parse/commit/8e36f66b069e264875877ee3655601f8f1ff1b77)) -## [v2.0.4](https://github.com/DS4SD/docling-parse/releases/tag/v2.0.4) - 2024-11-13 +## [v2.0.4](https://github.com/docling-project/docling-parse/releases/tag/v2.0.4) - 2024-11-13 ### Fix -* Removing asserts that break parse-v2 ([#55](https://github.com/DS4SD/docling-parse/issues/55)) ([`bb978c2`](https://github.com/DS4SD/docling-parse/commit/bb978c2918f3711aa838006a4b45f5701a561ef5)) +* Removing asserts that break parse-v2 ([#55](https://github.com/docling-project/docling-parse/issues/55)) ([`bb978c2`](https://github.com/docling-project/docling-parse/commit/bb978c2918f3711aa838006a4b45f5701a561ef5)) -## [v2.0.3](https://github.com/DS4SD/docling-parse/releases/tag/v2.0.3) - 2024-11-05 +## [v2.0.3](https://github.com/docling-project/docling-parse/releases/tag/v2.0.3) - 2024-11-05 ### Fix -* Replace all the FATAL with ERROR messages in the v2 parser ([#53](https://github.com/DS4SD/docling-parse/issues/53)) ([`cd15d00`](https://github.com/DS4SD/docling-parse/commit/cd15d00ddb6c67fada0056ec068caffc003d5edc)) +* Replace all the FATAL with ERROR messages in the v2 parser ([#53](https://github.com/docling-project/docling-parse/issues/53)) ([`cd15d00`](https://github.com/docling-project/docling-parse/commit/cd15d00ddb6c67fada0056ec068caffc003d5edc)) -## [v2.0.2](https://github.com/DS4SD/docling-parse/releases/tag/v2.0.2) - 2024-10-30 +## [v2.0.2](https://github.com/docling-project/docling-parse/releases/tag/v2.0.2) - 2024-10-30 ### Fix -* Improve qpdf optimization options ([#52](https://github.com/DS4SD/docling-parse/issues/52)) ([`82284d4`](https://github.com/DS4SD/docling-parse/commit/82284d42c5136490a4285cd19d4d5ff90044fbe5)) +* Improve qpdf optimization options ([#52](https://github.com/docling-project/docling-parse/issues/52)) ([`82284d4`](https://github.com/docling-project/docling-parse/commit/82284d42c5136490a4285cd19d4d5ff90044fbe5)) -## [v2.0.1](https://github.com/DS4SD/docling-parse/releases/tag/v2.0.1) - 2024-10-25 +## [v2.0.1](https://github.com/docling-project/docling-parse/releases/tag/v2.0.1) - 2024-10-25 ### Fix -* Robustify parser v2 ([#49](https://github.com/DS4SD/docling-parse/issues/49)) ([`1815e7d`](https://github.com/DS4SD/docling-parse/commit/1815e7d9400bd2551e99efb475fd59a7bf81069a)) +* Robustify parser v2 ([#49](https://github.com/docling-project/docling-parse/issues/49)) ([`1815e7d`](https://github.com/docling-project/docling-parse/commit/1815e7d9400bd2551e99efb475fd59a7bf81069a)) -## [v2.0.0](https://github.com/DS4SD/docling-parse/releases/tag/v2.0.0) - 2024-10-23 +## [v2.0.0](https://github.com/docling-project/docling-parse/releases/tag/v2.0.0) - 2024-10-23 ### Feature -* Upgrade to v2.0.0 ([#48](https://github.com/DS4SD/docling-parse/issues/48)) ([`6fdd748`](https://github.com/DS4SD/docling-parse/commit/6fdd74870dceff64e52279dc6fe1ff338346def3)) -* Fixed the v2 parser to only return the pages that are requested ([#47](https://github.com/DS4SD/docling-parse/issues/47)) ([`48451ad`](https://github.com/DS4SD/docling-parse/commit/48451ad0957ed5a4333642870cf9ca406bc95c2f)) +* Upgrade to v2.0.0 ([#48](https://github.com/docling-project/docling-parse/issues/48)) ([`6fdd748`](https://github.com/docling-project/docling-parse/commit/6fdd74870dceff64e52279dc6fe1ff338346def3)) +* Fixed the v2 parser to only return the pages that are requested ([#47](https://github.com/docling-project/docling-parse/issues/47)) ([`48451ad`](https://github.com/docling-project/docling-parse/commit/48451ad0957ed5a4333642870cf9ca406bc95c2f)) ### Breaking -* Upgrade to v2.0.0 ([#48](https://github.com/DS4SD/docling-parse/issues/48)) ([`6fdd748`](https://github.com/DS4SD/docling-parse/commit/6fdd74870dceff64e52279dc6fe1ff338346def3)) +* Upgrade to v2.0.0 ([#48](https://github.com/docling-project/docling-parse/issues/48)) ([`6fdd748`](https://github.com/docling-project/docling-parse/commit/6fdd74870dceff64e52279dc6fe1ff338346def3)) -## [v1.6.2](https://github.com/DS4SD/docling-parse/releases/tag/v1.6.2) - 2024-10-18 +## [v1.6.2](https://github.com/docling-project/docling-parse/releases/tag/v1.6.2) - 2024-10-18 ### Fix -* Cmake-cxxopts by using similar approach as glm ([#44](https://github.com/DS4SD/docling-parse/issues/44)) ([`6427726`](https://github.com/DS4SD/docling-parse/commit/64277266860407baac018fbe4397abfa2108a41b)) +* Cmake-cxxopts by using similar approach as glm ([#44](https://github.com/docling-project/docling-parse/issues/44)) ([`6427726`](https://github.com/docling-project/docling-parse/commit/64277266860407baac018fbe4397abfa2108a41b)) -## [v1.6.1](https://github.com/DS4SD/docling-parse/releases/tag/v1.6.1) - 2024-10-18 +## [v1.6.1](https://github.com/docling-project/docling-parse/releases/tag/v1.6.1) - 2024-10-18 ### Fix -* Fatal errors on pdfs ([#41](https://github.com/DS4SD/docling-parse/issues/41)) ([`54252e6`](https://github.com/DS4SD/docling-parse/commit/54252e6c2ef6a60dba6683fd32dd78d53fce5f76)) +* Fatal errors on pdfs ([#41](https://github.com/docling-project/docling-parse/issues/41)) ([`54252e6`](https://github.com/docling-project/docling-parse/commit/54252e6c2ef6a60dba6683fd32dd78d53fce5f76)) -## [v1.6.0](https://github.com/DS4SD/docling-parse/releases/tag/v1.6.0) - 2024-10-11 +## [v1.6.0](https://github.com/docling-project/docling-parse/releases/tag/v1.6.0) - 2024-10-11 ### Feature -* Add an experimental v2 parser to improve performance ([#29](https://github.com/DS4SD/docling-parse/issues/29)) ([`e5856f0`](https://github.com/DS4SD/docling-parse/commit/e5856f009a141e08a2e2f45e60aab5a69bfc28d9)) +* Add an experimental v2 parser to improve performance ([#29](https://github.com/docling-project/docling-parse/issues/29)) ([`e5856f0`](https://github.com/docling-project/docling-parse/commit/e5856f009a141e08a2e2f45e60aab5a69bfc28d9)) -## [v1.5.1](https://github.com/DS4SD/docling-parse/releases/tag/v1.5.1) - 2024-10-10 +## [v1.5.1](https://github.com/docling-project/docling-parse/releases/tag/v1.5.1) - 2024-10-10 ### Fix -* Allow more compatible pywin32 versions ([#40](https://github.com/DS4SD/docling-parse/issues/40)) ([`68b848c`](https://github.com/DS4SD/docling-parse/commit/68b848ccd60776f350b507a13c563a5cc33070a8)) +* Allow more compatible pywin32 versions ([#40](https://github.com/docling-project/docling-parse/issues/40)) ([`68b848c`](https://github.com/docling-project/docling-parse/commit/68b848ccd60776f350b507a13c563a5cc33070a8)) -## [v1.5.0](https://github.com/DS4SD/docling-parse/releases/tag/v1.5.0) - 2024-10-10 +## [v1.5.0](https://github.com/docling-project/docling-parse/releases/tag/v1.5.0) - 2024-10-10 ### Feature -* Python 3.13 support ([#39](https://github.com/DS4SD/docling-parse/issues/39)) ([`71a043e`](https://github.com/DS4SD/docling-parse/commit/71a043eb97e437c7e99970fab122bbd59fdee4b0)) +* Python 3.13 support ([#39](https://github.com/docling-project/docling-parse/issues/39)) ([`71a043e`](https://github.com/docling-project/docling-parse/commit/71a043eb97e437c7e99970fab122bbd59fdee4b0)) -## [v1.4.1](https://github.com/DS4SD/docling-parse/releases/tag/v1.4.1) - 2024-10-02 +## [v1.4.1](https://github.com/docling-project/docling-parse/releases/tag/v1.4.1) - 2024-10-02 ### Fix -* Windows build properly linking to system libraries ([#36](https://github.com/DS4SD/docling-parse/issues/36)) ([`e26ed05`](https://github.com/DS4SD/docling-parse/commit/e26ed056c22400552918c3a97dfb13614c9a03f5)) +* Windows build properly linking to system libraries ([#36](https://github.com/docling-project/docling-parse/issues/36)) ([`e26ed05`](https://github.com/docling-project/docling-parse/commit/e26ed056c22400552918c3a97dfb13614c9a03f5)) -## [v1.4.0](https://github.com/DS4SD/docling-parse/releases/tag/v1.4.0) - 2024-10-02 +## [v1.4.0](https://github.com/docling-project/docling-parse/releases/tag/v1.4.0) - 2024-10-02 ### Feature -* Build using system deps ([#33](https://github.com/DS4SD/docling-parse/issues/33)) ([`e1c8e49`](https://github.com/DS4SD/docling-parse/commit/e1c8e4980faab35bfdf6d1a78d8749745c560889)) +* Build using system deps ([#33](https://github.com/docling-project/docling-parse/issues/33)) ([`e1c8e49`](https://github.com/docling-project/docling-parse/commit/e1c8e4980faab35bfdf6d1a78d8749745c560889)) ### Fix -* Python version in wheels ([#31](https://github.com/DS4SD/docling-parse/issues/31)) ([`8d903ba`](https://github.com/DS4SD/docling-parse/commit/8d903baf61a7706066374c23265e115a9513c3ba)) +* Python version in wheels ([#31](https://github.com/docling-project/docling-parse/issues/31)) ([`8d903ba`](https://github.com/docling-project/docling-parse/commit/8d903baf61a7706066374c23265e115a9513c3ba)) -## [v1.3.1](https://github.com/DS4SD/docling-parse/releases/tag/v1.3.1) - 2024-09-30 +## [v1.3.1](https://github.com/docling-project/docling-parse/releases/tag/v1.3.1) - 2024-09-30 ### Fix -* Sdist and wheels content ([#28](https://github.com/DS4SD/docling-parse/issues/28)) ([`f3febc5`](https://github.com/DS4SD/docling-parse/commit/f3febc53a2a6565b16847113633f92d1a2dab48a)) +* Sdist and wheels content ([#28](https://github.com/docling-project/docling-parse/issues/28)) ([`f3febc5`](https://github.com/docling-project/docling-parse/commit/f3febc53a2a6565b16847113633f92d1a2dab48a)) -## [v1.3.0](https://github.com/DS4SD/docling-parse/releases/tag/v1.3.0) - 2024-09-20 +## [v1.3.0](https://github.com/docling-project/docling-parse/releases/tag/v1.3.0) - 2024-09-20 ### Feature -* Add windows support ([#22](https://github.com/DS4SD/docling-parse/issues/22)) ([`05e6aa3`](https://github.com/DS4SD/docling-parse/commit/05e6aa30d6de76694cf7f04be2633b2f5e129ef2)) +* Add windows support ([#22](https://github.com/docling-project/docling-parse/issues/22)) ([`05e6aa3`](https://github.com/docling-project/docling-parse/commit/05e6aa30d6de76694cf7f04be2633b2f5e129ef2)) -## [v1.2.1](https://github.com/DS4SD/docling-parse/releases/tag/v1.2.1) - 2024-09-18 +## [v1.2.1](https://github.com/docling-project/docling-parse/releases/tag/v1.2.1) - 2024-09-18 ### Fix -* Clean code ([#20](https://github.com/DS4SD/docling-parse/issues/20)) ([`992df42`](https://github.com/DS4SD/docling-parse/commit/992df4235ca624b47ce63be71592fa895c732e07)) +* Clean code ([#20](https://github.com/docling-project/docling-parse/issues/20)) ([`992df42`](https://github.com/docling-project/docling-parse/commit/992df4235ca624b47ce63be71592fa895c732e07)) -## [v1.2.0](https://github.com/DS4SD/docling-parse/releases/tag/v1.2.0) - 2024-09-09 +## [v1.2.0](https://github.com/docling-project/docling-parse/releases/tag/v1.2.0) - 2024-09-09 ### Feature -* Build linux arm64 architecture ([#17](https://github.com/DS4SD/docling-parse/issues/17)) ([`3f51a2c`](https://github.com/DS4SD/docling-parse/commit/3f51a2c571259491a79899db02cfe2de26a5c17f)) +* Build linux arm64 architecture ([#17](https://github.com/docling-project/docling-parse/issues/17)) ([`3f51a2c`](https://github.com/docling-project/docling-parse/commit/3f51a2c571259491a79899db02cfe2de26a5c17f)) -## [v1.1.3](https://github.com/DS4SD/docling-parse/releases/tag/v1.1.3) - 2024-08-30 +## [v1.1.3](https://github.com/docling-project/docling-parse/releases/tag/v1.1.3) - 2024-08-30 ### Fix -* Resolve more assert errors ([#16](https://github.com/DS4SD/docling-parse/issues/16)) ([`c3a6b03`](https://github.com/DS4SD/docling-parse/commit/c3a6b038571909a41b3abd237215b756c3eacc62)) +* Resolve more assert errors ([#16](https://github.com/docling-project/docling-parse/issues/16)) ([`c3a6b03`](https://github.com/docling-project/docling-parse/commit/c3a6b038571909a41b3abd237215b756c3eacc62)) -## [v1.1.2](https://github.com/DS4SD/docling-parse/releases/tag/v1.1.2) - 2024-08-29 +## [v1.1.2](https://github.com/docling-project/docling-parse/releases/tag/v1.1.2) - 2024-08-29 ### Fix -* Out-of-range vector error ([#15](https://github.com/DS4SD/docling-parse/issues/15)) ([`4ed034c`](https://github.com/DS4SD/docling-parse/commit/4ed034cc0fb3988a9216e3574b9f34c155dae452)) +* Out-of-range vector error ([#15](https://github.com/docling-project/docling-parse/issues/15)) ([`4ed034c`](https://github.com/docling-project/docling-parse/commit/4ed034cc0fb3988a9216e3574b9f34c155dae452)) -## [v1.1.1](https://github.com/DS4SD/docling-parse/releases/tag/v1.1.1) - 2024-08-23 +## [v1.1.1](https://github.com/docling-project/docling-parse/releases/tag/v1.1.1) - 2024-08-23 ### Fix -* Replace assert with exceptions ([#12](https://github.com/DS4SD/docling-parse/issues/12)) ([`6565f32`](https://github.com/DS4SD/docling-parse/commit/6565f32bdeb17d9796a94ccf3c8f8c4e0e73bf49)) +* Replace assert with exceptions ([#12](https://github.com/docling-project/docling-parse/issues/12)) ([`6565f32`](https://github.com/docling-project/docling-parse/commit/6565f32bdeb17d9796a94ccf3c8f8c4e0e73bf49)) -## [v1.1.0](https://github.com/DS4SD/docling-parse/releases/tag/v1.1.0) - 2024-08-22 +## [v1.1.0](https://github.com/docling-project/docling-parse/releases/tag/v1.1.0) - 2024-08-22 ### Feature -* Deal with qpdf errors on a page by page basis ([#11](https://github.com/DS4SD/docling-parse/issues/11)) ([`400fcb3`](https://github.com/DS4SD/docling-parse/commit/400fcb30b1813206bb98a17d85537af1471837a2)) +* Deal with qpdf errors on a page by page basis ([#11](https://github.com/docling-project/docling-parse/issues/11)) ([`400fcb3`](https://github.com/docling-project/docling-parse/commit/400fcb30b1813206bb98a17d85537af1471837a2)) -## [v1.0.0](https://github.com/DS4SD/docling-parse/releases/tag/v1.0.0) - 2024-08-22 +## [v1.0.0](https://github.com/docling-project/docling-parse/releases/tag/v1.0.0) - 2024-08-22 ### Feature -* Adding load/unload from key ([#9](https://github.com/DS4SD/docling-parse/issues/9)) ([`dd122d0`](https://github.com/DS4SD/docling-parse/commit/dd122d0c938e0054d22540949c9ee5b839c34c54)) +* Adding load/unload from key ([#9](https://github.com/docling-project/docling-parse/issues/9)) ([`dd122d0`](https://github.com/docling-project/docling-parse/commit/dd122d0c938e0054d22540949c9ee5b839c34c54)) ### Breaking -* adding load/unload from key ([#9](https://github.com/DS4SD/docling-parse/issues/9)) ([`dd122d0`](https://github.com/DS4SD/docling-parse/commit/dd122d0c938e0054d22540949c9ee5b839c34c54)) +* adding load/unload from key ([#9](https://github.com/docling-project/docling-parse/issues/9)) ([`dd122d0`](https://github.com/docling-project/docling-parse/commit/dd122d0c938e0054d22540949c9ee5b839c34c54)) -## [v0.3.1](https://github.com/DS4SD/docling-parse/releases/tag/v0.3.1) - 2024-08-22 +## [v0.3.1](https://github.com/docling-project/docling-parse/releases/tag/v0.3.1) - 2024-08-22 ### Fix -* Resolve segfaults ([#8](https://github.com/DS4SD/docling-parse/issues/8)) ([`8ab088d`](https://github.com/DS4SD/docling-parse/commit/8ab088daf07c2c1d959aab79d0845e2181667b0e)) +* Resolve segfaults ([#8](https://github.com/docling-project/docling-parse/issues/8)) ([`8ab088d`](https://github.com/docling-project/docling-parse/commit/8ab088daf07c2c1d959aab79d0845e2181667b0e)) -## [v0.3.0](https://github.com/DS4SD/docling-parse/releases/tag/v0.3.0) - 2024-08-21 +## [v0.3.0](https://github.com/docling-project/docling-parse/releases/tag/v0.3.0) - 2024-08-21 ### Feature -* Read page by page ([#7](https://github.com/DS4SD/docling-parse/issues/7)) ([`92e02ec`](https://github.com/DS4SD/docling-parse/commit/92e02ec4c1bdfc3e5cb899de8ea0e3384848560d)) +* Read page by page ([#7](https://github.com/docling-project/docling-parse/issues/7)) ([`92e02ec`](https://github.com/docling-project/docling-parse/commit/92e02ec4c1bdfc3e5cb899de8ea0e3384848560d)) -## [v0.2.0](https://github.com/DS4SD/docling-parse/releases/tag/v0.2.0) - 2024-08-13 +## [v0.2.0](https://github.com/docling-project/docling-parse/releases/tag/v0.2.0) - 2024-08-13 ### Feature -* Add reading from BytesIO ([#6](https://github.com/DS4SD/docling-parse/issues/6)) ([`195777b`](https://github.com/DS4SD/docling-parse/commit/195777b656969d5021b7d8d55d2d208b61dfcb0f)) +* Add reading from BytesIO ([#6](https://github.com/docling-project/docling-parse/issues/6)) ([`195777b`](https://github.com/docling-project/docling-parse/commit/195777b656969d5021b7d8d55d2d208b61dfcb0f)) -## [v0.1.0](https://github.com/DS4SD/docling-parse/releases/tag/v0.1.0) - 2024-08-07 +## [v0.1.0](https://github.com/docling-project/docling-parse/releases/tag/v0.1.0) - 2024-08-07 ### Feature -* First release to pypi ([#4](https://github.com/DS4SD/docling-parse/issues/4)) ([`f762774`](https://github.com/DS4SD/docling-parse/commit/f762774a8db2bd198b9c017a36a25fdd98ac1b41)) +* First release to pypi ([#4](https://github.com/docling-project/docling-parse/issues/4)) ([`f762774`](https://github.com/docling-project/docling-parse/commit/f762774a8db2bd198b9c017a36a25fdd98ac1b41)) -## [v0.0.1](https://github.com/DS4SD/docling-parse/releases/tag/v0.0.1) - 2024-08-07 +## [v0.0.1](https://github.com/docling-project/docling-parse/releases/tag/v0.0.1) - 2024-08-07 ### Fix -* Unit-tests ([#3](https://github.com/DS4SD/docling-parse/issues/3)) ([`fa7bef7`](https://github.com/DS4SD/docling-parse/commit/fa7bef7f35209d7f3d3d4a3eef37f704f94c9cac)) -* Add and fix cli ([#1](https://github.com/DS4SD/docling-parse/issues/1)) ([`ccb7675`](https://github.com/DS4SD/docling-parse/commit/ccb7675e248f9aba088a4b0c846caf7363be14bc)) +* Unit-tests ([#3](https://github.com/docling-project/docling-parse/issues/3)) ([`fa7bef7`](https://github.com/docling-project/docling-parse/commit/fa7bef7f35209d7f3d3d4a3eef37f704f94c9cac)) +* Add and fix cli ([#1](https://github.com/docling-project/docling-parse/issues/1)) ([`ccb7675`](https://github.com/docling-project/docling-parse/commit/ccb7675e248f9aba088a4b0c846caf7363be14bc)) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4bc10e3c..2c59c2ff 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,13 +2,13 @@ Our project welcomes external contributions. If you have an itch, please feel free to scratch it. -To contribute code or documentation, please submit a [pull request](https://github.com/DS4SD/docling-parse/pulls). +To contribute code or documentation, please submit a [pull request](https://github.com/docling-project/docling-parse/pulls). A good way to familiarize yourself with the codebase and contribution process is -to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/DS4SD/docling-parse/issues). +to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/docling-project/docling-parse/issues). Before embarking on a more ambitious contribution, please quickly [get in touch](#communication) with us. -For general questions or support requests, please refer to the [discussion section](https://github.com/DS4SD/docling-parse/discussions). +For general questions or support requests, please refer to the [discussion section](https://github.com/docling-project/docling-parse/discussions). **Note: We appreciate your effort, and want to avoid a situation where a contribution requires extensive rework (by you or by us), sits in backlog for a long time, or @@ -16,14 +16,14 @@ cannot be accepted at all!** ### Proposing new features -If you would like to implement a new feature, please [raise an issue](https://github.com/DS4SD/docling-parse/issues) +If you would like to implement a new feature, please [raise an issue](https://github.com/docling-project/docling-parse/issues) before sending a pull request so the feature can be discussed. This is to avoid you wasting your valuable time working on a feature that the project developers are not interested in accepting into the code base. ### Fixing bugs -If you would like to fix a bug, please [raise an issue](https://github.com/DS4SD/docling-parse/issues) before sending a +If you would like to fix a bug, please [raise an issue](https://github.com/docling-project/docling-parse/issues) before sending a pull request so it can be tracked. ### Merge approval @@ -74,7 +74,7 @@ git commit -s ## Communication -Please feel free to connect with us using the [discussion section](https://github.com/DS4SD/docling-parse/discussions). +Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling-parse/discussions). diff --git a/README.md b/README.md index b93f363b..71e9ae26 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,10 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling-parse)](https://pypi.org/project/docling-parse/) [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/) [![Pybind11](https://img.shields.io/badge/build-pybind11-blue)](https://github.com/pybind/pybind11/) -[![Platforms](https://img.shields.io/badge/platform-macos%20|%20linux%20|%20windows-blue)](https://github.com/DS4SD/docling-parse/) -[![License MIT](https://img.shields.io/github/license/DS4SD/docling-parse)](https://opensource.org/licenses/MIT) +[![Platforms](https://img.shields.io/badge/platform-macos%20|%20linux%20|%20windows-blue)](https://github.com/docling-project/docling-parse/) +[![License MIT](https://img.shields.io/github/license/docling-project/docling-parse)](https://opensource.org/licenses/MIT) -Simple package to extract text, paths and bitmap images with coordinates from programmatic PDFs. This package is used in the [Docling](https://github.com/DS4SD/docling) PDF conversion. Below, we show a few output of the latest parser with char, word and line level output for text, in addition to the extracted paths and bitmap resources. +Simple package to extract text, paths and bitmap images with coordinates from programmatic PDFs. This package is used in the [Docling](https://github.com/docling-project/docling) PDF conversion. Below, we show a few output of the latest parser with char, word and line level output for text, in addition to the extracted paths and bitmap resources. To do the visualizations yourself, simply run (change `word` into `char` or `line`), @@ -130,7 +130,7 @@ options: ### Timings of different parser versions -We ran the v1 and v2 parser on [DocLayNet](https://huggingface.co/datasets/ds4sd/DocLayNet-v1.1). We found the following overall behavior +We ran the v1 and v2 parser on [DocLayNet](https://huggingface.co/datasets/docling-project/DocLayNet-v1.1). We found the following overall behavior ![parser-performance](./docs/dln-v1.png) @@ -198,7 +198,7 @@ poetry run pytest ./tests -v -s ## Contributing -Please read [Contributing to Docling Parse](https://github.com/DS4SD/docling-parse/blob/main/CONTRIBUTING.md) for details. +Please read [Contributing to Docling Parse](https://github.com/docling-project/docling-parse/blob/main/CONTRIBUTING.md) for details. ## References diff --git a/pyproject.toml b/pyproject.toml index 803af6db..88f1be48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,8 +10,8 @@ maintainers = [ "Panos Vagenas ", "Maxim Lysak ", ] -repository = "https://github.com/DS4SD/docling-parse" -homepage = "https://github.com/DS4SD/docling-parse" +repository = "https://github.com/docling-project/docling-parse" +homepage = "https://github.com/docling-project/docling-parse" keywords= ["docling", "pdf", "parser"] classifiers = [ "License :: OSI Approved :: MIT License",