From 180f70c6e89f63fe892f15a158ffa7f598f50b98 Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Mon, 15 Jul 2024 12:43:05 +0200 Subject: [PATCH 01/10] docs: Update links, add GH repository to metadata (#1) * Add repo, absolute URLs Signed-off-by: Christoph Auer * Bump version Signed-off-by: Christoph Auer --------- Signed-off-by: Christoph Auer Co-authored-by: Christoph Auer --- README.md | 4 ++-- pyproject.toml | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c9e0e20a8..9c4217a21 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- Docling + Docling

# Docling @@ -75,7 +75,7 @@ You can limit the CPU threads used by `docling` by setting the environment varia ## Contributing -Please read [Contributing to Docling](./CONTRIBUTING.md) for details. +Please read [Contributing to Docling](https://github.com/DS4SD/docling/blob/main/CONTRIBUTING.md) for details. ## References diff --git a/pyproject.toml b/pyproject.toml index a603ba14d..133bb74cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,12 @@ [tool.poetry] name = "docling" -version = "0.1.0" +version = "0.1.1" description = "Docling PDF conversion package" authors = ["Christoph Auer ", "Michele Dolfi ", "Maxim Lysak ", "Nikos Livathinos ", "Ahmed Nassar ", "Peter Staar "] license = "MIT" readme = "README.md" +repository = "https://github.com/DS4SD/docling" +homepage = "https://github.com/DS4SD/docling" keywords= ["docling", "convert", "document", "pdf", "layout model", "segmentation", "table structure", "table former"] classifiers = [ "License :: OSI Approved :: MIT License", From 05ab89f9580a4e3778daba8e15b51c978cdbc28a Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:59:53 +0200 Subject: [PATCH 02/10] doc: More documentation updates (#2) * Update README.md Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> * Update Dockerfile Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> * Bump version Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> --------- Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> --- Dockerfile | 3 +-- README.md | 4 ++-- pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index bef7be52e..b2138a634 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,8 +6,7 @@ RUN apt-get update \ && apt-get install -y libgl1 libglib2.0-0 curl wget git \ && apt-get clean -RUN --mount=type=ssh \ - pip install --no-cache-dir https://github.com/DS4SD/docling.git +RUN pip install --no-cache-dir docling ENV HF_HOME=/tmp/ ENV TORCH_HOME=/tmp/ diff --git a/README.md b/README.md index 9c4217a21..1367cfd31 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ # Docling -Dockling bundles PDF document conversion to JSON and Markdown in an easy, self-contained package. +Docling bundles PDF document conversion to JSON and Markdown in an easy, self-contained package. ## Features * ⚡ Converts any PDF document to JSON or Markdown format, stable and lightning fast @@ -30,7 +30,7 @@ poetry install ## Usage -For basic usage, see the [convert.py](examples/convert.py) example module. Run with: +For basic usage, see the [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py) example module. Run with: ``` python examples/convert.py diff --git a/pyproject.toml b/pyproject.toml index 133bb74cc..1883ae9ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docling" -version = "0.1.1" +version = "0.1.2" description = "Docling PDF conversion package" authors = ["Christoph Auer ", "Michele Dolfi ", "Maxim Lysak ", "Nikos Livathinos ", "Ahmed Nassar ", "Peter Staar "] license = "MIT" From b9dc89238574c1f7a1f14b2b2e41a55c94961b1d Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Mon, 15 Jul 2024 18:02:42 +0200 Subject: [PATCH 03/10] Update convert.py (#3) Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> --- examples/convert.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/convert.py b/examples/convert.py index 89b372649..26a38c518 100644 --- a/examples/convert.py +++ b/examples/convert.py @@ -46,8 +46,6 @@ def main(): logging.basicConfig(level=logging.INFO) input_doc_paths = [ - # Path("/Users/cau/Downloads/Issue-36122.pdf"), - # Path("/Users/cau/Downloads/IBM_Storage_Insights_Fact_Sheet.pdf"), Path("./test/data/2206.01062.pdf"), Path("./test/data/2203.01017v2.pdf"), Path("./test/data/2305.03393v1.pdf"), From e45dc5d1a5772571d222fcf94b1e254989b87aed Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:05:04 +0200 Subject: [PATCH 04/10] ci: Add Github Actions (#4) * add Github Actions Signed-off-by: Michele Dolfi * apply styling Signed-off-by: Michele Dolfi * Update .github/actions/setup-poetry/action.yml Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> * add semantic-release config Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- .github/actions/setup-poetry/action.yml | 19 ++++++++ .github/scripts/release.sh | 39 ++++++++++++++++ .github/workflows/cd.yml | 59 +++++++++++++++++++++++++ .github/workflows/checks.yml | 16 +++++++ .github/workflows/ci.yml | 28 ++++++++++++ .github/workflows/pypi.yml | 21 +++++++++ .gitignore | 1 - docling/datamodel/base_models.py | 3 +- pyproject.toml | 15 ++++++- 9 files changed, 197 insertions(+), 4 deletions(-) create mode 100644 .github/actions/setup-poetry/action.yml create mode 100755 .github/scripts/release.sh create mode 100644 .github/workflows/cd.yml create mode 100644 .github/workflows/checks.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/pypi.yml diff --git a/.github/actions/setup-poetry/action.yml b/.github/actions/setup-poetry/action.yml new file mode 100644 index 000000000..e9ce6977c --- /dev/null +++ b/.github/actions/setup-poetry/action.yml @@ -0,0 +1,19 @@ +name: 'Set up Poetry and install' +description: 'Set up a specific version of Poetry and install dependencies using caching.' +inputs: + python-version: + description: "Version range or exact version of Python or PyPy to use, using SemVer's version range syntax." + default: '3.11' +runs: + using: 'composite' + steps: + - name: Install poetry + run: pipx install poetry==1.8.3 + shell: bash + - uses: actions/setup-python@v4 + with: + python-version: ${{ inputs.python-version }} + cache: 'poetry' + - name: Install dependencies + run: poetry install --all-extras + shell: bash diff --git a/.github/scripts/release.sh b/.github/scripts/release.sh new file mode 100755 index 000000000..6cac40064 --- /dev/null +++ b/.github/scripts/release.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set -e # trigger failure on error - do not remove! +set -x # display command on output + +if [ -z "${TARGET_VERSION}" ]; then + >&2 echo "No TARGET_VERSION specified" + exit 1 +fi +CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}" + +# update package version +poetry version "${TARGET_VERSION}" + +# collect release notes +REL_NOTES=$(mktemp) +poetry run semantic-release changelog --unreleased >> "${REL_NOTES}" + +# update changelog +TMP_CHGLOG=$(mktemp) +TARGET_TAG_NAME="v${TARGET_VERSION}" +RELEASE_URL="$(gh repo view --json url -q ".url")/releases/tag/${TARGET_TAG_NAME}" +printf "## [${TARGET_TAG_NAME}](${RELEASE_URL}) - $(date -Idate)\n\n" >> "${TMP_CHGLOG}" +cat "${REL_NOTES}" >> "${TMP_CHGLOG}" +if [ -f "${CHGLOG_FILE}" ]; then + printf "\n" | cat - "${CHGLOG_FILE}" >> "${TMP_CHGLOG}" +fi +mv "${TMP_CHGLOG}" "${CHGLOG_FILE}" + +# push changes +git config --global user.name 'github-actions[bot]' +git config --global user.email 'github-actions[bot]@users.noreply.github.com' +git add pyproject.toml "${CHGLOG_FILE}" +COMMIT_MSG="chore: bump version to ${TARGET_VERSION} [skip ci]" +git commit -m "${COMMIT_MSG}" +git push origin main + +# create GitHub release (incl. Git tag) +gh release create "${TARGET_TAG_NAME}" -F "${REL_NOTES}" diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 000000000..b65c72cb4 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,59 @@ +name: "Run CD" + +on: + push: + branches: + - main + +env: + # disable keyring (https://github.com/actions/runner-images/issues/6185): + PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring + +jobs: + docs: + permissions: + contents: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/setup-poetry + - name: Build and push docs + run: poetry run mkdocs gh-deploy --force + code-checks: + uses: ./.github/workflows/checks.yml + pre-release-check: + runs-on: ubuntu-latest + outputs: + TARGET_TAG_V: ${{ steps.version_check.outputs.TRGT_VERSION }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # for fetching tags, required for semantic-release + - uses: ./.github/actions/setup-poetry + - name: Check version of potential release + id: version_check + run: | + TRGT_VERSION=$(poetry run semantic-release print-version) + echo "TRGT_VERSION=${TRGT_VERSION}" >> $GITHUB_OUTPUT + echo "${TRGT_VERSION}" + - name: Check notes of potential release + run: poetry run semantic-release changelog --unreleased + release: + needs: [code-checks, pre-release-check] + if: needs.pre-release-check.outputs.TARGET_TAG_V != '' + environment: auto-release + runs-on: ubuntu-latest + concurrency: release + steps: + - uses: actions/checkout@v3 + with: + token: ${{ secrets.GH_PAT }} + fetch-depth: 0 # for fetching tags, required for semantic-release + - uses: ./.github/actions/setup-poetry + - name: Run release script + env: + GH_TOKEN: ${{ secrets.GH_PAT }} + TARGET_VERSION: ${{ needs.pre-release-check.outputs.TARGET_TAG_V }} + CHGLOG_FILE: CHANGELOG.md + run: ./.github/scripts/release.sh + shell: bash diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml new file mode 100644 index 000000000..6e2ff362c --- /dev/null +++ b/.github/workflows/checks.yml @@ -0,0 +1,16 @@ +on: + workflow_call: + +jobs: + run-checks: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.11'] + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/setup-poetry + with: + python-version: ${{ matrix.python-version }} + - name: Run styling check + run: poetry run pre-commit run --all-files diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..33fb37ac9 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: "Run CI" + +on: + pull_request: + types: [opened, reopened, synchronize, ready_for_review] + push: + branches: + - "**" + - "!main" + - "!gh-pages" + +env: + # disable keyring (https://github.com/actions/runner-images/issues/6185): + PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring + +jobs: + code-checks: + uses: ./.github/workflows/checks.yml + + # To enable when we add the ./docs + # build-docs: + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v3 + # - uses: ./.github/actions/setup-poetry + # - name: Build docs + # run: poetry run mkdocs build --verbose --clean + \ No newline at end of file diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 000000000..0d206b210 --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,21 @@ +name: "Build and publish package" + +on: + release: + types: [published] + +permissions: + contents: read + +env: + # disable keyring (https://github.com/actions/runner-images/issues/6185): + PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring + +jobs: + build-and-publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/setup-poetry + - name: Build and publish + run: poetry publish --build --no-interaction --username=__token__ --password=${{ secrets.PYPI_TOKEN }} diff --git a/.gitignore b/.gitignore index bb3e25468..800f56862 100644 --- a/.gitignore +++ b/.gitignore @@ -413,7 +413,6 @@ tags [Ll]ib [Ll]ib64 [Ll]ocal -[Ss]cripts pyvenv.cfg pip-selfcheck.json diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index dd9795aba..1e446eddc 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -180,8 +180,7 @@ class TableStructurePrediction(BaseModel): table_map: Dict[int, TableElement] = {} -class TextElement(BasePageElement): - ... +class TextElement(BasePageElement): ... class FigureData(BaseModel): diff --git a/pyproject.toml b/pyproject.toml index 1883ae9ae..35dc3613b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docling" -version = "0.1.2" +version = "0.1.2" # DO NOT EDIT, updated automatically description = "Docling PDF conversion package" authors = ["Christoph Auer ", "Michele Dolfi ", "Maxim Lysak ", "Nikos Livathinos ", "Ahmed Nassar ", "Peter Staar "] license = "MIT" @@ -72,3 +72,16 @@ python_version = "3.11" [tool.flake8] max-line-length = 88 extend-ignore = ["E203", "E501"] + +[tool.semantic_release] +# for default values check: +# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg + +version_source = "tag_only" +branch = "main" + +# configure types which should trigger minor and patch version bumps respectively +# (note that they must be a subset of the configured allowed types): +parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test" +parser_angular_minor_types = "feat" +parser_angular_patch_types = "fix,perf" From b4f45ce96b21bfbfb8c64debc9203bce527c3dc0 Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:14:44 +0200 Subject: [PATCH 05/10] disable docs build (#5) --- .github/workflows/cd.yml | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index b65c72cb4..05248981d 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -10,15 +10,17 @@ env: PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring jobs: - docs: - permissions: - contents: write - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: ./.github/actions/setup-poetry - - name: Build and push docs - run: poetry run mkdocs gh-deploy --force + # To be enabled when we add docs + # docs: + # permissions: + # contents: write + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v3 + # - uses: ./.github/actions/setup-poetry + # - name: Build and push docs + # run: poetry run mkdocs gh-deploy --force + code-checks: uses: ./.github/workflows/checks.yml pre-release-check: From b1479cf4ecf8a586703b31c7cf6917b3293c6a85 Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:34:42 +0200 Subject: [PATCH 06/10] feat: build with ci (#6) Signed-off-by: Michele Dolfi From 5c88574d03d721c7a78a789f9f752deabdb75405 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 16 Jul 2024 11:37:14 +0000 Subject: [PATCH 07/10] chore: bump version to 0.2.0 [skip ci] --- CHANGELOG.md | 5 +++++ pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..deb456807 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +## [v0.2.0](https://github.com/DS4SD/docling/releases/tag/v0.2.0) - 2024-07-16 + +### Feature + +* Build with ci ([#6](https://github.com/DS4SD/docling/issues/6)) ([`b1479cf`](https://github.com/DS4SD/docling/commit/b1479cf4ecf8a586703b31c7cf6917b3293c6a85)) diff --git a/pyproject.toml b/pyproject.toml index 35dc3613b..a648a7029 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docling" -version = "0.1.2" # DO NOT EDIT, updated automatically +version = "0.2.0" # DO NOT EDIT, updated automatically description = "Docling PDF conversion package" authors = ["Christoph Auer ", "Michele Dolfi ", "Maxim Lysak ", "Nikos Livathinos ", "Ahmed Nassar ", "Peter Staar "] license = "MIT" From 2803222ee1708481c779d435dbf1c031929d3cf6 Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:15:09 +0200 Subject: [PATCH 08/10] docs: Add setup with pypi to Readme (#7) Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> --- README.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 1367cfd31..f70c01538 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,19 @@ Docling bundles PDF document conversion to JSON and Markdown in an easy, self-co ## Setup -You need Python 3.11 and poetry. Install poetry from [here](https://python-poetry.org/docs/#installing-with-the-official-installer). +For general usage, you can simply install `docling` through `pip` from the pypi package index. +``` +pip install docling +``` + +**Notes**: +* Works on macOS and Linux environments. Windows platforms are currently not tested. + +### Development setup -Once you have `poetry` installed, create an environment and install the package: +To develop for `docling`, you need Python 3.11 and `poetry`. Install poetry from [here](https://python-poetry.org/docs/#installing-with-the-official-installer). + +Once you have `poetry` installed and cloned this repo, create an environment and install `docling` from the repo root: ```bash poetry env use $(which python3.11) @@ -24,10 +34,6 @@ poetry shell poetry install ``` -**Notes**: -* Works on macOS and Linux environments. Windows platforms are currently not tested. - - ## Usage For basic usage, see the [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py) example module. Run with: From fb72688ff7413083c864fe62d2dbfc420c1e5268 Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Wed, 17 Jul 2024 14:03:26 +0200 Subject: [PATCH 09/10] feat: enable python 3.12 support by updating glm (#8) * update deepsearch-glm for python 3.12 support Signed-off-by: Michele Dolfi * enable python 3.12 in ci tests Signed-off-by: Michele Dolfi --------- Signed-off-by: Michele Dolfi --- .github/workflows/checks.yml | 2 +- poetry.lock | 91 +++++++++++------------------------- pyproject.toml | 2 +- 3 files changed, 30 insertions(+), 65 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 6e2ff362c..69a417827 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.11'] + python-version: ['3.11', '3.12'] steps: - uses: actions/checkout@v3 - uses: ./.github/actions/setup-poetry diff --git a/poetry.lock b/poetry.lock index 25ce19300..b59f02713 100644 --- a/poetry.lock +++ b/poetry.lock @@ -74,17 +74,6 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] -[[package]] -name = "bashlex" -version = "0.18" -description = "Python parser for bash" -optional = false -python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4" -files = [ - {file = "bashlex-0.18-py2.py3-none-any.whl", hash = "sha256:91d73a23a3e51711919c1c899083890cdecffc91d8c088942725ac13e9dcfffa"}, - {file = "bashlex-0.18.tar.gz", hash = "sha256:5bb03a01c6d5676338c36fd1028009c8ad07e7d61d8a1ce3f513b7fff52796ee"}, -] - [[package]] name = "black" version = "24.4.2" @@ -131,17 +120,6 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] -[[package]] -name = "bracex" -version = "2.4" -description = "Bash style brace expander." -optional = false -python-versions = ">=3.8" -files = [ - {file = "bracex-2.4-py3-none-any.whl", hash = "sha256:efdc71eff95eaff5e0f8cfebe7d01adf2c8637c8c92edaf63ef348c241a82418"}, - {file = "bracex-2.4.tar.gz", hash = "sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb"}, -] - [[package]] name = "build" version = "1.2.1" @@ -371,32 +349,6 @@ files = [ {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] -[[package]] -name = "cibuildwheel" -version = "2.19.2" -description = "Build Python wheels on CI with minimal configuration." -optional = false -python-versions = ">=3.8" -files = [ - {file = "cibuildwheel-2.19.2-py3-none-any.whl", hash = "sha256:02ead5d7e3e81fe2ee0afb78746b1494af6b37afc1e32fae12f9c9a28c14e369"}, - {file = "cibuildwheel-2.19.2.tar.gz", hash = "sha256:d331c81c505106ee585333b871718cf0516ac10d55c4dda2c00c8a7405743cab"}, -] - -[package.dependencies] -bashlex = "!=0.13" -bracex = "*" -certifi = "*" -filelock = "*" -packaging = ">=20.9" -platformdirs = "*" - -[package.extras] -bin = ["click", "packaging (>=21.0)", "pip-tools", "pygithub", "pyyaml", "requests", "rich (>=9.6)"] -dev = ["build", "click", "jinja2", "packaging (>=21.0)", "pip-tools", "pygithub", "pytest (>=6)", "pytest-timeout", "pytest-xdist", "pyyaml", "requests", "rich (>=9.6)", "tomli-w", "validate-pyproject"] -docs = ["jinja2 (>=3.1.2)", "mkdocs (==1.3.1)", "mkdocs-include-markdown-plugin (==2.8.0)", "mkdocs-macros-plugin", "pymdown-extensions"] -test = ["build", "jinja2", "pytest (>=6)", "pytest-timeout", "pytest-xdist", "tomli-w", "validate-pyproject"] -uv = ["uv"] - [[package]] name = "cleo" version = "2.1.0" @@ -643,32 +595,45 @@ files = [ [[package]] name = "deepsearch-glm" -version = "0.18.4" +version = "0.19.0" description = "Graph Language Models" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "deepsearch_glm-0.18.4-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ad88c5bf3c203174ef81e0699405aec0f5386130cbc6a975b165f81887bc1a52"}, - {file = "deepsearch_glm-0.18.4-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:21d51a0671f0713d23be57030287a0f907f4a5f0627a45ea07e2caf54129a71a"}, - {file = "deepsearch_glm-0.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fc853941ea751a15f65e83f9bee9f988d0ecac4b28fac067b2aab49e15edb74"}, - {file = "deepsearch_glm-0.18.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cab5e577cf724343f2a5987ff4488c69e86a2dbca8cb0359c9243a07c6cd7d69"}, - {file = "deepsearch_glm-0.18.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:dda02391306d657a884b12f21cc3d1228663f940ec6001c833893dd2844bcc25"}, - {file = "deepsearch_glm-0.18.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dccd4286a93ee1a216acba27e1fc76f5d14e280d968998cfeae11a00ad1b6cb"}, - {file = "deepsearch_glm-0.18.4-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:cf38368bc72eab673459ea0fc96c02b1f3ae120df2d9443e1a63e010764ac1e9"}, - {file = "deepsearch_glm-0.18.4-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:d3fd83ea3b2bce11bac1d710f12547728f4dd48bfaa8bd472366ef144469d52c"}, - {file = "deepsearch_glm-0.18.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fb4bfd43ac3b996cdd151c35e94fa399953ee3952d7e86390a825880ece95f3"}, - {file = "deepsearch_glm-0.18.4-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:57cb67e435cacb6c4a6b6a9109d943267c493ebbba252a88ca40909976f60225"}, - {file = "deepsearch_glm-0.18.4-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:edc399939b6464f96600d2f23796ae2641d668fb794b77199e87abdef77f8853"}, - {file = "deepsearch_glm-0.18.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00ad8d932e7f0d1be4fd99fc0d4c8d50cb1ff10764f146b6ecb310a1379123d4"}, + {file = "deepsearch_glm-0.19.0-cp310-cp310-macosx_13_6_arm64.whl", hash = "sha256:d420c7eb4e27b64cdc33c0beba159147fc4be14e141133f0f6ef080465b2529c"}, + {file = "deepsearch_glm-0.19.0-cp310-cp310-macosx_13_6_x86_64.whl", hash = "sha256:8af4583ea6d914e87d6db96cae1d73272af6fe85193e67406f0c700064e794c2"}, + {file = "deepsearch_glm-0.19.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:772e6e245b4d77d9df84af07693f9c19bc2f3dc6de4cb44deaf5fdd4a6c8e68d"}, + {file = "deepsearch_glm-0.19.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:6a0c29f8cf8a1ee392c68985f8952a01b43dd8f2c5a1476b890f2c90d7ecbc96"}, + {file = "deepsearch_glm-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb4f34bb5e45df2790eb6bbaf5caa625393d903da502b086de65df9ce4e3fff2"}, + {file = "deepsearch_glm-0.19.0-cp311-cp311-macosx_13_6_arm64.whl", hash = "sha256:320195914e96b8197e53665594c4480b86f3fc4cacd5e6782befb2bb94494a40"}, + {file = "deepsearch_glm-0.19.0-cp311-cp311-macosx_13_6_x86_64.whl", hash = "sha256:7221851c304ef364a13eeffa940a7c15592e9d5b0050b97904221a65be33f3ab"}, + {file = "deepsearch_glm-0.19.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:b9b9e7102cf4355be8458569c7a44133b54446ae623923772db6942ce0fb2e87"}, + {file = "deepsearch_glm-0.19.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:fa8d8d718149cfecd724a0eca246a3bd57588dffb757f204b629a35623d8f946"}, + {file = "deepsearch_glm-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ae251cf69b43d945fbf2cd41a89ba12312cd319a1c28d41c99d35cc476376b5"}, + {file = "deepsearch_glm-0.19.0-cp312-cp312-macosx_13_6_arm64.whl", hash = "sha256:ad9a1fbf76e2561bc37e238ee9dd320b4b9cd49e61c55613e3977eedadee52bc"}, + {file = "deepsearch_glm-0.19.0-cp312-cp312-macosx_13_6_x86_64.whl", hash = "sha256:2030aec8ce751927fe20ca1788e125e9b0c37f994c30062e59c4d7b7a87cbb64"}, + {file = "deepsearch_glm-0.19.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:e57611b5d6cc2da91901e4b39fab6c9131dffe8766f43c20093bff75a0039100"}, + {file = "deepsearch_glm-0.19.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:efbcf157cd6bd2dd6138312cef5df378598fd67e6c3f6f0b63ed3342c1de7f49"}, + {file = "deepsearch_glm-0.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9febce49f365fcc5ada1ded720d387c51328ee470d4fcc89044b0684e074e699"}, + {file = "deepsearch_glm-0.19.0-cp38-cp38-macosx_13_6_arm64.whl", hash = "sha256:04a29ba5e942f32659ae1a65cfe5e90e93d50e058d53b4763fe13df93f30492f"}, + {file = "deepsearch_glm-0.19.0-cp38-cp38-macosx_13_6_x86_64.whl", hash = "sha256:a0024f42d6711f574dcab52ef2914a55f31b4fd804d3ad20ca7f211498e8a19b"}, + {file = "deepsearch_glm-0.19.0-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:513e5f1de14f0b12c916a52118083094a9ced439e4800d3442b2dd04f3cdbead"}, + {file = "deepsearch_glm-0.19.0-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:6d3dd07a549b8cd4408308b0b6b8ca65397ce7e8c819d050d8b2deb03cd1977e"}, + {file = "deepsearch_glm-0.19.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7be341a85ce1ff164963a6d58b50955599dc33b34040975c972a798ae0f6f12c"}, + {file = "deepsearch_glm-0.19.0-cp39-cp39-macosx_13_6_arm64.whl", hash = "sha256:b6c3c0d1295666a8a68f76262c020ffdc6de64cdd95671bf24c0592fa1317533"}, + {file = "deepsearch_glm-0.19.0-cp39-cp39-macosx_13_6_x86_64.whl", hash = "sha256:cf290fe3824bd0de01b7c1d681aa14c89c5e60c6735fa471e04a985e55aead44"}, + {file = "deepsearch_glm-0.19.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:92943c495646660aef99ba64a7e3b77ffeca4866e96044f8be5e14dfa7ee660e"}, + {file = "deepsearch_glm-0.19.0-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:a7c89d6fae4ed9dc960f9ee9734e91d321222080bf439e1d89e8c67270afc282"}, + {file = "deepsearch_glm-0.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb6398a34f5afac6282c4a5b7ea5a89f27fcf4c0adac43af27ecbac9e2731ce3"}, ] [package.dependencies] -cibuildwheel = ">=2.17.0,<3.0.0" deepsearch-toolkit = ">=0.31.0" matplotlib = ">=3.7.1,<4.0.0" networkx = ">=3.1,<4.0" netwulf = ">=0.1.5,<0.2.0" numerize = ">=0.12,<0.13" +numpy = {version = ">=1.26.4,<2.0.0", markers = "python_version >= \"3.9\""} pandas = ">=1.5.1" pybind11 = ">=2.10.4,<3.0.0" python-dotenv = ">=1.0.0,<2.0.0" @@ -4862,4 +4827,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "1f0c0eb64cdce7e5c21670841d5dea047b3f918a3041bb22bb9c615b9085da20" +content-hash = "dc19329559f190dfe687b4ee272eb6dac66b3d9fe0398c95c2572e8c63fa23ac" diff --git a/pyproject.toml b/pyproject.toml index a648a7029..03e2b1bf2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ python = "^3.11" pydantic = "^2.0.0" docling-core = "^0.2.0" docling-ibm-models = "^0.2.0" -deepsearch-glm = ">=0.18.4,<1" +deepsearch-glm = ">=0.19.0,<1" deepsearch-toolkit = ">=0.47.0,<1" filetype = "^1.2.0" pypdfium2 = "^4.30.0" From 0dfa4548d35dd139cfc857d666b514547006f181 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 17 Jul 2024 12:11:15 +0000 Subject: [PATCH 10/10] chore: bump version to 0.3.0 [skip ci] --- CHANGELOG.md | 10 ++++++++++ pyproject.toml | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index deb456807..75b26cd50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## [v0.3.0](https://github.com/DS4SD/docling/releases/tag/v0.3.0) - 2024-07-17 + +### Feature + +* Enable python 3.12 support by updating glm ([#8](https://github.com/DS4SD/docling/issues/8)) ([`fb72688`](https://github.com/DS4SD/docling/commit/fb72688ff7413083c864fe62d2dbfc420c1e5268)) + +### Documentation + +* Add setup with pypi to Readme ([#7](https://github.com/DS4SD/docling/issues/7)) ([`2803222`](https://github.com/DS4SD/docling/commit/2803222ee1708481c779d435dbf1c031929d3cf6)) + ## [v0.2.0](https://github.com/DS4SD/docling/releases/tag/v0.2.0) - 2024-07-16 ### Feature diff --git a/pyproject.toml b/pyproject.toml index 03e2b1bf2..7a39ac3fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docling" -version = "0.2.0" # DO NOT EDIT, updated automatically +version = "0.3.0" # DO NOT EDIT, updated automatically description = "Docling PDF conversion package" authors = ["Christoph Auer ", "Michele Dolfi ", "Maxim Lysak ", "Nikos Livathinos ", "Ahmed Nassar ", "Peter Staar "] license = "MIT"