Skip to content

Commit

Permalink
move chunker into new docling_core.transforms package
Browse files Browse the repository at this point in the history
Signed-off-by: Panos Vagenas <[email protected]>
  • Loading branch information
vagenas committed Sep 11, 2024
1 parent 1ccaeae commit 6b4a1b4
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 16 deletions.
8 changes: 0 additions & 8 deletions docling_core/chunker/__init__.py

This file was deleted.

6 changes: 6 additions & 0 deletions docling_core/transforms/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#
# Copyright IBM Corp. 2024 - 2024
# SPDX-License-Identifier: MIT
#

"""Data transformations package."""
15 changes: 15 additions & 0 deletions docling_core/transforms/chunker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#
# Copyright IBM Corp. 2024 - 2024
# SPDX-License-Identifier: MIT
#

"""Define the chunker types."""

from docling_core.transforms.chunker.base import ( # noqa
BaseChunker,
Chunk,
ChunkWithMetadata,
)
from docling_core.transforms.chunker.hierarchical_chunker import ( # noqa
HierarchicalChunker,
)
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pandas as pd
from pydantic import BaseModel, PositiveInt

from docling_core.chunker.base import BaseChunker, Chunk, ChunkWithMetadata
from docling_core.transforms.chunker import BaseChunker, Chunk, ChunkWithMetadata
from docling_core.types import BaseText
from docling_core.types import Document as DLDocument
from docling_core.types import Ref, Table
Expand Down Expand Up @@ -260,12 +260,7 @@ def _build_chunk_impl(
)
else:
# if root, augment with title (if available and different)
return (
text_entries
# ([doc_map.glob.title.text] + texts)
# if doc_map.glob.title and [doc_map.glob.title.text] != texts
# else texts
)
return text_entries
else:
return []

Expand Down
2 changes: 1 addition & 1 deletion test/test_hierarchical_chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import json

from docling_core.chunker import HierarchicalChunker
from docling_core.transforms.chunker import HierarchicalChunker
from docling_core.types import Document as DLDocument


Expand Down

0 comments on commit 6b4a1b4

Please sign in to comment.