Skip to content

Commit

Permalink
added figure classification in doc tags
Browse files Browse the repository at this point in the history
Signed-off-by: Matteo-Omenetti <[email protected]>
  • Loading branch information
Matteo-Omenetti authored and Matteo-Omenetti committed Jan 29, 2025
1 parent 6841f9c commit 85cfe86
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 1 deletion.
11 changes: 11 additions & 0 deletions docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,6 +966,17 @@ def export_to_document_tokens(
add_page_index=add_page_index,
)

classifications = [
ann
for ann in self.annotations
if isinstance(ann, PictureClassificationData)
]
if len(classifications) > 0:
# ! TODO: currently this code assumes class_name is of type 'str'
# ! TODO: when it will change to an ENUM --> adapt code
predicted_class = classifications[0].predicted_classes[0].class_name
body += DocumentToken.get_picture_classification_token(predicted_class)

if add_caption and len(self.captions):
text = self.caption_text(doc)

Expand Down
11 changes: 11 additions & 0 deletions docling_core/types/doc/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from enum import Enum
from typing import Tuple

from docling_core.types.doc.labels import PictureClassificationLabel


class TableToken(Enum):
"""Class to represent an LLM friendly representation of a Table."""
Expand Down Expand Up @@ -120,6 +122,10 @@ def get_special_tokens(
# for i in range(0, max_pages + 1):
# special_tokens.append(f"<page_{i}>")

# Add dynamically picture classification tokens
for _, member in PictureClassificationLabel.__members__.items():
special_tokens.append(f"<{member}>")

# Adding dynamically generated location-tokens
for i in range(0, max(page_dimension[0] + 1, page_dimension[1] + 1)):
special_tokens.append(f"<loc_{i}>")
Expand Down Expand Up @@ -147,6 +153,11 @@ def get_col_token(col: int, beg=bool) -> str:
else:
return f"</col_{col}>"

@staticmethod
def get_picture_classification_token(classification: str) -> str:
"""Function to get picture classification tokens."""
return f"<{classification}>"

# @staticmethod
# def get_page_token(page: int):
# """Function to get page tokens."""
Expand Down
2 changes: 1 addition & 1 deletion test/data/doc/dummy_doc.yaml.dt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<title><loc_8><loc_91><loc_81><loc_95>DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis</title>
<picture>
<loc_59><loc_0><loc_91><loc_75>
<caption>Figure 1: Four examples of complex page layouts across different document categories</caption>
<illustration><caption>Figure 1: Four examples of complex page layouts across different document categories</caption>
</picture>
<table>
<loc_42><loc_57><loc_49><loc_61>
Expand Down

0 comments on commit 85cfe86

Please sign in to comment.