Skip to content

Commit

Permalink
Update line-length formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Dec 13, 2024
1 parent ff2c5f3 commit 0eed42e
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 39 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tool.black]
line-length = 150
31 changes: 7 additions & 24 deletions src/python/txtmarker/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,29 +44,18 @@ def highlight(self, infile, outfile, highlights):
break

# Create annotation for each column
annotations.append(
(name, base.COLORS[index], page)
+ self.layout(elements[start:eindex])
)
annotations.append(
(name, base.COLORS[index], page)
+ self.layout(elements[eindex : end + 1])
)
annotations.append((name, base.COLORS[index], page) + self.layout(elements[start:eindex]))
annotations.append((name, base.COLORS[index], page) + self.layout(elements[eindex : end + 1]))
else:
# Single column annotation
annotations.append(
(name, base.COLORS[index], page)
+ self.layout(elements[start : end + 1])
)
annotations.append((name, base.COLORS[index], page) + self.layout(elements[start : end + 1]))

self.annotate(annotations, infile, outfile)

return annotations

def pages(self, infile):
for page, layout in enumerate(
extract_pages(infile, laparams=LAParams(line_margin=1.0, char_margin=4.0))
):
for page, layout in enumerate(extract_pages(infile, laparams=LAParams(line_margin=1.0, char_margin=4.0))):
elements = []

# Extract elements
Expand Down Expand Up @@ -235,9 +224,7 @@ def annotate(self, annotations, infile, outfile):
annotator.add_annotation(
"square",
Location(x1=x1, y1=y1, x2=x2, y2=y2, page=page),
Appearance(
fill=rgb + (0.3,), stroke_color=rgb + (0.3,), stroke_width=0
),
Appearance(fill=rgb + (0.3,), stroke_color=rgb + (0.3,), stroke_width=0),
)

if title:
Expand Down Expand Up @@ -326,9 +313,7 @@ def yposition(self, ranges, page, column, center, offset):
y1, y2 = y1 - offset, y2 - offset
else:
# Try with positive offset
conflicts = self.conflicts(
ranges, page, column, y1 + offset, y2 + offset
)
conflicts = self.conflicts(ranges, page, column, y1 + offset, y2 + offset)
if not conflicts:
y1, y2 = y1 + offset, y2 + offset
else:
Expand Down Expand Up @@ -372,6 +357,4 @@ def overlaps(self, start1, end1, start2, end2):
number of overlapping coordinates
"""

return len(
set(range(int(start1), int(end1))) & set(range(int(start2), int(end2)))
)
return len(set(range(int(start1), int(end1))) & set(range(int(start2), int(end2))))
20 changes: 5 additions & 15 deletions test/python/testpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ def testHighlights(self):
(None, "Python provides the built-in .hash()"),
]

annotations = highlighter.highlight(
self.path("hash.pdf"), self.path("out.pdf"), highlights
)
annotations = highlighter.highlight(self.path("hash.pdf"), self.path("out.pdf"), highlights)

# Check annotations created
self.assertEqual(len(annotations), 5)
Expand All @@ -67,9 +65,7 @@ def testOverlaps(self):
# Create duplicate highlights to test overlapping range
highlights = [("Overlaps", "This article will explore various methods")] * 4

annotations = highlighter.highlight(
self.path("embeddings.pdf"), self.path("out.pdf"), highlights
)
annotations = highlighter.highlight(self.path("embeddings.pdf"), self.path("out.pdf"), highlights)

# Check annotations created
self.assertEqual(len(annotations), 4)
Expand All @@ -90,9 +86,7 @@ def testFormatter(self):
),
]

annotations = highlighter.highlight(
self.path("neuml.pdf"), self.path("out.pdf"), highlights
)
annotations = highlighter.highlight(self.path("neuml.pdf"), self.path("out.pdf"), highlights)

# Check annotations created
self.assertEqual(len(annotations), 2)
Expand All @@ -104,13 +98,9 @@ def testColumns(self):

highlighter = Factory.create("pdf")

highlights = [
("Multi-column", "enable machine-learning(.|\n)+specific domains")
]
highlights = [("Multi-column", "enable machine-learning(.|\n)+specific domains")]

annotations = highlighter.highlight(
self.path("neuml.pdf"), self.path("out2.pdf"), highlights
)
annotations = highlighter.highlight(self.path("neuml.pdf"), self.path("out2.pdf"), highlights)

# Check annotations created
self.assertEqual(len(annotations), 2)
Expand Down

0 comments on commit 0eed42e

Please sign in to comment.