From 0eed42e716fd6ca39d6c2341a19ecec3fe36fa2d Mon Sep 17 00:00:00 2001 From: davidmezzetti <561939+davidmezzetti@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:13:53 -0500 Subject: [PATCH] Update line-length formatting --- pyproject.toml | 2 ++ src/python/txtmarker/pdf.py | 31 +++++++------------------------ test/python/testpdf.py | 20 +++++--------------- 3 files changed, 14 insertions(+), 39 deletions(-) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6b313bc --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +line-length = 150 diff --git a/src/python/txtmarker/pdf.py b/src/python/txtmarker/pdf.py index fa49975..cd6a4ec 100644 --- a/src/python/txtmarker/pdf.py +++ b/src/python/txtmarker/pdf.py @@ -44,29 +44,18 @@ def highlight(self, infile, outfile, highlights): break # Create annotation for each column - annotations.append( - (name, base.COLORS[index], page) - + self.layout(elements[start:eindex]) - ) - annotations.append( - (name, base.COLORS[index], page) - + self.layout(elements[eindex : end + 1]) - ) + annotations.append((name, base.COLORS[index], page) + self.layout(elements[start:eindex])) + annotations.append((name, base.COLORS[index], page) + self.layout(elements[eindex : end + 1])) else: # Single column annotation - annotations.append( - (name, base.COLORS[index], page) - + self.layout(elements[start : end + 1]) - ) + annotations.append((name, base.COLORS[index], page) + self.layout(elements[start : end + 1])) self.annotate(annotations, infile, outfile) return annotations def pages(self, infile): - for page, layout in enumerate( - extract_pages(infile, laparams=LAParams(line_margin=1.0, char_margin=4.0)) - ): + for page, layout in enumerate(extract_pages(infile, laparams=LAParams(line_margin=1.0, char_margin=4.0))): elements = [] # Extract elements @@ -235,9 +224,7 @@ def annotate(self, annotations, infile, outfile): annotator.add_annotation( "square", Location(x1=x1, y1=y1, x2=x2, y2=y2, page=page), - Appearance( - fill=rgb + (0.3,), stroke_color=rgb + (0.3,), stroke_width=0 - ), + Appearance(fill=rgb + (0.3,), stroke_color=rgb + (0.3,), stroke_width=0), ) if title: @@ -326,9 +313,7 @@ def yposition(self, ranges, page, column, center, offset): y1, y2 = y1 - offset, y2 - offset else: # Try with positive offset - conflicts = self.conflicts( - ranges, page, column, y1 + offset, y2 + offset - ) + conflicts = self.conflicts(ranges, page, column, y1 + offset, y2 + offset) if not conflicts: y1, y2 = y1 + offset, y2 + offset else: @@ -372,6 +357,4 @@ def overlaps(self, start1, end1, start2, end2): number of overlapping coordinates """ - return len( - set(range(int(start1), int(end1))) & set(range(int(start2), int(end2))) - ) + return len(set(range(int(start1), int(end1))) & set(range(int(start2), int(end2)))) diff --git a/test/python/testpdf.py b/test/python/testpdf.py index 9d8e02f..334c1af 100644 --- a/test/python/testpdf.py +++ b/test/python/testpdf.py @@ -47,9 +47,7 @@ def testHighlights(self): (None, "Python provides the built-in .hash()"), ] - annotations = highlighter.highlight( - self.path("hash.pdf"), self.path("out.pdf"), highlights - ) + annotations = highlighter.highlight(self.path("hash.pdf"), self.path("out.pdf"), highlights) # Check annotations created self.assertEqual(len(annotations), 5) @@ -67,9 +65,7 @@ def testOverlaps(self): # Create duplicate highlights to test overlapping range highlights = [("Overlaps", "This article will explore various methods")] * 4 - annotations = highlighter.highlight( - self.path("embeddings.pdf"), self.path("out.pdf"), highlights - ) + annotations = highlighter.highlight(self.path("embeddings.pdf"), self.path("out.pdf"), highlights) # Check annotations created self.assertEqual(len(annotations), 4) @@ -90,9 +86,7 @@ def testFormatter(self): ), ] - annotations = highlighter.highlight( - self.path("neuml.pdf"), self.path("out.pdf"), highlights - ) + annotations = highlighter.highlight(self.path("neuml.pdf"), self.path("out.pdf"), highlights) # Check annotations created self.assertEqual(len(annotations), 2) @@ -104,13 +98,9 @@ def testColumns(self): highlighter = Factory.create("pdf") - highlights = [ - ("Multi-column", "enable machine-learning(.|\n)+specific domains") - ] + highlights = [("Multi-column", "enable machine-learning(.|\n)+specific domains")] - annotations = highlighter.highlight( - self.path("neuml.pdf"), self.path("out2.pdf"), highlights - ) + annotations = highlighter.highlight(self.path("neuml.pdf"), self.path("out2.pdf"), highlights) # Check annotations created self.assertEqual(len(annotations), 2)