Skip to content

Commit 65445ed

Browse files
David Huggins-Dainesdhdaines
David Huggins-Daines
authored andcommitted
fix: implement polygon intersection properly
Signed-off-by: David Huggins-Daines <[email protected]> Signed-off-by: David Huggins-Daines <[email protected]>
1 parent 22a34df commit 65445ed

File tree

2 files changed

+43
-16
lines changed

2 files changed

+43
-16
lines changed

src/v2/pdf_resources/page_cell.h

+42-15
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ namespace pdflib
2727

2828
bool is_adjacent_to(pdf_resource<PAGE_CELL>& other, double delta);
2929

30+
bool intersects(pdf_resource<PAGE_CELL>& other);
31+
32+
bool contains(double x, double y);
33+
3034
bool has_same_reading_orientation(pdf_resource<PAGE_CELL>& other);
3135

3236
bool merge_with(pdf_resource<PAGE_CELL>& other, double delta);
@@ -259,23 +263,46 @@ namespace pdflib
259263

260264
return (num_chars>0? len/num_chars : 0.0);
261265
}
262-
266+
267+
bool pdf_resource<PAGE_CELL>::intersects(pdf_resource<PAGE_CELL>& other)
268+
{
269+
// Use point-in-polygon (via even-odd rule) to determine if
270+
// bounding quadrilaterals intersect.
271+
return contains(other.r_x0, other.r_y0)
272+
or contains(other.r_x1, other.r_y1)
273+
or contains(other.r_x2, other.r_y2)
274+
or contains(other.r_x3, other.r_y3)
275+
or other.contains(r_x0, r_y0)
276+
or other.contains(r_x1, r_y1)
277+
or other.contains(r_x2, r_y2)
278+
or other.contains(r_x3, r_y3);
279+
}
280+
281+
inline bool inside_plane(double x, double y, double xi, double yi, double xj, double yj)
282+
{
283+
return ((yi < y) != (yj > y)) and (x < (xj - xi) * (y - yi) / (yj - yi) + xi);
284+
}
285+
286+
bool pdf_resource<PAGE_CELL>::contains(double x, double y)
287+
{
288+
// point-in-polygon via even-odd rule
289+
bool inside = false;
290+
if (inside_plane(x, y, r_x0, r_y0, r_x1, r_y1))
291+
inside = not inside;
292+
if (inside_plane(x, y, r_x1, r_y1, r_x2, r_y2))
293+
inside = not inside;
294+
if (inside_plane(x, y, r_x2, r_y2, r_x3, r_y3))
295+
inside = not inside;
296+
if (inside_plane(x, y, r_x3, r_y3, r_x0, r_y0))
297+
inside = not inside;
298+
return inside;
299+
}
300+
301+
263302
bool pdf_resource<PAGE_CELL>::is_adjacent_to(pdf_resource<PAGE_CELL>& other, double eps)
264303
{
265-
// This assumes (even for right-to-left text) that other is to the
266-
// right of this. If two cells overlap then they are obviously
267-
// adjacent, otherwise the right side corners of this must be
268-
// within eps of the left side corners of other.
269-
270-
// Intersection of bounding rectangles (FIXME: Does not actually
271-
// imply overlap for rotated cells, not immediately sure the
272-
// correct and efficient way to compute that)
273-
double max_x0 = std::max(x0, other.x0);
274-
double min_x1 = std::min(x1, other.x1);
275-
double max_y0 = std::max(y0, other.y0);
276-
double min_y1 = std::min(y1, other.y1);
277-
if (max_x0 < min_x1 and max_y0 < min_y1)
278-
return true;
304+
// NOTE: This assumes (even for right-to-left text) that other is
305+
// to the right of this, as the calling code seems to do that.
279306

280307
// lower_right(this) : lower_left(other)
281308
double dx0 = other.r_x0 - r_x1;

src/v2/pdf_sanitators/cells.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ namespace pdflib
319319
double delta_0 = cells[i].average_char_width()*space_width_factor_for_merge;
320320
double delta_1 = cells[i].average_char_width()*space_width_factor_for_merge_with_space;
321321

322-
if(cells[i].is_adjacent_to(cells[j], delta_0))
322+
if(cells[i].is_adjacent_to(cells[j], delta_0) or cells[i].intersects(cells[j]))
323323
{
324324
cells[i].merge_with(cells[j], delta_1);
325325

0 commit comments

Comments
 (0)