diff --git a/src/v2/pdf_resources/page_cell.h b/src/v2/pdf_resources/page_cell.h index b5f3b5a6..37438d2b 100644 --- a/src/v2/pdf_resources/page_cell.h +++ b/src/v2/pdf_resources/page_cell.h @@ -27,6 +27,10 @@ namespace pdflib bool is_adjacent_to(pdf_resource& other, double delta); + bool intersects(pdf_resource& other); + + bool contains(double x, double y); + bool has_same_reading_orientation(pdf_resource& other); bool merge_with(pdf_resource& other, double delta); @@ -259,13 +263,64 @@ namespace pdflib return (num_chars>0? len/num_chars : 0.0); } - + + bool pdf_resource::intersects(pdf_resource& other) + { + // Use point-in-polygon (via even-odd rule) to determine if + // bounding quadrilaterals intersect. + return (contains(other.r_x0, other.r_y0) + or contains(other.r_x1, other.r_y1) + or contains(other.r_x2, other.r_y2) + or contains(other.r_x3, other.r_y3) + or other.contains(r_x0, r_y0) + or other.contains(r_x1, r_y1) + or other.contains(r_x2, r_y2) + or other.contains(r_x3, r_y3)); + } + + inline bool inside_plane(double x, double y, double xi, double yi, double xj, double yj) + { + return ((yi > y) != (yj > y) and (x < (xj - xi) * (y - yi) / (yj - yi) + xi)); + } + + bool pdf_resource::contains(double x, double y) + { + // point-in-polygon via even-odd rule + bool inside = false; + if (inside_plane(x, y, r_x3, r_y3, r_x0, r_y0)) + inside = not inside; + if (inside_plane(x, y, r_x0, r_y0, r_x1, r_y1)) + inside = not inside; + if (inside_plane(x, y, r_x1, r_y1, r_x2, r_y2)) + inside = not inside; + if (inside_plane(x, y, r_x2, r_y2, r_x3, r_y3)) + inside = not inside; + return inside; + } + + bool pdf_resource::is_adjacent_to(pdf_resource& other, double eps) { - double d0 = std::sqrt((r_x1-other.r_x0)*(r_x1-other.r_x0) + (r_y1-other.r_y0)*(r_y1-other.r_y0)); - double d1 = std::sqrt((r_x2-other.r_x3)*(r_x2-other.r_x3) + (r_y2-other.r_y3)*(r_y2-other.r_y3)); + // NOTE: This assumes (even for right-to-left text) that other is + // to the right of this, as the calling code seems to do that. + + // lower_right(this) : lower_left(other) + double dx0 = other.r_x0 - r_x1; + double dy0 = other.r_y0 - r_y1; + double d0 = std::sqrt(dx0 * dx0 + dy0 * dy0); - return ((d0= eps) + return false; + + // upper_right(this) : upper_left(other) + double dx1 = other.r_x3 - r_x2; + double dy1 = other.r_y3 - r_y2; + double d1 = std::sqrt(dx1 * dx1 + dy1 * dy1); + + if (d1 >= eps) + return false; + + return true; } bool pdf_resource::has_same_reading_orientation(pdf_resource& other) @@ -285,11 +340,16 @@ namespace pdflib LOG_S(ERROR) << "inconsistent merging of cells!"; } - double d0 = std::sqrt((r_x1-other.r_x0)*(r_x1-other.r_x0) + (r_y1-other.r_y0)*(r_y1-other.r_y0)); + // FIXME: Redundant calculation with is_adjacent_to + double dx0 = other.r_x0 - r_x1; + double dy0 = other.r_y0 - r_y1; + double d0 = std::sqrt(dx0 * dx0 + dy0 * dy0); + if((not left_to_right) or (not other.left_to_right)) { - if(delta= delta and not intersects(other)) { text = " " + text; } @@ -299,7 +359,7 @@ namespace pdflib } else { - if(delta= delta and not intersects(other)) { text += " "; } diff --git a/src/v2/pdf_sanitators/cells.h b/src/v2/pdf_sanitators/cells.h index c19c8e3a..0f458e49 100644 --- a/src/v2/pdf_sanitators/cells.h +++ b/src/v2/pdf_sanitators/cells.h @@ -283,7 +283,7 @@ namespace pdflib */ void pdf_sanitator::contract_cells_into_lines_v2(pdf_resource& cells, - double horizontal_cell_tolerance, + double horizontal_cell_tolerance, // FIXME: UNUSED bool enforce_same_font, double space_width_factor_for_merge, double space_width_factor_for_merge_with_space) @@ -319,7 +319,7 @@ namespace pdflib double delta_0 = cells[i].average_char_width()*space_width_factor_for_merge; double delta_1 = cells[i].average_char_width()*space_width_factor_for_merge_with_space; - if(cells[i].is_adjacent_to(cells[j], delta_0)) + if(cells[i].is_adjacent_to(cells[j], delta_0) or cells[i].intersects(cells[j])) { cells[i].merge_with(cells[j], delta_1);