From 7cf3af9c3ff2061b24793017ec441ff9a43e863f Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 18 Jul 2025 11:01:08 -0400 Subject: [PATCH 1/5] feat: start cell merging data class --- great_tables/_gt_data.py | 115 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/great_tables/_gt_data.py b/great_tables/_gt_data.py index d6874b89f..03de519ab 100644 --- a/great_tables/_gt_data.py +++ b/great_tables/_gt_data.py @@ -24,6 +24,7 @@ create_empty_frame, get_column_names, n_rows, + n_cols, to_list, validate_frame, ) @@ -1007,6 +1008,120 @@ def __init__(self, func: FormatFns, cols: list[str], rows: list[int]): # pass Formats = list +# Merge matrices ---- + +_MergeMatrix: TypeAlias = "list[list[int]]" + + +class MergeError(Exception): + """Represent an error from an invalid merge operation.""" + + +@dataclass(frozen=True) +class CellMerges: + """Matrix of merge values for a table body. + + This matrix is indexable using `body[row][col]`. Cells may have the value 0 (hidden), 1 (normal), + or greater than 1 (merging other cells). The top-left cell will be the one whose content and + styles are used. + """ + + rowspans: _MergeMatrix + colspans: _MergeMatrix + + def _assign_merge_value( + self, matrix: _MergeMatrix, row: int, col: int, value: int + ) -> _MergeMatrix: + """Assign a merge value to a cell in the matrix.""" + self.validate_merge_cell(row, col, value) + + new_body = matrix.copy() + new_row = new_body[row].copy() + + new_row[col] = value + new_body[row] = new_row + + return new_body + + def assign_rowspan(self, row: int, col: int, value: int) -> Self: + """Assign a rowspan value to a cell in the rowspans matrix.""" + old_val = self.rowspans[row][col] + new_rowspans = self._assign_merge_value(self.rowspans, row, col, value) + + # if cell is already a merging cell, reset the merge (without mutating) + if old_val > 1: + for ii in range(row + 1, row + value): + new_rowspans[ii][col] = 1 + + # set new merge + for ii in range(row + 1, row + value): + new_rowspans[ii][col] = 0 + + return self.__class__(rowspans=new_rowspans, colspans=self.colspans) + + def assign_colspan(self, row: int, col: int, value: int) -> Self: + """Assign a colspan value to a cell in the colspans matrix.""" + old_val = self.rowspans[row][col] + new_colspans = self._assign_merge_value(self.colspans, row, col, value) + + # if cell is already a merging cell, reset the merge (without mutating) + if old_val > 1: + for ii in range(col + 1, col + value): + new_colspans[row][ii] = 1 + + # set new merge + for ii in range(col + 1, col + value): + new_colspans[row][ii] = 0 + + return self.__class__(rowspans=self.rowspans, colspans=new_colspans) + + def validate_merge_cell(self, row: int, col: int, value: int) -> None: + # TODO: handle value 0 or 1 + + # Case: merging on cells + # rowspan checks ---- + cell_rowspans = [crnt_row[col] for crnt_row in self.rowspans[row : row + value]] + cell_colspans = [crnt_row[col] for crnt_row in self.colspans[row : row + value]] + + # merging cell is not being merged on + if cell_rowspans[0] != 0: + raise MergeError( + "Merging cell is already being merged on from the left." + f"\n\n* row: {row}\n* col: {col}" + ) + if cell_colspans[0] != 0: + raise MergeError( + "Merging cell is already being merged on from above." + f"\n\n* row: {row}\n* col: {col}" + ) + + # merged cells are not part of another merge (or a merging cell) + n_other = len(cell_rowspans) - 1 + if cell_rowspans[1:] != [1] * n_other: + raise MergeError( + "Attempting to merge on cells that are part of another merge from left." + f"\n\n* row: {row}\n* col: {col}" + ) + if cell_colspans[1:] != [1] * n_other: + raise MergeError( + "Attempting to merge on cells that are part of another merge from above." + f"\n\n* row: {row}\n* col: {col}" + ) + + def from_data_frame(self, data: TblData) -> Self: + """Create a merge matrix from a DataFrame. + + The merge matrix is a 2D list of integers, where each int represents the number of + cells that should be merged into the cell. Great Tables creates two merge matrices, + one for merging cells to the right, one for merging cells downward. + """ + + rows = n_rows(data) + cols = n_cols(data) + + default = [[1] * rows for _ in range(cols)] + return self.__class__(rowspans=default, colspans=default) + # Options ---- From da0d2dcf6a4274911bcb822a2f4a4a39875c7d91 Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Fri, 18 Jul 2025 11:01:44 -0400 Subject: [PATCH 2/5] dev: some basic comments to html rendering --- great_tables/_utils_render_html.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/great_tables/_utils_render_html.py b/great_tables/_utils_render_html.py index 11bd586ee..6af4af559 100644 --- a/great_tables/_utils_render_html.py +++ b/great_tables/_utils_render_html.py @@ -466,6 +466,8 @@ def create_body_component_h(data: GTData) -> str: ordered_index: list[tuple[int, GroupRowInfo]] = data._stub.group_indices_map() + # Loop over rows (j) + # TODO: are j and i the same? If so, remove one (e.g. by setting to _) for j, (i, group_info) in enumerate(ordered_index): # For table striping we want to add a striping CSS class to the even-numbered # rows in the rendered table; to target these rows, determine if `i` in the current @@ -475,7 +477,7 @@ def create_body_component_h(data: GTData) -> str: body_cells: list[str] = [] - # Create table row specifically for group (if applicable) + # Create row for group (if applicable) ----------------------------------------------------- if has_stub_column and has_groups and not has_two_col_stub: colspan_value = data._boxhead._get_effective_number_of_columns( stub=data._stub, options=data._options @@ -500,9 +502,12 @@ def create_body_component_h(data: GTData) -> str: body_rows.append(group_row) - # Create row cells + # Create row cells ------------------------------------------------------------------------- for colinfo in column_vars: - cell_content: Any = _get_cell(tbl_data, i, colinfo.var) + # TODO: get rowspan, colspan from merge matrices + # TODO: set rowspan, colspan + # TODO: if rowspan, colspan is 0, then do not create cell + cell_content: str | None = _get_cell(tbl_data, i, colinfo.var) cell_str: str = str(cell_content) # Determine whether the current cell is the stub cell From 9f4a36729223344f52b12d5f00c9ba04cfd234db Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Sun, 27 Jul 2025 10:38:10 -0400 Subject: [PATCH 3/5] dev: add n_cols dataframe generic --- great_tables/_tbl_data.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/great_tables/_tbl_data.py b/great_tables/_tbl_data.py index 3bc412c22..b4df3dffc 100644 --- a/great_tables/_tbl_data.py +++ b/great_tables/_tbl_data.py @@ -204,6 +204,19 @@ def _(data: PyArrowTable) -> int: return data.num_rows +# n_cols ---- +@singledispatch +def n_cols(data: DataFrameLike) -> int: + """Get the number of columns from the input data table""" + raise _raise_not_implemented(data) + + +@n_cols.register(PdDataFrame) +@n_cols.register(PlDataFrame) +def _(data: Any) -> int: + return len(data.columns) + + # _get_cell ---- From 004b1097b5cc68a29c6ae078ec2634d07580429b Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Wed, 6 Aug 2025 11:01:14 -0400 Subject: [PATCH 4/5] Add tests for CellMerges class (including errors) --- tests/test_gt_data.py | 47 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tests/test_gt_data.py b/tests/test_gt_data.py index 186851154..c785cca43 100644 --- a/tests/test_gt_data.py +++ b/tests/test_gt_data.py @@ -1,6 +1,7 @@ import pandas as pd +import pytest from great_tables import GT -from great_tables._gt_data import Boxhead, ColInfo, RowInfo, Stub +from great_tables._gt_data import Boxhead, ColInfo, RowInfo, Stub, CellMerges, MergeError def test_stub_construct_df(): @@ -48,3 +49,47 @@ def test_google_font_imports_is_set(): from great_tables._helpers import GoogleFontImports assert isinstance(gt_table._google_font_imports, GoogleFontImports) + + +def test_cell_merges_from_data_frame(): + data = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + # Default state where all cells are normal (value `1`) + merges = CellMerges( + rowspans=[[1, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[1, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + new_merges = merges.from_data_frame(data) + + assert new_merges.rowspans == [[1, 1, 1], [1, 1, 1], [1, 1, 1]] + assert new_merges.colspans == [[1, 1, 1], [1, 1, 1], [1, 1, 1]] + + +def test_cell_merges_assign_rowspan(): + # 2-cell rowspan: top-left (0,0) merges with left (1,0) + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + new_merges = merges.assign_rowspan(0, 0, 2) + + assert new_merges.rowspans[0][0] == 2 + assert new_merges.rowspans[1][0] == 0 + + +def test_cell_merges_assign_colspan(): + # 2-cell colspan: top-left (0,0) merges with top (0,1) + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + new_merges = merges.assign_colspan(0, 0, 2) + + assert new_merges.colspans[0][0] == 2 + assert new_merges.colspans[0][1] == 0 + + +def test_cell_merges_validate_merge_cell_error_rowspan_not_zero(): + # Merging cell is already being merged on from the left (cell_rowspans[0] != 0) raises an error + merges = CellMerges( + rowspans=[[1, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + + with pytest.raises(MergeError, match="Merging cell is already being merged on from the left"): + merges.validate_merge_cell(0, 0, 2) From c3f5e6f02324136c4e713bf0e448c6f10578dda3 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Wed, 6 Aug 2025 11:03:07 -0400 Subject: [PATCH 5/5] Add several tests for CellMerges merge logic --- tests/test_gt_data.py | 110 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/tests/test_gt_data.py b/tests/test_gt_data.py index c785cca43..193965a39 100644 --- a/tests/test_gt_data.py +++ b/tests/test_gt_data.py @@ -93,3 +93,113 @@ def test_cell_merges_validate_merge_cell_error_rowspan_not_zero(): with pytest.raises(MergeError, match="Merging cell is already being merged on from the left"): merges.validate_merge_cell(0, 0, 2) + + +def test_cell_merges_validate_merge_cell_error_colspan_not_zero(): + # Merging cell is already being merged on from above (cell_colspans[0] != 0) raises an error + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[1, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + + with pytest.raises(MergeError, match="Merging cell is already being merged on from above"): + merges.validate_merge_cell(0, 0, 2) + + +def test_cell_merges_validate_merge_cell_error_rowspan_merge_conflict(): + # Attempting to merge on cells that are part of another merge from left raises an error + merges = CellMerges( + rowspans=[[0, 1, 1], [0, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + + with pytest.raises( + MergeError, match="Attempting to merge on cells that are part of another merge from left" + ): + merges.validate_merge_cell(0, 0, 2) + + +def test_cell_merges_validate_merge_cell_error_colspan_merge_conflict(): + # Attempting to merge on cells that are part of another merge from above raises an error + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [0, 1, 1], [1, 1, 1]] + ) + + with pytest.raises( + MergeError, match="Attempting to merge on cells that are part of another merge from above" + ): + merges.validate_merge_cell(0, 0, 2) + + +def test_cell_merges_validate_merge_cell_success(): + # this is a valid merge scenario that shouldn't raise an error + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + + merges.validate_merge_cell(0, 0, 2) + + +def test_cell_merges_assign_merge_value(): + # top-left (0,0) ready for merging (value 0) + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + new_matrix = merges._assign_merge_value([[0, 1, 1], [1, 1, 1], [1, 1, 1]], 0, 0, 3) + + assert new_matrix[0][0] == 3 + assert new_matrix[0][1] == 1 + + +def test_cell_merges_with_merge_value_2(): + # 2-cell rowspan already applied: top-left (0,0) merges with left (1,0) + merges = CellMerges( + rowspans=[[2, 1, 1], [0, 1, 1], [1, 1, 1]], colspans=[[1, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + + assert merges.rowspans[0][0] == 2 + assert merges.rowspans[1][0] == 0 + + +def test_cell_merges_assign_rowspan_value_2(): + # 2-cell rowspan: top-left (0,0) merges with left (1,0) + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + new_merges = merges.assign_rowspan(0, 0, 2) + + assert new_merges.rowspans[0][0] == 2 + assert new_merges.rowspans[1][0] == 0 + + +def test_cell_merges_assign_colspan_value_2(): + # 2-cell colspan: top-left (0,0) merges with top (0,1) + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + new_merges = merges.assign_colspan(0, 0, 2) + + assert new_merges.colspans[0][0] == 2 + assert new_merges.colspans[0][1] == 0 + + +def test_cell_merges_assign_rowspan_value_3(): + # 3-cell rowspan: top-left (0,0) merges with left (1,0) and bottom-left (2,0) + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + new_merges = merges.assign_rowspan(0, 0, 3) + + assert new_merges.rowspans[0][0] == 3 + assert new_merges.rowspans[1][0] == 0 + assert new_merges.rowspans[2][0] == 0 + + +def test_cell_merges_assign_colspan_value_3(): + # 3-cell colspan: top-left (0,0) merges with top (0,1) and top-right (0,2) + merges = CellMerges( + rowspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]], colspans=[[0, 1, 1], [1, 1, 1], [1, 1, 1]] + ) + new_merges = merges.assign_colspan(0, 0, 3) + + assert new_merges.colspans[0][0] == 3 + assert new_merges.colspans[0][1] == 0 + assert new_merges.colspans[0][2] == 0