Skip to content

Commit ee1de81

Browse files
committed
feat: Add max_table_rows_in_repr to control row display in DataFrame
- Updated DataFrame class to include max_table_rows_in_repr parameter for display configuration. - Enhanced configure_display method to accept max_table_rows_in_repr. - Modified DisplayConfig struct to include max_table_rows_in_repr with a default value of 10. - Added tests to verify the functionality of max_table_rows_in_repr in both configuration and display output.
1 parent da116bf commit ee1de81

File tree

3 files changed

+78
-8
lines changed

3 files changed

+78
-8
lines changed

python/datafusion/dataframe.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,7 @@ def configure_display(
818818
max_table_bytes: Optional[int] = None,
819819
min_table_rows: Optional[int] = None,
820820
max_cell_length: Optional[int] = None,
821+
max_table_rows_in_repr: Optional[int] = None,
821822
) -> None:
822823
"""Configure display options for DataFrame representation.
823824
@@ -828,17 +829,26 @@ def configure_display(
828829
This is used for initial display and in notebooks.
829830
max_cell_length: Maximum length of a cell before it gets minimized (default: 25).
830831
Longer cells will be truncated with an expand button.
832+
max_table_rows_in_repr: Maximum number of rows to display in string representation
833+
(default: 10).
831834
832835
Raises:
833836
ValueError: If any of the provided values are less than or equal to 0.
834837
"""
835838
if any(
836839
value is not None and value <= 0
837-
for value in (max_table_bytes, min_table_rows, max_cell_length)
840+
for value in (
841+
max_table_bytes,
842+
min_table_rows,
843+
max_cell_length,
844+
max_table_rows_in_repr,
845+
)
838846
):
839847
raise ValueError("All values must be greater than 0.")
840848

841-
self.df.configure_display(max_table_bytes, min_table_rows, max_cell_length)
849+
self.df.configure_display(
850+
max_table_bytes, min_table_rows, max_cell_length, max_table_rows_in_repr
851+
)
842852

843853
def reset_display_config(self) -> None:
844854
"""Reset display configuration to default values."""

python/tests/test_dataframe.py

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,27 +1271,33 @@ def test_display_config(df):
12711271
assert config.max_table_bytes == 2 * 1024 * 1024 # 2 MB
12721272
assert config.min_table_rows == 20
12731273
assert config.max_cell_length == 25
1274+
assert config.max_table_rows_in_repr == 10 # Verify the new property
12741275

12751276

12761277
def test_configure_display(df):
12771278
"""Test setting display configuration properties."""
12781279
# Modify the display configuration
12791280
df.configure_display(
1280-
max_table_bytes=1024 * 1024, min_table_rows=10, max_cell_length=50 # 1 MB
1281+
max_table_bytes=1024 * 1024,
1282+
min_table_rows=10,
1283+
max_cell_length=50,
1284+
max_table_rows_in_repr=15, # Add test for the new property
12811285
)
12821286

12831287
# Verify the changes took effect
12841288
config = df.display_config
12851289
assert config.max_table_bytes == 1024 * 1024 # 1 MB
12861290
assert config.min_table_rows == 10
12871291
assert config.max_cell_length == 50
1292+
assert config.max_table_rows_in_repr == 15
12881293

12891294
# Test partial update (only changing one property)
1290-
df.configure_display(min_table_rows=5)
1295+
df.configure_display(max_table_rows_in_repr=5)
12911296
config = df.display_config
12921297
assert config.max_table_bytes == 1024 * 1024 # previous value retained
1293-
assert config.min_table_rows == 5 # only this value changed
1298+
assert config.min_table_rows == 10 # previous value retained
12941299
assert config.max_cell_length == 50 # previous value retained
1300+
assert config.max_table_rows_in_repr == 5 # only this value changed
12951301

12961302
# Test with extreme values (still valid, but potentially problematic)
12971303
# Zero values
@@ -1490,3 +1496,40 @@ def _create_numeric_test_df(ctx, rows) -> DataFrame:
14901496
data = list(range(rows))
14911497
batch = pa.RecordBatch.from_arrays([pa.array(data)], names=["values"])
14921498
return ctx.create_dataframe([[batch]])
1499+
1500+
1501+
def test_max_table_rows_in_repr(ctx):
1502+
"""Test that max_table_rows_in_repr controls the number of rows in string representation."""
1503+
# Create a dataframe with more rows than the default max_table_rows_in_repr (10)
1504+
rows = 20
1505+
df = _create_numeric_test_df(ctx, rows)
1506+
1507+
# First test with default setting (should limit to 10 rows)
1508+
repr_str = df.__repr__()
1509+
lines_default = _count_lines_in_str(repr_str)
1510+
1511+
# Default should be 10 rows max
1512+
assert lines_default <= 10
1513+
assert "Data truncated" in repr_str
1514+
1515+
# Now set a custom max_table_rows_in_repr value
1516+
custom_max_rows = 15
1517+
df.configure_display(max_table_rows_in_repr=custom_max_rows)
1518+
1519+
# Get the string representation with new configuration
1520+
repr_str_more = df.__repr__()
1521+
lines_custom = _count_lines_in_str(repr_str_more)
1522+
1523+
# Should show more rows than default but not more than configured max
1524+
assert lines_custom > lines_default
1525+
assert lines_custom <= custom_max_rows
1526+
assert "Data truncated" in repr_str_more
1527+
1528+
# Now set max_rows higher than total rows - should show all rows
1529+
df.configure_display(max_table_rows_in_repr=25)
1530+
repr_str_all = df.__repr__()
1531+
lines_all = _count_lines_in_str(repr_str_all)
1532+
1533+
# Should show all rows (20)
1534+
assert lines_all == rows
1535+
assert "Data truncated" not in repr_str_all

src/dataframe.rs

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,22 +86,28 @@ pub struct DisplayConfig {
8686
/// Maximum length of a cell before it gets minimized (default: 25)
8787
#[pyo3(get, set)]
8888
pub max_cell_length: usize,
89+
/// Maximum number of rows to display in repr string output (default: 10)
90+
#[pyo3(get, set)]
91+
pub max_table_rows_in_repr: usize,
8992
}
9093

9194
#[pymethods]
9295
impl DisplayConfig {
9396
#[new]
94-
#[pyo3(signature = (max_table_bytes=None, min_table_rows=None, max_cell_length=None))]
97+
#[pyo3(signature = (max_table_bytes=None, min_table_rows=None, max_cell_length=None, max_table_rows_in_repr=None))]
9598
fn new(
9699
max_table_bytes: Option<usize>,
97100
min_table_rows: Option<usize>,
98101
max_cell_length: Option<usize>,
102+
max_table_rows_in_repr: Option<usize>,
99103
) -> Self {
100104
let default = DisplayConfig::default();
101105
Self {
102106
max_table_bytes: max_table_bytes.unwrap_or(default.max_table_bytes),
103107
min_table_rows: min_table_rows.unwrap_or(default.min_table_rows),
104108
max_cell_length: max_cell_length.unwrap_or(default.max_cell_length),
109+
max_table_rows_in_repr: max_table_rows_in_repr
110+
.unwrap_or(default.max_table_rows_in_repr),
105111
}
106112
}
107113
}
@@ -112,6 +118,7 @@ impl Default for DisplayConfig {
112118
max_table_bytes: 2 * 1024 * 1024, // 2 MB
113119
min_table_rows: 20,
114120
max_cell_length: 25,
121+
max_table_rows_in_repr: 10,
115122
}
116123
}
117124
}
@@ -165,7 +172,7 @@ impl PyDataFrame {
165172
collect_record_batches_to_display(
166173
self.df.as_ref().clone(),
167174
self.config.min_table_rows,
168-
10,
175+
self.config.max_table_rows_in_repr,
169176
&self.config,
170177
),
171178
)?;
@@ -858,12 +865,18 @@ impl PyDataFrame {
858865
}
859866

860867
/// Update display configuration
861-
#[pyo3(signature = (max_table_bytes=None, min_table_rows=None, max_cell_length=None))]
868+
#[pyo3(signature = (
869+
max_table_bytes=None,
870+
min_table_rows=None,
871+
max_cell_length=None,
872+
max_table_rows_in_repr=None
873+
))]
862874
fn configure_display(
863875
&mut self,
864876
max_table_bytes: Option<usize>,
865877
min_table_rows: Option<usize>,
866878
max_cell_length: Option<usize>,
879+
max_table_rows_in_repr: Option<usize>,
867880
) {
868881
let mut new_config = (*self.config).clone();
869882

@@ -879,6 +892,10 @@ impl PyDataFrame {
879892
new_config.max_cell_length = length;
880893
}
881894

895+
if let Some(rows) = max_table_rows_in_repr {
896+
new_config.max_table_rows_in_repr = rows;
897+
}
898+
882899
self.config = Arc::new(new_config);
883900
}
884901

0 commit comments

Comments
 (0)