diff --git a/src/dataframe.rs b/src/dataframe.rs index be10b8c28..1542967ca 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -152,111 +152,23 @@ impl PyDataFrame { let table_uuid = uuid::Uuid::new_v4().to_string(); - let mut html_str = " - - -
- - \n".to_string(); - - let schema = batches[0].schema(); + let mut html_str = String::new(); + html_str.push_str(&get_html_style_definitions()); + html_str.push_str(&get_html_table_opening()); - let mut header = Vec::new(); - for field in schema.fields() { - header.push(format!("", field.name())); - } - let header_str = header.join(""); - html_str.push_str(&format!("{}\n", header_str)); - - let batch_formatters = batches - .iter() - .map(|batch| { - batch - .columns() - .iter() - .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default())) - .map(|c| { - c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string()))) - }) - .collect::, _>>() - }) - .collect::, _>>()?; + html_str.push_str(&get_html_table_header(&batches[0].schema())); + let batch_formatters = create_batch_formatters(&batches)?; let rows_per_batch = batches.iter().map(|batch| batch.num_rows()); - // We need to build up row by row for html - let mut table_row = 0; - for (batch_formatter, num_rows_in_batch) in batch_formatters.iter().zip(rows_per_batch) { - for batch_row in 0..num_rows_in_batch { - table_row += 1; - let mut cells = Vec::new(); - for (col, formatter) in batch_formatter.iter().enumerate() { - let cell_data = formatter.value(batch_row).to_string(); - // From testing, primitive data types do not typically get larger than 21 characters - if cell_data.len() > MAX_LENGTH_CELL_WITHOUT_MINIMIZE { - let short_cell_data = &cell_data[0..MAX_LENGTH_CELL_WITHOUT_MINIMIZE]; - cells.push(format!(" - ")); - } else { - cells.push(format!("", formatter.value(batch_row))); - } - } - let row_str = cells.join(""); - html_str.push_str(&format!("{}\n", row_str)); - } - } - html_str.push_str("
{}
-
- {short_cell_data} - {cell_data} - -
-
{}
\n"); + html_str.push_str(&get_html_table_rows( + &batch_formatters, + rows_per_batch, + &table_uuid, + )?); - html_str.push_str(" - - "); + html_str.push_str("\n"); + html_str.push_str(&get_html_js_functions()); if has_more { html_str.push_str("Data truncated due to size."); @@ -951,3 +863,139 @@ async fn collect_record_batches_to_display( Ok((record_batches, has_more)) } + +/// Returns the HTML style definitions for the table +fn get_html_style_definitions() -> String { + " + + +
+ + ".to_string() +} + +/// Returns the opening HTML table tags +fn get_html_table_opening() -> String { + "\n".to_string() +} + +/// Returns the HTML table headers based on the schema +fn get_html_table_header(schema: &Schema) -> String { + let mut header = Vec::new(); + for field in schema.fields() { + header.push(format!("", field.name())); + } + let header_str = header.join(""); + format!("{}\n", header_str) +} + +/// Creates array formatters for each batch +fn create_batch_formatters( + batches: &[RecordBatch], +) -> PyDataFusionResult>>> { + batches + .iter() + .map(|batch| { + batch + .columns() + .iter() + .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default())) + .map(|c| { + c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string()))) + }) + .collect::, _>>() + }) + .collect::, _>>() + .map_err(PyDataFusionError::from) +} + +/// Returns the HTML table rows based on the batch formatters +fn get_html_table_rows( + batch_formatters: &[Vec>], + rows_per_batch: impl Iterator, + table_uuid: &str, +) -> PyDataFusionResult { + let mut html_str = String::new(); + let mut table_row = 0; + + for (batch_formatter, num_rows_in_batch) in batch_formatters.iter().zip(rows_per_batch) { + for batch_row in 0..num_rows_in_batch { + table_row += 1; + let mut cells = Vec::new(); + for (col, formatter) in batch_formatter.iter().enumerate() { + let cell_data = formatter.value(batch_row).to_string(); + cells.push(format_table_cell(cell_data, table_uuid, table_row, col)); + } + let row_str = cells.join(""); + html_str.push_str(&format!("{}\n", row_str)); + } + } + + Ok(html_str) +} + +/// Formats a single table cell, handling large content with expansion buttons +fn format_table_cell(cell_data: String, table_uuid: &str, table_row: usize, col: usize) -> String { + if cell_data.len() > MAX_LENGTH_CELL_WITHOUT_MINIMIZE { + let short_cell_data = &cell_data[0..MAX_LENGTH_CELL_WITHOUT_MINIMIZE]; + format!(" + ") + } else { + format!("", cell_data) + } +} + +/// Returns the JavaScript functions for handling cell text expansion +fn get_html_js_functions() -> String { + " + + " + .to_string() +}
{}
+
+ {short_cell_data} + {cell_data} + +
+
{}