diff --git a/packages/data-designer/src/data_designer/cli/commands/create.py b/packages/data-designer/src/data_designer/cli/commands/create.py index 3bf3265f0..ea98222ea 100644 --- a/packages/data-designer/src/data_designer/cli/commands/create.py +++ b/packages/data-designer/src/data_designer/cli/commands/create.py @@ -3,10 +3,12 @@ from __future__ import annotations +import click import typer from data_designer.cli.controllers.generation_controller import GenerationController from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS +from data_designer.interface.results import SUPPORTED_EXPORT_FORMATS def create_command( @@ -35,6 +37,17 @@ def create_command( "-o", help="Path where generated artifacts will be stored. Defaults to ./artifacts.", ), + output_format: str | None = typer.Option( + None, + "--output-format", + "-f", + click_type=click.Choice(list(SUPPORTED_EXPORT_FORMATS)), + help=( + "Export the dataset to a single file after generation. " + "Supported formats: jsonl, csv, parquet. " + "The file is written to //.." + ), + ), ) -> None: """Create a full dataset and save results to disk. @@ -60,4 +73,5 @@ def create_command( num_records=num_records, dataset_name=dataset_name, artifact_path=artifact_path, + output_format=output_format, ) diff --git a/packages/data-designer/src/data_designer/cli/controllers/generation_controller.py b/packages/data-designer/src/data_designer/cli/controllers/generation_controller.py index 74a44c3cd..39c45f5f5 100644 --- a/packages/data-designer/src/data_designer/cli/controllers/generation_controller.py +++ b/packages/data-designer/src/data_designer/cli/controllers/generation_controller.py @@ -116,6 +116,7 @@ def run_create( num_records: int, dataset_name: str, artifact_path: str | None, + output_format: str | None = None, ) -> None: """Load config, create a full dataset, and save results to disk. @@ -124,6 +125,8 @@ def run_create( num_records: Number of records to generate. dataset_name: Name for the generated dataset folder. artifact_path: Path where generated artifacts will be stored, or None for default. + output_format: If set, export the dataset to a single file in this format after + generation. One of 'jsonl', 'csv', 'parquet'. """ config_builder = self._load_config(config_source) @@ -147,7 +150,7 @@ def run_create( print_error(f"Dataset creation failed: {e}") raise typer.Exit(code=1) - dataset = results.load_dataset() + actual_record_count = results.count_records() analysis = results.load_analysis() if analysis is not None: @@ -155,8 +158,20 @@ def run_create( analysis.to_report() console.print() - print_success(f"Dataset created — {len(dataset)} record(s) generated") console.print(f" Artifacts saved to: [bold]{results.artifact_storage.base_dataset_path}[/bold]") + + if output_format is not None: + export_path = Path(results.artifact_storage.base_dataset_path) / f"{dataset_name}.{output_format}" + try: + results.export(export_path) + except Exception as e: + export_path.unlink(missing_ok=True) + print_error(f"Export failed: {e}") + raise typer.Exit(code=1) + console.print(f" Exported to: [bold]{export_path}[/bold]") + + console.print() + print_success(f"Dataset created — {actual_record_count} record(s) generated") console.print() def _load_config(self, config_source: str) -> DataDesignerConfigBuilder: diff --git a/packages/data-designer/src/data_designer/interface/results.py b/packages/data-designer/src/data_designer/interface/results.py index 599ad1af0..5e1ed35dc 100644 --- a/packages/data-designer/src/data_designer/interface/results.py +++ b/packages/data-designer/src/data_designer/interface/results.py @@ -4,11 +4,13 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal, get_args +import data_designer.lazy_heavy_imports as lazy from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults from data_designer.config.config_builder import DataDesignerConfigBuilder from data_designer.config.dataset_metadata import DatasetMetadata +from data_designer.config.errors import InvalidFileFormatError from data_designer.config.utils.visualization import WithRecordSamplerMixin from data_designer.engine.dataset_builders.errors import ArtifactStorageError from data_designer.engine.storage.artifact_storage import ArtifactStorage @@ -19,6 +21,9 @@ from data_designer.engine.dataset_builders.utils.task_model import TaskTrace +ExportFormat = Literal["jsonl", "csv", "parquet"] +SUPPORTED_EXPORT_FORMATS: tuple[str, ...] = get_args(ExportFormat) + class DatasetCreationResults(WithRecordSamplerMixin): """Results container for a Data Designer dataset creation run. @@ -69,6 +74,18 @@ def load_dataset(self) -> pd.DataFrame: """ return self.artifact_storage.load_dataset() + def count_records(self) -> int: + """Return the total number of records in the generated dataset. + + Counts rows by reading Parquet file metadata only — no data pages are + loaded, so memory usage is constant regardless of dataset size. + + Returns: + Total row count across all batch parquet files. + """ + batch_files = sorted(self.artifact_storage.final_dataset_path.glob("batch_*.parquet")) + return sum(lazy.pq.read_metadata(f).num_rows for f in batch_files) + def load_processor_dataset(self, processor_name: str) -> pd.DataFrame: """Load the dataset generated by a processor. @@ -95,6 +112,57 @@ def get_path_to_processor_artifacts(self, processor_name: str) -> Path: raise ArtifactStorageError(f"Processor {processor_name} has no artifacts.") return self.artifact_storage.processors_outputs_path / processor_name + def export(self, path: Path | str, *, format: ExportFormat | None = None) -> Path: + """Export the generated dataset to a single file by streaming batch files. + + The output format is inferred from the file extension when *format* is + omitted. Pass *format* explicitly to override the extension (e.g. write a + ``.txt`` file as JSONL). + + Unlike :meth:`load_dataset`, this method never materialises the full dataset + in memory — it reads batch parquet files one at a time and appends each to + the output file, keeping peak memory proportional to a single batch. + + Args: + path: Output file path. The exact path is used as-is; the extension is + not rewritten. + format: Output format. One of ``'jsonl'``, ``'csv'``, or ``'parquet'``. + When omitted, the format is inferred from the file extension. + + Returns: + Path to the written file. + + Raises: + InvalidFileFormatError: If the format cannot be determined or is not + one of the supported values. + ArtifactStorageError: If no batch parquet files are found. + + Example: + >>> results = data_designer.create(config, num_records=1000) + >>> results.export("output.jsonl") + PosixPath('output.jsonl') + >>> results.export("output.csv") + PosixPath('output.csv') + >>> results.export("output.txt", format="jsonl") + PosixPath('output.txt') + """ + path = Path(path) + resolved_format: str = format if format is not None else path.suffix.lstrip(".").lower() + if resolved_format not in SUPPORTED_EXPORT_FORMATS: + raise InvalidFileFormatError( + f"Unsupported export format: {resolved_format!r}. Choose one of: {', '.join(SUPPORTED_EXPORT_FORMATS)}." + ) + batch_files = sorted(self.artifact_storage.final_dataset_path.glob("batch_*.parquet")) + if not batch_files: + raise ArtifactStorageError("No batch parquet files found to export.") + if resolved_format == "jsonl": + _export_jsonl(batch_files, path) + elif resolved_format == "csv": + _export_csv(batch_files, path) + elif resolved_format == "parquet": + _export_parquet(batch_files, path) + return path + def push_to_hub( self, repo_id: str, @@ -140,3 +208,49 @@ def push_to_hub( description=description, tags=tags, ) + + +def _export_jsonl(batch_files: list[Path], output: Path) -> None: + """Write *batch_files* to *output* as JSONL, one record per line. + + Each batch is appended in turn so peak memory stays proportional to one batch. + """ + with output.open("w", encoding="utf-8") as f: + for batch_file in batch_files: + chunk = lazy.pd.read_parquet(batch_file) + content = chunk.to_json(orient="records", lines=True, force_ascii=False, date_format="iso") + if content: + f.write(content) + + +def _export_csv(batch_files: list[Path], output: Path) -> None: + """Write *batch_files* to *output* as CSV with a single header row.""" + for i, batch_file in enumerate(batch_files): + chunk = lazy.pd.read_parquet(batch_file) + chunk.to_csv(output, mode="a" if i > 0 else "w", header=(i == 0), index=False) + + +def _export_parquet(batch_files: list[Path], output: Path) -> None: + """Write *batch_files* to *output* as a single Parquet file. + + Schemas are unified across batches before writing so that columns with minor + type drift (e.g. ``int64`` vs ``float64`` across batches) are cast to a + consistent schema rather than causing a write error. + + Raises: + InvalidFileFormatError: If batch schemas have incompatible column names or + types that cannot be unified or cast. + """ + schemas = [lazy.pq.read_schema(f) for f in batch_files] + try: + # promote_options="permissive" allows minor numeric type drift (e.g. int64 → double) + unified_schema = lazy.pa.unify_schemas(schemas, promote_options="permissive") + except (lazy.pa.ArrowInvalid, lazy.pa.ArrowTypeError) as e: + raise InvalidFileFormatError(f"Cannot unify batch schemas for parquet export: {e}") from e + with lazy.pq.ParquetWriter(output, unified_schema) as writer: + for batch_file in batch_files: + table = lazy.pq.read_table(batch_file) + try: + writer.write_table(table.cast(unified_schema)) + except (lazy.pa.ArrowInvalid, ValueError) as e: + raise InvalidFileFormatError(f"Cannot cast batch {batch_file.name} to unified schema: {e}") from e diff --git a/packages/data-designer/tests/cli/commands/test_create_command.py b/packages/data-designer/tests/cli/commands/test_create_command.py index 30cae9bc7..fc779df7c 100644 --- a/packages/data-designer/tests/cli/commands/test_create_command.py +++ b/packages/data-designer/tests/cli/commands/test_create_command.py @@ -18,7 +18,9 @@ def test_create_command_delegates_to_controller(mock_ctrl_cls: MagicMock) -> Non mock_ctrl = MagicMock() mock_ctrl_cls.return_value = mock_ctrl - create_command(config_source="config.yaml", num_records=10, dataset_name="dataset", artifact_path=None) + create_command( + config_source="config.yaml", num_records=10, dataset_name="dataset", artifact_path=None, output_format=None + ) mock_ctrl_cls.assert_called_once() mock_ctrl.run_create.assert_called_once_with( @@ -26,6 +28,7 @@ def test_create_command_delegates_to_controller(mock_ctrl_cls: MagicMock) -> Non num_records=10, dataset_name="dataset", artifact_path=None, + output_format=None, ) @@ -40,6 +43,7 @@ def test_create_command_passes_custom_options(mock_ctrl_cls: MagicMock) -> None: num_records=100, dataset_name="my_data", artifact_path="/custom/output", + output_format=None, ) mock_ctrl.run_create.assert_called_once_with( @@ -47,6 +51,7 @@ def test_create_command_passes_custom_options(mock_ctrl_cls: MagicMock) -> None: num_records=100, dataset_name="my_data", artifact_path="/custom/output", + output_format=None, ) @@ -56,11 +61,37 @@ def test_create_command_default_artifact_path_is_none(mock_ctrl_cls: MagicMock) mock_ctrl = MagicMock() mock_ctrl_cls.return_value = mock_ctrl - create_command(config_source="config.yaml", num_records=5, dataset_name="ds", artifact_path=None) + create_command( + config_source="config.yaml", num_records=5, dataset_name="ds", artifact_path=None, output_format=None + ) mock_ctrl.run_create.assert_called_once_with( config_source="config.yaml", num_records=5, dataset_name="ds", artifact_path=None, + output_format=None, + ) + + +@patch("data_designer.cli.commands.create.GenerationController") +def test_create_command_passes_output_format(mock_ctrl_cls: MagicMock) -> None: + """Test create_command forwards --output-format to the controller.""" + mock_ctrl = MagicMock() + mock_ctrl_cls.return_value = mock_ctrl + + create_command( + config_source="config.yaml", + num_records=10, + dataset_name="dataset", + artifact_path=None, + output_format="jsonl", + ) + + mock_ctrl.run_create.assert_called_once_with( + config_source="config.yaml", + num_records=10, + dataset_name="dataset", + artifact_path=None, + output_format="jsonl", ) diff --git a/packages/data-designer/tests/cli/controllers/test_generation_controller.py b/packages/data-designer/tests/cli/controllers/test_generation_controller.py index de4918cff..151f2cbb4 100644 --- a/packages/data-designer/tests/cli/controllers/test_generation_controller.py +++ b/packages/data-designer/tests/cli/controllers/test_generation_controller.py @@ -30,9 +30,7 @@ def _make_mock_preview_results(num_records: int) -> MagicMock: def _make_mock_create_results(num_records: int, base_path: str = "/output/artifacts/dataset") -> MagicMock: """Create a mock CreateResults with the given number of records.""" mock_results = MagicMock() - mock_dataset = MagicMock() - mock_dataset.__len__ = MagicMock(return_value=num_records) - mock_results.load_dataset.return_value = mock_dataset + mock_results.count_records.return_value = num_records mock_results.artifact_storage.base_dataset_path = base_path return mock_results @@ -772,3 +770,55 @@ def test_run_create_skips_report_when_analysis_is_none(mock_load_config: MagicMo # load_analysis() returns None, so to_report() must not be called. # If the code ignores the None check, an AttributeError propagates and the test fails. mock_results.load_analysis.assert_called_once() + + +@patch(f"{_CTRL}.DataDesigner") +@patch(f"{_CTRL}.load_config_builder") +def test_run_create_with_output_format_happy_path(mock_load_config: MagicMock, mock_dd_cls: MagicMock) -> None: + """export() is called with the dataset-name-derived path when --output-format is given.""" + mock_load_config.return_value = MagicMock(spec=DataDesignerConfigBuilder) + mock_dd = MagicMock() + mock_dd_cls.return_value = mock_dd + mock_results = _make_mock_create_results(5, "/output/artifacts/my_data") + mock_dd.create.return_value = mock_results + + controller = GenerationController() + controller.run_create( + config_source="config.yaml", + num_records=5, + dataset_name="my_data", + artifact_path=None, + output_format="jsonl", + ) + + mock_results.export.assert_called_once_with( + Path("/output/artifacts/my_data") / "my_data.jsonl", + ) + + +@patch(f"{_CTRL}.DataDesigner") +@patch(f"{_CTRL}.load_config_builder") +def test_run_create_export_failure_exits(mock_load_config: MagicMock, mock_dd_cls: MagicMock, tmp_path: Path) -> None: + """If export() raises, run_create cleans up the partial file and exits with code 1.""" + mock_load_config.return_value = MagicMock(spec=DataDesignerConfigBuilder) + mock_dd = MagicMock() + mock_dd_cls.return_value = mock_dd + mock_results = _make_mock_create_results(5, str(tmp_path)) + mock_results.export.side_effect = RuntimeError("disk full") + mock_dd.create.return_value = mock_results + + # Create a partial file to verify it gets cleaned up. + partial_file = tmp_path / "dataset.csv" + partial_file.write_text("partial") + + controller = GenerationController() + with pytest.raises(typer.Exit) as exc_info: + controller.run_create( + config_source="config.yaml", + num_records=5, + dataset_name="dataset", + artifact_path=None, + output_format="csv", + ) + assert exc_info.value.exit_code == 1 + assert not partial_file.exists() diff --git a/packages/data-designer/tests/cli/test_main.py b/packages/data-designer/tests/cli/test_main.py index 32d9cfc7d..15349d8e6 100644 --- a/packages/data-designer/tests/cli/test_main.py +++ b/packages/data-designer/tests/cli/test_main.py @@ -84,4 +84,5 @@ def test_app_dispatches_lazy_create_command(mock_controller_cls: Mock) -> None: num_records=DEFAULT_NUM_RECORDS, dataset_name="dataset", artifact_path=None, + output_format=None, ) diff --git a/packages/data-designer/tests/interface/test_results.py b/packages/data-designer/tests/interface/test_results.py index a28dd987e..200d43810 100644 --- a/packages/data-designer/tests/interface/test_results.py +++ b/packages/data-designer/tests/interface/test_results.py @@ -3,6 +3,8 @@ from __future__ import annotations +import json +from pathlib import Path from unittest.mock import MagicMock, patch import pytest @@ -11,9 +13,11 @@ from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults from data_designer.config.config_builder import DataDesignerConfigBuilder from data_designer.config.dataset_metadata import DatasetMetadata +from data_designer.config.errors import InvalidFileFormatError from data_designer.config.preview_results import PreviewResults from data_designer.config.utils.errors import DatasetSampleDisplayError from data_designer.config.utils.visualization import display_sample_record as display_fn +from data_designer.engine.dataset_builders.errors import ArtifactStorageError from data_designer.engine.storage.artifact_storage import ArtifactStorage from data_designer.interface.results import DatasetCreationResults @@ -259,6 +263,167 @@ def test_load_dataset_independent_of_record_sampler_cache(stub_dataset_creation_ stub_artifact_storage.load_dataset.assert_called_once() +@pytest.fixture +def stub_batch_dir(stub_dataframe, tmp_path): + """Directory with two batch parquet files split from stub_dataframe. + + Splitting into two batches exercises the multi-batch streaming path in export(). + """ + batch_dir = tmp_path / "parquet-files" + batch_dir.mkdir() + mid = len(stub_dataframe) // 2 + stub_dataframe.iloc[:mid].to_parquet(batch_dir / "batch_00000.parquet", index=False) + stub_dataframe.iloc[mid:].to_parquet(batch_dir / "batch_00001.parquet", index=False) + return batch_dir + + +@pytest.mark.parametrize("fmt", ["jsonl", "csv", "parquet"]) +def test_export_writes_file(stub_dataset_creation_results, stub_batch_dir, tmp_path, fmt) -> None: + """export() writes a non-empty file for each supported format.""" + stub_dataset_creation_results.artifact_storage.final_dataset_path = stub_batch_dir + out = tmp_path / f"out.{fmt}" + result = stub_dataset_creation_results.export(out) + assert result == out + assert out.exists() + assert out.stat().st_size > 0 + + +def test_export_jsonl_content(stub_dataset_creation_results, stub_dataframe, stub_batch_dir, tmp_path) -> None: + """JSONL export writes one valid JSON object per line, covering all records.""" + stub_dataset_creation_results.artifact_storage.final_dataset_path = stub_batch_dir + out = tmp_path / "out.jsonl" + stub_dataset_creation_results.export(out) + lines = out.read_text(encoding="utf-8").splitlines() + assert len(lines) == len(stub_dataframe) + for line in lines: + json.loads(line) + + +def test_export_csv_content(stub_dataset_creation_results, stub_dataframe, stub_batch_dir, tmp_path) -> None: + """CSV export produces a single header row and one data row per record.""" + stub_dataset_creation_results.artifact_storage.final_dataset_path = stub_batch_dir + out = tmp_path / "out.csv" + stub_dataset_creation_results.export(out) + loaded = lazy.pd.read_csv(out) + assert list(loaded.columns) == list(stub_dataframe.columns) + assert len(loaded) == len(stub_dataframe) + + +def test_export_parquet_content(stub_dataset_creation_results, stub_dataframe, stub_batch_dir, tmp_path) -> None: + """Parquet export round-trips to the original DataFrame across two batches.""" + stub_dataset_creation_results.artifact_storage.final_dataset_path = stub_batch_dir + out = tmp_path / "out.parquet" + stub_dataset_creation_results.export(out) + loaded = lazy.pd.read_parquet(out) + lazy.pd.testing.assert_frame_equal( + loaded.reset_index(drop=True), + stub_dataframe.reset_index(drop=True), + ) + + +def test_export_infers_format_from_extension(stub_dataset_creation_results, stub_batch_dir, tmp_path) -> None: + """export() infers the output format from the file extension when format is omitted.""" + stub_dataset_creation_results.artifact_storage.final_dataset_path = stub_batch_dir + out = tmp_path / "out.jsonl" + stub_dataset_creation_results.export(out) + lines = out.read_text(encoding="utf-8").splitlines() + for line in lines: + json.loads(line) + + +def test_export_explicit_format_overrides_extension( + stub_dataset_creation_results, stub_dataframe, stub_batch_dir, tmp_path +) -> None: + """Passing format= explicitly overrides extension-based inference.""" + stub_dataset_creation_results.artifact_storage.final_dataset_path = stub_batch_dir + out = tmp_path / "data.txt" + stub_dataset_creation_results.export(out, format="jsonl") + lines = out.read_text(encoding="utf-8").splitlines() + assert len(lines) == len(stub_dataframe) + for line in lines: + json.loads(line) + + +def test_export_parquet_schema_unification(stub_dataset_creation_results, tmp_path) -> None: + """Parquet export unifies schemas across batches with diverging column types.""" + batch_dir = tmp_path / "parquet-files" + batch_dir.mkdir() + # Batch 0: 'value' as int64; Batch 1: 'value' as float64 (type drift) + lazy.pd.DataFrame({"value": lazy.pd.array([1, 2], dtype="int64")}).to_parquet( + batch_dir / "batch_00000.parquet", index=False + ) + lazy.pd.DataFrame({"value": lazy.pd.array([3.0, 4.0], dtype="float64")}).to_parquet( + batch_dir / "batch_00001.parquet", index=False + ) + stub_dataset_creation_results.artifact_storage.final_dataset_path = batch_dir + out = tmp_path / "out.parquet" + stub_dataset_creation_results.export(out) + loaded = lazy.pd.read_parquet(out) + assert list(loaded["value"]) == [1.0, 2.0, 3.0, 4.0] + + +def test_export_unknown_extension_raises(stub_dataset_creation_results, tmp_path) -> None: + """export() raises InvalidFileFormatError when the extension is not a supported format.""" + with pytest.raises(InvalidFileFormatError, match="Unsupported export format"): + stub_dataset_creation_results.export(tmp_path / "out.xyz") + + +def test_export_unsupported_explicit_format_raises(stub_dataset_creation_results, tmp_path) -> None: + """export() raises InvalidFileFormatError for an explicit unsupported format override.""" + with pytest.raises(InvalidFileFormatError, match="Unsupported export format"): + stub_dataset_creation_results.export(tmp_path / "out.jsonl", format="xlsx") # type: ignore[arg-type] + + +def test_export_no_batch_files_raises(stub_dataset_creation_results, tmp_path) -> None: + """export() raises ArtifactStorageError when the batch directory is empty.""" + empty_dir = tmp_path / "parquet-files" + empty_dir.mkdir() + stub_dataset_creation_results.artifact_storage.final_dataset_path = empty_dir + with pytest.raises(ArtifactStorageError, match="No batch parquet files found"): + stub_dataset_creation_results.export(tmp_path / "out.jsonl") + + +def test_count_records(stub_dataset_creation_results, stub_dataframe, stub_batch_dir) -> None: + """count_records() returns the total row count without loading data pages.""" + stub_dataset_creation_results.artifact_storage.final_dataset_path = stub_batch_dir + assert stub_dataset_creation_results.count_records() == len(stub_dataframe) + + +def test_export_uppercase_extension_is_recognised(stub_dataset_creation_results, stub_batch_dir, tmp_path) -> None: + """export() treats file extensions case-insensitively (e.g. .JSONL → jsonl).""" + stub_dataset_creation_results.artifact_storage.final_dataset_path = stub_batch_dir + out = tmp_path / "out.JSONL" + result = stub_dataset_creation_results.export(out) + assert result == out + assert out.exists() + lines = out.read_text(encoding="utf-8").splitlines() + for line in lines: + json.loads(line) + + +def test_export_parquet_incompatible_schemas_raises(stub_dataset_creation_results, tmp_path) -> None: + """_export_parquet wraps schema cast failures (incompatible column names) as InvalidFileFormatError. + + With promote_options="permissive", pa.unify_schemas merges the two schemas into a superset + {col_a, col_b}. The cast step then raises ValueError because batch_00000 only has col_a. + """ + batch_dir = tmp_path / "parquet-files" + batch_dir.mkdir() + lazy.pd.DataFrame({"col_a": [1, 2]}).to_parquet(batch_dir / "batch_00000.parquet", index=False) + lazy.pd.DataFrame({"col_b": [3, 4]}).to_parquet(batch_dir / "batch_00001.parquet", index=False) + stub_dataset_creation_results.artifact_storage.final_dataset_path = batch_dir + with pytest.raises(InvalidFileFormatError, match="Cannot cast batch"): + stub_dataset_creation_results.export(tmp_path / "out.parquet") + + +def test_export_returns_path_object(stub_dataset_creation_results, stub_batch_dir, tmp_path) -> None: + """export() returns a Path regardless of whether str or Path was passed.""" + stub_dataset_creation_results.artifact_storage.final_dataset_path = stub_batch_dir + out = tmp_path / "out.jsonl" + result = stub_dataset_creation_results.export(str(out)) + assert isinstance(result, Path) + + def test_preview_results_dataset_metadata() -> None: """Test that PreviewResults uses DatasetMetadata in display_sample_record.""" config_builder = MagicMock(spec=DataDesignerConfigBuilder)