Skip to content

Commit 88f1314

Browse files
committed
Cleanup
Signed-off-by: Mike Knepper <mknepper@nvidia.com>
1 parent 69896b2 commit 88f1314

6 files changed

Lines changed: 176 additions & 11 deletions

File tree

packages/data-designer-engine/src/data_designer/engine/dataset_builders/dataset_builder.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
ColumnGeneratorWithModel,
3535
GenerationStrategy,
3636
)
37-
from data_designer.engine.column_generators.utils.generator_classification import column_type_is_model_generated
3837
from data_designer.engine.compiler import compile_data_designer_config
3938
from data_designer.engine.context import current_row_group, current_row_group_start_offset
4039
from data_designer.engine.dataset_builders.errors import DatasetGenerationError
@@ -277,10 +276,6 @@ def single_column_configs(self) -> list[ColumnConfigT]:
277276
def single_column_config_by_name(self) -> dict[str, ColumnConfigT]:
278277
return {config.name: config for config in self.single_column_configs}
279278

280-
@functools.cached_property
281-
def llm_generated_column_configs(self) -> list[ColumnConfigT]:
282-
return [config for config in self.single_column_configs if column_type_is_model_generated(config.column_type)]
283-
284279
def build(
285280
self,
286281
*,

packages/data-designer-engine/src/data_designer/engine/readiness.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
from __future__ import annotations
2323

2424
import logging
25-
from typing import TYPE_CHECKING, Sequence
25+
from collections.abc import Sequence
26+
from typing import TYPE_CHECKING
2627

2728
from data_designer.engine import flags
2829
from data_designer.engine.column_generators.utils.generator_classification import column_type_is_model_generated
@@ -107,8 +108,7 @@ def _run_mcp_tool_health_check(
107108
column_configs: Sequence[ColumnConfigT],
108109
resource_provider: ResourceProvider,
109110
) -> None:
110-
# Tool aliases are only meaningful on LLM-generated column configs; the
111-
# filter here matches the (now-removed) ``DatasetBuilder.llm_generated_column_configs``.
111+
# Tool aliases are only meaningful on model-generated column configs.
112112
tool_aliases = sorted(
113113
{
114114
config.tool_alias

packages/data-designer-engine/tests/engine/test_readiness.py

Lines changed: 138 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@
33

44
from __future__ import annotations
55

6-
from unittest.mock import Mock
6+
from collections.abc import Sequence
7+
from unittest.mock import Mock, patch
78

89
import pytest
910

1011
from data_designer.config.column_configs import LLMTextColumnConfig, SamplerColumnConfig
12+
from data_designer.config.column_types import ColumnConfigT
1113
from data_designer.config.config_builder import DataDesignerConfigBuilder
1214
from data_designer.config.custom_column import custom_column_generator
15+
from data_designer.config.models import ModelConfig
1316
from data_designer.config.sampler_params import SamplerType, UUIDSamplerParams
1417
from data_designer.engine import flags
1518
from data_designer.engine.dataset_builders.errors import DatasetGenerationError
@@ -27,7 +30,12 @@ def _force_sync_engine(monkeypatch: pytest.MonkeyPatch) -> None:
2730
monkeypatch.setattr(flags, "DATA_DESIGNER_ASYNC_ENGINE", False)
2831

2932

30-
def _build_columns(*, model_configs, llm_columns: list[tuple[str, str]] = (), include_sampler: bool = True):
33+
def _build_columns(
34+
*,
35+
model_configs: list[ModelConfig],
36+
llm_columns: Sequence[tuple[str, str]] = (),
37+
include_sampler: bool = True,
38+
) -> list[ColumnConfigT]:
3139
"""Build a ``DataDesignerConfig`` and return its (already-flat) column configs.
3240
3341
``llm_columns`` is a list of ``(name, model_alias)`` pairs. ``include_sampler``
@@ -231,3 +239,131 @@ def test_run_readiness_check_no_models_no_tools_is_noop(
231239

232240
stub_resource_provider.model_registry.run_health_check.assert_not_called()
233241
mock_mcp_registry.run_health_check.assert_not_called()
242+
243+
244+
# ---------------------------------------------------------------------------
245+
# Column-type coverage
246+
# ---------------------------------------------------------------------------
247+
248+
249+
def test_run_readiness_check_collects_image_model_aliases(
250+
stub_resource_provider,
251+
stub_model_configs,
252+
) -> None:
253+
"""Image-generation columns contribute their model aliases like LLM columns do.
254+
255+
The dataset builder dispatches probes by ``model_generation_type`` inside
256+
``ModelRegistry.run_health_check``; readiness is generation-type-agnostic
257+
and must surface every alias regardless of column kind.
258+
"""
259+
from data_designer.config.column_configs import ImageColumnConfig
260+
261+
stub_resource_provider.model_registry.run_health_check = Mock()
262+
stub_resource_provider.mcp_registry = None
263+
264+
builder = DataDesignerConfigBuilder(model_configs=stub_model_configs)
265+
builder.add_column(SamplerColumnConfig(name="seed_id", sampler_type=SamplerType.UUID, params=UUIDSamplerParams()))
266+
builder.add_column(LLMTextColumnConfig(name="caption", prompt="x", model_alias="stub-text"))
267+
builder.add_column(ImageColumnConfig(name="picture", prompt="y", model_alias="stub-image"))
268+
269+
run_readiness_check(builder.build().columns, stub_resource_provider)
270+
271+
stub_resource_provider.model_registry.run_health_check.assert_called_once()
272+
(called_aliases,), _ = stub_resource_provider.model_registry.run_health_check.call_args
273+
assert set(called_aliases) == {"stub-text", "stub-image"}
274+
275+
276+
def test_run_readiness_check_passes_skip_flagged_aliases_to_registry(
277+
stub_resource_provider,
278+
stub_model_configs,
279+
) -> None:
280+
"""Readiness does not pre-filter ``skip_health_check=True`` aliases.
281+
282+
The skip decision lives in ``ModelRegistry.run_health_check`` (covered by
283+
``test_model_registry``). Readiness's contract is "pass every referenced
284+
alias through and let the registry decide" — verified here so future edits
285+
don't accidentally start filtering at this layer.
286+
"""
287+
stub_resource_provider.model_registry.run_health_check = Mock()
288+
stub_resource_provider.mcp_registry = None
289+
290+
columns = _build_columns(
291+
model_configs=stub_model_configs,
292+
llm_columns=[("col", "stub-text")],
293+
)
294+
295+
run_readiness_check(columns, stub_resource_provider)
296+
297+
stub_resource_provider.model_registry.run_health_check.assert_called_once_with(["stub-text"])
298+
299+
300+
# ---------------------------------------------------------------------------
301+
# Async dispatch
302+
# ---------------------------------------------------------------------------
303+
304+
305+
def test_run_readiness_check_dispatches_to_async_registry_under_async_engine(
306+
stub_resource_provider,
307+
stub_model_configs,
308+
monkeypatch: pytest.MonkeyPatch,
309+
) -> None:
310+
"""When the async engine is selected, model probes route through ``arun_health_check``.
311+
312+
The autouse fixture pins sync; this test overrides for the async path so the
313+
branch in ``readiness._run_model_health_check`` gets coverage.
314+
"""
315+
monkeypatch.setattr(flags, "DATA_DESIGNER_ASYNC_ENGINE", True)
316+
stub_resource_provider.model_registry.arun_health_check = Mock()
317+
stub_resource_provider.mcp_registry = None
318+
319+
columns = _build_columns(
320+
model_configs=stub_model_configs,
321+
llm_columns=[("col", "stub-text")],
322+
)
323+
324+
# ``run_coroutine_threadsafe`` returns a Future; we want the readiness wrapper
325+
# to call ``.result(timeout=...)`` on it, so install a Mock future whose
326+
# ``.result`` returns ``None`` (success).
327+
sentinel_future = Mock()
328+
sentinel_future.result.return_value = None
329+
330+
fake_loop = Mock()
331+
332+
with (
333+
patch("data_designer.engine.readiness.ensure_async_engine_loop", return_value=fake_loop, create=True),
334+
patch("asyncio.run_coroutine_threadsafe", return_value=sentinel_future) as mock_submit,
335+
):
336+
run_readiness_check(columns, stub_resource_provider)
337+
338+
# The async coroutine was created from arun_health_check and submitted to the loop.
339+
stub_resource_provider.model_registry.arun_health_check.assert_called_once_with(["stub-text"])
340+
mock_submit.assert_called_once()
341+
sentinel_future.result.assert_called_once_with(timeout=180)
342+
343+
344+
def test_run_readiness_check_cancels_future_and_reraises_on_timeout(
345+
stub_resource_provider,
346+
stub_model_configs,
347+
monkeypatch: pytest.MonkeyPatch,
348+
) -> None:
349+
"""A 180-second timeout cancels the future and re-raises ``TimeoutError``."""
350+
monkeypatch.setattr(flags, "DATA_DESIGNER_ASYNC_ENGINE", True)
351+
stub_resource_provider.model_registry.arun_health_check = Mock()
352+
stub_resource_provider.mcp_registry = None
353+
354+
columns = _build_columns(
355+
model_configs=stub_model_configs,
356+
llm_columns=[("col", "stub-text")],
357+
)
358+
359+
sentinel_future = Mock()
360+
sentinel_future.result.side_effect = TimeoutError()
361+
362+
with (
363+
patch("data_designer.engine.readiness.ensure_async_engine_loop", return_value=Mock(), create=True),
364+
patch("asyncio.run_coroutine_threadsafe", return_value=sentinel_future),
365+
pytest.raises(TimeoutError),
366+
):
367+
run_readiness_check(columns, stub_resource_provider)
368+
369+
sentinel_future.cancel.assert_called_once()

packages/data-designer/src/data_designer/cli/commands/check_models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ def check_models_command(
3131
# Check models referenced by a YAML config
3232
data-designer check-models my_config.yaml
3333
34+
# Check models referenced by a remote config URL
35+
data-designer check-models https://example.com/my_config.yaml
36+
3437
# Check models referenced by a Python module
3538
data-designer check-models my_config.py
3639
"""

packages/data-designer/src/data_designer/cli/controllers/generation_controller.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from data_designer.config.errors import InvalidConfigError
1818
from data_designer.config.utils.constants import DEFAULT_DISPLAY_WIDTH
1919
from data_designer.engine.storage.artifact_storage import ResumeMode
20+
from data_designer.errors import DataDesignerError
2021
from data_designer.interface import DataDesigner
2122
from data_designer.logging import LOG_INDENT
2223

@@ -130,6 +131,9 @@ def run_check_models(self, config_source: str) -> None:
130131
try:
131132
data_designer = DataDesigner()
132133
data_designer.check_models(config_builder)
134+
except DataDesignerError as e:
135+
print_error(f"Model health check failed ({type(e).__name__}): {e}")
136+
raise typer.Exit(code=1)
133137
except Exception as e:
134138
print_error(f"Model health check failed: {e}")
135139
raise typer.Exit(code=1)

packages/data-designer/tests/cli/controllers/test_generation_controller.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,7 @@ def test_run_check_models_config_load_error(mock_load_config: MagicMock) -> None
701701
@patch(f"{_CTRL}.DataDesigner")
702702
@patch(f"{_CTRL}.load_config_builder")
703703
def test_run_check_models_health_check_failure(mock_load_config: MagicMock, mock_dd_cls: MagicMock) -> None:
704-
"""check_models exits with code 1 when a probe fails."""
704+
"""check_models exits with code 1 when a probe fails with a generic exception."""
705705
mock_load_config.return_value = MagicMock(spec=DataDesignerConfigBuilder)
706706
mock_dd = MagicMock()
707707
mock_dd_cls.return_value = mock_dd
@@ -714,6 +714,33 @@ def test_run_check_models_health_check_failure(mock_load_config: MagicMock, mock
714714
assert exc_info.value.exit_code == 1
715715

716716

717+
@patch(f"{_CTRL}.DataDesigner")
718+
@patch(f"{_CTRL}.load_config_builder")
719+
def test_run_check_models_typed_error_includes_class_name(
720+
mock_load_config: MagicMock, mock_dd_cls: MagicMock, capsys: pytest.CaptureFixture[str]
721+
) -> None:
722+
"""Typed engine errors exit 1 and surface the error class name to the user.
723+
724+
Without this, an authentication failure and a connection failure look identical
725+
on the terminal, defeating the purpose of typed engine errors.
726+
"""
727+
from data_designer.engine.models.errors import ModelAuthenticationError
728+
729+
mock_load_config.return_value = MagicMock(spec=DataDesignerConfigBuilder)
730+
mock_dd = MagicMock()
731+
mock_dd_cls.return_value = mock_dd
732+
mock_dd.check_models.side_effect = ModelAuthenticationError("bad creds")
733+
734+
controller = GenerationController()
735+
with pytest.raises(typer.Exit) as exc_info:
736+
controller.run_check_models(config_source="config.yaml")
737+
738+
assert exc_info.value.exit_code == 1
739+
captured = capsys.readouterr()
740+
assert "ModelAuthenticationError" in captured.out
741+
assert "bad creds" in captured.out
742+
743+
717744
# ---------------------------------------------------------------------------
718745
# run_create tests
719746
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)