diff --git a/docs/assets/recipes/code_generation/infinibyte.py b/docs/assets/recipes/code_generation/infinibyte.py
new file mode 100644
index 000000000..56f727413
--- /dev/null
+++ b/docs/assets/recipes/code_generation/infinibyte.py
@@ -0,0 +1,521 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "data-designer",
+#     "datasets",
+#     "pandas",
+# ]
+# ///
+"""Nemotron Nano InfiniByte Recipe: Cross-Source Problem Generation
+
+Generate more diverse and complex training problems by cross-breeding two
+source datasets. Each record pairs a "problem A" (e.g. a coding problem) with
+a "problem B" (e.g. a math or science problem), then uses an LLM to create new
+problems that incorporate concepts from both sources through either obfuscation
+(adding plausible but irrelevant complexity) or complication (genuinely
+increasing difficulty).
+
+This recipe implements the InfiniByte pipeline used for Nemotron Nano
+post-training data. The key idea: rather than generating problems from scratch,
+cross-join two existing problem datasets, then augment problem A with concepts
+from problem B to produce novel, harder problems.
+
+Pipeline architecture:
+
+    ┌─────────────────────────────────────────────────────────────────────────┐
+    │               PRE-PROCESSING (outside Data Designer)                   │
+    │  Download 2 HuggingFace datasets, cross-join with random sampling,    │
+    │  save as CSV seed file.                                                │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 1: SEED + SAMPLER                                  │
+    │  Seed: cross-joined CSV (problem_a, problem_b pairs)                  │
+    │  Sampler: combination_type (obfuscation / complication)               │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 2: CANDIDATE GENERATION (LLM Structured)           │
+    │  Generate 2 candidate problems augmenting A with concepts from B.     │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 3: BEST PROBLEM SELECTION (LLM Structured)         │
+    │  Select the best candidate based on adherence, difficulty, clarity.   │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 4: EVALUATION (LLM Structured)                     │
+    │  Score difficulty (1-3), clarity (1-3), adherence (1-3).              │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 5: SOLUTION GENERATION (LLM Text)                  │
+    │  Solve the new problem.                                               │
+    └────────────────────────────────────────────────────────────────────────┘
+
+Prerequisites:
+    - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases.
+    - Internet access for downloading HuggingFace datasets.
+
+Run:
+    # Basic usage (downloads OpenCodeReasoning + OpenMathReasoning, 100 records)
+    uv run infinibyte.py
+
+    # Customize dataset strategy and record count
+    uv run infinibyte.py --strategy ocr_omr --num-records 500 --limit 10000
+
+    # For help message and available options
+    uv run infinibyte.py --help
+"""
+
+from __future__ import annotations
+
+import hashlib
+import random
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+from pydantic import BaseModel, Field
+
+import data_designer.config as dd
+from data_designer.interface import DataDesigner, DatasetCreationResults
+
+# =============================================================================
+# Pydantic models for structured LLM outputs
+# =============================================================================
+
+
+class NewProblem(BaseModel):
+    added_concepts: str = Field(
+        ..., description="Specific new concepts incorporated into the original problem"
+    )
+    problem: str = Field(
+        ..., description="A new problem statement incorporating new concepts from the provided problems."
+    )
+
+
+class NewProblemList(BaseModel):
+    problems: list[NewProblem] = Field(
+        default_factory=list, description="List of new problems"
+    )
+
+
+class NewProblemWithReasoning(NewProblem):
+    reasoning: str = Field(
+        ..., description="Concise explanation for selecting this specific new problem"
+    )
+
+
+class NewProblemEvals(BaseModel):
+    difficulty: int = Field(
+        ...,
+        description=(
+            "Evaluation of problem difficulty relative to original. "
+            "3: Significantly more challenging. "
+            "2: Moderately more difficult. "
+            "1: Similar to or easier than the original."
+        ),
+    )
+    clarity: int = Field(
+        ...,
+        description=(
+            "Evaluation of clarity and coherence. "
+            "3: Exceptionally clear and well-structured. "
+            "2: Generally clear with minor issues. "
+            "1: Unclear or poorly structured."
+        ),
+    )
+    adherence: int = Field(
+        ...,
+        description=(
+            "Evaluation of adherence to the combination goal. "
+            "3: Excellent adherence. "
+            "2: Adequate adherence. "
+            "1: Poor adherence."
+        ),
+    )
+
+
+# =============================================================================
+# Dataset download and cross-join
+# =============================================================================
+
+DATASET_CONFIGS = {
+    "ocr": {
+        "name": "nvidia/OpenCodeReasoning",
+        "config_name": "split_0",
+        "input_column": "input",
+        "output_column": "problem_a",
+        "output_id_column": "id_a",
+        "deduplicate": True,
+        "max_records": 500,
+        "random_seed": 27,
+    },
+    "omr": {
+        "name": "nvidia/OpenMathReasoning",
+        "split": "tir",
+        "input_column": "problem",
+        "output_column": "problem_b",
+        "output_id_column": "id_b",
+        "deduplicate": True,
+        "max_records": 500,
+        "random_seed": 7,
+    },
+}
+
+STRATEGIES = {
+    "ocr_omr": ("ocr", "omr"),
+}
+
+
+def fetch_hf_dataset_to_df(dataset_config: dict) -> pd.DataFrame:
+    """Load a HuggingFace dataset via streaming into a DataFrame with id and text columns.
+
+    Uses HF streaming to avoid downloading the full dataset, collecting up to
+    ``max_records`` deduplicated rows.
+    """
+    from datasets import load_dataset
+
+    dataset_name = dataset_config["name"]
+    config_name = dataset_config.get("config_name")
+    split = dataset_config.get("split")
+    input_col = dataset_config.get("input_column", "text")
+    output_col = dataset_config.get("output_column", input_col)
+    output_id_col = dataset_config.get("output_id_column", "id")
+    dedupe = dataset_config.get("deduplicate", False)
+    max_records = dataset_config.get("max_records", 500)
+    random_seed = dataset_config.get("random_seed", 42)
+
+    print(f"\n=== Streaming {dataset_name} (config={config_name}, split={split or 'all'}) ===")
+
+    load_kwargs: dict = {"path": dataset_name, "streaming": True}
+    if config_name:
+        load_kwargs["name"] = config_name
+    if split:
+        load_kwargs["split"] = split
+
+    ds = load_dataset(**load_kwargs)
+
+    # For DatasetDict (no split specified), use the first available split
+    if hasattr(ds, "keys"):
+        split_name = list(ds.keys())[0]
+        print(f"  Using split: {split_name}")
+        ds = ds[split_name]
+
+    # Shuffle the stream for diversity, then collect up to max_records
+    ds = ds.shuffle(seed=random_seed)
+
+    rows = []
+    seen: set[str] = set()
+    scanned = 0
+    for rec in ds:
+        if input_col not in rec:
+            continue
+        text = rec[input_col]
+        scanned += 1
+        if dedupe:
+            if text in seen:
+                continue
+            seen.add(text)
+        rec_id = rec.get("id") or hashlib.md5(text.encode("utf-8")).hexdigest()
+        rows.append({output_id_col: rec_id, output_col: text})
+
+        if len(rows) >= max_records:
+            break
+
+        if scanned % 10_000 == 0:
+            print(f"  Scanned {scanned} records, collected {len(rows)}...")
+
+    df = pd.DataFrame(rows)
+    print(f"  Collected {len(df)} rows (scanned={scanned}, deduplicated={dedupe})")
+
+    return df
+
+
+def cross_join_with_limit(df1: pd.DataFrame, df2: pd.DataFrame, limit: int = 10_000) -> pd.DataFrame:
+    """Randomly sample pairs from the cartesian product of two DataFrames."""
+    n1, n2 = len(df1), len(df2)
+    total = n1 * n2
+    actual_limit = min(limit, total)
+
+    print(f"\nCross-joining {n1} x {n2} = {total} possible pairs, sampling {actual_limit}")
+
+    flat_indices = random.sample(range(total), actual_limit)
+    idx1 = [k // n2 for k in flat_indices]
+    idx2 = [k % n2 for k in flat_indices]
+
+    sub1 = df1.iloc[idx1].reset_index(drop=True)
+    sub2 = df2.iloc[idx2].reset_index(drop=True)
+
+    return pd.concat([sub1, sub2], axis=1)
+
+
+def prepare_seed_data(strategy: str = "ocr_omr", limit: int = 10_000) -> Path:
+    """Download datasets, cross-join, and save as CSV. Returns the CSV path."""
+    ds_a_key, ds_b_key = STRATEGIES[strategy]
+
+    df_a = fetch_hf_dataset_to_df(DATASET_CONFIGS[ds_a_key])
+    df_b = fetch_hf_dataset_to_df(DATASET_CONFIGS[ds_b_key])
+
+    cross_joined = cross_join_with_limit(df_a, df_b, limit=limit)
+
+    seed_path = Path(tempfile.mkdtemp()) / "infinibyte_seed.csv"
+    cross_joined.to_csv(seed_path, index=False)
+    print(f"\nSeed data saved to: {seed_path} ({len(cross_joined)} rows)")
+
+    return seed_path
+
+
+# =============================================================================
+# LLM prompts
+# =============================================================================
+
+PROBLEM_SYSTEM_PROMPT = """\
+You are an experienced competitive programmer, well versed in algorithms, \
+data structures, mathematics, physics, chemistry, biology and other sciences. \
+You excel in crafting problems that combine multiple concepts into a cohesive \
+problem statement.
+"""
+
+SOLUTION_SYSTEM_PROMPT = """\
+You are a helpful and harmless code assistant, well versed in competitive \
+coding problems and STEM subjects. You should think step-by-step before \
+responding to any instruction.
+
+You must use python programming language when generating code.
+You must use the python code block for just the final solution with the \
+following format:
+```python
+# Your final solution goes here
+```
+"""
+
+CANDIDATE_GENERATION_PROMPT = """\
+### Problem A:
+{{ problem_a }}
+
+### Problem B:
+{{ problem_b }}
+
+Carefully examine problems A and B above. Then formulate TWO new problems by \
+augmenting Problem A with concepts from Problem B.
+
+DO NOT REPEAT PROBLEM B VERBATIM WHEN AUGMENTING. INCORPORATE JUST THE \
+CONCEPTS FROM IT.
+
+{% if combination_type == 'obfuscation' %}
+Focus on obfuscation: Add concepts from Problem B to Problem A in a way that \
+makes the new problem seem more complex, but doesn't actually change the \
+solution. The added information should appear relevant but be effectively \
+irrelevant to solving the core problem. The goal is to create a problem that \
+appears more complicated than it actually is.
+
+NEVER DISCLOSE THAT ADDED INFORMATION IS IRRELEVANT OR THAT IT DOESN'T \
+AFFECT THE PROBLEM.
+IF A QUESTION IS POSED IN PROBLEM A, THAT QUESTION MUST REMAIN THE SAME.
+IF INPUT, OUTPUT, AND EXAMPLES ARE PRESENT IN PROBLEM A, INCLUDE THEM IN THE \
+FINAL PROBLEM AS WELL.
+
+{% elif combination_type == 'complication' %}
+Focus on complication: Integrate concepts from Problem B into Problem A to \
+genuinely increase the complexity. The solution should require understanding \
+and applying elements from both problems. The new problem should be more \
+challenging but still logically coherent and solvable.
+
+GIVEN THAT IT'S A NEW PROBLEM, DO NOT INCLUDE EXEMPLARY INPUT AND OUTPUT FROM \
+THE ORIGINAL PROBLEM.
+{% endif %}
+
+MAKE SURE TO INCORPORATE CONCEPTS ONLY FROM PROBLEM B.
+Your augmented problem should be believable and appear as a natural, cohesive \
+question without artificial divisions between the original elements.
+A reader should not be able to easily identify which parts came from Problem A \
+versus Problem B.
+
+DO NOT USE WORDS "PROBLEM A" OR "PROBLEM B" IN YOUR RESPONSE. INSTEAD, \
+PROVIDE A COMPLETE PROBLEM STATEMENT.
+"""
+
+BEST_SELECTION_PROMPT = """\
+### Original problem:
+{{ problem_a }}
+
+Examine candidate problems below which were created with the goal of \
+{% if combination_type == 'obfuscation' %}
+adding information to make the original problem seem more complex, without \
+actually changing the solution. The added information should be effectively \
+irrelevant to solving the core problem.
+{% elif combination_type == 'complication' %}
+genuinely increasing the complexity of the original problem by incorporating \
+new concepts that are logically coherent and solvable. Solving the new problem \
+should require understanding and applying newly introduced concepts.
+{% endif %}
+
+### Candidate problems:
+{{ problem_candidates }}
+
+Select the BEST problem based on the following criteria:
+1. Goal adherence (1-3): How well does the new problem adhere to the goal of \
+{% if combination_type == 'obfuscation' %}
+obfuscating the original problem without actually changing the solution
+{% elif combination_type == 'complication' %}
+increasing the complexity of the original problem by incorporating new \
+concepts that are logically coherent and solvable
+{% endif %}
+2. Difficulty (1-3): How challenging is the problem to answer, compared to the \
+original problem.
+3. Clarity (1-3): Is the problem clearly formulated and coherent?
+"""
+
+EVALUATION_PROMPT = """\
+### Original problem:
+{{ problem_a }}
+
+### New problem:
+{{ new_problem }}
+
+The new problem was created with the goal of \
+{% if combination_type == 'obfuscation' %}
+adding information to make the original problem seem more complex, without \
+actually changing the solution. The added information should be effectively \
+irrelevant to solving the core problem.
+{% elif combination_type == 'complication' %}
+genuinely increasing the complexity of the original problem by incorporating \
+new concepts that are logically coherent and solvable. Solving the new problem \
+should require understanding and applying newly introduced concepts.
+{% endif %}
+
+## Instructions:
+1. Carefully examine and compare the new problem to the original problem.
+2. Evaluate the new problem on goal adherence, difficulty and clarity.
+"""
+
+
+# =============================================================================
+# Pipeline builder
+# =============================================================================
+
+
+def build_config(model_alias: str, seed_path: Path) -> dd.DataDesignerConfigBuilder:
+    config_builder = dd.DataDesignerConfigBuilder()
+
+    # ── Seed data ────────────────────────────────────────────────────────
+
+    config_builder.with_seed_dataset(
+        dd.LocalFileSeedSource(path=str(seed_path)),
+        sampling_strategy=dd.SamplingStrategy.SHUFFLE,
+    )
+
+    # ── Stage 1: Combination type sampler ────────────────────────────────
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="combination_type",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["obfuscation", "complication"]),
+        )
+    )
+
+    # ── Stage 2: Candidate problem generation ────────────────────────────
+
+    config_builder.add_column(
+        dd.LLMStructuredColumnConfig(
+            name="problem_candidates",
+            prompt=CANDIDATE_GENERATION_PROMPT,
+            system_prompt=PROBLEM_SYSTEM_PROMPT,
+            output_format=NewProblemList,
+            model_alias=model_alias,
+        )
+    )
+
+    # ── Stage 3: Best problem selection ──────────────────────────────────
+
+    config_builder.add_column(
+        dd.LLMStructuredColumnConfig(
+            name="best_problem_json",
+            prompt=BEST_SELECTION_PROMPT,
+            system_prompt=PROBLEM_SYSTEM_PROMPT,
+            output_format=NewProblemWithReasoning,
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="new_problem",
+            expr="{{ best_problem_json.problem }}",
+        )
+    )
+
+    # ── Stage 4: Evaluation ──────────────────────────────────────────────
+
+    config_builder.add_column(
+        dd.LLMStructuredColumnConfig(
+            name="evals",
+            prompt=EVALUATION_PROMPT,
+            system_prompt=PROBLEM_SYSTEM_PROMPT,
+            output_format=NewProblemEvals,
+            model_alias=model_alias,
+        )
+    )
+
+    # ── Stage 5: Solution generation ─────────────────────────────────────
+    # NOTE: The evals column above already contains difficulty, clarity, and
+    # adherence scores as structured fields (e.g. {{ evals.difficulty }}).
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="solution",
+            prompt="Solve the following problem: {{ new_problem }}",
+            system_prompt=SOLUTION_SYSTEM_PROMPT,
+            model_alias=model_alias,
+        )
+    )
+
+    return config_builder
+
+
+# =============================================================================
+# Dataset creation
+# =============================================================================
+
+
+def create_dataset(
+    config_builder: dd.DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser(description="Nemotron Nano InfiniByte Recipe")
+    parser.add_argument("--model-alias", type=str, default="nvidia-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    parser.add_argument(
+        "--strategy",
+        type=str,
+        default="ocr_omr",
+        choices=list(STRATEGIES.keys()),
+        help="Cross-join strategy: which two datasets to combine (default: ocr_omr)",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=10_000,
+        help="Maximum number of cross-joined pairs to sample (default: 10000)",
+    )
+    args = parser.parse_args()
+
+    print("=== Step 1: Preparing seed data ===")
+    seed_path = prepare_seed_data(strategy=args.strategy, limit=args.limit)
+
+    print("\n=== Step 2: Building pipeline and generating data ===")
+    config_builder = build_config(model_alias=args.model_alias, seed_path=seed_path)
+    results = create_dataset(
+        config_builder,
+        num_records=args.num_records,
+        artifact_path=args.artifact_path,
+    )
+
+    print(f"\nDataset saved to: {results.artifact_storage.final_dataset_path}")
+    results.load_analysis().to_report()
diff --git a/docs/assets/recipes/model_usability/prompt_sensitivity.py b/docs/assets/recipes/model_usability/prompt_sensitivity.py
new file mode 100644
index 000000000..4a4673e0e
--- /dev/null
+++ b/docs/assets/recipes/model_usability/prompt_sensitivity.py
@@ -0,0 +1,528 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "data-designer",
+#     "pandas",
+# ]
+# ///
+"""Nemotron Nano Prompt Sensitivity Recipe: Regex-Verified Preamble Generation
+
+Generate diverse prompt preambles for RL training that pair natural-language
+instructions with regex-verified output format specifications.  Each record
+contains a paraphrased preamble, a format instruction aligned to a regex
+pattern, and a composed user prompt -- all scored by four LLM judges.
+
+This recipe implements the preamble generation stage of the prompt sensitivity
+pipeline used for Nemotron Nano training.  The key idea: seed the pipeline with
+10 regex-based answer formats (boxed, double-parens, angle brackets, XML tags,
+etc.), then use an LLM to paraphrase both the instruction preamble and the
+format specification while preserving the regex contract.  Placement order
+variants control where the preamble, format instruction, and {problem}
+placeholder appear in the final prompt.
+
+Pipeline architecture:
+
+    ┌─────────────────────────────────────────────────────────────────────────┐
+    │               SEED DATA: 10 regex formats x 30 preambles              │
+    │  Each row: format_key, output_regex, seed_preamble,                   │
+    │            seed_format_instruction                                     │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 1: DIVERSITY SAMPLERS (7 columns)                  │
+    │  sentence_length, sentence_type, tone, strictness_level,              │
+    │  verbosity_level, domain_context, preamble_format_order               │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 2: PREAMBLE GENERATION (LLM)                       │
+    │  Paraphrase seed preamble respecting sampled controls.                │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 3: FORMAT INSTRUCTION GENERATION (LLM)             │
+    │  Paraphrase format instruction preserving regex intent.               │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 4: USER PROMPT COMPOSITION (LLM)                   │
+    │  Assemble preamble + format instruction + {problem} placeholder.      │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │               STAGE 5: QUALITY SCORING (4 LLM judges)                  │
+    │  format_compliance (0-2), regex_alignment (0-1),                      │
+    │  order_coherence (0-1), preamble_quality (0-3)                        │
+    └────────────────────────────────────────────────────────────────────────┘
+
+Prerequisites:
+    - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases.
+
+Run:
+    uv run prompt_sensitivity.py
+
+    uv run prompt_sensitivity.py --num-records 200
+
+    uv run prompt_sensitivity.py --help
+"""
+
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+
+import data_designer.config as dd
+from data_designer.interface import DataDesigner, DatasetCreationResults
+
+# =============================================================================
+# Seed data: regex format templates + seed preambles
+# =============================================================================
+
+FORMAT_TEMPLATES = [
+    {
+        "format_key": "fmt_00",
+        "output_regex": r"\boxed{([.*?])}",
+        "seed_format_instruction": "Make sure to put the answer (and only answer) inside \\boxed{}.",
+    },
+    {
+        "format_key": "fmt_01",
+        "output_regex": r"\(\((.*?)\)\)",
+        "seed_format_instruction": "Your final answer (and only the answer) should be enclosed within double parentheses.",
+    },
+    {
+        "format_key": "fmt_02",
+        "output_regex": r"Answer is\s*\[([A-Za-z])\]",
+        "seed_format_instruction": "Wrap answer in square brackets at the end: 'Answer is [X]'.",
+    },
+    {
+        "format_key": "fmt_03",
+        "output_regex": r"\(Answer:\s*([A-Za-z])\)",
+        "seed_format_instruction": "Conclude with (Answer: X), where X is the final answer.",
+    },
+    {
+        "format_key": "fmt_04",
+        "output_regex": r"Final Answer:\s*\|\|(.*?)\|\|",
+        "seed_format_instruction": "End your response with Final Answer: ||X||, where X is the final answer (and only the answer).",
+    },
+    {
+        "format_key": "fmt_05",
+        "output_regex": r"\[Answer:\s*([A-Za-z])\)",
+        "seed_format_instruction": "Remember to end with [Answer: X], where X is the final answer.",
+    },
+    {
+        "format_key": "fmt_06",
+        "output_regex": r"<<(.*?)>>",
+        "seed_format_instruction": "Put your final answer in double angle brackets.",
+    },
+    {
+        "format_key": "fmt_07",
+        "output_regex": r"\s*\*\*(.*?)\*\*",
+        "seed_format_instruction": "Give the answer at the end in this format -> **X**, where X is final answer.",
+    },
+    {
+        "format_key": "fmt_08",
+        "output_regex": r"<final_answer>\s*([.*?])\s*</final_answer>",
+        "seed_format_instruction": "Wrap your final answer in XML-style tags like this: <final_answer>X</final_answer>.",
+    },
+    {
+        "format_key": "fmt_09",
+        "output_regex": r"\boxed{([.*?])}",
+        "seed_format_instruction": "The final answer must be in \\boxed{} format. It's crucial to follow this format.",
+    },
+]
+
+SEED_PREAMBLES = [
+    "Solve the following problem step by step.",
+    "Provide a comprehensive solution to the problem below.",
+    "Think carefully and solve the following.",
+    "Analyze the problem and provide your answer.",
+    "Work through the following problem methodically.",
+    "Read the problem carefully and provide a detailed solution.",
+    "Consider all aspects of the problem before answering.",
+    "Break down the problem into steps and solve it.",
+    "Explain your reasoning as you solve the following.",
+    "Solve the problem, then provide the answer.",
+    "Provide your reasoning for the answer and state the final result.",
+    "Think step-by-step before giving your final answer.",
+    "Carefully analyze the following and provide your solution.",
+    "Show your work as you solve the following problem.",
+    "Reason through the problem below and give your answer.",
+    "Solve the following. Show your reasoning.",
+    "Provide a clear and complete solution.",
+    "Answer the following question with full explanation.",
+    "Walk through the solution step by step.",
+    "Read the following problem and solve it completely.",
+    "Give a thorough answer to the problem below.",
+    "Determine the answer to the following problem.",
+    "Present your solution with supporting reasoning.",
+    "Evaluate the following and provide your answer.",
+    "Think about the problem below and respond with your solution.",
+    "Provide a well-reasoned answer to the following.",
+    "Solve this problem. Explain each step.",
+    "Work out the answer to the following question.",
+    "Address the problem below with a complete solution.",
+    "Respond to the following with a clear answer.",
+]
+
+# =============================================================================
+# Placement order variants
+# =============================================================================
+
+PLACEMENT_ORDERS = [
+    "P + F + {problem}",
+    "F + P + {problem}",
+    "P + {problem} + F",
+    "F + {problem} + P",
+    "{problem} + P + F",
+    "{problem} + F + P",
+    "PF + {problem}",
+    "{problem} + PF",
+]
+
+# =============================================================================
+# LLM prompts
+# =============================================================================
+
+PREAMBLE_GEN_PROMPT = """\
+You are rewriting a seed preamble for an open-ended question prompt.
+
+Seed preamble: {{ seed_preamble }}
+
+Constraints:
+- Sentence length: {{ sentence_length }}
+- Sentence type: {{ sentence_type }}
+- Tone: {{ tone }}
+- Strictness: {{ strictness_level }}
+- Verbosity: {{ verbosity_level }}
+- Domain: {{ domain_context }}
+
+Instructions:
+- Paraphrase the seed preamble (do NOT copy it verbatim).
+- Produce a concise instruction line for generic open-ended questions.
+- Keep it neutral and generic; do NOT include output formatting requirements.
+- Respect the constraints above (length, type, tone, strictness, verbosity).
+- Output ONLY the rewritten preamble, nothing else.
+"""
+
+FORMAT_INSTRUCTION_GEN_PROMPT = """\
+You are rewriting a format instruction that tells the user how to present their final answer.
+
+Seed format instruction: {{ seed_format_instruction }}
+Output regex pattern: {{ output_regex }}
+
+Constraints:
+- Sentence length: {{ sentence_length }}
+- Tone: {{ tone }}
+
+Instructions:
+- Paraphrase the seed format instruction while preserving its intent.
+- The instruction must unambiguously specify how the final answer should be formatted.
+- The answer must be required at the end of the response.
+- Do NOT refer to the type of answer (sentence, paragraph, math expression).
+- Respect the sentence length and tone constraints.
+- Output ONLY the rewritten format instruction, nothing else.
+"""
+
+USER_PROMPT_COMPOSITION_PROMPT = """\
+Compose a final user prompt from the following parts.
+
+Preamble (P): {{ preamble }}
+Format instruction (F): {{ format_instruction }}
+Placement order: {{ preamble_format_order }}
+
+Instructions:
+- Concatenate the parts in the order specified by "Placement order".
+- Use {problem} as a literal placeholder for the question text.
+- For "PF" or "FP" merged orders, combine P and F into a single natural sentence.
+- Ensure {problem} has newlines before and after it for readability.
+- Preserve the exact text of P and F; do NOT abbreviate or add new content.
+- Output ONLY the composed user prompt, nothing else.
+"""
+
+# =============================================================================
+# Judge rubrics
+# =============================================================================
+
+FORMAT_COMPLIANCE_SCORES = [
+    dd.Score(
+        name="Format Compliance",
+        description="Does the format instruction unambiguously enforce the intended output format and require the answer at end of response?",
+        options={
+            "2": "Explicit, unambiguous, requires ending with answer in specified format.",
+            "1": "Mentions format but leaves room for trailing text after the answer.",
+            "0": "Ambiguous, doesn't mention format, or specifies an alternative format.",
+        },
+    ),
+]
+
+REGEX_ALIGNMENT_SCORES = [
+    dd.Score(
+        name="Regex Alignment",
+        description="Does the format instruction semantically and structurally align with the output_regex pattern?",
+        options={
+            "1": "Instruction matches the regex pattern intent.",
+            "0": "Instruction conflicts with or deviates from the regex intent.",
+        },
+    ),
+]
+
+ORDER_COHERENCE_SCORES = [
+    dd.Score(
+        name="Order Coherence",
+        description="Is the composed user prompt coherent with respect to the ordering of preamble, format instruction, and {problem} placeholder?",
+        options={
+            "1": "Makes sense given the part ordering.",
+            "0": "Confusing or contradictory ordering.",
+        },
+    ),
+]
+
+PREAMBLE_QUALITY_SCORES = [
+    dd.Score(
+        name="Preamble Quality",
+        description="Assess the preamble for clarity, concision, generic tone, and adherence to the sampled controls.",
+        options={
+            "3": "Clear, concise, generic, adheres to all controls.",
+            "2": "Good with minor issues in tone or length.",
+            "1": "Fair with noticeable issues.",
+            "0": "Poor, unclear, or conflicts with controls.",
+        },
+    ),
+]
+
+# =============================================================================
+# Judge prompts
+# =============================================================================
+
+FORMAT_COMPLIANCE_JUDGE_PROMPT = """\
+Evaluate the format instruction for compliance.
+
+Format instruction: {{ format_instruction }}
+Output regex: {{ output_regex }}
+Seed format instruction: {{ seed_format_instruction }}
+"""
+
+REGEX_ALIGNMENT_JUDGE_PROMPT = """\
+Evaluate whether the format instruction aligns with the regex pattern.
+
+Format instruction: {{ format_instruction }}
+Output regex: {{ output_regex }}
+"""
+
+ORDER_COHERENCE_JUDGE_PROMPT = """\
+Evaluate whether the composed user prompt is coherent given the placement order.
+
+User prompt: {{ user_prompt }}
+Preamble: {{ preamble }}
+Format instruction: {{ format_instruction }}
+Placement order: {{ preamble_format_order }}
+"""
+
+PREAMBLE_QUALITY_JUDGE_PROMPT = """\
+Evaluate the preamble for quality.
+
+Preamble: {{ preamble }}
+Seed preamble: {{ seed_preamble }}
+Sentence length: {{ sentence_length }}
+Sentence type: {{ sentence_type }}
+Tone: {{ tone }}
+Strictness: {{ strictness_level }}
+Verbosity: {{ verbosity_level }}
+"""
+
+
+# =============================================================================
+# Seed data builder
+# =============================================================================
+
+
+def build_seed_dataframe() -> pd.DataFrame:
+    """Build the seed DataFrame as the cross product of formats x preambles."""
+    rows = []
+    for fmt in FORMAT_TEMPLATES:
+        for preamble in SEED_PREAMBLES:
+            rows.append(
+                {
+                    "format_key": fmt["format_key"],
+                    "output_regex": fmt["output_regex"],
+                    "seed_format_instruction": fmt["seed_format_instruction"],
+                    "seed_preamble": preamble,
+                }
+            )
+    return pd.DataFrame(rows)
+
+
+# =============================================================================
+# Pipeline builder
+# =============================================================================
+
+
+def build_config(model_alias: str) -> tuple[dd.DataDesignerConfigBuilder, Path]:
+    config_builder = dd.DataDesignerConfigBuilder()
+
+    # ── Seed data ────────────────────────────────────────────────────────
+    seed_df = build_seed_dataframe()
+    seed_path = Path(tempfile.mkdtemp()) / "prompt_sensitivity_seed.csv"
+    seed_df.to_csv(seed_path, index=False)
+
+    config_builder.with_seed_dataset(
+        dd.LocalFileSeedSource(path=str(seed_path)),
+        sampling_strategy=dd.SamplingStrategy.SHUFFLE,
+    )
+
+    # ── Stage 1: Diversity samplers ──────────────────────────────────────
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="sentence_length",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["short", "medium"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="sentence_type",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["imperative", "declarative", "interrogative"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="tone",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["formal", "neutral", "concise", "informal", "strict"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="strictness_level",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["low", "medium", "high"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="verbosity_level",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["concise", "standard"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="domain_context",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["general"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="preamble_format_order",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=PLACEMENT_ORDERS),
+        )
+    )
+
+    # ── Stage 2: Preamble generation ─────────────────────────────────────
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="preamble",
+            prompt=PREAMBLE_GEN_PROMPT,
+            model_alias=model_alias,
+        )
+    )
+
+    # ── Stage 3: Format instruction generation ───────────────────────────
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="format_instruction",
+            prompt=FORMAT_INSTRUCTION_GEN_PROMPT,
+            model_alias=model_alias,
+        )
+    )
+
+    # ── Stage 4: User prompt composition ─────────────────────────────────
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="user_prompt",
+            prompt=USER_PROMPT_COMPOSITION_PROMPT,
+            model_alias=model_alias,
+        )
+    )
+
+    # ── Stage 5: Quality scoring ─────────────────────────────────────────
+
+    config_builder.add_column(
+        dd.LLMJudgeColumnConfig(
+            name="format_compliance_result",
+            prompt=FORMAT_COMPLIANCE_JUDGE_PROMPT,
+            scores=FORMAT_COMPLIANCE_SCORES,
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMJudgeColumnConfig(
+            name="regex_alignment_result",
+            prompt=REGEX_ALIGNMENT_JUDGE_PROMPT,
+            scores=REGEX_ALIGNMENT_SCORES,
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMJudgeColumnConfig(
+            name="order_coherence_result",
+            prompt=ORDER_COHERENCE_JUDGE_PROMPT,
+            scores=ORDER_COHERENCE_SCORES,
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMJudgeColumnConfig(
+            name="preamble_quality_result",
+            prompt=PREAMBLE_QUALITY_JUDGE_PROMPT,
+            scores=PREAMBLE_QUALITY_SCORES,
+            model_alias=model_alias,
+        )
+    )
+
+    return config_builder, seed_path
+
+
+# =============================================================================
+# Dataset creation
+# =============================================================================
+
+
+def create_dataset(
+    config_builder: dd.DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser(description="Nemotron Nano Prompt Sensitivity Recipe")
+    parser.add_argument("--model-alias", type=str, default="nvidia-text")
+    parser.add_argument("--num-records", type=int, default=10)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder, _seed_path = build_config(model_alias=args.model_alias)
+    results = create_dataset(
+        config_builder,
+        num_records=args.num_records,
+        artifact_path=args.artifact_path,
+    )
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+    results.load_analysis().to_report()
diff --git a/docs/assets/recipes/model_usability/structured_data.py b/docs/assets/recipes/model_usability/structured_data.py
new file mode 100644
index 000000000..d0a87737a
--- /dev/null
+++ b/docs/assets/recipes/model_usability/structured_data.py
@@ -0,0 +1,434 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "data-designer",
+# ]
+# ///
+"""Nemotron Nano Structured Data Recipe: Multi-Format Schema Generation
+
+Generate synthetic structured data across multiple output formats (JSON, YAML,
+XML, Markdown) with controlled schema complexity, conversational grounding,
+and best-of-3 candidate generation.
+
+This recipe implements the pipeline used to produce structured-data SFT records
+for Nemotron Nano training. Each record contains a generated schema, a natural
+user request, grounding Q&A conversation pairs, and three candidate structured
+outputs that conform to the schema.
+
+Pipeline architecture:
+
+    ┌─────────────────────────────────────────────────────────────────────────┐
+    │                   STAGE 1: SAMPLING (DIVERSITY CONTROLS)               │
+    │                                                                        │
+    │  Format Controls          Schema Controls        Conversation Controls │
+    │  ├─ output_format         ├─ schema_rigidity     ├─ num_turns          │
+    │  └─ topic (conditional)   ├─ schema_fields_count ├─ tone               │
+    │                           ├─ schema_complexity   └─ detail_level       │
+    │                           └─ nesting_depth                             │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │                   STAGE 2: SCHEMA GENERATION (LLM)                     │
+    │  Generates format-specific schema from sampled controls + topic.       │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │                   STAGE 3: USER PROMPT GENERATION (LLM)                │
+    │  Natural-language request matching tone and detail level.              │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │                   STAGE 4: CONVERSATION PAIRS (LLM)                    │
+    │  Q&A pairs covering schema facts for grounding.                        │
+    ├────────────────────────────────────────────────────────────────────────┤
+    │                   STAGE 5: STRUCTURED OUTPUT (LLM, best-of-3)          │
+    │  Three candidate structured outputs conforming to the schema.          │
+    └────────────────────────────────────────────────────────────────────────┘
+
+Prerequisites:
+    - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases.
+
+Run:
+    uv run structured_data.py
+
+    uv run structured_data.py --num-records 100 --output-format json
+
+    uv run structured_data.py --help
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import data_designer.config as dd
+from data_designer.interface import DataDesigner, DatasetCreationResults
+
+# =============================================================================
+# Topics: representative subset of categories and subtopics
+# =============================================================================
+
+TOPICS: dict[str, list[str]] = {
+    "Leisure Activities": [
+        "Outdoor Recreation",
+        "Board Games",
+        "DIY Crafts",
+        "Photography",
+        "Gardening",
+    ],
+    "Daily Life": [
+        "Morning Routines",
+        "Grocery Shopping",
+        "Commuting",
+        "Household Chores",
+        "Meal Planning",
+    ],
+    "Education and Learning": [
+        "Online Courses",
+        "Study Techniques",
+        "Language Learning",
+        "STEM Education",
+        "Library Systems",
+    ],
+    "Technology and Gadgets": [
+        "Smartphones",
+        "Smart Home Devices",
+        "Wearable Tech",
+        "Cloud Computing",
+        "Cybersecurity Basics",
+    ],
+    "Health and Wellness": [
+        "Nutrition Planning",
+        "Mental Health",
+        "Exercise Routines",
+        "Sleep Hygiene",
+        "Preventive Care",
+    ],
+    "Finance and Money": [
+        "Personal Budgeting",
+        "Investment Basics",
+        "Tax Preparation",
+        "Credit Management",
+        "Retirement Planning",
+    ],
+    "Food and Cooking": [
+        "Baking Techniques",
+        "Meal Prep",
+        "International Cuisines",
+        "Dietary Restrictions",
+        "Kitchen Equipment",
+    ],
+    "Travel and Transportation": [
+        "Trip Planning",
+        "Public Transit",
+        "Road Trips",
+        "Travel Insurance",
+        "Packing Strategies",
+    ],
+    "Arts and Culture": [
+        "Music Theory",
+        "Film Analysis",
+        "Theater Production",
+        "Contemporary Art",
+        "Creative Writing",
+    ],
+    "Work and Careers": [
+        "Resume Building",
+        "Interview Preparation",
+        "Remote Work",
+        "Project Management",
+        "Career Transitions",
+    ],
+}
+
+# =============================================================================
+# Prompts
+# =============================================================================
+
+SCHEMA_GENERATION_PROMPT = """\
+Create a schema for a structured object response in the format {{ output_format }}.
+
+Controls:
+- Rigidity: {{ schema_rigidity }}
+- Top-level properties: {{ schema_fields_count }}
+- Complexity: {{ schema_complexity }}
+- Nesting depth: {{ nesting_depth }}
+- Topic: {{ topic_category }} / {{ topic_subtopic }}
+
+Instructions:
+- Output only an object with keys: "name", "schema", and "strict", formatted as \
+{{ output_format }}.
+- "name" must be appropriate with the Topic: {{ topic_category }} / {{ topic_subtopic }}
+- "schema" should be a valid structured schema as specified in {{ output_format }}.
+- Use {{ schema_fields_count }} top-level properties, relevant to the topic.
+- Include at least one boolean and, if appropriate, one enum.
+- All top-level properties must be listed in "required".
+- Set "additionalProperties": false at every object level.
+- If {{ schema_complexity }} is "complex", make the schema deeply nested: at least two \
+levels of nested objects, with at least one object nested three levels deep. Keep nesting \
+relevant to the topic.
+- If "simple", keep nesting minimal or flat.
+- "strict" must be true.
+
+Formatting by output_format:
+- "json": Output a valid JSON object, no code fences or comments.
+- "yaml": Output a valid YAML object, no code fences or comments.
+- "xml": Output a valid XML document with root "root" and child elements "name", "schema", \
+and "strict". "schema" can be a string or nested XML.
+- "markdown": Output a Markdown code block with the JSON object, using triple backticks and \
+"json" as the language, no extra text.
+
+Output only the object in the specified format. No explanations or extra text.
+"""
+
+USER_PROMPT_GENERATION = """\
+You are a human user asking an AI assistant to produce a structured output. Write a natural, \
+concise request that would lead to filling in a schema about {{ topic_category }} / \
+{{ topic_subtopic }}.
+
+The request should:
+- Sound like something a real person would type or say
+- Describe what data they want without exposing the schema itself
+- Mention the desired output format: {{ output_format }}
+- Match the tone: {{ tone }} and detail level: {{ detail_level }}
+
+Do not include the schema, code fences, or technical formatting. Just the user request.
+"""
+
+CONVERSATION_PROMPT = """\
+Write a short Q&A conversation about the following topic. Follow the selected JSON Schema \
+fields as the underlying facts to cover, but DO NOT output JSON here.
+
+Topic context:
+- Category: {{ topic_category }}
+- Subtopic: {{ topic_subtopic }}
+
+Constraints:
+- Number of Q&A pairs: {{ num_turns }}
+- Tone: {{ tone }}
+- Detail level: {{ detail_level }}
+
+Write alternating question/answer pairs that make these facts unambiguous for the chosen \
+schema: {{ structured_schema }}
+Return only a Python list of [question, answer] pairs (no extra text).
+"""
+
+STRUCTURED_OUTPUT_PROMPT = """\
+You will produce a {{ output_format }} instance that conforms strictly to the following \
+schema (no extra keys).
+
+Schema:
+{{ structured_schema }}
+
+You are given a Python list of [question, answer] pairs:
+{{ conversation_pairs }}
+
+Instructions:
+- Derive values only from the answers given.
+- Render ONLY the {{ output_format }} instance, with no commentary.
+- Formatting rules:
+  - If output_format is "json", output a single JSON object (no code fences).
+  - If output_format is "yaml", output a YAML mapping (no code fences).
+  - If output_format is "xml", output an XML document with root <scene_response>.
+  - If output_format is "markdown", output a fenced code block with ```json.
+- Ensure the content validates against the schema when parsed back to JSON.
+"""
+
+# =============================================================================
+# Supported output formats
+# =============================================================================
+
+OUTPUT_FORMATS = ["json", "yaml", "xml", "markdown"]
+
+
+# =============================================================================
+# Pipeline builder
+# =============================================================================
+
+
+def build_config(
+    model_alias: str,
+    output_format: str | None = None,
+) -> dd.DataDesignerConfigBuilder:
+    config_builder = dd.DataDesignerConfigBuilder()
+
+    # ── Stage 1: Sampling ────────────────────────────────────────────────
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="record_id",
+            sampler_type=dd.SamplerType.UUID,
+            params=dd.UUIDSamplerParams(prefix="SD-", short_form=True, uppercase=True),
+        )
+    )
+
+    formats = [output_format] if output_format else OUTPUT_FORMATS
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="output_format",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=formats),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="topic_category",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=list(TOPICS.keys())),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="topic_subtopic",
+            sampler_type=dd.SamplerType.SUBCATEGORY,
+            params=dd.SubcategorySamplerParams(
+                category="topic_category",
+                values=TOPICS,
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="schema_rigidity",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["strict", "moderate"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="schema_fields_count",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(
+                values=["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="schema_complexity",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["simple", "complex"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="nesting_depth",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["1", "2", "3", "4"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="num_turns",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["2", "3", "4", "5", "6", "7", "8"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="tone",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["neutral", "enthusiastic", "factual"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="detail_level",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(
+                values=["brief", "standard", "detailed", "super verbose"],
+            ),
+        )
+    )
+
+    # ── Stage 2: Schema generation ───────────────────────────────────────
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="structured_schema",
+            prompt=SCHEMA_GENERATION_PROMPT,
+            model_alias=model_alias,
+        )
+    )
+
+    # ── Stage 3: User prompt generation ──────────────────────────────────
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="user_prompt",
+            prompt=USER_PROMPT_GENERATION,
+            model_alias=model_alias,
+        )
+    )
+
+    # ── Stage 4: Conversation pairs ──────────────────────────────────────
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="conversation_pairs",
+            prompt=CONVERSATION_PROMPT,
+            model_alias=model_alias,
+        )
+    )
+
+    # ── Stage 5: Best-of-3 structured output ─────────────────────────────
+
+    for i in range(3):
+        config_builder.add_column(
+            dd.LLMTextColumnConfig(
+                name=f"structured_output_{i}",
+                prompt=STRUCTURED_OUTPUT_PROMPT,
+                model_alias=model_alias,
+            )
+        )
+
+    return config_builder
+
+
+# =============================================================================
+# Dataset creation
+# =============================================================================
+
+
+def create_dataset(
+    config_builder: dd.DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser(description="Nemotron Nano Structured Data Recipe")
+    parser.add_argument("--model-alias", type=str, default="nvidia-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    parser.add_argument(
+        "--output-format",
+        type=str,
+        default=None,
+        choices=OUTPUT_FORMATS,
+        help="Generate for a single output format (default: all formats)",
+    )
+    args = parser.parse_args()
+
+    config_builder = build_config(
+        model_alias=args.model_alias,
+        output_format=args.output_format,
+    )
+    results = create_dataset(
+        config_builder,
+        num_records=args.num_records,
+        artifact_path=args.artifact_path,
+    )
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+    results.load_analysis().to_report()
diff --git a/docs/recipes/cards.md b/docs/recipes/cards.md
index ef2752936..5f85fc27b 100644
--- a/docs/recipes/cards.md
+++ b/docs/recipes/cards.md
@@ -172,6 +172,60 @@ Each recipe is a self-contained example that can be run independently.
     [:material-book-open-page-variant: View Recipe](mcp_and_tooluse/search_agent.md){ .md-button }
     [Download Code :octicons-download-24:](../assets/recipes/mcp_and_tooluse/search_agent.py){ .md-button download="search_agent.py" }
 
+-   :material-code-json:{ .lg .middle } **Nemotron Nano Structured Data**
+
+    Generate multi-format structured data (JSON, YAML, XML, Markdown) with controlled schema complexity, conversational grounding, and best-of-3 candidate generation.
+
+    ---
+
+    **Demonstrates:**
+
+    - Multi-format structured output generation
+    - Conditional topic sampling with SubcategorySamplerParams
+    - Multi-stage LLM pipeline (schema, prompt, conversation, output)
+    - Best-of-3 candidate generation
+
+    ---
+
+    [:material-book-open-page-variant: View Recipe](model_usability/structured_data.md){ .md-button }
+    [Download Code :octicons-download-24:](../assets/recipes/model_usability/structured_data.py){ .md-button download="structured_data.py" }
+
+-   :material-format-list-text:{ .lg .middle } **Nemotron Nano Prompt Sensitivity**
+
+    Generate diverse prompt preambles for RL training with regex-verified output format specifications, LLM paraphrasing, placement order variants, and four LLM judges.
+
+    ---
+
+    **Demonstrates:**
+
+    - Seed dataset with format templates and preambles
+    - LLM paraphrasing with diversity controls
+    - Regex-aligned format instruction generation
+    - 4 LLM judges (format compliance, regex alignment, order coherence, preamble quality)
+
+    ---
+
+    [:material-book-open-page-variant: View Recipe](model_usability/prompt_sensitivity.md){ .md-button }
+    [Download Code :octicons-download-24:](../assets/recipes/model_usability/prompt_sensitivity.py){ .md-button download="prompt_sensitivity.py" }
+
+-   :material-infinity:{ .lg .middle } **Nemotron Nano InfiniByte**
+
+    Generate more diverse and complex problems by cross-breeding two HuggingFace datasets, augmenting problems through obfuscation or complication with structured LLM evaluation.
+
+    ---
+
+    **Demonstrates:**
+
+    - Cross-source dataset seeding from HuggingFace
+    - LLMStructuredColumnConfig with Pydantic models
+    - Multi-stage structured generation (candidates, selection, evaluation)
+    - Expression columns for score extraction
+
+    ---
+
+    [:material-book-open-page-variant: View Recipe](code_generation/infinibyte.md){ .md-button }
+    [Download Code :octicons-download-24:](../assets/recipes/code_generation/infinibyte.py){ .md-button download="infinibyte.py" }
+
 -   :material-file-document-multiple:{ .lg .middle } **Markdown Section Seed Reader**
 
     Define a custom `FileSystemSeedReader` inline and turn Markdown files into one seed row per heading section.
diff --git a/docs/recipes/code_generation/infinibyte.md b/docs/recipes/code_generation/infinibyte.md
new file mode 100644
index 000000000..662757bc9
--- /dev/null
+++ b/docs/recipes/code_generation/infinibyte.md
@@ -0,0 +1,9 @@
+# Nemotron Nano InfiniByte
+
+Generate more diverse and complex training problems by cross-breeding two source datasets. Cross-joins coding and math/science problems, then uses an LLM to create augmented problems through obfuscation or complication, with structured evaluation and solution generation. Used for Nemotron Nano post-training data.
+
+[Download Code :octicons-download-24:](../../assets/recipes/code_generation/infinibyte.py){ .md-button download="infinibyte.py" }
+
+```python
+--8<-- "assets/recipes/code_generation/infinibyte.py"
+```
diff --git a/docs/recipes/model_usability/prompt_sensitivity.md b/docs/recipes/model_usability/prompt_sensitivity.md
new file mode 100644
index 000000000..92db61353
--- /dev/null
+++ b/docs/recipes/model_usability/prompt_sensitivity.md
@@ -0,0 +1,9 @@
+# Nemotron Nano Prompt Sensitivity
+
+Generate diverse prompt preambles for RL training that pair natural-language instructions with regex-verified output format specifications. Uses seed format templates, LLM paraphrasing, placement order variants, and four LLM judges to produce high-quality, format-compliant preambles. Used for Nemotron Nano prompt sensitivity training.
+
+[Download Code :octicons-download-24:](../../assets/recipes/model_usability/prompt_sensitivity.py){ .md-button download="prompt_sensitivity.py" }
+
+```python
+--8<-- "assets/recipes/model_usability/prompt_sensitivity.py"
+```
diff --git a/docs/recipes/model_usability/structured_data.md b/docs/recipes/model_usability/structured_data.md
new file mode 100644
index 000000000..aace7f064
--- /dev/null
+++ b/docs/recipes/model_usability/structured_data.md
@@ -0,0 +1,9 @@
+# Nemotron Nano Structured Data
+
+Generate multi-format structured data (JSON, YAML, XML, Markdown) with controlled schema complexity, conversational grounding, and best-of-3 candidate generation. Used for Nemotron Nano structured-output SFT training.
+
+[Download Code :octicons-download-24:](../../assets/recipes/model_usability/structured_data.py){ .md-button download="structured_data.py" }
+
+```python
+--8<-- "assets/recipes/model_usability/structured_data.py"
+```
diff --git a/mkdocs.yml b/mkdocs.yml
index 464491d4f..4ace5081f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -46,6 +46,10 @@ nav:
           - Text to Python: recipes/code_generation/text_to_python.md
           - Text to SQL: recipes/code_generation/text_to_sql.md
           - "Nemotron Super Text to SQL": recipes/code_generation/enterprise_text_to_sql.md
+          - "Nemotron Nano InfiniByte": recipes/code_generation/infinibyte.md
+      - Model Usability:
+          - "Nemotron Nano Structured Data": recipes/model_usability/structured_data.md
+          - "Nemotron Nano Prompt Sensitivity": recipes/model_usability/prompt_sensitivity.md
       - Plugin Development:
           - Markdown Section Seed Reader Plugin: recipes/plugin_development/markdown_seed_reader.md
       - QA and Chat: