From a13ddca7d89aa7969cd5ddde41738d760795234f Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 09:43:31 -0500
Subject: [PATCH 01/40] docs(cruncher): document FIMO-like scoring

---
 src/dnadesign/cruncher/README.md                          | 8 ++++++++
 src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md   | 7 +++++++
 .../cruncher/docs/demos/demo_campaigns_multi_tf.md        | 5 +++++
 src/dnadesign/cruncher/docs/reference/config.md           | 4 ++++
 4 files changed, 24 insertions(+)

diff --git a/src/dnadesign/cruncher/README.md b/src/dnadesign/cruncher/README.md
index 7d8fe983..677281bd 100644
--- a/src/dnadesign/cruncher/README.md
+++ b/src/dnadesign/cruncher/README.md
@@ -20,6 +20,14 @@ A typical workflow looks like:
 3. Generate synthetic sequences (e.g., via [MCMC](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo)) using the locked motifs.
 4. Analyze / visualize / report from run artifacts.
 
+Scoring is **FIMO-like**: cruncher builds log-odds PWMs against a 0‑order
+background, scans each candidate sequence to find the best window per TF
+(optionally bidirectional), and can scale that best hit to a p‑value using a
+DP‑derived null distribution (`score_scale: logp`). For `logp`, the tail
+probability for the best window is converted to a sequence‑level p via
+`p_seq = 1 − (1 − p_win)^n_windows`. This is an internal implementation; cruncher
+does not call the FIMO binary.
+
 ---
 
 ### Quickstart (happy path)
diff --git a/src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md b/src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md
index bcf46b99..7aabeb4f 100644
--- a/src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md
+++ b/src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md
@@ -4,6 +4,13 @@
 
 **cruncher** scores each TF by the best PWM match anywhere in the candidate sequence on either strand, then optimizes the min/soft‑min across TFs so the weakest TF improves. It explores sequence space with Gibbs + parallel tempering (MCMC) and returns a diverse elite set (unique up to reverse‑complement) plus diagnostics for stability/mixing. Motif overlap is allowed and treated as informative structure in analysis.
 
+Scoring is **FIMO-like** (internal implementation): for each PWM, cruncher builds
+log‑odds scores against a 0‑order background, scans all windows to find the best
+hit (optionally bidirectional), and optionally converts that best hit to a
+p‑value via a DP‑derived null distribution (`score_scale: logp`). For `logp`,
+the tail probability for the best window becomes a sequence‑level p via
+`p_seq = 1 − (1 − p_win)^n_windows`.
+
 **Terminology:**
 
 - **sites** = training binding sequences
diff --git a/src/dnadesign/cruncher/docs/demos/demo_campaigns_multi_tf.md b/src/dnadesign/cruncher/docs/demos/demo_campaigns_multi_tf.md
index 3e8ea4f8..c6118f85 100644
--- a/src/dnadesign/cruncher/docs/demos/demo_campaigns_multi_tf.md
+++ b/src/dnadesign/cruncher/docs/demos/demo_campaigns_multi_tf.md
@@ -4,6 +4,11 @@
 
 This demo walks through a process of running category-based sequence optimization campaigns, with a focus on campaign selection (site counts + PWM quality), derived configs, and multi-TF runs.
 
+Scoring is **FIMO-like** (internal implementation): cruncher uses PWM log‑odds
+scanning against a 0‑order background, takes the best window per TF (optionally
+both strands), and can convert that best hit to a p‑value via a DP‑derived null
+distribution (`score_scale: logp`, with `p_seq = 1 − (1 − p_win)^n_windows`).
+
 ### Demo instance
 
 - **Workspace**: `src/dnadesign/cruncher/workspaces/demo_campaigns_multi_tf/`
diff --git a/src/dnadesign/cruncher/docs/reference/config.md b/src/dnadesign/cruncher/docs/reference/config.md
index 27270e80..6022cff0 100644
--- a/src/dnadesign/cruncher/docs/reference/config.md
+++ b/src/dnadesign/cruncher/docs/reference/config.md
@@ -385,6 +385,10 @@ Notes:
 - `objective.bidirectional=true` scores both strands (reverse complement) when scanning PWMs.
 - `objective.combine` controls how per-TF scores are combined (`min` for weakest-TF optimization, `sum` for sum-based).
 - `objective.allow_unscaled_llr=true` allows `score_scale=llr` in multi-TF runs (otherwise validation fails).
+- `objective.score_scale=logp` is FIMO‑like: it uses a DP‑derived null
+  distribution under a 0‑order background to compute a tail p‑value for the
+  best window, then converts to a sequence‑level p via
+  `p_seq = 1 − (1 − p_win)^n_windows` before reporting `−log10(p_seq)`.
 - `elites.min_hamming` is the Hamming-distance filter for elites (0 disables). If `output.trim.enabled=true` yields variable lengths, the distance is computed over the shared prefix plus the length difference.
 - `elites.k` controls how many sequences are retained before diversity filtering (0 = keep all).
 - `elites.dsDNA_canonicalize=true` treats reverse complements as identical when computing unique fractions and (optionally) stores `canonical_sequence` in elites.

From 4cad6062be5ec21cb591b57ab9bcad91bf7d6f8a Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 09:43:44 -0500
Subject: [PATCH 02/40] build: add pixi task aliases for dense and cruncher

---
 pixi.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pixi.toml b/pixi.toml
index af81839d..7b4672b4 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -6,6 +6,7 @@ platforms = ["osx-arm64", "osx-64", "linux-64"]
 
 [tasks]
 cruncher = "uv run cruncher"
+dense = "uv run dense"
 
 [dependencies]
 meme = "*"

From 71ea141680e921ea096f56ee929bbbd16d090f13 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 09:43:55 -0500
Subject: [PATCH 03/40] densegen: improve FIMO sampling UX and audit metadata

---
 .../densegen/src/adapters/outputs/parquet.py  |  13 +
 .../densegen/src/adapters/sources/base.py     |   2 +-
 .../src/adapters/sources/binding_sites.py     |   2 +-
 .../src/adapters/sources/pwm_artifact.py      |  53 +-
 .../src/adapters/sources/pwm_artifact_set.py  |  42 +-
 .../densegen/src/adapters/sources/pwm_fimo.py | 178 +++++++
 .../src/adapters/sources/pwm_jaspar.py        |  42 +-
 .../src/adapters/sources/pwm_matrix_csv.py    |  49 +-
 .../densegen/src/adapters/sources/pwm_meme.py |  42 +-
 .../src/adapters/sources/pwm_meme_set.py      |  42 +-
 .../src/adapters/sources/pwm_sampling.py      | 464 ++++++++++++++++--
 .../src/adapters/sources/sequence_library.py  |   2 +-
 .../src/adapters/sources/usr_sequences.py     |   2 +-
 src/dnadesign/densegen/src/cli.py             |  34 +-
 src/dnadesign/densegen/src/config/__init__.py |  77 ++-
 src/dnadesign/densegen/src/core/metadata.py   |   8 +
 .../densegen/src/core/metadata_schema.py      |  26 +
 src/dnadesign/densegen/src/core/pipeline.py   | 124 ++++-
 .../densegen/src/core/pvalue_bins.py          |  32 ++
 .../densegen/src/integrations/__init__.py     |   3 +
 .../densegen/src/integrations/meme_suite.py   |  41 ++
 .../tests/test_cli_summarize_library.py       |   8 +
 .../densegen/tests/test_outputs_parquet.py    |   8 +
 .../tests/test_pipeline_library_index.py      |  15 +
 .../densegen/tests/test_pwm_fimo_utils.py     |  93 ++++
 .../densegen/tests/test_pwm_sampling_bins.py  |  74 +++
 26 files changed, 1377 insertions(+), 99 deletions(-)
 create mode 100644 src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py
 create mode 100644 src/dnadesign/densegen/src/core/pvalue_bins.py
 create mode 100644 src/dnadesign/densegen/src/integrations/__init__.py
 create mode 100644 src/dnadesign/densegen/src/integrations/meme_suite.py
 create mode 100644 src/dnadesign/densegen/tests/test_pipeline_library_index.py
 create mode 100644 src/dnadesign/densegen/tests/test_pwm_fimo_utils.py
 create mode 100644 src/dnadesign/densegen/tests/test_pwm_sampling_bins.py

diff --git a/src/dnadesign/densegen/src/adapters/outputs/parquet.py b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
index 35f533f2..a160098f 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/parquet.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
@@ -32,6 +32,12 @@ def _meta_arrow_type(name: str, pa):
         "input_pwm_ids",
         "required_regulators",
     }
+    list_float = {
+        "input_pwm_pvalue_bins",
+    }
+    list_int = {
+        "input_pwm_pvalue_bin_ids",
+    }
     int_fields = {
         "length",
         "random_seed",
@@ -61,6 +67,7 @@ def _meta_arrow_type(name: str, pa):
         "compression_ratio",
         "input_pwm_score_threshold",
         "input_pwm_score_percentile",
+        "input_pwm_pvalue_threshold",
         "sampling_fraction",
         "sampling_fraction_pairs",
         "gap_fill_gc_min",
@@ -79,10 +86,16 @@ def _meta_arrow_type(name: str, pa):
         "sampling_relaxed_cap",
         "gap_fill_used",
         "gap_fill_relaxed",
+        "input_pwm_keep_all_candidates_debug",
+        "input_pwm_include_matched_sequence",
     }
 
     if name in list_str:
         return pa.list_(pa.string())
+    if name in list_float:
+        return pa.list_(pa.float64())
+    if name in list_int:
+        return pa.list_(pa.int64())
     if name == "used_tfbs_detail":
         return pa.list_(
             pa.struct(
diff --git a/src/dnadesign/densegen/src/adapters/sources/base.py b/src/dnadesign/densegen/src/adapters/sources/base.py
index d1f9758c..2d645291 100644
--- a/src/dnadesign/densegen/src/adapters/sources/base.py
+++ b/src/dnadesign/densegen/src/adapters/sources/base.py
@@ -39,7 +39,7 @@ def infer_format(path: Path) -> str | None:
 
 class BaseDataSource(abc.ABC):
     @abc.abstractmethod
-    def load_data(self, *, rng=None) -> Tuple[List, Optional[pd.DataFrame]]:
+    def load_data(self, *, rng=None, outputs_root: Path | None = None) -> Tuple[List, Optional[pd.DataFrame]]:
         """
         Returns:
             (data_entries, meta_df)
diff --git a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
index 6bea5b56..6c74a022 100644
--- a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
+++ b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
@@ -57,7 +57,7 @@ def _load_table(self, path: Path, fmt: str) -> pd.DataFrame:
             return pd.read_excel(path)
         raise ValueError(f"Unsupported binding_sites.format: {fmt}")
 
-    def load_data(self, *, rng=None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None):
         data_path = resolve_path(self.cfg_path, self.path)
         if not (data_path.exists() and data_path.is_file()):
             raise FileNotFoundError(f"Binding sites file not found. Looked here:\n  - {data_path}")
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
index b2604800..446ca742 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
@@ -152,7 +152,7 @@ class PWMArtifactDataSource(BaseDataSource):
     cfg_path: Path
     sampling: dict
 
-    def load_data(self, *, rng=None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         artifact_path = resolve_path(self.cfg_path, self.path)
@@ -173,8 +173,25 @@ def load_data(self, *, rng=None):
         length_range = sampling.get("length_range")
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
-
-        selected = sample_pwm_sites(
+        scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
+        pvalue_threshold = sampling.get("pvalue_threshold")
+        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        bgfile = sampling.get("bgfile")
+        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
+        keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
+        include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
+        bgfile_path: Path | None = None
+        if bgfile is not None:
+            bgfile_path = resolve_path(self.cfg_path, str(bgfile))
+            if not (bgfile_path.exists() and bgfile_path.is_file()):
+                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+        debug_output_dir: Path | None = None
+        if keep_all_candidates_debug and outputs_root is not None:
+            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
+
+        return_meta = scoring_backend == "fimo"
+        result = sample_pwm_sites(
             rng,
             motif,
             strategy=strategy,
@@ -184,20 +201,36 @@ def load_data(self, *, rng=None):
             max_seconds=max_seconds,
             score_threshold=threshold,
             score_percentile=percentile,
+            scoring_backend=scoring_backend,
+            pvalue_threshold=pvalue_threshold,
+            pvalue_bins=pvalue_bins,
+            pvalue_bin_ids=pvalue_bin_ids,
+            bgfile=bgfile_path,
+            selection_policy=selection_policy,
+            keep_all_candidates_debug=keep_all_candidates_debug,
+            include_matched_sequence=include_matched_sequence,
+            debug_output_dir=debug_output_dir,
+            debug_label=f"{artifact_path.stem}__{motif.motif_id}",
             length_policy=length_policy,
             length_range=length_range,
             trim_window_length=trim_window_length,
             trim_window_strategy=str(trim_window_strategy),
+            return_metadata=return_meta,
         )
+        if return_meta:
+            selected, meta_by_seq = result  # type: ignore[misc]
+        else:
+            selected = result  # type: ignore[assignment]
+            meta_by_seq = {}
 
         entries = [(motif.motif_id, seq, str(artifact_path)) for seq in selected]
         import pandas as pd
 
-        df_out = pd.DataFrame(
-            {
-                "tf": [motif.motif_id] * len(selected),
-                "tfbs": selected,
-                "source": [str(artifact_path)] * len(selected),
-            }
-        )
+        rows = []
+        for seq in selected:
+            row = {"tf": motif.motif_id, "tfbs": seq, "source": str(artifact_path)}
+            if meta_by_seq:
+                row.update(meta_by_seq.get(seq, {}))
+            rows.append(row)
+        df_out = pd.DataFrame(rows)
         return entries, df_out
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
index 9ed3dae9..6fff70b3 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
@@ -28,7 +28,7 @@ class PWMArtifactSetDataSource(BaseDataSource):
     sampling: dict
     overrides_by_motif_id: dict[str, dict] | None = None
 
-    def load_data(self, *, rng=None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
 
@@ -69,7 +69,24 @@ def load_data(self, *, rng=None):
             length_range = sampling_cfg.get("length_range")
             trim_window_length = sampling_cfg.get("trim_window_length")
             trim_window_strategy = sampling_cfg.get("trim_window_strategy", "max_info")
-            selected = sample_pwm_sites(
+            scoring_backend = str(sampling_cfg.get("scoring_backend", "densegen")).lower()
+            pvalue_threshold = sampling_cfg.get("pvalue_threshold")
+            pvalue_bins = sampling_cfg.get("pvalue_bins")
+            pvalue_bin_ids = sampling_cfg.get("pvalue_bin_ids")
+            bgfile = sampling_cfg.get("bgfile")
+            selection_policy = str(sampling_cfg.get("selection_policy", "random_uniform"))
+            keep_all_candidates_debug = bool(sampling_cfg.get("keep_all_candidates_debug", False))
+            include_matched_sequence = bool(sampling_cfg.get("include_matched_sequence", False))
+            bgfile_path: Path | None = None
+            if bgfile is not None:
+                bgfile_path = resolve_path(self.cfg_path, str(bgfile))
+                if not (bgfile_path.exists() and bgfile_path.is_file()):
+                    raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+            debug_output_dir: Path | None = None
+            if keep_all_candidates_debug and outputs_root is not None:
+                debug_output_dir = Path(outputs_root) / "meta" / "fimo"
+            return_meta = scoring_backend == "fimo"
+            result = sample_pwm_sites(
                 rng,
                 motif,
                 strategy=strategy,
@@ -79,15 +96,34 @@ def load_data(self, *, rng=None):
                 max_seconds=max_seconds,
                 score_threshold=threshold,
                 score_percentile=percentile,
+                scoring_backend=scoring_backend,
+                pvalue_threshold=pvalue_threshold,
+                pvalue_bins=pvalue_bins,
+                pvalue_bin_ids=pvalue_bin_ids,
+                bgfile=bgfile_path,
+                selection_policy=selection_policy,
+                keep_all_candidates_debug=keep_all_candidates_debug,
+                include_matched_sequence=include_matched_sequence,
+                debug_output_dir=debug_output_dir,
+                debug_label=f"{Path(path).stem}__{motif.motif_id}",
                 length_policy=length_policy,
                 length_range=length_range,
                 trim_window_length=trim_window_length,
                 trim_window_strategy=str(trim_window_strategy),
+                return_metadata=return_meta,
             )
+            if return_meta:
+                selected, meta_by_seq = result  # type: ignore[misc]
+            else:
+                selected = result  # type: ignore[assignment]
+                meta_by_seq = {}
 
             for seq in selected:
                 entries.append((motif.motif_id, seq, str(path)))
-                all_rows.append({"tf": motif.motif_id, "tfbs": seq, "source": str(path)})
+                row = {"tf": motif.motif_id, "tfbs": seq, "source": str(path)}
+                if meta_by_seq:
+                    row.update(meta_by_seq.get(seq, {}))
+                all_rows.append(row)
 
         import pandas as pd
 
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py b/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py
new file mode 100644
index 00000000..cbdc06c6
--- /dev/null
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py
@@ -0,0 +1,178 @@
+"""
+--------------------------------------------------------------------------------
+<dnadesign project>
+dnadesign/densegen/adapters/sources/pwm_fimo.py
+
+Helpers for MEME Suite FIMO-backed scoring of PWM-sampled candidates.
+
+Module Author(s): Eric J. South
+Dunlop Lab
+--------------------------------------------------------------------------------
+"""
+
+from __future__ import annotations
+
+import csv
+import re
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, Sequence
+
+from ...integrations.meme_suite import resolve_executable
+from .pwm_sampling import PWMMotif, normalize_background
+
+_HEADER_RE = re.compile(r"[\s\-]+")
+_SAFE_ID_RE = re.compile(r"[^A-Za-z0-9_.-]+")
+
+
+@dataclass(frozen=True)
+class FimoHit:
+    sequence_name: str
+    start: int
+    stop: int
+    strand: str
+    score: float
+    pvalue: float
+    matched_sequence: str | None = None
+
+
+def _normalize_header(name: str) -> str:
+    return _HEADER_RE.sub("_", str(name).strip().lower())
+
+
+def _sanitize_id(text: str) -> str:
+    cleaned = _SAFE_ID_RE.sub("_", str(text).strip())
+    return cleaned or "motif"
+
+
+def build_candidate_records(motif_id: str, sequences: Sequence[str]) -> list[tuple[str, str]]:
+    prefix = _sanitize_id(motif_id)
+    return [(f"{prefix}|cand{idx}", seq) for idx, seq in enumerate(sequences)]
+
+
+def write_candidates_fasta(records: Sequence[tuple[str, str]], out_path: Path) -> None:
+    lines = []
+    for rec_id, seq in records:
+        lines.append(f">{rec_id}")
+        lines.append(str(seq))
+    out_path.write_text("\n".join(lines) + "\n")
+
+
+def write_minimal_meme_motif(motif: PWMMotif, out_path: Path) -> str:
+    motif_id = _sanitize_id(motif.motif_id)
+    bg = normalize_background(motif.background)
+    lines = [
+        "MEME version 4",
+        "",
+        "ALPHABET= ACGT",
+        "",
+        "strands: + -",
+        "",
+        "Background letter frequencies:",
+        f"A {bg['A']:.6g} C {bg['C']:.6g} G {bg['G']:.6g} T {bg['T']:.6g}",
+        "",
+        f"MOTIF {motif_id}",
+        f"letter-probability matrix: alength= 4 w= {len(motif.matrix)}",
+    ]
+    for row in motif.matrix:
+        lines.append(
+            f"{float(row.get('A', 0.0)):.6g} {float(row.get('C', 0.0)):.6g} "
+            f"{float(row.get('G', 0.0)):.6g} {float(row.get('T', 0.0)):.6g}"
+        )
+    out_path.write_text("\n".join(lines) + "\n")
+    return motif_id
+
+
+def parse_fimo_tsv(text: str) -> list[dict]:
+    lines = [ln for ln in text.splitlines() if ln.strip() and not ln.lstrip().startswith("#")]
+    if not lines:
+        return []
+    reader = csv.reader(lines, delimiter="\t")
+    header = next(reader, None)
+    if header is None:
+        return []
+    alias = {"pvalue": "p_value", "qvalue": "q_value", "sequence": "sequence_name"}
+    normalized = [alias.get(_normalize_header(h), _normalize_header(h)) for h in header]
+    idx = {name: i for i, name in enumerate(normalized)}
+    required = {"sequence_name", "start", "stop", "strand", "score", "p_value"}
+    if not required.issubset(idx):
+        raise ValueError(f"FIMO output missing required columns: {sorted(required - set(idx))}")
+    rows: list[dict] = []
+    for row in reader:
+        if not row:
+            continue
+        seq_name = row[idx["sequence_name"]]
+        entry = {
+            "sequence_name": seq_name,
+            "start": int(row[idx["start"]]),
+            "stop": int(row[idx["stop"]]),
+            "strand": row[idx["strand"]],
+            "score": float(row[idx["score"]]),
+            "p_value": float(row[idx["p_value"]]),
+        }
+        if "q_value" in idx:
+            try:
+                entry["q_value"] = float(row[idx["q_value"]])
+            except Exception:
+                entry["q_value"] = None
+        if "matched_sequence" in idx:
+            entry["matched_sequence"] = row[idx["matched_sequence"]]
+        rows.append(entry)
+    return rows
+
+
+def aggregate_best_hits(rows: Iterable[dict]) -> dict[str, FimoHit]:
+    best: dict[str, FimoHit] = {}
+    for row in rows:
+        seq_name = row["sequence_name"]
+        pval = float(row["p_value"])
+        score = float(row["score"])
+        hit = FimoHit(
+            sequence_name=seq_name,
+            start=int(row["start"]),
+            stop=int(row["stop"]),
+            strand=str(row["strand"]),
+            score=score,
+            pvalue=pval,
+            matched_sequence=row.get("matched_sequence"),
+        )
+        prev = best.get(seq_name)
+        if prev is None or pval < prev.pvalue or (pval == prev.pvalue and score > prev.score):
+            best[seq_name] = hit
+    return best
+
+
+def run_fimo(
+    *,
+    meme_motif_path: Path,
+    fasta_path: Path,
+    bgfile: Path | None = None,
+    norc: bool = False,
+    thresh: float | None = None,
+    include_matched_sequence: bool = False,
+    return_tsv: bool = False,
+) -> tuple[list[dict], str | None]:
+    exe = resolve_executable("fimo", tool_path=None)
+    if exe is None:
+        raise FileNotFoundError(
+            "FIMO executable not found. Install MEME Suite and ensure `fimo` is on PATH, "
+            "or set MEME_BIN to the MEME bin directory (pixi users: `pixi run dense ...`)."
+        )
+    cmd = [str(exe), "--text"]
+    if not include_matched_sequence:
+        cmd.append("--skip-matched-sequence")
+    if norc:
+        cmd.append("--norc")
+    if thresh is not None:
+        cmd.extend(["--thresh", str(thresh)])
+    if bgfile is not None:
+        cmd.extend(["--bgfile", str(bgfile)])
+    cmd.extend([str(meme_motif_path), str(fasta_path)])
+    result = subprocess.run(cmd, capture_output=True, text=True, check=False)
+    if result.returncode != 0:
+        stderr = result.stderr.strip()
+        raise RuntimeError(f"FIMO failed (exit {result.returncode}). {stderr or 'No stderr output.'}")
+    tsv_text = result.stdout
+    rows = parse_fimo_tsv(tsv_text)
+    return rows, (tsv_text if return_tsv else None)
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
index c052a008..75a73d19 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
@@ -87,7 +87,7 @@ class PWMJasparDataSource(BaseDataSource):
     motif_ids: Optional[List[str]]
     sampling: dict
 
-    def load_data(self, *, rng=None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         jaspar_path = resolve_path(self.cfg_path, self.path)
@@ -113,11 +113,28 @@ def load_data(self, *, rng=None):
         length_range = sampling.get("length_range")
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
+        scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
+        pvalue_threshold = sampling.get("pvalue_threshold")
+        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        bgfile = sampling.get("bgfile")
+        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
+        keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
+        include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
+        bgfile_path: Path | None = None
+        if bgfile is not None:
+            bgfile_path = resolve_path(self.cfg_path, str(bgfile))
+            if not (bgfile_path.exists() and bgfile_path.is_file()):
+                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+        debug_output_dir: Path | None = None
+        if keep_all_candidates_debug and outputs_root is not None:
+            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
 
         entries = []
         all_rows = []
         for motif in motifs:
-            selected = sample_pwm_sites(
+            return_meta = scoring_backend == "fimo"
+            result = sample_pwm_sites(
                 rng,
                 motif,
                 strategy=strategy,
@@ -127,14 +144,33 @@ def load_data(self, *, rng=None):
                 max_seconds=max_seconds,
                 score_threshold=threshold,
                 score_percentile=percentile,
+                scoring_backend=scoring_backend,
+                pvalue_threshold=pvalue_threshold,
+                pvalue_bins=pvalue_bins,
+                pvalue_bin_ids=pvalue_bin_ids,
+                bgfile=bgfile_path,
+                selection_policy=selection_policy,
+                keep_all_candidates_debug=keep_all_candidates_debug,
+                include_matched_sequence=include_matched_sequence,
+                debug_output_dir=debug_output_dir,
+                debug_label=f"{jaspar_path.stem}__{motif.motif_id}",
                 length_policy=length_policy,
                 length_range=length_range,
                 trim_window_length=trim_window_length,
                 trim_window_strategy=str(trim_window_strategy),
+                return_metadata=return_meta,
             )
+            if return_meta:
+                selected, meta_by_seq = result  # type: ignore[misc]
+            else:
+                selected = result  # type: ignore[assignment]
+                meta_by_seq = {}
             for seq in selected:
                 entries.append((motif.motif_id, seq, str(jaspar_path)))
-                all_rows.append({"tf": motif.motif_id, "tfbs": seq, "source": str(jaspar_path)})
+                row = {"tf": motif.motif_id, "tfbs": seq, "source": str(jaspar_path)}
+                if meta_by_seq:
+                    row.update(meta_by_seq.get(seq, {}))
+                all_rows.append(row)
 
         import pandas as pd
 
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
index c34aa49c..5df2088c 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
@@ -29,7 +29,7 @@ class PWMMatrixCSVDataSource(BaseDataSource):
     columns: dict[str, str]
     sampling: dict
 
-    def load_data(self, *, rng=None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         if not self.motif_id or not str(self.motif_id).strip():
@@ -77,8 +77,25 @@ def load_data(self, *, rng=None):
         length_range = sampling.get("length_range")
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
-
-        selected = sample_pwm_sites(
+        scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
+        pvalue_threshold = sampling.get("pvalue_threshold")
+        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        bgfile = sampling.get("bgfile")
+        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
+        keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
+        include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
+        bgfile_path: Path | None = None
+        if bgfile is not None:
+            bgfile_path = resolve_path(self.cfg_path, str(bgfile))
+            if not (bgfile_path.exists() and bgfile_path.is_file()):
+                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+        debug_output_dir: Path | None = None
+        if keep_all_candidates_debug and outputs_root is not None:
+            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
+
+        return_meta = scoring_backend == "fimo"
+        result = sample_pwm_sites(
             rng,
             motif,
             strategy=strategy,
@@ -88,14 +105,34 @@ def load_data(self, *, rng=None):
             max_seconds=max_seconds,
             score_threshold=threshold,
             score_percentile=percentile,
+            scoring_backend=scoring_backend,
+            pvalue_threshold=pvalue_threshold,
+            pvalue_bins=pvalue_bins,
+            pvalue_bin_ids=pvalue_bin_ids,
+            bgfile=bgfile_path,
+            selection_policy=selection_policy,
+            keep_all_candidates_debug=keep_all_candidates_debug,
+            include_matched_sequence=include_matched_sequence,
+            debug_output_dir=debug_output_dir,
+            debug_label=f"{csv_path.stem}__{motif.motif_id}",
             length_policy=length_policy,
             length_range=length_range,
             trim_window_length=trim_window_length,
             trim_window_strategy=str(trim_window_strategy),
+            return_metadata=return_meta,
         )
+        if return_meta:
+            selected, meta_by_seq = result  # type: ignore[misc]
+        else:
+            selected = result  # type: ignore[assignment]
+            meta_by_seq = {}
 
         entries = [(motif.motif_id, seq, str(csv_path)) for seq in selected]
-        df_out = pd.DataFrame(
-            {"tf": [motif.motif_id] * len(selected), "tfbs": selected, "source": [str(csv_path)] * len(selected)}
-        )
+        rows = []
+        for seq in selected:
+            row = {"tf": motif.motif_id, "tfbs": seq, "source": str(csv_path)}
+            if meta_by_seq:
+                row.update(meta_by_seq.get(seq, {}))
+            rows.append(row)
+        df_out = pd.DataFrame(rows)
         return entries, df_out
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
index e364c413..7f7193ac 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
@@ -56,7 +56,7 @@ class PWMMemeDataSource(BaseDataSource):
     motif_ids: Optional[List[str]]
     sampling: dict
 
-    def load_data(self, *, rng=None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         meme_path = resolve_path(self.cfg_path, self.path)
@@ -91,12 +91,29 @@ def load_data(self, *, rng=None):
         length_range = sampling.get("length_range")
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
+        scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
+        pvalue_threshold = sampling.get("pvalue_threshold")
+        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        bgfile = sampling.get("bgfile")
+        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
+        keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
+        include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
+        bgfile_path: Path | None = None
+        if bgfile is not None:
+            bgfile_path = resolve_path(self.cfg_path, str(bgfile))
+            if not (bgfile_path.exists() and bgfile_path.is_file()):
+                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+        debug_output_dir: Path | None = None
+        if keep_all_candidates_debug and outputs_root is not None:
+            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
 
         entries = []
         all_rows = []
         for motif in motifs:
             pwm = _motif_to_pwm(motif, background)
-            selected = sample_pwm_sites(
+            return_meta = scoring_backend == "fimo"
+            result = sample_pwm_sites(
                 rng,
                 pwm,
                 strategy=strategy,
@@ -106,15 +123,34 @@ def load_data(self, *, rng=None):
                 max_seconds=max_seconds,
                 score_threshold=threshold,
                 score_percentile=percentile,
+                scoring_backend=scoring_backend,
+                pvalue_threshold=pvalue_threshold,
+                pvalue_bins=pvalue_bins,
+                pvalue_bin_ids=pvalue_bin_ids,
+                bgfile=bgfile_path,
+                selection_policy=selection_policy,
+                keep_all_candidates_debug=keep_all_candidates_debug,
+                include_matched_sequence=include_matched_sequence,
+                debug_output_dir=debug_output_dir,
+                debug_label=f"{meme_path.stem}__{pwm.motif_id}",
                 length_policy=length_policy,
                 length_range=length_range,
                 trim_window_length=trim_window_length,
                 trim_window_strategy=str(trim_window_strategy),
+                return_metadata=return_meta,
             )
+            if return_meta:
+                selected, meta_by_seq = result  # type: ignore[misc]
+            else:
+                selected = result  # type: ignore[assignment]
+                meta_by_seq = {}
 
             for seq in selected:
                 entries.append((pwm.motif_id, seq, str(meme_path)))
-                all_rows.append({"tf": pwm.motif_id, "tfbs": seq, "source": str(meme_path)})
+                row = {"tf": pwm.motif_id, "tfbs": seq, "source": str(meme_path)}
+                if meta_by_seq:
+                    row.update(meta_by_seq.get(seq, {}))
+                all_rows.append(row)
 
         import pandas as pd
 
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
index f5434721..1e521914 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
@@ -42,7 +42,7 @@ class PWMMemeSetDataSource(BaseDataSource):
     motif_ids: Optional[List[str]]
     sampling: dict
 
-    def load_data(self, *, rng=None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         resolved = [resolve_path(self.cfg_path, path) for path in self.paths]
@@ -85,12 +85,29 @@ def load_data(self, *, rng=None):
         length_range = sampling.get("length_range")
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
+        scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
+        pvalue_threshold = sampling.get("pvalue_threshold")
+        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        bgfile = sampling.get("bgfile")
+        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
+        keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
+        include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
+        bgfile_path: Path | None = None
+        if bgfile is not None:
+            bgfile_path = resolve_path(self.cfg_path, str(bgfile))
+            if not (bgfile_path.exists() and bgfile_path.is_file()):
+                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+        debug_output_dir: Path | None = None
+        if keep_all_candidates_debug and outputs_root is not None:
+            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
 
         entries = []
         all_rows = []
         for motif, background, path in motifs_payload:
             pwm = _motif_to_pwm(motif, background)
-            selected = sample_pwm_sites(
+            return_meta = scoring_backend == "fimo"
+            result = sample_pwm_sites(
                 rng,
                 pwm,
                 strategy=strategy,
@@ -100,14 +117,33 @@ def load_data(self, *, rng=None):
                 max_seconds=max_seconds,
                 score_threshold=threshold,
                 score_percentile=percentile,
+                scoring_backend=scoring_backend,
+                pvalue_threshold=pvalue_threshold,
+                pvalue_bins=pvalue_bins,
+                pvalue_bin_ids=pvalue_bin_ids,
+                bgfile=bgfile_path,
+                selection_policy=selection_policy,
+                keep_all_candidates_debug=keep_all_candidates_debug,
+                include_matched_sequence=include_matched_sequence,
+                debug_output_dir=debug_output_dir,
+                debug_label=f"{Path(path).stem}__{pwm.motif_id}",
                 length_policy=length_policy,
                 length_range=length_range,
                 trim_window_length=trim_window_length,
                 trim_window_strategy=str(trim_window_strategy),
+                return_metadata=return_meta,
             )
+            if return_meta:
+                selected, meta_by_seq = result  # type: ignore[misc]
+            else:
+                selected = result  # type: ignore[assignment]
+                meta_by_seq = {}
             for seq in selected:
                 entries.append((pwm.motif_id, seq, str(path)))
-                all_rows.append({"tf": pwm.motif_id, "tfbs": seq, "source": str(path)})
+                row = {"tf": pwm.motif_id, "tfbs": seq, "source": str(path)}
+                if meta_by_seq:
+                    row.update(meta_by_seq.get(seq, {}))
+                all_rows.append(row)
 
         import pandas as pd
 
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 6a17f905..5c3514f5 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -15,12 +15,40 @@
 import logging
 import time
 from dataclasses import dataclass
+from pathlib import Path
 from typing import List, Optional, Sequence, Tuple
 
 import numpy as np
 
+from ...core.pvalue_bins import resolve_pvalue_bins
+
 SMOOTHING_ALPHA = 1e-6
 log = logging.getLogger(__name__)
+_SAFE_LABEL_RE = None
+
+
+def _safe_label(text: str) -> str:
+    global _SAFE_LABEL_RE
+    if _SAFE_LABEL_RE is None:
+        import re
+
+        _SAFE_LABEL_RE = re.compile(r"[^A-Za-z0-9_.-]+")
+    cleaned = _SAFE_LABEL_RE.sub("_", str(text).strip())
+    return cleaned or "motif"
+
+
+@dataclass(frozen=True)
+class FimoCandidate:
+    seq: str
+    pvalue: float
+    score: float
+    bin_id: int
+    bin_low: float
+    bin_high: float
+    start: int
+    stop: int
+    strand: str
+    matched_sequence: Optional[str] = None
 
 
 @dataclass(frozen=True)
@@ -221,6 +249,154 @@ def select_by_score(
     return unique[:n_sites]
 
 
+def _resolve_pvalue_edges(pvalue_bins: Sequence[float] | None) -> list[float]:
+    edges = resolve_pvalue_bins(pvalue_bins)
+    if not edges:
+        raise ValueError("pvalue_bins must contain at least one edge.")
+    cleaned: list[float] = []
+    prev = 0.0
+    for edge in edges:
+        edge_val = float(edge)
+        if not (0.0 < edge_val <= 1.0):
+            raise ValueError("pvalue_bins values must be in (0, 1].")
+        if edge_val <= prev:
+            raise ValueError("pvalue_bins must be strictly increasing.")
+        cleaned.append(edge_val)
+        prev = edge_val
+    if abs(cleaned[-1] - 1.0) > 1e-12:
+        raise ValueError("pvalue_bins must end with 1.0.")
+    return cleaned
+
+
+def _assign_pvalue_bin(pvalue: float, edges: Sequence[float]) -> tuple[int, float, float]:
+    low = 0.0
+    for idx, edge in enumerate(edges):
+        if pvalue <= edge:
+            return idx, low, float(edge)
+        low = float(edge)
+    if not edges:
+        return 0, 0.0, 1.0
+    if len(edges) == 1:
+        return 0, 0.0, float(edges[0])
+    return len(edges) - 1, float(edges[-2]), float(edges[-1])
+
+
+def _format_pvalue_bins(edges: Sequence[float], counts: Sequence[int]) -> str:
+    if not edges or not counts:
+        return "-"
+    labels: list[str] = []
+    low = 0.0
+    for edge, count in zip(edges, counts):
+        labels.append(f"({low:.0e},{float(edge):.0e}]:{int(count)}")
+        low = float(edge)
+    return " ".join(labels)
+
+
+def _stratified_sample(
+    candidates: List[FimoCandidate],
+    *,
+    n_sites: int,
+    rng: np.random.Generator,
+    n_bins: int,
+) -> List[FimoCandidate]:
+    bins: list[list[FimoCandidate]] = [[] for _ in range(n_bins)]
+    for cand in candidates:
+        idx = max(0, min(int(cand.bin_id), n_bins - 1))
+        bins[idx].append(cand)
+    for bucket in bins:
+        rng.shuffle(bucket)
+    picked: list[FimoCandidate] = []
+    while len(picked) < n_sites:
+        progressed = False
+        for bucket in bins:
+            if bucket:
+                picked.append(bucket.pop())
+                progressed = True
+                if len(picked) >= n_sites:
+                    break
+        if not progressed:
+            break
+    return picked
+
+
+def _select_fimo_candidates(
+    candidates: List[FimoCandidate],
+    *,
+    n_sites: int,
+    selection_policy: str,
+    rng: np.random.Generator,
+    pvalue_threshold: float,
+    keep_weak: bool,
+    n_bins: int,
+    context: dict,
+) -> List[FimoCandidate]:
+    unique: list[FimoCandidate] = []
+    seen: set[str] = set()
+    for cand in candidates:
+        if cand.seq in seen:
+            continue
+        seen.add(cand.seq)
+        unique.append(cand)
+    if len(unique) < n_sites:
+        msg_lines = [
+            (
+                "PWM sampling failed for motif "
+                f"'{context.get('motif_id')}' "
+                f"(width={context.get('width')}, strategy={context.get('strategy')}, "
+                f"length={context.get('length_label')}, window={context.get('window_label')}, "
+                f"backend=fimo, selection={selection_policy}, "
+                f"pvalue={context.get('pvalue_label')})."
+            ),
+            (
+                f"Requested n_sites={context.get('n_sites')} oversample_factor={context.get('oversample_factor')} "
+                f"-> candidates requested={context.get('requested_candidates')} "
+                f"generated={context.get('generated_candidates')}"
+                f"{context.get('cap_label')}."
+            ),
+            (f"Unique candidates after filtering={len(unique)} (need {n_sites})."),
+        ]
+        if context.get("length_observed"):
+            msg_lines.append(f"Observed candidate lengths={context.get('length_observed')}.")
+        if context.get("pvalue_bins_label") is not None:
+            msg_lines.append(f"P-value bins={context.get('pvalue_bins_label')}.")
+        if context.get("pvalue_bin_ids") is not None:
+            msg_lines.append(f"Selected bins={context.get('pvalue_bin_ids')}.")
+        suggestions = [
+            "reduce n_sites",
+            "relax pvalue_threshold (e.g., 1e-4 → 1e-3)",
+            "increase oversample_factor",
+        ]
+        if context.get("pvalue_bin_ids") is not None:
+            suggestions.append("broaden pvalue_bin_ids (or remove bin filtering)")
+        if context.get("cap_applied"):
+            suggestions.append("increase max_candidates (cap was hit)")
+        if context.get("time_limited"):
+            suggestions.append("increase max_seconds (time limit was hit)")
+        if context.get("width") is not None and int(context.get("width")) <= 6:
+            suggestions.append("try length_policy=range with a longer length_range")
+        msg_lines.append("Try next: " + "; ".join(suggestions) + ".")
+        raise ValueError(" ".join(msg_lines))
+    if selection_policy == "random_uniform":
+        if len(unique) == n_sites:
+            return unique
+        picks = rng.choice(len(unique), size=n_sites, replace=False)
+        return [unique[int(i)] for i in picks]
+    if selection_policy == "top_n":
+        if keep_weak:
+            ordered = sorted(unique, key=lambda c: (-c.pvalue, c.score))
+        else:
+            ordered = sorted(unique, key=lambda c: (c.pvalue, -c.score))
+        return ordered[:n_sites]
+    if selection_policy == "stratified":
+        return _stratified_sample(
+            unique,
+            n_sites=n_sites,
+            rng=rng,
+            n_bins=n_bins,
+        )
+    raise ValueError(f"Unsupported pwm selection_policy: {selection_policy}")
+
+
 def sample_pwm_sites(
     rng: np.random.Generator,
     motif: PWMMotif,
@@ -232,19 +408,53 @@ def sample_pwm_sites(
     max_seconds: Optional[float] = None,
     score_threshold: Optional[float],
     score_percentile: Optional[float],
+    scoring_backend: str = "densegen",
+    pvalue_threshold: Optional[float] = None,
+    pvalue_bins: Optional[Sequence[float]] = None,
+    pvalue_bin_ids: Optional[Sequence[int]] = None,
+    bgfile: Optional[str | Path] = None,
+    selection_policy: str = "random_uniform",
+    keep_all_candidates_debug: bool = False,
+    include_matched_sequence: bool = False,
+    debug_output_dir: Optional[Path] = None,
+    debug_label: Optional[str] = None,
     length_policy: str = "exact",
     length_range: Optional[Sequence[int]] = None,
     trim_window_length: Optional[int] = None,
     trim_window_strategy: str = "max_info",
-) -> List[str]:
+    return_metadata: bool = False,
+) -> List[str] | Tuple[List[str], dict[str, dict]]:
     if n_sites <= 0:
         raise ValueError("n_sites must be > 0")
     if oversample_factor <= 0:
         raise ValueError("oversample_factor must be > 0")
     if max_seconds is not None and float(max_seconds) <= 0:
         raise ValueError("max_seconds must be > 0 when set")
-    if (score_threshold is None) == (score_percentile is None):
-        raise ValueError("PWM sampling requires exactly one of score_threshold or score_percentile")
+    scoring_backend = str(scoring_backend or "densegen").lower()
+    if scoring_backend not in {"densegen", "fimo"}:
+        raise ValueError(f"Unsupported pwm sampling scoring_backend: {scoring_backend}")
+    if scoring_backend == "densegen":
+        if (score_threshold is None) == (score_percentile is None):
+            raise ValueError("PWM sampling requires exactly one of score_threshold or score_percentile")
+        if pvalue_bins is not None:
+            raise ValueError("pvalue_bins is only valid when scoring_backend='fimo'")
+        if pvalue_bin_ids is not None:
+            raise ValueError("pvalue_bin_ids is only valid when scoring_backend='fimo'")
+        if include_matched_sequence:
+            raise ValueError("include_matched_sequence is only valid when scoring_backend='fimo'")
+    else:
+        if pvalue_threshold is None:
+            raise ValueError("PWM sampling requires pvalue_threshold when scoring_backend='fimo'")
+        pvalue_threshold = float(pvalue_threshold)
+        if not (0.0 < pvalue_threshold <= 1.0):
+            raise ValueError("pwm.sampling.pvalue_threshold must be between 0 and 1")
+        if selection_policy not in {"random_uniform", "top_n", "stratified"}:
+            raise ValueError(f"Unsupported pwm selection_policy: {selection_policy}")
+        if score_threshold is not None or score_percentile is not None:
+            log.warning(
+                "PWM sampling scoring_backend=fimo ignores score_threshold/score_percentile for motif %s.",
+                motif.motif_id,
+            )
     if strategy == "consensus" and n_sites != 1:
         raise ValueError("PWM sampling strategy 'consensus' requires n_sites=1")
 
@@ -276,10 +486,41 @@ def sample_pwm_sites(
         matrix = motif.matrix
 
     score_label = f"threshold={score_threshold}" if score_threshold is not None else f"percentile={score_percentile}"
+    pvalue_label = None
+    if scoring_backend == "fimo" and pvalue_threshold is not None:
+        comparator = ">=" if keep_low else "<="
+        pvalue_label = f"{comparator}{pvalue_threshold:g}"
     length_label = str(length_policy)
     if length_policy == "range" and length_range is not None and len(length_range) == 2:
         length_label = f"{length_policy}({length_range[0]}..{length_range[1]})"
 
+    def _cap_label(cap_applied: bool, time_limited: bool) -> str:
+        cap_label = ""
+        if cap_applied and max_candidates is not None:
+            cap_label = f" (capped by max_candidates={max_candidates})"
+        if time_limited and max_seconds is not None:
+            cap_label = f"{cap_label}; max_seconds={max_seconds}" if cap_label else f" (max_seconds={max_seconds})"
+        return cap_label
+
+    def _context(length_obs: str, cap_applied: bool, requested: int, generated: int, time_limited: bool) -> dict:
+        return {
+            "motif_id": motif.motif_id,
+            "width": width,
+            "strategy": strategy,
+            "length_label": length_label,
+            "window_label": window_label,
+            "length_observed": length_obs,
+            "score_label": score_label,
+            "pvalue_label": pvalue_label,
+            "n_sites": n_sites,
+            "oversample_factor": oversample_factor,
+            "requested_candidates": requested,
+            "generated_candidates": generated,
+            "cap_applied": cap_applied,
+            "cap_label": _cap_label(cap_applied, time_limited),
+            "time_limited": time_limited,
+        }
+
     def _select(
         candidates: List[Tuple[str, float]],
         *,
@@ -289,33 +530,13 @@ def _select(
         generated: int,
         time_limited: bool,
     ):
-        cap_label = ""
-        if cap_applied and max_candidates is not None:
-            cap_label = f" (capped by max_candidates={max_candidates})"
-        if time_limited and max_seconds is not None:
-            cap_label = f"{cap_label}; max_seconds={max_seconds}" if cap_label else f" (max_seconds={max_seconds})"
         return select_by_score(
             candidates,
             n_sites=n_sites,
             threshold=score_threshold,
             percentile=score_percentile,
             keep_low=keep_low,
-            context={
-                "motif_id": motif.motif_id,
-                "width": width,
-                "strategy": strategy,
-                "length_label": length_label,
-                "window_label": window_label,
-                "length_observed": length_obs,
-                "score_label": score_label,
-                "n_sites": n_sites,
-                "oversample_factor": oversample_factor,
-                "requested_candidates": requested,
-                "generated_candidates": generated,
-                "cap_applied": cap_applied,
-                "cap_label": cap_label,
-                "time_limited": time_limited,
-            },
+            context=_context(length_obs, cap_applied, requested, generated, time_limited),
         )
 
     def _resolve_length() -> int:
@@ -342,19 +563,180 @@ def _embed_with_background(seq: str, target_len: int) -> str:
         right = sample_sequence_from_background(rng, motif.background, right_len)
         return f"{left}{seq}{right}"
 
+    def _score_with_fimo(
+        sequences: List[str],
+        *,
+        length_obs: str,
+        cap_applied: bool,
+        requested: int,
+        generated: int,
+        time_limited: bool,
+    ) -> tuple[List[str], dict[str, dict]]:
+        import tempfile
+
+        from .pwm_fimo import (
+            aggregate_best_hits,
+            build_candidate_records,
+            run_fimo,
+            write_candidates_fasta,
+            write_minimal_meme_motif,
+        )
+
+        if pvalue_threshold is None:
+            raise ValueError("pvalue_threshold required for fimo backend")
+        resolved_bins = _resolve_pvalue_edges(pvalue_bins)
+        allowed_bins: Optional[set[int]] = None
+        if pvalue_bin_ids is not None:
+            allowed_bins = {int(idx) for idx in pvalue_bin_ids}
+            max_idx = len(resolved_bins) - 1
+            if any(idx > max_idx for idx in allowed_bins):
+                raise ValueError(f"pvalue_bin_ids contains an index outside the available bins (max={max_idx}).")
+        keep_weak = keep_low
+        debug_path: Optional[Path] = None
+        debug_dir = debug_output_dir
+        if keep_all_candidates_debug:
+            if debug_dir is None:
+                tmp_dir = tempfile.mkdtemp(prefix="densegen-fimo-")
+                debug_dir = Path(tmp_dir)
+                log.warning(
+                    "PWM sampling keep_all_candidates_debug enabled without outputs_root; "
+                    "writing FIMO debug TSVs to %s",
+                    debug_dir,
+                )
+            debug_dir.mkdir(parents=True, exist_ok=True)
+            label = _safe_label(debug_label or motif.motif_id)
+            debug_path = debug_dir / f"{label}__fimo.tsv"
+
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            meme_path = tmp_path / "motif.meme"
+            fasta_path = tmp_path / "candidates.fasta"
+            motif_for_fimo = PWMMotif(motif_id=motif.motif_id, matrix=matrix, background=motif.background)
+            write_minimal_meme_motif(motif_for_fimo, meme_path)
+            records = build_candidate_records(motif.motif_id, sequences)
+            write_candidates_fasta(records, fasta_path)
+            thresh = 1.0 if keep_all_candidates_debug or keep_weak else float(pvalue_threshold)
+            rows, raw_tsv = run_fimo(
+                meme_motif_path=meme_path,
+                fasta_path=fasta_path,
+                bgfile=Path(bgfile) if bgfile is not None else None,
+                thresh=thresh,
+                include_matched_sequence=include_matched_sequence or keep_all_candidates_debug,
+                return_tsv=debug_path is not None,
+            )
+            if debug_path is not None and raw_tsv is not None:
+                debug_path.write_text(raw_tsv)
+                log.info("FIMO debug TSV written: %s", debug_path)
+            best_hits = aggregate_best_hits(rows)
+
+        candidates: List[FimoCandidate] = []
+        total_bin_counts = [0 for _ in resolved_bins]
+        accepted_bin_counts = [0 for _ in resolved_bins]
+        for rec_id, seq in records:
+            hit = best_hits.get(rec_id)
+            if hit is None:
+                continue
+            bin_id, bin_low, bin_high = _assign_pvalue_bin(hit.pvalue, resolved_bins)
+            total_bin_counts[bin_id] += 1
+            if keep_weak:
+                accept = hit.pvalue >= float(pvalue_threshold)
+            else:
+                accept = hit.pvalue <= float(pvalue_threshold)
+            if allowed_bins is not None and bin_id not in allowed_bins:
+                continue
+            if not accept:
+                continue
+            accepted_bin_counts[bin_id] += 1
+            candidates.append(
+                FimoCandidate(
+                    seq=seq,
+                    pvalue=hit.pvalue,
+                    score=hit.score,
+                    bin_id=bin_id,
+                    bin_low=bin_low,
+                    bin_high=bin_high,
+                    start=hit.start,
+                    stop=hit.stop,
+                    strand=hit.strand,
+                    matched_sequence=hit.matched_sequence,
+                )
+            )
+
+        total_hits = sum(total_bin_counts)
+        accepted_hits = sum(accepted_bin_counts)
+        bins_label = _format_pvalue_bins(resolved_bins, total_bin_counts)
+        accepted_label = _format_pvalue_bins(resolved_bins, accepted_bin_counts)
+
+        context = _context(length_obs, cap_applied, requested, generated, time_limited)
+        context["pvalue_bins_label"] = bins_label
+        context["pvalue_bin_ids"] = sorted(allowed_bins) if allowed_bins is not None else None
+        picked = _select_fimo_candidates(
+            candidates,
+            n_sites=n_sites,
+            selection_policy=selection_policy,
+            rng=rng,
+            pvalue_threshold=float(pvalue_threshold),
+            keep_weak=keep_weak,
+            n_bins=len(resolved_bins),
+            context=context,
+        )
+        selected_bin_counts = [0 for _ in resolved_bins]
+        for cand in picked:
+            idx = max(0, min(int(cand.bin_id), len(resolved_bins) - 1))
+            selected_bin_counts[idx] += 1
+        selected_label = _format_pvalue_bins(resolved_bins, selected_bin_counts)
+        log.info(
+            "FIMO yield for motif %s: hits=%d accepted=%d selected=%d bins=%s accepted_bins=%s selected_bins=%s%s",
+            motif.motif_id,
+            total_hits,
+            accepted_hits,
+            len(picked),
+            bins_label,
+            accepted_label,
+            selected_label,
+            f" allowed_bins={sorted(allowed_bins)}" if allowed_bins is not None else "",
+        )
+        meta_by_seq: dict[str, dict] = {}
+        for cand in picked:
+            meta = {
+                "fimo_score": cand.score,
+                "fimo_pvalue": cand.pvalue,
+                "fimo_bin_id": cand.bin_id,
+                "fimo_bin_low": cand.bin_low,
+                "fimo_bin_high": cand.bin_high,
+                "fimo_start": cand.start,
+                "fimo_stop": cand.stop,
+                "fimo_strand": cand.strand,
+            }
+            if cand.matched_sequence:
+                meta["fimo_matched_sequence"] = cand.matched_sequence
+            meta_by_seq[cand.seq] = meta
+        return [c.seq for c in picked], meta_by_seq
+
     if strategy == "consensus":
         seq = "".join(max(row.items(), key=lambda kv: kv[1])[0] for row in matrix)
         target_len = _resolve_length()
         full_seq = _embed_with_background(seq, target_len)
-        score = score_sequence(seq, matrix, log_odds=log_odds, background=motif.background)
-        return _select(
-            [(full_seq, score)],
+        if scoring_backend == "densegen":
+            score = score_sequence(seq, matrix, log_odds=log_odds, background=motif.background)
+            selected = _select(
+                [(full_seq, score)],
+                length_obs=str(target_len),
+                cap_applied=False,
+                requested=1,
+                generated=1,
+                time_limited=False,
+            )
+            return (selected, {}) if return_metadata else selected
+        selected, meta = _score_with_fimo(
+            [full_seq],
             length_obs=str(target_len),
             cap_applied=False,
             requested=1,
             generated=1,
             time_limited=False,
         )
+        return (selected, meta) if return_metadata else selected
 
     requested_candidates = max(1, n_sites * oversample_factor)
     n_candidates = requested_candidates
@@ -373,7 +755,7 @@ def _embed_with_background(seq: str, target_len: int) -> str:
                 cap_val,
             )
     n_candidates = max(1, n_candidates)
-    candidates: List[Tuple[str, float]] = []
+    candidates: List[Tuple[str, str]] = []
     lengths: List[int] = []
     start = time.monotonic()
     time_limited = False
@@ -389,12 +771,7 @@ def _embed_with_background(seq: str, target_len: int) -> str:
         else:
             core = sample_sequence_from_pwm(rng, matrix)
         full_seq = _embed_with_background(core, target_len)
-        candidates.append(
-            (
-                full_seq,
-                score_sequence(core, matrix, log_odds=log_odds, background=motif.background),
-            )
-        )
+        candidates.append((full_seq, core))
     if time_limited:
         log.warning(
             "PWM sampling hit max_seconds for motif %s: generated=%d requested=%d",
@@ -405,11 +782,26 @@ def _embed_with_background(seq: str, target_len: int) -> str:
     length_obs = "-"
     if lengths:
         length_obs = f"{min(lengths)}..{max(lengths)}" if min(lengths) != max(lengths) else str(lengths[0])
-    return _select(
-        candidates,
+    if scoring_backend == "densegen":
+        scored = [
+            (full_seq, score_sequence(core, matrix, log_odds=log_odds, background=motif.background))
+            for full_seq, core in candidates
+        ]
+        selected = _select(
+            scored,
+            length_obs=length_obs,
+            cap_applied=cap_applied,
+            requested=requested_candidates,
+            generated=len(candidates),
+            time_limited=time_limited,
+        )
+        return (selected, {}) if return_metadata else selected
+    selected, meta = _score_with_fimo(
+        [full_seq for full_seq, _core in candidates],
         length_obs=length_obs,
         cap_applied=cap_applied,
         requested=requested_candidates,
         generated=len(candidates),
         time_limited=time_limited,
     )
+    return (selected, meta) if return_metadata else selected
diff --git a/src/dnadesign/densegen/src/adapters/sources/sequence_library.py b/src/dnadesign/densegen/src/adapters/sources/sequence_library.py
index a6875fa9..d17bbf04 100644
--- a/src/dnadesign/densegen/src/adapters/sources/sequence_library.py
+++ b/src/dnadesign/densegen/src/adapters/sources/sequence_library.py
@@ -50,7 +50,7 @@ def _load_table(self, path: Path, fmt: str) -> pd.DataFrame:
             return pq.read_table(path).to_pandas()
         raise ValueError(f"Unsupported sequence_library.format: {fmt}")
 
-    def load_data(self, *, rng=None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None):
         data_path = resolve_path(self.cfg_path, self.path)
         if not (data_path.exists() and data_path.is_file()):
             raise FileNotFoundError(f"Sequence library file not found. Looked here:\n  - {data_path}")
diff --git a/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py b/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py
index 32c07cb3..66606d5c 100644
--- a/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py
+++ b/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py
@@ -26,7 +26,7 @@ class USRSequencesDataSource(BaseDataSource):
     root: str
     limit: Optional[int] = None
 
-    def load_data(self, *, rng=None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None):
         try:
             from dnadesign.usr.src.dataset import Dataset as USRDataset  # type: ignore
         except Exception as e:  # pragma: no cover - depends on optional USR install
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 5253f2c8..fbc6e201 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -254,9 +254,16 @@ def _warn_pwm_sampling_configs(loaded, cfg_path: Path) -> None:
         sampling = getattr(inp, "sampling", None)
         if sampling is None:
             continue
+        scoring_backend = getattr(sampling, "scoring_backend", "densegen")
         n_sites = getattr(sampling, "n_sites", None)
         oversample = getattr(sampling, "oversample_factor", None)
         max_candidates = getattr(sampling, "max_candidates", None)
+        score_threshold = getattr(sampling, "score_threshold", None)
+        score_percentile = getattr(sampling, "score_percentile", None)
+        if scoring_backend == "fimo" and (score_threshold is not None or score_percentile is not None):
+            warnings.append(
+                f"{getattr(inp, 'name', src_type)}: scoring_backend=fimo ignores score_threshold/score_percentile."
+            )
         if isinstance(n_sites, int) and isinstance(oversample, int) and max_candidates is not None:
             requested = n_sites * oversample
             if requested > int(max_candidates):
@@ -949,7 +956,11 @@ def describe(
             "motifs",
             "n_sites",
             "strategy",
+            "backend",
             "score",
+            "selection",
+            "bins",
+            "bgfile",
             "oversample",
             "max_candidates",
             "max_seconds",
@@ -971,11 +982,25 @@ def describe(
                 motif_label = f"{len(getattr(inp, 'paths', []) or [])} artifacts"
             else:
                 motif_label = "from artifact"
+            backend = getattr(sampling, "scoring_backend", "densegen")
             score_label = "-"
-            if sampling.score_threshold is not None:
+            if backend == "fimo" and sampling.pvalue_threshold is not None:
+                comparator = ">=" if sampling.strategy == "background" else "<="
+                score_label = f"pvalue{comparator}{sampling.pvalue_threshold}"
+            elif sampling.score_threshold is not None:
                 score_label = f"threshold={sampling.score_threshold}"
             elif sampling.score_percentile is not None:
                 score_label = f"percentile={sampling.score_percentile}"
+            selection_label = "-" if backend != "fimo" else (getattr(sampling, "selection_policy", None) or "-")
+            bins_label = "-"
+            if backend == "fimo":
+                bins_label = "canonical"
+                if getattr(sampling, "pvalue_bins", None) is not None:
+                    bins_label = "custom"
+                bin_ids = getattr(sampling, "pvalue_bin_ids", None)
+                if bin_ids:
+                    bins_label = f"{bins_label} pick={bin_ids}"
+            bgfile_label = getattr(sampling, "bgfile", None) or "-"
             length_label = str(sampling.length_policy)
             if sampling.length_policy == "range" and sampling.length_range is not None:
                 length_label = f"range({sampling.length_range[0]}..{sampling.length_range[1]})"
@@ -984,7 +1009,11 @@ def describe(
                 motif_label,
                 str(sampling.n_sites),
                 str(sampling.strategy),
+                str(backend),
                 score_label,
+                str(selection_label),
+                str(bins_label),
+                str(bgfile_label),
                 str(sampling.oversample_factor),
                 str(sampling.max_candidates) if sampling.max_candidates is not None else "-",
                 str(sampling.max_seconds) if sampling.max_seconds is not None else "-",
@@ -1146,6 +1175,9 @@ def run(
         raise typer.Exit(code=1)
 
     console.print(":tada: [bold green]Run complete[/].")
+    console.print("[bold]Next steps[/]:")
+    console.print(f"  - dense summarize --library -c {cfg_path}")
+    console.print(f"  - dense report -c {cfg_path}")
 
     # Auto-plot if configured
     if not no_plot and root.plots:
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index 192d2f6b..beed783f 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -21,6 +21,8 @@
 from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator, model_validator
 from typing_extensions import Literal
 
+from ..core.pvalue_bins import CANONICAL_PVALUE_BINS
+
 
 # ---- Strict YAML loader (duplicate keys fail) ----
 class _StrictLoader(yaml.SafeLoader):
@@ -161,6 +163,14 @@ class PWMSamplingConfig(BaseModel):
     max_seconds: Optional[float] = None
     score_threshold: Optional[float] = None
     score_percentile: Optional[float] = None
+    scoring_backend: Literal["densegen", "fimo"] = "densegen"
+    pvalue_threshold: Optional[float] = None
+    pvalue_bins: Optional[List[float]] = None
+    pvalue_bin_ids: Optional[List[int]] = None
+    bgfile: Optional[str] = None
+    selection_policy: Literal["random_uniform", "top_n", "stratified"] = "random_uniform"
+    keep_all_candidates_debug: bool = False
+    include_matched_sequence: bool = False
     length_policy: Literal["exact", "range"] = "exact"
     length_range: Optional[tuple[int, int]] = None
     trim_window_length: Optional[int] = None
@@ -219,15 +229,76 @@ def _trim_length_ok(cls, v: Optional[int]):
             raise ValueError("pwm.sampling.trim_window_length must be a positive integer")
         return v
 
+    @field_validator("bgfile")
+    @classmethod
+    def _bgfile_ok(cls, v: Optional[str]):
+        if v is None:
+            return v
+        if not str(v).strip():
+            raise ValueError("pwm.sampling.bgfile must be a non-empty string when set")
+        return str(v).strip()
+
+    @field_validator("pvalue_bins")
+    @classmethod
+    def _pvalue_bins_ok(cls, v: Optional[List[float]]):
+        if v is None:
+            return v
+        if not v:
+            raise ValueError("pwm.sampling.pvalue_bins must be non-empty when set")
+        bins = [float(x) for x in v]
+        prev = 0.0
+        for val in bins:
+            if not (0.0 < val <= 1.0):
+                raise ValueError("pwm.sampling.pvalue_bins values must be in (0, 1]")
+            if val <= prev:
+                raise ValueError("pwm.sampling.pvalue_bins must be strictly increasing")
+            prev = val
+        if abs(bins[-1] - 1.0) > 1e-12:
+            raise ValueError("pwm.sampling.pvalue_bins must end with 1.0")
+        return bins
+
+    @field_validator("pvalue_bin_ids")
+    @classmethod
+    def _pvalue_bin_ids_ok(cls, v: Optional[List[int]]):
+        if v is None:
+            return v
+        if not v:
+            raise ValueError("pwm.sampling.pvalue_bin_ids must be non-empty when set")
+        ids = [int(x) for x in v]
+        if any(idx < 0 for idx in ids):
+            raise ValueError("pwm.sampling.pvalue_bin_ids values must be >= 0")
+        if len(set(ids)) != len(ids):
+            raise ValueError("pwm.sampling.pvalue_bin_ids must be unique")
+        return ids
+
     @model_validator(mode="after")
     def _score_mode(self):
         has_thresh = self.score_threshold is not None
         has_pct = self.score_percentile is not None
-        if has_thresh == has_pct:
-            raise ValueError("pwm.sampling must set exactly one of score_threshold or score_percentile")
+        if self.scoring_backend == "densegen":
+            if has_thresh == has_pct:
+                raise ValueError("pwm.sampling must set exactly one of score_threshold or score_percentile")
+            if self.pvalue_threshold is not None:
+                raise ValueError("pwm.sampling.pvalue_threshold is only valid when scoring_backend='fimo'")
+            if self.pvalue_bins is not None:
+                raise ValueError("pwm.sampling.pvalue_bins is only valid when scoring_backend='fimo'")
+            if self.pvalue_bin_ids is not None:
+                raise ValueError("pwm.sampling.pvalue_bin_ids is only valid when scoring_backend='fimo'")
+            if self.include_matched_sequence:
+                raise ValueError("pwm.sampling.include_matched_sequence is only valid when scoring_backend='fimo'")
+        else:
+            if self.pvalue_threshold is None:
+                raise ValueError("pwm.sampling.pvalue_threshold is required when scoring_backend='fimo'")
+            if not (0.0 < float(self.pvalue_threshold) <= 1.0):
+                raise ValueError("pwm.sampling.pvalue_threshold must be between 0 and 1")
+            if self.pvalue_bin_ids is not None:
+                bins = list(self.pvalue_bins) if self.pvalue_bins is not None else list(CANONICAL_PVALUE_BINS)
+                max_idx = len(bins) - 1
+                if any(idx > max_idx for idx in self.pvalue_bin_ids):
+                    raise ValueError("pwm.sampling.pvalue_bin_ids contains an index outside the available bins")
         if self.strategy == "consensus" and int(self.n_sites) != 1:
             raise ValueError("pwm.sampling.strategy=consensus requires n_sites=1")
-        if self.score_percentile is not None:
+        if self.scoring_backend == "densegen" and self.score_percentile is not None:
             if not (0.0 < float(self.score_percentile) < 100.0):
                 raise ValueError("pwm.sampling.score_percentile must be between 0 and 100")
         if self.length_policy == "exact" and self.length_range is not None:
diff --git a/src/dnadesign/densegen/src/core/metadata.py b/src/dnadesign/densegen/src/core/metadata.py
index 263bb1d0..cb8d8ca6 100644
--- a/src/dnadesign/densegen/src/core/metadata.py
+++ b/src/dnadesign/densegen/src/core/metadata.py
@@ -141,8 +141,16 @@ def build_metadata(
         "sampling_fraction": sampling_fraction,
         "sampling_fraction_pairs": sampling_fraction_pairs,
         "input_pwm_strategy": input_meta.get("input_pwm_strategy"),
+        "input_pwm_scoring_backend": input_meta.get("input_pwm_scoring_backend"),
         "input_pwm_score_threshold": input_meta.get("input_pwm_score_threshold"),
         "input_pwm_score_percentile": input_meta.get("input_pwm_score_percentile"),
+        "input_pwm_pvalue_threshold": input_meta.get("input_pwm_pvalue_threshold"),
+        "input_pwm_pvalue_bins": input_meta.get("input_pwm_pvalue_bins"),
+        "input_pwm_pvalue_bin_ids": input_meta.get("input_pwm_pvalue_bin_ids"),
+        "input_pwm_selection_policy": input_meta.get("input_pwm_selection_policy"),
+        "input_pwm_bgfile": input_meta.get("input_pwm_bgfile"),
+        "input_pwm_keep_all_candidates_debug": input_meta.get("input_pwm_keep_all_candidates_debug"),
+        "input_pwm_include_matched_sequence": input_meta.get("input_pwm_include_matched_sequence"),
         "input_pwm_n_sites": input_meta.get("input_pwm_n_sites"),
         "input_pwm_oversample_factor": input_meta.get("input_pwm_oversample_factor"),
         "fixed_elements": fixed_elements_dump,
diff --git a/src/dnadesign/densegen/src/core/metadata_schema.py b/src/dnadesign/densegen/src/core/metadata_schema.py
index db26d5d8..70379bd3 100644
--- a/src/dnadesign/densegen/src/core/metadata_schema.py
+++ b/src/dnadesign/densegen/src/core/metadata_schema.py
@@ -93,8 +93,16 @@ class MetaField:
         allow_none=True,
     ),
     MetaField("input_pwm_strategy", (str,), "PWM sampling strategy.", allow_none=True),
+    MetaField("input_pwm_scoring_backend", (str,), "PWM scoring backend (densegen|fimo).", allow_none=True),
     MetaField("input_pwm_score_threshold", (numbers.Real,), "PWM score threshold.", allow_none=True),
     MetaField("input_pwm_score_percentile", (numbers.Real,), "PWM score percentile.", allow_none=True),
+    MetaField("input_pwm_pvalue_threshold", (numbers.Real,), "PWM p-value threshold (FIMO).", allow_none=True),
+    MetaField("input_pwm_pvalue_bins", (list,), "PWM p-value bins (FIMO).", allow_none=True),
+    MetaField("input_pwm_pvalue_bin_ids", (list,), "Selected p-value bin indices (FIMO).", allow_none=True),
+    MetaField("input_pwm_selection_policy", (str,), "PWM selection policy (FIMO).", allow_none=True),
+    MetaField("input_pwm_bgfile", (str,), "PWM background model path (FIMO).", allow_none=True),
+    MetaField("input_pwm_keep_all_candidates_debug", (bool,), "PWM FIMO debug TSV enabled.", allow_none=True),
+    MetaField("input_pwm_include_matched_sequence", (bool,), "PWM matched-sequence capture.", allow_none=True),
     MetaField("input_pwm_n_sites", (int,), "PWM sampling n_sites.", allow_none=True),
     MetaField("input_pwm_oversample_factor", (int,), "PWM sampling oversample factor.", allow_none=True),
     MetaField("fixed_elements", (dict,), "Fixed-element constraints (promoters + side biases)."),
@@ -198,6 +206,24 @@ def _validate_list_fields(meta: Mapping[str, Any]) -> None:
                 raise TypeError("Metadata field 'used_tf_counts' must contain dict entries")
             if "tf" not in item or "count" not in item:
                 raise ValueError("used_tf_counts entries must include 'tf' and 'count'")
+
+    if "input_pwm_pvalue_bins" in meta:
+        vals = meta["input_pwm_pvalue_bins"]
+        if vals is not None:
+            if isinstance(vals, (str, bytes)) or not isinstance(vals, Sequence):
+                raise TypeError("Metadata field 'input_pwm_pvalue_bins' must be a list of numbers")
+            for item in vals:
+                if not isinstance(item, numbers.Real):
+                    raise TypeError("Metadata field 'input_pwm_pvalue_bins' must contain only numbers")
+
+    if "input_pwm_pvalue_bin_ids" in meta:
+        vals = meta["input_pwm_pvalue_bin_ids"]
+        if vals is not None:
+            if isinstance(vals, (str, bytes)) or not isinstance(vals, Sequence):
+                raise TypeError("Metadata field 'input_pwm_pvalue_bin_ids' must be a list of integers")
+            for item in vals:
+                if not isinstance(item, int):
+                    raise TypeError("Metadata field 'input_pwm_pvalue_bin_ids' must contain only integers")
             if not isinstance(item["tf"], str):
                 raise TypeError("used_tf_counts.tf must be a string")
             if not isinstance(item["count"], int):
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 05b4e8da..dea38e94 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -44,6 +44,7 @@
 )
 from .metadata import build_metadata
 from .postprocess import random_fill
+from .pvalue_bins import resolve_pvalue_bins
 from .run_manifest import PlanManifest, RunManifest
 from .run_paths import (
     ensure_run_meta_dir,
@@ -164,6 +165,18 @@ def _sampling_attr(sampling, name: str, default=None):
     return default
 
 
+def _resolve_pvalue_bins_meta(sampling) -> list[float] | None:
+    if sampling is None:
+        return None
+    backend = str(_sampling_attr(sampling, "scoring_backend") or "densegen").lower()
+    bins = _sampling_attr(sampling, "pvalue_bins")
+    if backend == "fimo":
+        return resolve_pvalue_bins(bins)
+    if bins is None:
+        return None
+    return [float(v) for v in bins]
+
+
 def _extract_pwm_sampling_config(source_cfg) -> dict | None:
     sampling = getattr(source_cfg, "sampling", None)
     if sampling is None:
@@ -190,6 +203,7 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
         length_range = list(length_range)
     return {
         "strategy": _sampling_attr(sampling, "strategy"),
+        "scoring_backend": _sampling_attr(sampling, "scoring_backend"),
         "n_sites": _sampling_attr(sampling, "n_sites"),
         "oversample_factor": _sampling_attr(sampling, "oversample_factor"),
         "max_candidates": _sampling_attr(sampling, "max_candidates"),
@@ -199,6 +213,11 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
         "capped": capped,
         "score_threshold": _sampling_attr(sampling, "score_threshold"),
         "score_percentile": _sampling_attr(sampling, "score_percentile"),
+        "pvalue_threshold": _sampling_attr(sampling, "pvalue_threshold"),
+        "pvalue_bins": _resolve_pvalue_bins_meta(sampling),
+        "selection_policy": _sampling_attr(sampling, "selection_policy"),
+        "bgfile": _sampling_attr(sampling, "bgfile"),
+        "keep_all_candidates_debug": _sampling_attr(sampling, "keep_all_candidates_debug"),
         "length_policy": _sampling_attr(sampling, "length_policy"),
         "length_range": length_range,
     }
@@ -452,8 +471,16 @@ def _input_metadata(source_cfg, cfg_path: Path) -> dict:
         sampling = getattr(source_cfg, "sampling", None)
         if sampling is not None:
             meta["input_pwm_strategy"] = getattr(sampling, "strategy", None)
+            meta["input_pwm_scoring_backend"] = getattr(sampling, "scoring_backend", None)
             meta["input_pwm_score_threshold"] = getattr(sampling, "score_threshold", None)
             meta["input_pwm_score_percentile"] = getattr(sampling, "score_percentile", None)
+            meta["input_pwm_pvalue_threshold"] = getattr(sampling, "pvalue_threshold", None)
+            meta["input_pwm_pvalue_bins"] = _resolve_pvalue_bins_meta(sampling)
+            meta["input_pwm_pvalue_bin_ids"] = getattr(sampling, "pvalue_bin_ids", None)
+            meta["input_pwm_selection_policy"] = getattr(sampling, "selection_policy", None)
+            meta["input_pwm_bgfile"] = getattr(sampling, "bgfile", None)
+            meta["input_pwm_keep_all_candidates_debug"] = getattr(sampling, "keep_all_candidates_debug", None)
+            meta["input_pwm_include_matched_sequence"] = getattr(sampling, "include_matched_sequence", None)
             meta["input_pwm_n_sites"] = getattr(sampling, "n_sites", None)
             meta["input_pwm_oversample_factor"] = getattr(sampling, "oversample_factor", None)
             meta["input_pwm_max_candidates"] = getattr(sampling, "max_candidates", None)
@@ -1012,18 +1039,26 @@ def _load_failure_counts_from_attempts(
 
 def _load_existing_library_index(outputs_root: Path) -> int:
     attempts_path = outputs_root / "attempts.parquet"
-    if not attempts_path.exists():
-        return 0
-    try:
-        df = pd.read_parquet(attempts_path, columns=["sampling_library_index"])
-    except Exception:
-        return 0
-    if df.empty or "sampling_library_index" not in df.columns:
-        return 0
-    try:
-        return int(pd.to_numeric(df["sampling_library_index"], errors="coerce").dropna().max() or 0)
-    except Exception:
+    paths: list[Path] = []
+    if attempts_path.exists():
+        paths.append(attempts_path)
+    paths.extend(sorted(outputs_root.glob("attempts_part-*.parquet")))
+    if not paths:
         return 0
+    max_idx = 0
+    for path in paths:
+        try:
+            df = pd.read_parquet(path, columns=["sampling_library_index"])
+        except Exception:
+            continue
+        if df.empty or "sampling_library_index" not in df.columns:
+            continue
+        try:
+            current = int(pd.to_numeric(df["sampling_library_index"], errors="coerce").dropna().max() or 0)
+        except Exception:
+            continue
+        max_idx = max(max_idx, current)
+    return max_idx
 
 
 def _append_attempt(
@@ -1289,7 +1324,7 @@ def _process_plan_for_source(
 
     # Load source
     src_obj = deps.source_factory(source_cfg, cfg_path)
-    data_entries, meta_df = src_obj.load_data(rng=np_rng)
+    data_entries, meta_df = src_obj.load_data(rng=np_rng, outputs_root=outputs_root)
     input_meta = _input_metadata(source_cfg, cfg_path)
     input_tf_tfbs_pair_count: int | None = None
     if meta_df is not None and isinstance(meta_df, pd.DataFrame):
@@ -1313,6 +1348,17 @@ def _process_plan_for_source(
             "sampling_fraction_pairs": None,
         }
     )
+    pair_label = str(input_tf_tfbs_pair_count) if input_tf_tfbs_pair_count is not None else "-"
+    log.info(
+        "[%s/%s] Input summary: mode=%s rows=%d tfs=%d tfbs=%d pairs=%s",
+        source_label,
+        plan_name,
+        input_meta.get("input_mode"),
+        input_row_count,
+        input_tf_count,
+        input_tfbs_count,
+        pair_label,
+    )
     source_type = getattr(source_cfg, "type", None)
     if source_type in PWM_INPUT_TYPES and meta_df is not None and "tf" in meta_df.columns:
         input_meta["input_pwm_ids"] = sorted(set(meta_df["tf"].tolist()))
@@ -1325,15 +1371,27 @@ def _process_plan_for_source(
             max_seconds = _sampling_attr(input_sampling_cfg, "max_seconds")
             score_threshold = _sampling_attr(input_sampling_cfg, "score_threshold")
             score_percentile = _sampling_attr(input_sampling_cfg, "score_percentile")
+            scoring_backend = _sampling_attr(input_sampling_cfg, "scoring_backend") or "densegen"
+            pvalue_threshold = _sampling_attr(input_sampling_cfg, "pvalue_threshold")
+            selection_policy = _sampling_attr(input_sampling_cfg, "selection_policy")
             length_policy = _sampling_attr(input_sampling_cfg, "length_policy")
             length_range = _sampling_attr(input_sampling_cfg, "length_range")
             if length_range is not None:
                 length_range = list(length_range)
             score_label = "-"
-            if score_threshold is not None:
+            if scoring_backend == "fimo" and pvalue_threshold is not None:
+                comparator = ">=" if str(strategy) == "background" else "<="
+                score_label = f"pvalue{comparator}{pvalue_threshold}"
+            elif score_threshold is not None:
                 score_label = f"threshold={score_threshold}"
             elif score_percentile is not None:
                 score_label = f"percentile={score_percentile}"
+            bins_label = "-"
+            if scoring_backend == "fimo":
+                bins_label = "canonical" if _sampling_attr(input_sampling_cfg, "pvalue_bins") is None else "custom"
+                bin_ids = _sampling_attr(input_sampling_cfg, "pvalue_bin_ids")
+                if bin_ids:
+                    bins_label = f"{bins_label} pick={sorted(list(bin_ids))}"
             length_label = str(length_policy)
             if length_policy == "range" and length_range:
                 length_label = f"{length_policy}({length_range[0]}..{length_range[1]})"
@@ -1345,14 +1403,18 @@ def _process_plan_for_source(
             if max_seconds is not None:
                 cap_label = f"{cap_label}; max_seconds={max_seconds}" if cap_label != "-" else f"{max_seconds}s"
             counts_label = _summarize_tf_counts(meta_df["tf"].tolist())
+            selection_label = selection_policy if scoring_backend == "fimo" else "-"
             log.info(
-                "PWM input sampling for %s: motifs=%d | sites=%s | strategy=%s | score=%s | "
-                "oversample=%s | max_candidates=%s | length=%s",
+                "PWM input sampling for %s: motifs=%d | sites=%s | strategy=%s | backend=%s | score=%s | "
+                "selection=%s | bins=%s | oversample=%s | max_candidates=%s | length=%s",
                 source_label,
                 len(input_meta.get("input_pwm_ids") or []),
                 counts_label or "-",
                 strategy,
+                scoring_backend,
                 score_label,
+                selection_label,
+                bins_label,
                 oversample,
                 cap_label,
                 length_label,
@@ -1702,26 +1764,34 @@ def _record_site_failures(reason: str) -> None:
     input_meta["sampling_fraction_pairs"] = sampling_fraction_pairs
     # Library summary (succinct)
     tf_summary = _summarize_tf_counts(regulator_labels)
+    library_index = sampling_info.get("library_index")
+    strategy_label = sampling_info.get("library_sampling_strategy", library_sampling_strategy)
+    pool_label = sampling_info.get("pool_strategy")
+    target_len = sampling_info.get("target_length")
+    achieved_len = sampling_info.get("achieved_length")
+    header = f"Stage B library for {source_label}/{plan_name}"
+    if library_index is not None:
+        header = f"{header} (build {library_index})"
     if tf_summary:
         log.info(
-            "Library for %s/%s: %d motifs | TF counts: %s | target=%d achieved=%d pool=%s",
-            source_label,
-            plan_name,
+            "%s: %d motifs | TF counts: %s | target=%s achieved=%s pool=%s sampling=%s",
+            header,
             len(library_for_opt),
             tf_summary,
-            sampling_info.get("target_length"),
-            sampling_info.get("achieved_length"),
-            sampling_info.get("pool_strategy"),
+            target_len,
+            achieved_len,
+            pool_label,
+            strategy_label,
         )
     else:
         log.info(
-            "Library for %s/%s: %d motifs | target=%d achieved=%d pool=%s",
-            source_label,
-            plan_name,
+            "%s: %d motifs | target=%s achieved=%s pool=%s sampling=%s",
+            header,
             len(library_for_opt),
-            sampling_info.get("target_length"),
-            sampling_info.get("achieved_length"),
-            sampling_info.get("pool_strategy"),
+            target_len,
+            achieved_len,
+            pool_label,
+            strategy_label,
         )
 
     solver_min_counts: dict[str, int] | None = None
diff --git a/src/dnadesign/densegen/src/core/pvalue_bins.py b/src/dnadesign/densegen/src/core/pvalue_bins.py
new file mode 100644
index 00000000..69084a56
--- /dev/null
+++ b/src/dnadesign/densegen/src/core/pvalue_bins.py
@@ -0,0 +1,32 @@
+"""
+--------------------------------------------------------------------------------
+<dnadesign project>
+dnadesign/densegen/core/pvalue_bins.py
+
+Canonical p-value bin edges for FIMO-based PWM sampling.
+
+Module Author(s): Eric J. South
+Dunlop Lab
+--------------------------------------------------------------------------------
+"""
+
+from __future__ import annotations
+
+from typing import Sequence
+
+CANONICAL_PVALUE_BINS: tuple[float, ...] = (
+    1e-10,
+    1e-8,
+    1e-6,
+    1e-4,
+    1e-3,
+    1e-2,
+    1e-1,
+    1.0,
+)
+
+
+def resolve_pvalue_bins(pvalue_bins: Sequence[float] | None) -> list[float]:
+    if pvalue_bins is None:
+        return list(CANONICAL_PVALUE_BINS)
+    return [float(v) for v in pvalue_bins]
diff --git a/src/dnadesign/densegen/src/integrations/__init__.py b/src/dnadesign/densegen/src/integrations/__init__.py
new file mode 100644
index 00000000..d3759fd2
--- /dev/null
+++ b/src/dnadesign/densegen/src/integrations/__init__.py
@@ -0,0 +1,3 @@
+"""
+DenseGen external tool integrations.
+"""
diff --git a/src/dnadesign/densegen/src/integrations/meme_suite.py b/src/dnadesign/densegen/src/integrations/meme_suite.py
new file mode 100644
index 00000000..9abdb34c
--- /dev/null
+++ b/src/dnadesign/densegen/src/integrations/meme_suite.py
@@ -0,0 +1,41 @@
+"""
+--------------------------------------------------------------------------------
+<dnadesign project>
+dnadesign/densegen/integrations/meme_suite.py
+
+Lightweight MEME Suite tool resolution for DenseGen.
+
+Module Author(s): Eric J. South
+Dunlop Lab
+--------------------------------------------------------------------------------
+"""
+
+from __future__ import annotations
+
+import os
+import shutil
+from pathlib import Path
+
+
+def resolve_executable(tool: str, *, tool_path: Path | None = None) -> Path | None:
+    if tool_path is not None:
+        resolved = tool_path.expanduser()
+        if resolved.is_dir():
+            candidate = resolved / tool
+        else:
+            candidate = resolved
+            if candidate.name != tool:
+                raise FileNotFoundError(
+                    f"Configured tool_path points to '{candidate.name}', expected '{tool}'. "
+                    "Provide a bin directory or the correct executable."
+                )
+        if candidate.exists():
+            return candidate
+        raise FileNotFoundError(f"Configured tool_path does not contain '{tool}': {candidate}")
+    env_dir = os.getenv("MEME_BIN")
+    if env_dir:
+        candidate = Path(env_dir).expanduser() / tool
+        if candidate.exists():
+            return candidate
+    found = shutil.which(tool)
+    return Path(found) if found else None
diff --git a/src/dnadesign/densegen/tests/test_cli_summarize_library.py b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
index e5ddc45b..49618288 100644
--- a/src/dnadesign/densegen/tests/test_cli_summarize_library.py
+++ b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
@@ -59,8 +59,16 @@ def _base_meta(library_hash: str, library_index: int) -> dict:
         "sampling_fraction": 0.5,
         "sampling_fraction_pairs": 0.5,
         "input_pwm_strategy": None,
+        "input_pwm_scoring_backend": None,
         "input_pwm_score_threshold": None,
         "input_pwm_score_percentile": None,
+        "input_pwm_pvalue_threshold": None,
+        "input_pwm_pvalue_bins": None,
+        "input_pwm_pvalue_bin_ids": None,
+        "input_pwm_selection_policy": None,
+        "input_pwm_bgfile": None,
+        "input_pwm_keep_all_candidates_debug": None,
+        "input_pwm_include_matched_sequence": None,
         "input_pwm_n_sites": None,
         "input_pwm_oversample_factor": None,
         "fixed_elements": {"promoter_constraints": [], "side_biases": {"left": [], "right": []}},
diff --git a/src/dnadesign/densegen/tests/test_outputs_parquet.py b/src/dnadesign/densegen/tests/test_outputs_parquet.py
index 502591af..606bd03f 100644
--- a/src/dnadesign/densegen/tests/test_outputs_parquet.py
+++ b/src/dnadesign/densegen/tests/test_outputs_parquet.py
@@ -54,8 +54,16 @@ def _dummy_meta() -> dict:
         "sampling_fraction": None,
         "sampling_fraction_pairs": 0.5,
         "input_pwm_strategy": None,
+        "input_pwm_scoring_backend": None,
         "input_pwm_score_threshold": None,
         "input_pwm_score_percentile": None,
+        "input_pwm_pvalue_threshold": None,
+        "input_pwm_pvalue_bins": None,
+        "input_pwm_pvalue_bin_ids": None,
+        "input_pwm_selection_policy": None,
+        "input_pwm_bgfile": None,
+        "input_pwm_keep_all_candidates_debug": None,
+        "input_pwm_include_matched_sequence": None,
         "input_pwm_n_sites": None,
         "input_pwm_oversample_factor": None,
         "fixed_elements": {"promoter_constraints": [], "side_biases": {"left": [], "right": []}},
diff --git a/src/dnadesign/densegen/tests/test_pipeline_library_index.py b/src/dnadesign/densegen/tests/test_pipeline_library_index.py
new file mode 100644
index 00000000..97816a86
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_pipeline_library_index.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+
+from dnadesign.densegen.src.core.pipeline import _load_existing_library_index
+
+
+def test_load_existing_library_index_reads_parts(tmp_path: Path) -> None:
+    outputs = tmp_path
+    df = pd.DataFrame({"sampling_library_index": [1, 2, 5]})
+    part = outputs / "attempts_part-000.parquet"
+    df.to_parquet(part)
+    assert _load_existing_library_index(outputs) == 5
diff --git a/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py b/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py
new file mode 100644
index 00000000..95c2aa8b
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from dnadesign.densegen.src.adapters.sources.pwm_fimo import (
+    aggregate_best_hits,
+    build_candidate_records,
+    parse_fimo_tsv,
+    run_fimo,
+    write_candidates_fasta,
+    write_minimal_meme_motif,
+)
+from dnadesign.densegen.src.adapters.sources.pwm_sampling import PWMMotif
+from dnadesign.densegen.src.integrations.meme_suite import resolve_executable
+
+
+def test_write_minimal_meme_motif(tmp_path: Path) -> None:
+    motif = PWMMotif(
+        motif_id="M1",
+        matrix=[
+            {"A": 0.7, "C": 0.1, "G": 0.1, "T": 0.1},
+            {"A": 0.2, "C": 0.3, "G": 0.4, "T": 0.1},
+        ],
+        background={"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+    )
+    out = tmp_path / "motif.meme"
+    motif_id = write_minimal_meme_motif(motif, out)
+    text = out.read_text()
+    assert "MEME version" in text
+    assert "Background letter frequencies" in text
+    assert f"MOTIF {motif_id}" in text
+    lines = [ln for ln in text.splitlines() if ln.strip()]
+    idx = next(i for i, ln in enumerate(lines) if ln.startswith("letter-probability matrix"))
+    matrix_lines = lines[idx + 1 : idx + 1 + len(motif.matrix)]
+    assert len(matrix_lines) == len(motif.matrix)
+    for row in matrix_lines:
+        vals = [float(x) for x in row.split()]
+        assert abs(sum(vals) - 1.0) < 1e-6
+
+
+def test_write_candidates_fasta(tmp_path: Path) -> None:
+    records = build_candidate_records("My Motif", ["ACG", "TTT"])
+    out = tmp_path / "candidates.fasta"
+    write_candidates_fasta(records, out)
+    lines = out.read_text().splitlines()
+    assert lines[0].startswith(">")
+    assert lines[1] == "ACG"
+    assert lines[2].startswith(">")
+    assert lines[3] == "TTT"
+    assert records[0][0].endswith("|cand0")
+    assert records[1][0].endswith("|cand1")
+
+
+def test_parse_fimo_tsv_and_best_hits() -> None:
+    tsv = "\n".join(
+        [
+            "motif_id\tmotif_alt_id\tsequence_name\tstart\tstop\tstrand\tscore\tp-value\tq-value\tmatched_sequence",
+            "M1\t.\tcand0\t2\t4\t+\t5.2\t1e-4\t0.01\tACG",
+            "M1\t.\tcand0\t1\t3\t-\t4.0\t1e-3\t0.1\tTGC",
+            "M1\t.\tcand1\t1\t3\t+\t2.0\t0.5\t1.0\tAAA",
+        ]
+    )
+    rows = parse_fimo_tsv(tsv)
+    best = aggregate_best_hits(rows)
+    assert best["cand0"].pvalue == pytest.approx(1e-4)
+    assert best["cand0"].score == pytest.approx(5.2)
+    assert best["cand0"].matched_sequence == "ACG"
+    assert best["cand1"].pvalue == pytest.approx(0.5)
+
+
+@pytest.mark.skipif(resolve_executable("fimo", tool_path=None) is None, reason="fimo executable not available")
+def test_run_fimo_smoke(tmp_path: Path) -> None:
+    motif = PWMMotif(
+        motif_id="M1",
+        matrix=[
+            {"A": 0.8, "C": 0.1, "G": 0.05, "T": 0.05},
+            {"A": 0.8, "C": 0.1, "G": 0.05, "T": 0.05},
+            {"A": 0.8, "C": 0.1, "G": 0.05, "T": 0.05},
+        ],
+        background={"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+    )
+    meme_path = tmp_path / "motif.meme"
+    fasta_path = tmp_path / "candidates.fasta"
+    write_minimal_meme_motif(motif, meme_path)
+    records = build_candidate_records("M1", ["AAA", "CCC"])
+    write_candidates_fasta(records, fasta_path)
+    rows, _raw = run_fimo(meme_motif_path=meme_path, fasta_path=fasta_path, thresh=1.0)
+    assert rows
+    for row in rows:
+        pval = float(row["p_value"])
+        assert 0.0 <= pval <= 1.0
diff --git a/src/dnadesign/densegen/tests/test_pwm_sampling_bins.py b/src/dnadesign/densegen/tests/test_pwm_sampling_bins.py
new file mode 100644
index 00000000..c20bbeb9
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_pwm_sampling_bins.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+import numpy as np
+
+from dnadesign.densegen.src.adapters.sources.pwm_sampling import (
+    FimoCandidate,
+    _assign_pvalue_bin,
+    _stratified_sample,
+)
+
+
+def test_assign_pvalue_bin_edges() -> None:
+    edges = [1e-4, 1e-2, 1.0]
+    assert _assign_pvalue_bin(1e-4, edges) == (0, 0.0, 1e-4)
+    assert _assign_pvalue_bin(5e-4, edges) == (1, 1e-4, 1e-2)
+    assert _assign_pvalue_bin(0.5, edges) == (2, 1e-2, 1.0)
+
+
+def test_stratified_sample_balances_bins() -> None:
+    rng = np.random.default_rng(0)
+    candidates = [
+        FimoCandidate(
+            seq="AAAA",
+            pvalue=1e-6,
+            score=10.0,
+            bin_id=0,
+            bin_low=0.0,
+            bin_high=1e-4,
+            start=0,
+            stop=3,
+            strand="+",
+            matched_sequence=None,
+        ),
+        FimoCandidate(
+            seq="AAAT",
+            pvalue=5e-6,
+            score=9.0,
+            bin_id=0,
+            bin_low=0.0,
+            bin_high=1e-4,
+            start=0,
+            stop=3,
+            strand="+",
+            matched_sequence=None,
+        ),
+        FimoCandidate(
+            seq="TTTT",
+            pvalue=5e-3,
+            score=6.0,
+            bin_id=1,
+            bin_low=1e-4,
+            bin_high=1e-2,
+            start=0,
+            stop=3,
+            strand="+",
+            matched_sequence=None,
+        ),
+        FimoCandidate(
+            seq="TTTA",
+            pvalue=8e-3,
+            score=5.0,
+            bin_id=1,
+            bin_low=1e-4,
+            bin_high=1e-2,
+            start=0,
+            stop=3,
+            strand="+",
+            matched_sequence=None,
+        ),
+    ]
+
+    picked = _stratified_sample(candidates, n_sites=3, rng=rng, n_bins=2)
+    assert len(picked) == 3
+    assert {int(c.bin_id) for c in picked} == {0, 1}

From d803d834f980113a32b77fc73ace25611e391594 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 09:44:16 -0500
Subject: [PATCH 04/40] docs(densegen): update stratified FIMO demo and mining
 workflow

---
 src/dnadesign/densegen/README.md              |  5 +-
 .../densegen/docs/demo/demo_basic.md          | 53 ++++++++++++--
 src/dnadesign/densegen/docs/guide/inputs.md   | 69 +++++++++++++++++--
 .../densegen/docs/reference/config.md         | 17 ++++-
 .../workspaces/demo_meme_two_tf/config.yaml   |  9 +--
 5 files changed, 134 insertions(+), 19 deletions(-)

diff --git a/src/dnadesign/densegen/README.md b/src/dnadesign/densegen/README.md
index b3c511e7..be196836 100644
--- a/src/dnadesign/densegen/README.md
+++ b/src/dnadesign/densegen/README.md
@@ -17,11 +17,14 @@ Prerequisites include Python, dense-arrays, and a MILP solver. CBC is open-sourc
 Use the canonical demo config (small, Parquet-only). The demo uses MEME motif files
 copied from the Cruncher basic demo workspace (`inputs/local_motifs`) and parsed with
 Cruncher’s MEME parser for DRY, consistent parsing.
+FIMO-backed PWM sampling is supported when MEME Suite is available (`fimo` on PATH via `pixi run`).
+Stratified FIMO sampling uses canonical p‑value bins by default; see the guide for mining workflows.
 
 ```bash
 uv run dense validate -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
 uv run dense describe -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
+pixi run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
+uv run dense summarize -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --library --top-per-tf 5
 uv run dense plot -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --only tf_usage,tf_coverage
 ```
 
diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index f229f84d..b2904916 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -27,6 +27,10 @@ If you have not synced dependencies yet:
 uv sync --locked
 ```
 
+This demo uses **FIMO** (MEME Suite) to adjudicate strong motif matches. Ensure `fimo` is on PATH
+or set `MEME_BIN` to the MEME bin directory. If you use pixi, run commands via
+`pixi run dense ...` so MEME tools are available (recommended for the run step).
+
 All commands below assume you are at the repo root. We will write the demo run to a scratch
 directory; set a run root:
 
@@ -49,7 +53,9 @@ src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/cpxR.txt
 ```
 
 These are MEME files parsed with Cruncher’s MEME parser (DenseGen reuses the same parsing
-logic for DRY). The demo uses LexA + CpxR motifs and exercises PWM sampling bounds.
+logic for DRY). The demo uses LexA + CpxR motifs and exercises PWM sampling bounds. Sampling
+uses FIMO p-values to define “strong” matches and `selection_policy: stratified` to balance
+across canonical p‑value bins (see the input-stage sampling table in `dense describe`).
 
 ### 1b) (Optional) Rebuild inputs from Cruncher
 
@@ -113,7 +119,7 @@ Example output:
 ┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓
 ┃ name ┃ quota ┃ has promoter_constraints ┃
 ┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ meme_demo │ 6 │ no                       │
+│ meme_demo │ 50 │ no                      │
 └──────┴───────┴──────────────────────────┘
 ```
 
@@ -149,16 +155,17 @@ Solver-stage library sampling
 ## 6) Run generation
 
 ```bash
-uv run dense run -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --no-plot
+pixi run dense run -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --no-plot
 ```
 
 Example output (abridged):
 
 ```text
 2026-01-15 14:02:02 | INFO | dnadesign.densegen.src.utils.logging_utils | Logging initialized (level=INFO)
-Quota plan: meme_demo=6
+Quota plan: meme_demo=50
 2026-01-15 14:02:02 | INFO | dnadesign.densegen.src.adapters.optimizer.dense_arrays | Solver selected: CBC
-2026-01-15 14:02:05 | INFO | dnadesign.densegen.src.core.pipeline | [demo/demo] 5/5 (100.00%) (local 5/5) CR=1.050 | seq ATTGACAGTAAACCTGCGGGAAATATAATTTACTCCGTATTTGCACATGGTTATCCACAG
+2026-01-15 14:02:05 | INFO | dnadesign.densegen.src.adapters.sources.pwm_sampling | FIMO yield for motif lexA: hits=960 accepted=120 selected=80 bins=(0e+00,1e-10]:0 ... selected_bins=(0e+00,1e-10]:0 ...
+2026-01-15 14:02:06 | INFO | dnadesign.densegen.src.core.pipeline | [demo/demo] 2/50 (4.00%) (local 2/2) CR=1.050 | seq ATTGACAGTAAACCTGCGGGAAATATAATTTACTCCGTATTTGCACATGGTTATCCACAG
 2026-01-15 14:02:05 | INFO | dnadesign.densegen.src.core.pipeline | Inputs manifest written: /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/meta/inputs_manifest.json
 🎉 Run complete.
 ```
@@ -182,7 +189,7 @@ Run: demo_press  Root: /private/tmp/densegen-demo-20260115-1405/demo_press  Sche
 ┏━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┓
 ┃ input        ┃ plan ┃ generated ┃ duplica… ┃ failed ┃ resamples ┃ librari… ┃ stalls ┃
 ┡━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━┩
-│ lexA_cpxR_meme │ meme_demo │ 6   │ 0        │ 0      │ 0         │ 3        │ 0      │
+│ lexA_cpxR_meme │ meme_demo │ 50  │ 0        │ 0      │ 0         │ 3        │ 0      │
 └──────────────┴──────┴───────────┴──────────┴────────┴───────────┴──────────┴────────┘
 ```
 
@@ -190,9 +197,12 @@ Use `--verbose` for constraint-failure breakdowns and duplicate-solution counts.
 Use `--library` to print offered-vs-used summaries for quick debugging:
 
 ```bash
-uv run dense summarize --run /private/tmp/densegen-demo-20260115-1405/demo_press --library
+uv run dense summarize --run /private/tmp/densegen-demo-20260115-1405/demo_press --library --top-per-tf 5
 ```
 
+This library summary is the quickest way to audit which TFBS were offered vs
+used in the solver stage (Stage‑B sampling).
+
 If any solutions are rejected, DenseGen writes them to
 `outputs/attempts.parquet` in the run root.
 
@@ -317,6 +327,7 @@ inputs:
     motif_ids: [lexA]
     sampling:
       strategy: background
+      scoring_backend: densegen
       n_sites: 200
       oversample_factor: 5
       score_percentile: 10
@@ -324,6 +335,34 @@ inputs:
 
 Swap `type` and `path` to `pwm_jaspar` or `pwm_matrix_csv` with the same `sampling` block.
 
+For **strong match** sampling with FIMO p-values:
+
+```yaml
+inputs:
+  - name: lexA_meme
+    type: pwm_meme
+    path: inputs/lexA.txt
+    motif_ids: [lexA]
+    sampling:
+      strategy: stochastic
+      scoring_backend: fimo
+      pvalue_threshold: 1e-4
+      selection_policy: top_n
+      n_sites: 80
+      oversample_factor: 10
+```
+
+To mine specific affinity strata, add canonical p‑value bins and select bins by index:
+
+```yaml
+    sampling:
+      scoring_backend: fimo
+      pvalue_threshold: 1e-3
+      selection_policy: stratified
+      pvalue_bins: [1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]
+      pvalue_bin_ids: [1, 2]  # (1e-6..1e-4] and (1e-4..1e-3]
+```
+
 ### Add USR output
 
 USR is an optional I/O adapter. To write both Parquet and USR:
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index f5fda12e..093a7ba0 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -95,21 +95,47 @@ Use a MEME-format PWM file and explicitly sample binding sites.
 Required sampling fields:
 - `strategy`: `consensus | stochastic | background`
 - `n_sites`: number of binding sites to generate per motif
-- `score_threshold` or `score_percentile` (exactly one)
+- `scoring_backend`: `densegen | fimo` (default: `densegen`)
+- `score_threshold` or `score_percentile` (exactly one; densegen backend only)
+- `pvalue_threshold` (float in (0, 1]; fimo backend only)
 - `oversample_factor`: oversampling multiplier for candidate generation
 - `max_candidates` (optional): cap on candidate generation; helps bound long motifs
 - `max_seconds` (optional): time limit for candidate generation (best-effort cap)
+- `selection_policy`: `random_uniform | top_n | stratified` (default: `random_uniform`; fimo only)
+- `pvalue_bins` (optional): list of p‑value bin edges (strictly increasing; must end with `1.0`)
+- `pvalue_bin_ids` (optional): list of bin indices to keep (0‑based, using `pvalue_bins`)
+- `bgfile` (optional): MEME bfile-format background model for FIMO
+- `keep_all_candidates_debug` (optional): write raw FIMO TSVs to `outputs/meta/fimo/` for inspection
+- `include_matched_sequence` (optional): include `fimo_matched_sequence` column in the TFBS table
 
 Notes:
-- Sampling scores use PWM log-odds with the motif background (from MEME when available).
-- `score_threshold` / `score_percentile` controls similarity to the PWM consensus
-  (higher percentiles or thresholds yield stronger matches).
+- `densegen` scoring uses PWM log-odds with the motif background (from MEME when available).
+- `fimo` scoring scans the entire emitted TFBS and uses a model-based p-value threshold.
+  `pvalue_threshold` controls match strength (smaller values are stronger).
+- `fimo` backend requires the `fimo` executable on PATH (run via pixi).
+- If `bgfile` is omitted, FIMO uses the motif background (or uniform if none provided).
+- `background` selects low-scoring sequences (<= threshold/percentile; or pvalue >= threshold for fimo).
+- `selection_policy: stratified` uses fixed p‑value bins to balance strong/weak matches.
+- Canonical p‑value bins (default): `[1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]`.
+  Bin 0 is `(0, 1e-10]`, bin 1 is `(1e-10, 1e-8]`, etc.
+
+#### FIMO p-values (beginner-friendly)
+- A **p-value** is the probability that a random sequence (under the background model)
+  would score **at least as well** as the observed match.
+- Smaller p-values mean **stronger** motif matches; larger p-values mean **weaker** matches.
+- As a rule of thumb: `1e-4` is a strong match, `1e-3` is moderate, `1e-2` is weak.
+- DenseGen accepts a candidate if its **best hit** within the emitted TFBS passes the threshold.
+- For `strategy: background`, DenseGen keeps **weak** matches where `pvalue >= pvalue_threshold`.
+- If you set `pvalue_bin_ids`, DenseGen only keeps candidates in those bins (useful for mining
+  specific affinity ranges).
+- FIMO adds per‑TFBS metadata columns: `fimo_score`, `fimo_pvalue`, `fimo_start`, `fimo_stop`,
+  `fimo_strand`, `fimo_bin_id`, `fimo_bin_low`, `fimo_bin_high`, and (optionally)
+  `fimo_matched_sequence`.
 - `length_policy` defaults to `exact`. Use `length_policy: range` with `length_range: [min, max]`
   to sample variable lengths (min must be >= motif length).
 - `trim_window_length` optionally trims the PWM to a max‑information window before sampling (useful
   for long motifs when you want shorter cores); `trim_window_strategy` currently supports `max_info`.
 - `consensus` requires `n_sites: 1`.
-- `background` selects low-scoring sequences from the PWM.
 
 Example:
 
@@ -130,6 +156,39 @@ inputs:
       length_range: [22, 28]
 ```
 
+FIMO-backed example:
+
+```yaml
+inputs:
+  - name: lexA_meme
+    type: pwm_meme
+    path: inputs/lexA.txt
+    motif_ids: [lexA]
+    sampling:
+      strategy: stochastic
+      scoring_backend: fimo
+      pvalue_threshold: 1e-4
+      selection_policy: top_n
+      n_sites: 80
+      oversample_factor: 12
+      max_candidates: 50000
+      max_seconds: 5
+```
+
+#### Mining workflow (p‑value strata)
+If you want to **mine** sequences across affinity strata, use `selection_policy: stratified` plus
+canonical p‑value bins. A typical workflow:
+
+1) Oversample candidates (`oversample_factor`, `max_candidates`) and score with FIMO.
+2) Accept candidates using `pvalue_threshold` (global strength cutoff).
+3) Use `pvalue_bin_ids` to select one or more bins (e.g., moderate matches only).
+4) Repeat runs to accumulate a deduplicated reservoir of sequences per bin.
+5) Use `dense summarize --library` to inspect which TFBS were offered vs used in Stage‑B sampling.
+
+DenseGen reports per‑bin yield summaries (hits, accepted, selected) for every FIMO run, so you can
+track how many candidates land in each bin and adjust thresholds or oversampling accordingly. With
+`selection_policy: stratified`, the selected‑bin counts show how evenly the final pool spans strata.
+
 ---
 
 ### PWM MEME set (`type: pwm_meme_set`)
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index a31f6ce6..9b7c938a 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -62,13 +62,26 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
     - `oversample_factor` (int > 0)
     - `max_candidates` (optional int > 0; caps candidate generation)
     - `max_seconds` (optional float > 0; time limit for candidate generation)
-    - `score_threshold` or `score_percentile` (exactly one)
+    - `scoring_backend`: `densegen | fimo` (default: `densegen`)
+    - `score_threshold` or `score_percentile` (exactly one; **densegen** backend only)
+    - `pvalue_threshold` (float in (0, 1]; **fimo** backend only)
+    - `selection_policy`: `random_uniform | top_n | stratified` (default: `random_uniform`; fimo only)
+    - `pvalue_bins` (optional list of floats; must end with `1.0`) - p‑value bin edges for stratified sampling
+    - `pvalue_bin_ids` (optional list of ints) - select specific p‑value bins (0‑based indices)
+    - `bgfile` (optional path) - MEME bfile-format background model for FIMO
+    - `keep_all_candidates_debug` (bool, default false) - write raw FIMO TSVs to `outputs/meta/fimo/` for inspection
+    - `include_matched_sequence` (bool, default false) - include `fimo_matched_sequence` in TFBS outputs
     - `length_policy`: `exact | range` (default: `exact`)
     - `length_range`: `[min, max]` (required when `length_policy=range`; `min` >= motif length)
     - `trim_window_length` (optional int > 0; trims PWM to a max‑information window before sampling)
     - `trim_window_strategy`: `max_info` (window selection strategy)
     - `consensus` requires `n_sites: 1`
-    - `background` selects low-scoring sequences (<= threshold/percentile)
+    - `background` selects low-scoring sequences (<= threshold/percentile; or pvalue >= threshold for fimo)
+    - FIMO resolves `fimo` via `MEME_BIN` or PATH; pixi users should run `pixi run dense ...` so it is available.
+    - Canonical p‑value bins (default): `[1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]`
+      (bin 0 is `(0, 1e-10]`, bin 1 is `(1e-10, 1e-8]`, etc.)
+    - FIMO runs log per‑bin yield summaries (hits, accepted, selected); `selection_policy: stratified`
+      makes the selected‑bin distribution explicit for mining workflows.
 - `type: pwm_meme_set`
   - `paths` - list of MEME PWM files (merged into a single TF pool)
   - `motif_ids` (optional list) - choose motifs by ID across files
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index b261c1a3..e5794dd6 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -23,8 +23,9 @@ densegen:
         n_sites: 80
         oversample_factor: 12
         max_candidates: 50000  # bounded candidate generation
-        score_threshold: null
-        score_percentile: 80
+        scoring_backend: fimo
+        pvalue_threshold: 1e-4
+        selection_policy: stratified
         length_policy: range
         length_range: [22, 28]
 
@@ -40,7 +41,7 @@ densegen:
 
   generation:
     sequence_length: 60
-    quota: 6
+    quota: 50
     sampling:
       pool_strategy: subsample
       library_size: 24
@@ -58,7 +59,7 @@ densegen:
 
     plan:
       - name: meme_demo
-        quota: 6
+        quota: 50
         required_regulators: [lexA, cpxR]
 
   solver:

From 4700a9ce3d6c261a59840bd38eade31d9cee46a3 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 10:39:44 -0500
Subject: [PATCH 05/40] densegen: add FIMO mining workflow and UX updates

---
 .../densegen/docs/demo/demo_basic.md          |   9 +-
 .../densegen/docs/guide/generation.md         |  11 +
 src/dnadesign/densegen/docs/guide/inputs.md   |  52 ++-
 .../densegen/docs/reference/config.md         |  26 +-
 .../densegen/src/adapters/outputs/parquet.py  |   5 +
 .../src/adapters/sources/pwm_artifact.py      |   2 +
 .../src/adapters/sources/pwm_artifact_set.py  |   2 +
 .../densegen/src/adapters/sources/pwm_fimo.py |   9 +-
 .../src/adapters/sources/pwm_jaspar.py        |   2 +
 .../src/adapters/sources/pwm_matrix_csv.py    |   2 +
 .../densegen/src/adapters/sources/pwm_meme.py |   2 +
 .../src/adapters/sources/pwm_meme_set.py      |   2 +
 .../src/adapters/sources/pwm_sampling.py      | 387 +++++++++++++-----
 src/dnadesign/densegen/src/cli.py             |  20 +-
 src/dnadesign/densegen/src/config/__init__.py |  95 ++++-
 src/dnadesign/densegen/src/core/metadata.py   |   5 +
 .../densegen/src/core/metadata_schema.py      |  35 +-
 src/dnadesign/densegen/src/core/pipeline.py   | 277 +++++++++----
 .../tests/test_cli_summarize_library.py       |   5 +
 .../densegen/tests/test_outputs_parquet.py    |   5 +
 .../densegen/tests/test_pwm_fimo_utils.py     |   6 +-
 .../tests/test_pwm_sampling_mining.py         |  80 ++++
 .../workspaces/demo_meme_two_tf/config.yaml   |  14 +-
 23 files changed, 832 insertions(+), 221 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/test_pwm_sampling_mining.py

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index b2904916..d322242e 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -158,6 +158,10 @@ Solver-stage library sampling
 pixi run dense run -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --no-plot
 ```
 
+The demo config sets `logging.progress_style: screen`, so in a TTY you will see a
+refreshing dashboard (progress, leaderboards, last sequence). To see per‑sequence
+logs, set `progress_style: stream` (and optionally tune `progress_every`).
+
 Example output (abridged):
 
 ```text
@@ -360,7 +364,10 @@ To mine specific affinity strata, add canonical p‑value bins and select bins b
       pvalue_threshold: 1e-3
       selection_policy: stratified
       pvalue_bins: [1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]
-      pvalue_bin_ids: [1, 2]  # (1e-6..1e-4] and (1e-4..1e-3]
+      mining:
+        batch_size: 5000
+        max_batches: 4
+        retain_bin_ids: [1, 2]  # (1e-6..1e-4] and (1e-4..1e-3]
 ```
 
 ### Add USR output
diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index c8b3d220..e3f29a8f 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -111,6 +111,17 @@ Notes:
 - `coverage_weighted` dynamically boosts underused TFBS based on the run’s usage counts.
 - `avoid_failed_motifs: true` down-weights TFBS that repeatedly appear in failed solve attempts (tracked in attempts.parquet).
 
+### Run scheduling (round‑robin)
+
+`runtime.round_robin` controls **scheduling**, not sampling. When enabled, DenseGen interleaves plan
+items across inputs so each plan advances in turn (one subsample per pass). This is useful when you
+have multiple constraint sets (e.g., different fixed sequences) and want a single run to progress
+each design target in parallel.
+
+Round‑robin is **distinct from Stage‑B sampling** (`generation.sampling`): library sampling still
+uses the same policy per plan, but round‑robin can trigger more frequent library rebuilds when
+`pool_strategy: iterative_subsample` is used. Expect extra compute if many plans are active.
+
 ---
 
 ### Regulator constraints
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index 093a7ba0..63911198 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -100,10 +100,16 @@ Required sampling fields:
 - `pvalue_threshold` (float in (0, 1]; fimo backend only)
 - `oversample_factor`: oversampling multiplier for candidate generation
 - `max_candidates` (optional): cap on candidate generation; helps bound long motifs
-- `max_seconds` (optional): time limit for candidate generation (best-effort cap)
+- `max_seconds` (optional): time limit for candidate generation per batch (best-effort cap)
 - `selection_policy`: `random_uniform | top_n | stratified` (default: `random_uniform`; fimo only)
 - `pvalue_bins` (optional): list of p‑value bin edges (strictly increasing; must end with `1.0`)
-- `pvalue_bin_ids` (optional): list of bin indices to keep (0‑based, using `pvalue_bins`)
+- `pvalue_bin_ids` (deprecated; use `mining.retain_bin_ids`)
+- `mining` (optional; fimo only): batch/time controls for mining with FIMO
+  - `batch_size` (int > 0): candidates per batch
+  - `max_batches` (optional int > 0): limit batches per motif
+  - `max_seconds` (optional float > 0): limit total mining time per motif
+  - `retain_bin_ids` (optional list of ints): keep only specific p‑value bins
+  - `log_every_batches` (int > 0): log yield summaries every N batches
 - `bgfile` (optional): MEME bfile-format background model for FIMO
 - `keep_all_candidates_debug` (optional): write raw FIMO TSVs to `outputs/meta/fimo/` for inspection
 - `include_matched_sequence` (optional): include `fimo_matched_sequence` column in the TFBS table
@@ -126,11 +132,11 @@ Notes:
 - As a rule of thumb: `1e-4` is a strong match, `1e-3` is moderate, `1e-2` is weak.
 - DenseGen accepts a candidate if its **best hit** within the emitted TFBS passes the threshold.
 - For `strategy: background`, DenseGen keeps **weak** matches where `pvalue >= pvalue_threshold`.
-- If you set `pvalue_bin_ids`, DenseGen only keeps candidates in those bins (useful for mining
+- If you set `mining.retain_bin_ids`, DenseGen only keeps candidates in those bins (useful for mining
   specific affinity ranges).
 - FIMO adds per‑TFBS metadata columns: `fimo_score`, `fimo_pvalue`, `fimo_start`, `fimo_stop`,
   `fimo_strand`, `fimo_bin_id`, `fimo_bin_low`, `fimo_bin_high`, and (optionally)
-  `fimo_matched_sequence`.
+  `fimo_matched_sequence` (the best‑hit window within the TFBS).
 - `length_policy` defaults to `exact`. Use `length_policy: range` with `length_range: [min, max]`
   to sample variable lengths (min must be >= motif length).
 - `trim_window_length` optionally trims the PWM to a max‑information window before sampling (useful
@@ -170,24 +176,42 @@ inputs:
       pvalue_threshold: 1e-4
       selection_policy: top_n
       n_sites: 80
-      oversample_factor: 12
-      max_candidates: 50000
-      max_seconds: 5
+      oversample_factor: 200
+      max_candidates: 20000
+      mining:
+        batch_size: 5000
+        max_batches: 4
+        retain_bin_ids: [0, 1, 2, 3]
+        log_every_batches: 1
 ```
 
 #### Mining workflow (p‑value strata)
 If you want to **mine** sequences across affinity strata, use `selection_policy: stratified` plus
-canonical p‑value bins. A typical workflow:
+canonical p‑value bins and the `mining` block. A typical workflow:
 
-1) Oversample candidates (`oversample_factor`, `max_candidates`) and score with FIMO.
+1) Oversample candidates (`oversample_factor`, `max_candidates`) and score with FIMO in batches
+   (`mining.batch_size`).
 2) Accept candidates using `pvalue_threshold` (global strength cutoff).
-3) Use `pvalue_bin_ids` to select one or more bins (e.g., moderate matches only).
-4) Repeat runs to accumulate a deduplicated reservoir of sequences per bin.
+3) Use `mining.retain_bin_ids` to select one or more bins (e.g., moderate matches only).
+4) Repeat runs (or increase `mining.max_batches` / `mining.max_seconds`) to accumulate a deduplicated
+   reservoir of sequences per bin.
 5) Use `dense summarize --library` to inspect which TFBS were offered vs used in Stage‑B sampling.
 
-DenseGen reports per‑bin yield summaries (hits, accepted, selected) for every FIMO run, so you can
-track how many candidates land in each bin and adjust thresholds or oversampling accordingly. With
-`selection_policy: stratified`, the selected‑bin counts show how evenly the final pool spans strata.
+DenseGen reports per‑bin yield summaries (hits, accepted, selected) for retained bins only (or all
+bins if `retain_bin_ids` is unset), so you can track how many candidates land in each stratum and
+adjust thresholds or oversampling accordingly. With `selection_policy: stratified`, the selected‑bin
+counts show how evenly the final pool spans strata.
+
+#### Stdout UX for long runs
+DenseGen supports three logging styles so long runs stay readable:
+
+- `progress_style: stream` (default) logs per‑sequence updates; tune `progress_every` to reduce noise.
+- `progress_style: summary` hides per‑sequence logs and only prints periodic leaderboard summaries.
+- `progress_style: screen` clears and redraws a compact dashboard (progress, leaderboards, last sequence)
+  at `progress_refresh_seconds`.
+
+For iterative mining workflows, `screen` or `summary` modes are recommended to avoid log spam while still
+seeing yield/leaderboard progress over time.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 9b7c938a..0fd049a7 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -67,7 +67,14 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
     - `pvalue_threshold` (float in (0, 1]; **fimo** backend only)
     - `selection_policy`: `random_uniform | top_n | stratified` (default: `random_uniform`; fimo only)
     - `pvalue_bins` (optional list of floats; must end with `1.0`) - p‑value bin edges for stratified sampling
-    - `pvalue_bin_ids` (optional list of ints) - select specific p‑value bins (0‑based indices)
+    - `pvalue_bin_ids` (deprecated; use `mining.retain_bin_ids`)
+    - `mining` (optional; fimo only) - batch/time controls for mining via FIMO:
+      - `batch_size` (int > 0; default 100000) - candidates per FIMO batch
+      - `max_batches` (optional int > 0) - max batches per motif
+      - `max_seconds` (optional float > 0) - max seconds per motif mining loop
+      - `retain_bin_ids` (optional list of ints) - select p‑value bins to retain (0‑based indices);
+        retained bins are the only bins reported in yield summaries
+      - `log_every_batches` (int > 0; default 1) - log per‑bin yield summaries every N batches
     - `bgfile` (optional path) - MEME bfile-format background model for FIMO
     - `keep_all_candidates_debug` (bool, default false) - write raw FIMO TSVs to `outputs/meta/fimo/` for inspection
     - `include_matched_sequence` (bool, default false) - include `fimo_matched_sequence` in TFBS outputs
@@ -80,8 +87,11 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
     - FIMO resolves `fimo` via `MEME_BIN` or PATH; pixi users should run `pixi run dense ...` so it is available.
     - Canonical p‑value bins (default): `[1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]`
       (bin 0 is `(0, 1e-10]`, bin 1 is `(1e-10, 1e-8]`, etc.)
-    - FIMO runs log per‑bin yield summaries (hits, accepted, selected); `selection_policy: stratified`
+    - FIMO runs log per‑bin yield summaries (hits, accepted, selected). If `retain_bin_ids` is set,
+      only those bins are reported; otherwise all bins are reported. `selection_policy: stratified`
       makes the selected‑bin distribution explicit for mining workflows.
+    - When `mining` is enabled, `max_seconds` caps per‑batch candidate generation while
+      `mining.max_seconds` caps the overall mining loop.
 - `type: pwm_meme_set`
   - `paths` - list of MEME PWM files (merged into a single TF pool)
   - `motif_ids` (optional list) - choose motifs by ID across files
@@ -200,7 +210,11 @@ binding-site and PWM-sampled inputs.
 
 ### `densegen.runtime`
 
-- `round_robin` (bool)
+- `round_robin` (bool) - interleave plan items across inputs (one subsample per plan per pass).
+  Use this when you have multiple distinct constraint sets (e.g., different fixed sequences) and want
+  a single run to advance each plan in turn. This **does not** change Stage‑B sampling logic; it only
+  changes scheduling. With `pool_strategy: iterative_subsample`, round‑robin can increase how often
+  libraries are rebuilt, so expect additional compute if many plans are active.
 - `arrays_generated_before_resample` (int > 0)
 - `min_count_per_tf` (int >= 0)
 - `max_duplicate_solutions`, `stall_seconds_before_resample`, `stall_warning_every_seconds`
@@ -226,6 +240,12 @@ binding-site and PWM-sampled inputs.
 - `level` (e.g., `INFO`)
 - `suppress_solver_stderr` (bool)
 - `print_visual` (bool)
+- `progress_style`: `stream | summary | screen` (default `stream`)
+  - `stream`: per‑sequence logs (controlled by `progress_every`)
+  - `summary`: suppress per‑sequence logs; keep periodic leaderboard summaries
+  - `screen`: clear and redraw a compact dashboard at `progress_refresh_seconds`
+- `progress_every` (int >= 0) - log/refresh interval in sequences (`0` disables per‑sequence logging)
+- `progress_refresh_seconds` (float > 0) - minimum seconds between screen refreshes
 
 ---
 
diff --git a/src/dnadesign/densegen/src/adapters/outputs/parquet.py b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
index a160098f..a45456f2 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/parquet.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
@@ -37,6 +37,7 @@ def _meta_arrow_type(name: str, pa):
     }
     list_int = {
         "input_pwm_pvalue_bin_ids",
+        "input_pwm_mining_retain_bin_ids",
     }
     int_fields = {
         "length",
@@ -45,6 +46,9 @@ def _meta_arrow_type(name: str, pa):
         "min_required_regulators",
         "input_pwm_n_sites",
         "input_pwm_oversample_factor",
+        "input_pwm_mining_batch_size",
+        "input_pwm_mining_max_batches",
+        "input_pwm_mining_log_every_batches",
         "input_row_count",
         "input_tf_count",
         "input_tfbs_count",
@@ -68,6 +72,7 @@ def _meta_arrow_type(name: str, pa):
         "input_pwm_score_threshold",
         "input_pwm_score_percentile",
         "input_pwm_pvalue_threshold",
+        "input_pwm_mining_max_seconds",
         "sampling_fraction",
         "sampling_fraction_pairs",
         "gap_fill_gc_min",
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
index 446ca742..e193617d 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
@@ -177,6 +177,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
         pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
@@ -205,6 +206,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             pvalue_threshold=pvalue_threshold,
             pvalue_bins=pvalue_bins,
             pvalue_bin_ids=pvalue_bin_ids,
+            mining=mining,
             bgfile=bgfile_path,
             selection_policy=selection_policy,
             keep_all_candidates_debug=keep_all_candidates_debug,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
index 6fff70b3..6a87a1f0 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
@@ -73,6 +73,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             pvalue_threshold = sampling_cfg.get("pvalue_threshold")
             pvalue_bins = sampling_cfg.get("pvalue_bins")
             pvalue_bin_ids = sampling_cfg.get("pvalue_bin_ids")
+            mining = sampling_cfg.get("mining")
             bgfile = sampling_cfg.get("bgfile")
             selection_policy = str(sampling_cfg.get("selection_policy", "random_uniform"))
             keep_all_candidates_debug = bool(sampling_cfg.get("keep_all_candidates_debug", False))
@@ -100,6 +101,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 pvalue_threshold=pvalue_threshold,
                 pvalue_bins=pvalue_bins,
                 pvalue_bin_ids=pvalue_bin_ids,
+                mining=mining,
                 bgfile=bgfile_path,
                 selection_policy=selection_policy,
                 keep_all_candidates_debug=keep_all_candidates_debug,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py b/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py
index cbdc06c6..1cb2fc4b 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py
@@ -46,9 +46,14 @@ def _sanitize_id(text: str) -> str:
     return cleaned or "motif"
 
 
-def build_candidate_records(motif_id: str, sequences: Sequence[str]) -> list[tuple[str, str]]:
+def build_candidate_records(
+    motif_id: str,
+    sequences: Sequence[str],
+    *,
+    start_index: int = 0,
+) -> list[tuple[str, str]]:
     prefix = _sanitize_id(motif_id)
-    return [(f"{prefix}|cand{idx}", seq) for idx, seq in enumerate(sequences)]
+    return [(f"{prefix}|cand{start_index + idx}", seq) for idx, seq in enumerate(sequences)]
 
 
 def write_candidates_fasta(records: Sequence[tuple[str, str]], out_path: Path) -> None:
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
index 75a73d19..4ce3594f 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
@@ -117,6 +117,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
         pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
@@ -148,6 +149,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 pvalue_threshold=pvalue_threshold,
                 pvalue_bins=pvalue_bins,
                 pvalue_bin_ids=pvalue_bin_ids,
+                mining=mining,
                 bgfile=bgfile_path,
                 selection_policy=selection_policy,
                 keep_all_candidates_debug=keep_all_candidates_debug,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
index 5df2088c..049eecfd 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
@@ -81,6 +81,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
         pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
@@ -109,6 +110,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             pvalue_threshold=pvalue_threshold,
             pvalue_bins=pvalue_bins,
             pvalue_bin_ids=pvalue_bin_ids,
+            mining=mining,
             bgfile=bgfile_path,
             selection_policy=selection_policy,
             keep_all_candidates_debug=keep_all_candidates_debug,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
index 7f7193ac..dc3facb0 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
@@ -95,6 +95,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
         pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
@@ -127,6 +128,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 pvalue_threshold=pvalue_threshold,
                 pvalue_bins=pvalue_bins,
                 pvalue_bin_ids=pvalue_bin_ids,
+                mining=mining,
                 bgfile=bgfile_path,
                 selection_policy=selection_policy,
                 keep_all_candidates_debug=keep_all_candidates_debug,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
index 1e521914..cafece29 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
@@ -89,6 +89,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
         pvalue_bin_ids = sampling.get("pvalue_bin_ids")
+        mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
@@ -121,6 +122,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 pvalue_threshold=pvalue_threshold,
                 pvalue_bins=pvalue_bins,
                 pvalue_bin_ids=pvalue_bin_ids,
+                mining=mining,
                 bgfile=bgfile_path,
                 selection_policy=selection_policy,
                 keep_all_candidates_debug=keep_all_candidates_debug,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 5c3514f5..5b84db1a 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -37,6 +37,16 @@ def _safe_label(text: str) -> str:
     return cleaned or "motif"
 
 
+def _mining_attr(mining, name: str, default=None):
+    if mining is None:
+        return default
+    if hasattr(mining, name):
+        return getattr(mining, name)
+    if isinstance(mining, dict):
+        return mining.get(name, default)
+    return default
+
+
 @dataclass(frozen=True)
 class FimoCandidate:
     seq: str
@@ -281,15 +291,24 @@ def _assign_pvalue_bin(pvalue: float, edges: Sequence[float]) -> tuple[int, floa
     return len(edges) - 1, float(edges[-2]), float(edges[-1])
 
 
-def _format_pvalue_bins(edges: Sequence[float], counts: Sequence[int]) -> str:
+def _format_pvalue_bins(
+    edges: Sequence[float],
+    counts: Sequence[int],
+    *,
+    only_bins: Optional[Sequence[int]] = None,
+) -> str:
     if not edges or not counts:
         return "-"
+    only_set = {int(idx) for idx in only_bins} if only_bins is not None else None
     labels: list[str] = []
     low = 0.0
-    for edge, count in zip(edges, counts):
+    for idx, (edge, count) in enumerate(zip(edges, counts)):
+        if only_set is not None and idx not in only_set:
+            low = float(edge)
+            continue
         labels.append(f"({low:.0e},{float(edge):.0e}]:{int(count)}")
         low = float(edge)
-    return " ".join(labels)
+    return " ".join(labels) if labels else "-"
 
 
 def _stratified_sample(
@@ -360,18 +379,22 @@ def _select_fimo_candidates(
         if context.get("pvalue_bins_label") is not None:
             msg_lines.append(f"P-value bins={context.get('pvalue_bins_label')}.")
         if context.get("pvalue_bin_ids") is not None:
-            msg_lines.append(f"Selected bins={context.get('pvalue_bin_ids')}.")
+            msg_lines.append(f"Retained bins={context.get('pvalue_bin_ids')}.")
         suggestions = [
             "reduce n_sites",
             "relax pvalue_threshold (e.g., 1e-4 → 1e-3)",
             "increase oversample_factor",
         ]
         if context.get("pvalue_bin_ids") is not None:
-            suggestions.append("broaden pvalue_bin_ids (or remove bin filtering)")
+            suggestions.append("broaden mining.retain_bin_ids (or remove bin filtering)")
         if context.get("cap_applied"):
             suggestions.append("increase max_candidates (cap was hit)")
         if context.get("time_limited"):
             suggestions.append("increase max_seconds (time limit was hit)")
+        if context.get("mining_max_batches") is not None and context.get("mining_batches_limited"):
+            suggestions.append("increase mining.max_batches")
+        if context.get("mining_max_seconds") is not None and context.get("mining_time_limited"):
+            suggestions.append("increase mining.max_seconds")
         if context.get("width") is not None and int(context.get("width")) <= 6:
             suggestions.append("try length_policy=range with a longer length_range")
         msg_lines.append("Try next: " + "; ".join(suggestions) + ".")
@@ -412,6 +435,7 @@ def sample_pwm_sites(
     pvalue_threshold: Optional[float] = None,
     pvalue_bins: Optional[Sequence[float]] = None,
     pvalue_bin_ids: Optional[Sequence[int]] = None,
+    mining: Optional[object] = None,
     bgfile: Optional[str | Path] = None,
     selection_policy: str = "random_uniform",
     keep_all_candidates_debug: bool = False,
@@ -440,6 +464,8 @@ def sample_pwm_sites(
             raise ValueError("pvalue_bins is only valid when scoring_backend='fimo'")
         if pvalue_bin_ids is not None:
             raise ValueError("pvalue_bin_ids is only valid when scoring_backend='fimo'")
+        if mining is not None:
+            raise ValueError("mining is only valid when scoring_backend='fimo'")
         if include_matched_sequence:
             raise ValueError("include_matched_sequence is only valid when scoring_backend='fimo'")
     else:
@@ -450,6 +476,10 @@ def sample_pwm_sites(
             raise ValueError("pwm.sampling.pvalue_threshold must be between 0 and 1")
         if selection_policy not in {"random_uniform", "top_n", "stratified"}:
             raise ValueError(f"Unsupported pwm selection_policy: {selection_policy}")
+        if mining is not None:
+            retain_bins = _mining_attr(mining, "retain_bin_ids")
+            if retain_bins is not None and pvalue_bin_ids is not None:
+                raise ValueError("Provide retain_bin_ids in mining or pvalue_bin_ids, not both.")
         if score_threshold is not None or score_percentile is not None:
             log.warning(
                 "PWM sampling scoring_backend=fimo ignores score_threshold/score_percentile for motif %s.",
@@ -503,6 +533,7 @@ def _cap_label(cap_applied: bool, time_limited: bool) -> str:
         return cap_label
 
     def _context(length_obs: str, cap_applied: bool, requested: int, generated: int, time_limited: bool) -> dict:
+        mining_cfg = mining
         return {
             "motif_id": motif.motif_id,
             "width": width,
@@ -519,6 +550,11 @@ def _context(length_obs: str, cap_applied: bool, requested: int, generated: int,
             "cap_applied": cap_applied,
             "cap_label": _cap_label(cap_applied, time_limited),
             "time_limited": time_limited,
+            "mining_batch_size": _mining_attr(mining_cfg, "batch_size"),
+            "mining_max_batches": _mining_attr(mining_cfg, "max_batches"),
+            "mining_max_seconds": _mining_attr(mining_cfg, "max_seconds"),
+            "mining_log_every_batches": _mining_attr(mining_cfg, "log_every_batches"),
+            "mining_retain_bin_ids": _mining_attr(mining_cfg, "retain_bin_ids"),
         }
 
     def _select(
@@ -564,13 +600,11 @@ def _embed_with_background(seq: str, target_len: int) -> str:
         return f"{left}{seq}{right}"
 
     def _score_with_fimo(
-        sequences: List[str],
         *,
-        length_obs: str,
+        n_candidates: int,
         cap_applied: bool,
         requested: int,
-        generated: int,
-        time_limited: bool,
+        sequences: Optional[List[str]] = None,
     ) -> tuple[List[str], dict[str, dict]]:
         import tempfile
 
@@ -585,13 +619,20 @@ def _score_with_fimo(
         if pvalue_threshold is None:
             raise ValueError("pvalue_threshold required for fimo backend")
         resolved_bins = _resolve_pvalue_edges(pvalue_bins)
+        retain_bins = _mining_attr(mining, "retain_bin_ids")
+        if retain_bins is None and pvalue_bin_ids is not None:
+            retain_bins = list(pvalue_bin_ids)
         allowed_bins: Optional[set[int]] = None
-        if pvalue_bin_ids is not None:
-            allowed_bins = {int(idx) for idx in pvalue_bin_ids}
+        if retain_bins is not None:
+            allowed_bins = {int(idx) for idx in retain_bins}
             max_idx = len(resolved_bins) - 1
             if any(idx > max_idx for idx in allowed_bins):
-                raise ValueError(f"pvalue_bin_ids contains an index outside the available bins (max={max_idx}).")
+                raise ValueError(f"retain_bin_ids contains an index outside the available bins (max={max_idx}).")
         keep_weak = keep_low
+        mining_batch_size = int(_mining_attr(mining, "batch_size", n_candidates))
+        mining_max_batches = _mining_attr(mining, "max_batches")
+        mining_max_seconds = _mining_attr(mining, "max_seconds")
+        mining_log_every = int(_mining_attr(mining, "log_every_batches", 1))
         debug_path: Optional[Path] = None
         debug_dir = debug_output_dir
         if keep_all_candidates_debug:
@@ -607,69 +648,219 @@ def _score_with_fimo(
             label = _safe_label(debug_label or motif.motif_id)
             debug_path = debug_dir / f"{label}__fimo.tsv"
 
+        def _merge_tsv(existing: list[str], text: str) -> None:
+            lines = [ln for ln in text.splitlines() if ln.strip()]
+            if not lines:
+                return
+            if not existing:
+                existing.extend(lines)
+                return
+            header_skipped = False
+            for ln in lines:
+                if ln.lstrip().startswith("#"):
+                    continue
+                if not header_skipped:
+                    header_skipped = True
+                    continue
+                existing.append(ln)
+
+        def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
+            batch_start = time.monotonic()
+            sequences: list[str] = []
+            lengths: list[int] = []
+            time_limited = False
+            for _ in range(count):
+                if max_seconds is not None and sequences:
+                    if (time.monotonic() - batch_start) >= float(max_seconds):
+                        time_limited = True
+                        break
+                target_len = _resolve_length()
+                lengths.append(int(target_len))
+                if strategy == "background":
+                    core = sample_sequence_from_background(rng, motif.background, width)
+                else:
+                    core = sample_sequence_from_pwm(rng, matrix)
+                full_seq = _embed_with_background(core, target_len)
+                sequences.append(full_seq)
+            return sequences, lengths, time_limited
+
+        total_bin_counts = [0 for _ in resolved_bins]
+        accepted_bin_counts = [0 for _ in resolved_bins]
+        candidates: List[FimoCandidate] = []
+        seen: set[str] = set()
+        lengths_all: list[int] = []
+        generated_total = 0
+        time_limited = False
+        mining_time_limited = False
+        mining_batches_limited = False
+        batches = 0
+        tsv_lines: list[str] = []
+        provided_sequences = sequences
+
         with tempfile.TemporaryDirectory() as tmp:
             tmp_path = Path(tmp)
             meme_path = tmp_path / "motif.meme"
-            fasta_path = tmp_path / "candidates.fasta"
             motif_for_fimo = PWMMotif(motif_id=motif.motif_id, matrix=matrix, background=motif.background)
             write_minimal_meme_motif(motif_for_fimo, meme_path)
-            records = build_candidate_records(motif.motif_id, sequences)
-            write_candidates_fasta(records, fasta_path)
-            thresh = 1.0 if keep_all_candidates_debug or keep_weak else float(pvalue_threshold)
-            rows, raw_tsv = run_fimo(
-                meme_motif_path=meme_path,
-                fasta_path=fasta_path,
-                bgfile=Path(bgfile) if bgfile is not None else None,
-                thresh=thresh,
-                include_matched_sequence=include_matched_sequence or keep_all_candidates_debug,
-                return_tsv=debug_path is not None,
-            )
-            if debug_path is not None and raw_tsv is not None:
-                debug_path.write_text(raw_tsv)
-                log.info("FIMO debug TSV written: %s", debug_path)
-            best_hits = aggregate_best_hits(rows)
-
-        candidates: List[FimoCandidate] = []
-        total_bin_counts = [0 for _ in resolved_bins]
-        accepted_bin_counts = [0 for _ in resolved_bins]
-        for rec_id, seq in records:
-            hit = best_hits.get(rec_id)
-            if hit is None:
-                continue
-            bin_id, bin_low, bin_high = _assign_pvalue_bin(hit.pvalue, resolved_bins)
-            total_bin_counts[bin_id] += 1
-            if keep_weak:
-                accept = hit.pvalue >= float(pvalue_threshold)
-            else:
-                accept = hit.pvalue <= float(pvalue_threshold)
-            if allowed_bins is not None and bin_id not in allowed_bins:
-                continue
-            if not accept:
-                continue
-            accepted_bin_counts[bin_id] += 1
-            candidates.append(
-                FimoCandidate(
-                    seq=seq,
-                    pvalue=hit.pvalue,
-                    score=hit.score,
-                    bin_id=bin_id,
-                    bin_low=bin_low,
-                    bin_high=bin_high,
-                    start=hit.start,
-                    stop=hit.stop,
-                    strand=hit.strand,
-                    matched_sequence=hit.matched_sequence,
+            if provided_sequences is not None:
+                lengths_all = [len(seq) for seq in provided_sequences]
+                fasta_path = tmp_path / "candidates.fasta"
+                records = build_candidate_records(motif.motif_id, provided_sequences, start_index=0)
+                write_candidates_fasta(records, fasta_path)
+                thresh = 1.0 if keep_all_candidates_debug or keep_weak else float(pvalue_threshold)
+                rows, raw_tsv = run_fimo(
+                    meme_motif_path=meme_path,
+                    fasta_path=fasta_path,
+                    bgfile=Path(bgfile) if bgfile is not None else None,
+                    thresh=thresh,
+                    include_matched_sequence=include_matched_sequence or keep_all_candidates_debug,
+                    return_tsv=debug_path is not None,
                 )
-            )
+                if debug_path is not None and raw_tsv is not None:
+                    _merge_tsv(tsv_lines, raw_tsv)
+                best_hits = aggregate_best_hits(rows)
+                for rec_id, seq in records:
+                    hit = best_hits.get(rec_id)
+                    if hit is None:
+                        continue
+                    bin_id, bin_low, bin_high = _assign_pvalue_bin(hit.pvalue, resolved_bins)
+                    if allowed_bins is not None and bin_id not in allowed_bins:
+                        continue
+                    total_bin_counts[bin_id] += 1
+                    if keep_weak:
+                        accept = hit.pvalue >= float(pvalue_threshold)
+                    else:
+                        accept = hit.pvalue <= float(pvalue_threshold)
+                    if not accept:
+                        continue
+                    if seq in seen:
+                        continue
+                    seen.add(seq)
+                    accepted_bin_counts[bin_id] += 1
+                    candidates.append(
+                        FimoCandidate(
+                            seq=seq,
+                            pvalue=hit.pvalue,
+                            score=hit.score,
+                            bin_id=bin_id,
+                            bin_low=bin_low,
+                            bin_high=bin_high,
+                            start=hit.start,
+                            stop=hit.stop,
+                            strand=hit.strand,
+                            matched_sequence=hit.matched_sequence,
+                        )
+                    )
+                generated_total = len(provided_sequences)
+                batches = 1
+            else:
+                mining_start = time.monotonic()
+                while generated_total < n_candidates:
+                    if mining_max_batches is not None and batches >= int(mining_max_batches):
+                        mining_batches_limited = True
+                        break
+                    if mining_max_seconds is not None and (time.monotonic() - mining_start) >= float(
+                        mining_max_seconds
+                    ):
+                        mining_time_limited = True
+                        break
+                    remaining = int(n_candidates) - generated_total
+                    if remaining <= 0:
+                        break
+                    batch_target = min(int(mining_batch_size), remaining)
+                    sequences, lengths, batch_limited = _generate_batch(batch_target)
+                    if batch_limited:
+                        time_limited = True
+                    if not sequences:
+                        break
+                    lengths_all.extend(lengths)
+                    fasta_path = tmp_path / "candidates.fasta"
+                    records = build_candidate_records(motif.motif_id, sequences, start_index=generated_total)
+                    write_candidates_fasta(records, fasta_path)
+                    thresh = 1.0 if keep_all_candidates_debug or keep_weak else float(pvalue_threshold)
+                    rows, raw_tsv = run_fimo(
+                        meme_motif_path=meme_path,
+                        fasta_path=fasta_path,
+                        bgfile=Path(bgfile) if bgfile is not None else None,
+                        thresh=thresh,
+                        include_matched_sequence=include_matched_sequence or keep_all_candidates_debug,
+                        return_tsv=debug_path is not None,
+                    )
+                    if debug_path is not None and raw_tsv is not None:
+                        _merge_tsv(tsv_lines, raw_tsv)
+                    best_hits = aggregate_best_hits(rows)
+                    for rec_id, seq in records:
+                        hit = best_hits.get(rec_id)
+                        if hit is None:
+                            continue
+                        bin_id, bin_low, bin_high = _assign_pvalue_bin(hit.pvalue, resolved_bins)
+                        if allowed_bins is not None and bin_id not in allowed_bins:
+                            continue
+                        total_bin_counts[bin_id] += 1
+                        if keep_weak:
+                            accept = hit.pvalue >= float(pvalue_threshold)
+                        else:
+                            accept = hit.pvalue <= float(pvalue_threshold)
+                        if not accept:
+                            continue
+                        if seq in seen:
+                            continue
+                        seen.add(seq)
+                        accepted_bin_counts[bin_id] += 1
+                        candidates.append(
+                            FimoCandidate(
+                                seq=seq,
+                                pvalue=hit.pvalue,
+                                score=hit.score,
+                                bin_id=bin_id,
+                                bin_low=bin_low,
+                                bin_high=bin_high,
+                                start=hit.start,
+                                stop=hit.stop,
+                                strand=hit.strand,
+                                matched_sequence=hit.matched_sequence,
+                            )
+                        )
+                    generated_total += len(sequences)
+                    batches += 1
+                    if mining_log_every > 0 and batches % mining_log_every == 0:
+                        bins_label = _format_pvalue_bins(resolved_bins, total_bin_counts, only_bins=retain_bins)
+                        accepted_label = _format_pvalue_bins(resolved_bins, accepted_bin_counts, only_bins=retain_bins)
+                        log.info(
+                            "FIMO mining %s batch %d/%s: generated=%d accepted=%d bins=%s accepted_bins=%s",
+                            motif.motif_id,
+                            batches,
+                            str(mining_max_batches) if mining_max_batches is not None else "-",
+                            generated_total,
+                            len(candidates),
+                            bins_label,
+                            accepted_label,
+                        )
+
+        if debug_path is not None and tsv_lines:
+            debug_path.write_text("\n".join(tsv_lines) + "\n")
+            log.info("FIMO debug TSV written: %s", debug_path)
 
         total_hits = sum(total_bin_counts)
         accepted_hits = sum(accepted_bin_counts)
-        bins_label = _format_pvalue_bins(resolved_bins, total_bin_counts)
-        accepted_label = _format_pvalue_bins(resolved_bins, accepted_bin_counts)
+        bins_label = _format_pvalue_bins(resolved_bins, total_bin_counts, only_bins=retain_bins)
+        accepted_label = _format_pvalue_bins(resolved_bins, accepted_bin_counts, only_bins=retain_bins)
+        length_obs = "-"
+        if lengths_all:
+            length_obs = (
+                f"{min(lengths_all)}..{max(lengths_all)}"
+                if min(lengths_all) != max(lengths_all)
+                else str(lengths_all[0])
+            )
 
-        context = _context(length_obs, cap_applied, requested, generated, time_limited)
+        context = _context(length_obs, cap_applied, requested, generated_total, time_limited)
         context["pvalue_bins_label"] = bins_label
         context["pvalue_bin_ids"] = sorted(allowed_bins) if allowed_bins is not None else None
+        context["mining_batch_size"] = mining_batch_size
+        context["mining_max_batches"] = mining_max_batches
+        context["mining_max_seconds"] = mining_max_seconds
+        context["mining_time_limited"] = mining_time_limited
+        context["mining_batches_limited"] = mining_batches_limited
         picked = _select_fimo_candidates(
             candidates,
             n_sites=n_sites,
@@ -684,9 +875,9 @@ def _score_with_fimo(
         for cand in picked:
             idx = max(0, min(int(cand.bin_id), len(resolved_bins) - 1))
             selected_bin_counts[idx] += 1
-        selected_label = _format_pvalue_bins(resolved_bins, selected_bin_counts)
+        selected_label = _format_pvalue_bins(resolved_bins, selected_bin_counts, only_bins=retain_bins)
         log.info(
-            "FIMO yield for motif %s: hits=%d accepted=%d selected=%d bins=%s accepted_bins=%s selected_bins=%s%s",
+            "FIMO yield for motif %s: hits=%d accepted=%d selected=%d bins=%s accepted_bins=%s selected_bins=%s",
             motif.motif_id,
             total_hits,
             accepted_hits,
@@ -694,7 +885,6 @@ def _score_with_fimo(
             bins_label,
             accepted_label,
             selected_label,
-            f" allowed_bins={sorted(allowed_bins)}" if allowed_bins is not None else "",
         )
         meta_by_seq: dict[str, dict] = {}
         for cand in picked:
@@ -729,12 +919,10 @@ def _score_with_fimo(
             )
             return (selected, {}) if return_metadata else selected
         selected, meta = _score_with_fimo(
-            [full_seq],
-            length_obs=str(target_len),
+            n_candidates=1,
             cap_applied=False,
             requested=1,
-            generated=1,
-            time_limited=False,
+            sequences=[full_seq],
         )
         return (selected, meta) if return_metadata else selected
 
@@ -755,34 +943,34 @@ def _score_with_fimo(
                 cap_val,
             )
     n_candidates = max(1, n_candidates)
-    candidates: List[Tuple[str, str]] = []
-    lengths: List[int] = []
-    start = time.monotonic()
-    time_limited = False
-    for _ in range(n_candidates):
-        if max_seconds is not None and candidates:
-            if (time.monotonic() - start) >= float(max_seconds):
-                time_limited = True
-                break
-        target_len = _resolve_length()
-        lengths.append(int(target_len))
-        if strategy == "background":
-            core = sample_sequence_from_background(rng, motif.background, width)
-        else:
-            core = sample_sequence_from_pwm(rng, matrix)
-        full_seq = _embed_with_background(core, target_len)
-        candidates.append((full_seq, core))
-    if time_limited:
-        log.warning(
-            "PWM sampling hit max_seconds for motif %s: generated=%d requested=%d",
-            motif.motif_id,
-            len(candidates),
-            requested_candidates,
-        )
-    length_obs = "-"
-    if lengths:
-        length_obs = f"{min(lengths)}..{max(lengths)}" if min(lengths) != max(lengths) else str(lengths[0])
     if scoring_backend == "densegen":
+        candidates: List[Tuple[str, str]] = []
+        lengths: List[int] = []
+        start = time.monotonic()
+        time_limited = False
+        for _ in range(n_candidates):
+            if max_seconds is not None and candidates:
+                if (time.monotonic() - start) >= float(max_seconds):
+                    time_limited = True
+                    break
+            target_len = _resolve_length()
+            lengths.append(int(target_len))
+            if strategy == "background":
+                core = sample_sequence_from_background(rng, motif.background, width)
+            else:
+                core = sample_sequence_from_pwm(rng, matrix)
+            full_seq = _embed_with_background(core, target_len)
+            candidates.append((full_seq, core))
+        if time_limited:
+            log.warning(
+                "PWM sampling hit max_seconds for motif %s: generated=%d requested=%d",
+                motif.motif_id,
+                len(candidates),
+                requested_candidates,
+            )
+        length_obs = "-"
+        if lengths:
+            length_obs = f"{min(lengths)}..{max(lengths)}" if min(lengths) != max(lengths) else str(lengths[0])
         scored = [
             (full_seq, score_sequence(core, matrix, log_odds=log_odds, background=motif.background))
             for full_seq, core in candidates
@@ -797,11 +985,8 @@ def _score_with_fimo(
         )
         return (selected, {}) if return_metadata else selected
     selected, meta = _score_with_fimo(
-        [full_seq for full_seq, _core in candidates],
-        length_obs=length_obs,
         cap_applied=cap_applied,
         requested=requested_candidates,
-        generated=len(candidates),
-        time_limited=time_limited,
+        n_candidates=n_candidates,
     )
     return (selected, meta) if return_metadata else selected
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index fbc6e201..9fbd1195 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -960,6 +960,7 @@ def describe(
             "score",
             "selection",
             "bins",
+            "mining",
             "bgfile",
             "oversample",
             "max_candidates",
@@ -997,9 +998,23 @@ def describe(
                 bins_label = "canonical"
                 if getattr(sampling, "pvalue_bins", None) is not None:
                     bins_label = "custom"
-                bin_ids = getattr(sampling, "pvalue_bin_ids", None)
+                mining_cfg = getattr(sampling, "mining", None)
+                bin_ids = getattr(mining_cfg, "retain_bin_ids", None)
+                if bin_ids is None:
+                    bin_ids = getattr(sampling, "pvalue_bin_ids", None)
                 if bin_ids:
-                    bins_label = f"{bins_label} pick={bin_ids}"
+                    bins_label = f"{bins_label} retain={bin_ids}"
+            mining_label = "-"
+            mining_cfg = getattr(sampling, "mining", None)
+            if backend == "fimo" and mining_cfg is not None:
+                parts = [f"batch={mining_cfg.batch_size}"]
+                if mining_cfg.max_batches is not None:
+                    parts.append(f"max_batches={mining_cfg.max_batches}")
+                if mining_cfg.max_seconds is not None:
+                    parts.append(f"max_seconds={mining_cfg.max_seconds}s")
+                if mining_cfg.retain_bin_ids:
+                    parts.append(f"retain={mining_cfg.retain_bin_ids}")
+                mining_label = ", ".join(parts)
             bgfile_label = getattr(sampling, "bgfile", None) or "-"
             length_label = str(sampling.length_policy)
             if sampling.length_policy == "range" and sampling.length_range is not None:
@@ -1013,6 +1028,7 @@ def describe(
                 score_label,
                 str(selection_label),
                 str(bins_label),
+                str(mining_label),
                 str(bgfile_label),
                 str(sampling.oversample_factor),
                 str(sampling.max_candidates) if sampling.max_candidates is not None else "-",
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index beed783f..fb882183 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -13,6 +13,7 @@
 from __future__ import annotations
 
 import os
+import warnings
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Annotated, Any, Dict, List, Optional, Union
@@ -154,6 +155,59 @@ class SequenceLibraryInput(BaseModel):
     sequence_column: str = "sequence"
 
 
+class PWMMiningConfig(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    batch_size: int = 100000
+    max_batches: Optional[int] = None
+    max_seconds: Optional[float] = None
+    retain_bin_ids: Optional[List[int]] = None
+    log_every_batches: int = 1
+
+    @field_validator("batch_size")
+    @classmethod
+    def _batch_size_ok(cls, v: int):
+        if v <= 0:
+            raise ValueError("pwm.sampling.mining.batch_size must be > 0")
+        return v
+
+    @field_validator("max_batches")
+    @classmethod
+    def _max_batches_ok(cls, v: Optional[int]):
+        if v is not None and v <= 0:
+            raise ValueError("pwm.sampling.mining.max_batches must be > 0 when set")
+        return v
+
+    @field_validator("max_seconds")
+    @classmethod
+    def _max_seconds_ok(cls, v: Optional[float]):
+        if v is None:
+            return v
+        if not isinstance(v, (int, float)) or float(v) <= 0:
+            raise ValueError("pwm.sampling.mining.max_seconds must be > 0 when set")
+        return float(v)
+
+    @field_validator("retain_bin_ids")
+    @classmethod
+    def _retain_bin_ids_ok(cls, v: Optional[List[int]]):
+        if v is None:
+            return v
+        if not v:
+            raise ValueError("pwm.sampling.mining.retain_bin_ids must be non-empty when set")
+        ids = [int(x) for x in v]
+        if any(idx < 0 for idx in ids):
+            raise ValueError("pwm.sampling.mining.retain_bin_ids values must be >= 0")
+        if len(set(ids)) != len(ids):
+            raise ValueError("pwm.sampling.mining.retain_bin_ids must be unique")
+        return ids
+
+    @field_validator("log_every_batches")
+    @classmethod
+    def _log_every_batches_ok(cls, v: int):
+        if v <= 0:
+            raise ValueError("pwm.sampling.mining.log_every_batches must be > 0")
+        return v
+
+
 class PWMSamplingConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
     strategy: Literal["consensus", "stochastic", "background"] = "stochastic"
@@ -167,6 +221,7 @@ class PWMSamplingConfig(BaseModel):
     pvalue_threshold: Optional[float] = None
     pvalue_bins: Optional[List[float]] = None
     pvalue_bin_ids: Optional[List[int]] = None
+    mining: Optional[PWMMiningConfig] = None
     bgfile: Optional[str] = None
     selection_policy: Literal["random_uniform", "top_n", "stratified"] = "random_uniform"
     keep_all_candidates_debug: bool = False
@@ -284,6 +339,8 @@ def _score_mode(self):
                 raise ValueError("pwm.sampling.pvalue_bins is only valid when scoring_backend='fimo'")
             if self.pvalue_bin_ids is not None:
                 raise ValueError("pwm.sampling.pvalue_bin_ids is only valid when scoring_backend='fimo'")
+            if self.mining is not None:
+                raise ValueError("pwm.sampling.mining is only valid when scoring_backend='fimo'")
             if self.include_matched_sequence:
                 raise ValueError("pwm.sampling.include_matched_sequence is only valid when scoring_backend='fimo'")
         else:
@@ -291,11 +348,26 @@ def _score_mode(self):
                 raise ValueError("pwm.sampling.pvalue_threshold is required when scoring_backend='fimo'")
             if not (0.0 < float(self.pvalue_threshold) <= 1.0):
                 raise ValueError("pwm.sampling.pvalue_threshold must be between 0 and 1")
-            if self.pvalue_bin_ids is not None:
+            if self.pvalue_bin_ids is not None and self.mining is not None:
+                raise ValueError(
+                    "pwm.sampling.pvalue_bin_ids is deprecated; use pwm.sampling.mining.retain_bin_ids instead."
+                )
+            if self.pvalue_bin_ids is not None and self.mining is None:
+                warnings.warn(
+                    "pwm.sampling.pvalue_bin_ids is deprecated; use pwm.sampling.mining.retain_bin_ids.",
+                    stacklevel=2,
+                )
+                self.mining = PWMMiningConfig(retain_bin_ids=list(self.pvalue_bin_ids))
+            bin_ids = None
+            if self.mining is not None and self.mining.retain_bin_ids is not None:
+                bin_ids = list(self.mining.retain_bin_ids)
+            elif self.pvalue_bin_ids is not None:
+                bin_ids = list(self.pvalue_bin_ids)
+            if bin_ids is not None:
                 bins = list(self.pvalue_bins) if self.pvalue_bins is not None else list(CANONICAL_PVALUE_BINS)
                 max_idx = len(bins) - 1
-                if any(idx > max_idx for idx in self.pvalue_bin_ids):
-                    raise ValueError("pwm.sampling.pvalue_bin_ids contains an index outside the available bins")
+                if any(idx > max_idx for idx in bin_ids):
+                    raise ValueError("pwm.sampling.mining.retain_bin_ids contains an index outside the available bins")
         if self.strategy == "consensus" and int(self.n_sites) != 1:
             raise ValueError("pwm.sampling.strategy=consensus requires n_sites=1")
         if self.scoring_backend == "densegen" and self.score_percentile is not None:
@@ -945,6 +1017,9 @@ class LoggingConfig(BaseModel):
     level: str = "INFO"
     suppress_solver_stderr: bool = True
     print_visual: bool = True
+    progress_style: Literal["stream", "summary", "screen"] = "stream"
+    progress_every: int = 1
+    progress_refresh_seconds: float = 1.0
 
     @field_validator("log_dir")
     @classmethod
@@ -962,6 +1037,20 @@ def _level_ok(cls, v: str):
             raise ValueError(f"logging.level must be one of {sorted(allowed)}")
         return lv
 
+    @field_validator("progress_every")
+    @classmethod
+    def _progress_every_ok(cls, v: int):
+        if v < 0:
+            raise ValueError("logging.progress_every must be >= 0")
+        return int(v)
+
+    @field_validator("progress_refresh_seconds")
+    @classmethod
+    def _progress_refresh_ok(cls, v: float):
+        if not isinstance(v, (int, float)) or float(v) <= 0:
+            raise ValueError("logging.progress_refresh_seconds must be > 0")
+        return float(v)
+
 
 # ---- Plots ----
 class PlotConfig(BaseModel):
diff --git a/src/dnadesign/densegen/src/core/metadata.py b/src/dnadesign/densegen/src/core/metadata.py
index cb8d8ca6..861de3ed 100644
--- a/src/dnadesign/densegen/src/core/metadata.py
+++ b/src/dnadesign/densegen/src/core/metadata.py
@@ -147,6 +147,11 @@ def build_metadata(
         "input_pwm_pvalue_threshold": input_meta.get("input_pwm_pvalue_threshold"),
         "input_pwm_pvalue_bins": input_meta.get("input_pwm_pvalue_bins"),
         "input_pwm_pvalue_bin_ids": input_meta.get("input_pwm_pvalue_bin_ids"),
+        "input_pwm_mining_batch_size": input_meta.get("input_pwm_mining_batch_size"),
+        "input_pwm_mining_max_batches": input_meta.get("input_pwm_mining_max_batches"),
+        "input_pwm_mining_max_seconds": input_meta.get("input_pwm_mining_max_seconds"),
+        "input_pwm_mining_retain_bin_ids": input_meta.get("input_pwm_mining_retain_bin_ids"),
+        "input_pwm_mining_log_every_batches": input_meta.get("input_pwm_mining_log_every_batches"),
         "input_pwm_selection_policy": input_meta.get("input_pwm_selection_policy"),
         "input_pwm_bgfile": input_meta.get("input_pwm_bgfile"),
         "input_pwm_keep_all_candidates_debug": input_meta.get("input_pwm_keep_all_candidates_debug"),
diff --git a/src/dnadesign/densegen/src/core/metadata_schema.py b/src/dnadesign/densegen/src/core/metadata_schema.py
index 70379bd3..ca0c2736 100644
--- a/src/dnadesign/densegen/src/core/metadata_schema.py
+++ b/src/dnadesign/densegen/src/core/metadata_schema.py
@@ -98,7 +98,27 @@ class MetaField:
     MetaField("input_pwm_score_percentile", (numbers.Real,), "PWM score percentile.", allow_none=True),
     MetaField("input_pwm_pvalue_threshold", (numbers.Real,), "PWM p-value threshold (FIMO).", allow_none=True),
     MetaField("input_pwm_pvalue_bins", (list,), "PWM p-value bins (FIMO).", allow_none=True),
-    MetaField("input_pwm_pvalue_bin_ids", (list,), "Selected p-value bin indices (FIMO).", allow_none=True),
+    MetaField(
+        "input_pwm_pvalue_bin_ids",
+        (list,),
+        "Deprecated: selected p-value bin indices (use input_pwm_mining_retain_bin_ids).",
+        allow_none=True,
+    ),
+    MetaField("input_pwm_mining_batch_size", (int,), "PWM mining batch size (FIMO).", allow_none=True),
+    MetaField("input_pwm_mining_max_batches", (int,), "PWM mining max batches (FIMO).", allow_none=True),
+    MetaField("input_pwm_mining_max_seconds", (numbers.Real,), "PWM mining max seconds (FIMO).", allow_none=True),
+    MetaField(
+        "input_pwm_mining_retain_bin_ids",
+        (list,),
+        "PWM mining retained p-value bin indices (FIMO).",
+        allow_none=True,
+    ),
+    MetaField(
+        "input_pwm_mining_log_every_batches",
+        (int,),
+        "PWM mining log frequency (batches).",
+        allow_none=True,
+    ),
     MetaField("input_pwm_selection_policy", (str,), "PWM selection policy (FIMO).", allow_none=True),
     MetaField("input_pwm_bgfile", (str,), "PWM background model path (FIMO).", allow_none=True),
     MetaField("input_pwm_keep_all_candidates_debug", (bool,), "PWM FIMO debug TSV enabled.", allow_none=True),
@@ -224,10 +244,15 @@ def _validate_list_fields(meta: Mapping[str, Any]) -> None:
             for item in vals:
                 if not isinstance(item, int):
                     raise TypeError("Metadata field 'input_pwm_pvalue_bin_ids' must contain only integers")
-            if not isinstance(item["tf"], str):
-                raise TypeError("used_tf_counts.tf must be a string")
-            if not isinstance(item["count"], int):
-                raise TypeError("used_tf_counts.count must be an int")
+
+    if "input_pwm_mining_retain_bin_ids" in meta:
+        vals = meta["input_pwm_mining_retain_bin_ids"]
+        if vals is not None:
+            if isinstance(vals, (str, bytes)) or not isinstance(vals, Sequence):
+                raise TypeError("Metadata field 'input_pwm_mining_retain_bin_ids' must be a list of integers")
+            for item in vals:
+                if not isinstance(item, int):
+                    raise TypeError("Metadata field 'input_pwm_mining_retain_bin_ids' must contain only integers")
 
     if "min_count_by_regulator" in meta:
         vals = meta["min_count_by_regulator"]
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index dea38e94..df88519c 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -29,6 +29,7 @@
 
 import numpy as np
 import pandas as pd
+from rich.console import Console
 
 from ..adapters.optimizer import DenseArraysAdapter, OptimizerAdapter
 from ..adapters.outputs import OutputRecord, SinkBase, build_sinks, load_records_from_config, resolve_bio_alphabet
@@ -165,6 +166,16 @@ def _sampling_attr(sampling, name: str, default=None):
     return default
 
 
+def _mining_attr(mining, name: str, default=None):
+    if mining is None:
+        return default
+    if hasattr(mining, name):
+        return getattr(mining, name)
+    if isinstance(mining, dict):
+        return mining.get(name, default)
+    return default
+
+
 def _resolve_pvalue_bins_meta(sampling) -> list[float] | None:
     if sampling is None:
         return None
@@ -201,6 +212,15 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
     length_range = _sampling_attr(sampling, "length_range")
     if length_range is not None:
         length_range = list(length_range)
+    mining = _sampling_attr(sampling, "mining")
+    mining_batch_size = _mining_attr(mining, "batch_size")
+    mining_max_batches = _mining_attr(mining, "max_batches")
+    mining_max_seconds = _mining_attr(mining, "max_seconds")
+    mining_retain_bin_ids = _mining_attr(mining, "retain_bin_ids")
+    legacy_bin_ids = _sampling_attr(sampling, "pvalue_bin_ids")
+    if mining_retain_bin_ids is None:
+        mining_retain_bin_ids = legacy_bin_ids
+    mining_log_every_batches = _mining_attr(mining, "log_every_batches")
     return {
         "strategy": _sampling_attr(sampling, "strategy"),
         "scoring_backend": _sampling_attr(sampling, "scoring_backend"),
@@ -215,11 +235,21 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
         "score_percentile": _sampling_attr(sampling, "score_percentile"),
         "pvalue_threshold": _sampling_attr(sampling, "pvalue_threshold"),
         "pvalue_bins": _resolve_pvalue_bins_meta(sampling),
+        "pvalue_bin_ids": legacy_bin_ids,
         "selection_policy": _sampling_attr(sampling, "selection_policy"),
         "bgfile": _sampling_attr(sampling, "bgfile"),
         "keep_all_candidates_debug": _sampling_attr(sampling, "keep_all_candidates_debug"),
         "length_policy": _sampling_attr(sampling, "length_policy"),
         "length_range": length_range,
+        "mining": {
+            "batch_size": mining_batch_size,
+            "max_batches": mining_max_batches,
+            "max_seconds": mining_max_seconds,
+            "retain_bin_ids": mining_retain_bin_ids,
+            "log_every_batches": mining_log_every_batches,
+        }
+        if mining is not None
+        else None,
     }
 
 
@@ -476,7 +506,15 @@ def _input_metadata(source_cfg, cfg_path: Path) -> dict:
             meta["input_pwm_score_percentile"] = getattr(sampling, "score_percentile", None)
             meta["input_pwm_pvalue_threshold"] = getattr(sampling, "pvalue_threshold", None)
             meta["input_pwm_pvalue_bins"] = _resolve_pvalue_bins_meta(sampling)
-            meta["input_pwm_pvalue_bin_ids"] = getattr(sampling, "pvalue_bin_ids", None)
+            mining_cfg = getattr(sampling, "mining", None)
+            retained_bins = _mining_attr(mining_cfg, "retain_bin_ids")
+            legacy_bin_ids = getattr(sampling, "pvalue_bin_ids", None)
+            meta["input_pwm_pvalue_bin_ids"] = legacy_bin_ids if legacy_bin_ids is not None else retained_bins
+            meta["input_pwm_mining_batch_size"] = _mining_attr(mining_cfg, "batch_size")
+            meta["input_pwm_mining_max_batches"] = _mining_attr(mining_cfg, "max_batches")
+            meta["input_pwm_mining_max_seconds"] = _mining_attr(mining_cfg, "max_seconds")
+            meta["input_pwm_mining_retain_bin_ids"] = retained_bins
+            meta["input_pwm_mining_log_every_batches"] = _mining_attr(mining_cfg, "log_every_batches")
             meta["input_pwm_selection_policy"] = getattr(sampling, "selection_policy", None)
             meta["input_pwm_bgfile"] = getattr(sampling, "bgfile", None)
             meta["input_pwm_keep_all_candidates_debug"] = getattr(sampling, "keep_all_candidates_debug", None)
@@ -1296,6 +1334,12 @@ def _process_plan_for_source(
 
     log_cfg = global_cfg.logging
     print_visual = bool(log_cfg.print_visual)
+    progress_style = str(getattr(log_cfg, "progress_style", "stream"))
+    progress_every = int(getattr(log_cfg, "progress_every", 1))
+    progress_refresh_seconds = float(getattr(log_cfg, "progress_refresh_seconds", 1.0))
+    screen_console = Console() if progress_style == "screen" else None
+    last_screen_refresh = 0.0
+    latest_failure_totals: str | None = None
 
     policy_gc_fill = str(fill_mode)
     policy_sampling = pool_strategy
@@ -1376,6 +1420,11 @@ def _process_plan_for_source(
             selection_policy = _sampling_attr(input_sampling_cfg, "selection_policy")
             length_policy = _sampling_attr(input_sampling_cfg, "length_policy")
             length_range = _sampling_attr(input_sampling_cfg, "length_range")
+            mining_cfg = _sampling_attr(input_sampling_cfg, "mining")
+            mining_batch_size = _mining_attr(mining_cfg, "batch_size")
+            mining_max_batches = _mining_attr(mining_cfg, "max_batches")
+            mining_max_seconds = _mining_attr(mining_cfg, "max_seconds")
+            mining_retain_bins = _mining_attr(mining_cfg, "retain_bin_ids")
             if length_range is not None:
                 length_range = list(length_range)
             score_label = "-"
@@ -1389,9 +1438,13 @@ def _process_plan_for_source(
             bins_label = "-"
             if scoring_backend == "fimo":
                 bins_label = "canonical" if _sampling_attr(input_sampling_cfg, "pvalue_bins") is None else "custom"
-                bin_ids = _sampling_attr(input_sampling_cfg, "pvalue_bin_ids")
+                bin_ids = (
+                    mining_retain_bins
+                    if mining_retain_bins is not None
+                    else _sampling_attr(input_sampling_cfg, "pvalue_bin_ids")
+                )
                 if bin_ids:
-                    bins_label = f"{bins_label} pick={sorted(list(bin_ids))}"
+                    bins_label = f"{bins_label} retain={sorted(list(bin_ids))}"
             length_label = str(length_policy)
             if length_policy == "range" and length_range:
                 length_label = f"{length_policy}({length_range[0]}..{length_range[1]})"
@@ -1404,9 +1457,19 @@ def _process_plan_for_source(
                 cap_label = f"{cap_label}; max_seconds={max_seconds}" if cap_label != "-" else f"{max_seconds}s"
             counts_label = _summarize_tf_counts(meta_df["tf"].tolist())
             selection_label = selection_policy if scoring_backend == "fimo" else "-"
+            mining_label = "-"
+            if scoring_backend == "fimo" and mining_cfg is not None:
+                parts = []
+                if mining_batch_size is not None:
+                    parts.append(f"batch={mining_batch_size}")
+                if mining_max_batches is not None:
+                    parts.append(f"max_batches={mining_max_batches}")
+                if mining_max_seconds is not None:
+                    parts.append(f"max_seconds={mining_max_seconds}s")
+                mining_label = ", ".join(parts) if parts else "enabled"
             log.info(
                 "PWM input sampling for %s: motifs=%d | sites=%s | strategy=%s | backend=%s | score=%s | "
-                "selection=%s | bins=%s | oversample=%s | max_candidates=%s | length=%s",
+                "selection=%s | bins=%s | mining=%s | oversample=%s | max_candidates=%s | length=%s",
                 source_label,
                 len(input_meta.get("input_pwm_ids") or []),
                 counts_label or "-",
@@ -1415,6 +1478,7 @@ def _process_plan_for_source(
                 score_label,
                 selection_label,
                 bins_label,
+                mining_label,
                 oversample,
                 cap_label,
                 length_label,
@@ -2301,37 +2365,75 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                 pct = 100.0 * (global_generated / max(1, quota))
                 bar = _format_progress_bar(global_generated, quota, width=24)
                 cr = getattr(sol, "compression_ratio", float("nan"))
-                if print_visual:
-                    log.info(
-                        "╭─ %s/%s  %s  %d/%d (%.2f%%) — local %d/%d — CR=%.3f\n"
-                        "%s\nsequence %s\n"
-                        "╰────────────────────────────────────────────────────────",
-                        source_label,
-                        plan_name,
-                        bar,
-                        global_generated,
-                        quota,
-                        pct,
-                        local_generated,
-                        max_per_subsample,
-                        cr,
-                        derived["visual"],
-                        final_seq,
-                    )
+                should_log = progress_every > 0 and global_generated % max(1, progress_every) == 0
+                if progress_style == "screen":
+                    if should_log and screen_console is not None:
+                        now = time.monotonic()
+                        if (now - last_screen_refresh) >= progress_refresh_seconds:
+                            screen_console.clear()
+                            seq_preview = final_seq if len(final_seq) <= 120 else f"{final_seq[:117]}..."
+                            screen_console.print(
+                                f"[bold]{source_label}/{plan_name}[/] {bar} {global_generated}/{quota} ({pct:.2f}%)"
+                            )
+                            screen_console.print(
+                                f"local {local_generated}/{max_per_subsample} | CR={cr:.3f} | "
+                                f"resamples={total_resamples} dup_out={duplicate_records} "
+                                f"dup_sol={duplicate_solutions} fails={failed_solutions} stalls={stall_events}"
+                            )
+                            if latest_failure_totals:
+                                screen_console.print(f"failures: {latest_failure_totals}")
+                            if tf_usage_counts:
+                                screen_console.print(
+                                    f"TF leaderboard: {_summarize_leaderboard(tf_usage_counts, top=5)}"
+                                )
+                            if usage_counts:
+                                screen_console.print(f"TFBS leaderboard: {_summarize_leaderboard(usage_counts, top=5)}")
+                            diversity_label = _summarize_diversity(
+                                usage_counts,
+                                tf_usage_counts,
+                                library_tfs=library_tfs,
+                                library_tfbs=library_tfbs,
+                            )
+                            screen_console.print(f"Diversity: {diversity_label}")
+                            if print_visual:
+                                screen_console.print(derived["visual"])
+                            screen_console.print(f"sequence {seq_preview}")
+                            last_screen_refresh = now
+                elif progress_style == "summary":
+                    pass
                 else:
-                    log.info(
-                        "[%s/%s] %s %d/%d (%.2f%%) (local %d/%d) CR=%.3f | seq %s",
-                        source_label,
-                        plan_name,
-                        bar,
-                        global_generated,
-                        quota,
-                        pct,
-                        local_generated,
-                        max_per_subsample,
-                        cr,
-                        final_seq,
-                    )
+                    if should_log:
+                        if print_visual:
+                            log.info(
+                                "╭─ %s/%s  %s  %d/%d (%.2f%%) — local %d/%d — CR=%.3f\n"
+                                "%s\nsequence %s\n"
+                                "╰────────────────────────────────────────────────────────",
+                                source_label,
+                                plan_name,
+                                bar,
+                                global_generated,
+                                quota,
+                                pct,
+                                local_generated,
+                                max_per_subsample,
+                                cr,
+                                derived["visual"],
+                                final_seq,
+                            )
+                        else:
+                            log.info(
+                                "[%s/%s] %s %d/%d (%.2f%%) (local %d/%d) CR=%.3f | seq %s",
+                                source_label,
+                                plan_name,
+                                bar,
+                                global_generated,
+                                quota,
+                                pct,
+                                local_generated,
+                                max_per_subsample,
+                                cr,
+                                final_seq,
+                            )
 
                 if leaderboard_every > 0 and global_generated % max(1, leaderboard_every) == 0:
                     failure_totals = _summarize_failure_totals(
@@ -2339,56 +2441,58 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         input_name=source_label,
                         plan_name=plan_name,
                     )
-                    log.info(
-                        "[%s/%s] Progress %s %d/%d (%.2f%%) | resamples=%d dup_out=%d "
-                        "dup_sol=%d fails=%d stalls=%d | %s",
-                        source_label,
-                        plan_name,
-                        bar,
-                        global_generated,
-                        quota,
-                        pct,
-                        total_resamples,
-                        duplicate_records,
-                        duplicate_solutions,
-                        failed_solutions,
-                        stall_events,
-                        failure_totals,
-                    )
-                    log.info(
-                        "[%s/%s] Leaderboard (TF): %s",
-                        source_label,
-                        plan_name,
-                        _summarize_leaderboard(tf_usage_counts, top=5),
-                    )
-                    log.info(
-                        "[%s/%s] Leaderboard (TFBS): %s",
-                        source_label,
-                        plan_name,
-                        _summarize_leaderboard(usage_counts, top=5),
-                    )
-                    log.info(
-                        "[%s/%s] Failed TFBS: %s",
-                        source_label,
-                        plan_name,
-                        _summarize_failure_leaderboard(
-                            failure_counts,
-                            input_name=source_label,
-                            plan_name=plan_name,
-                            top=5,
-                        ),
-                    )
-                    log.info(
-                        "[%s/%s] Diversity: %s",
-                        source_label,
-                        plan_name,
-                        _summarize_diversity(
-                            usage_counts,
-                            tf_usage_counts,
-                            library_tfs=library_tfs,
-                            library_tfbs=library_tfbs,
-                        ),
-                    )
+                    latest_failure_totals = failure_totals
+                    if progress_style != "screen":
+                        log.info(
+                            "[%s/%s] Progress %s %d/%d (%.2f%%) | resamples=%d dup_out=%d "
+                            "dup_sol=%d fails=%d stalls=%d | %s",
+                            source_label,
+                            plan_name,
+                            bar,
+                            global_generated,
+                            quota,
+                            pct,
+                            total_resamples,
+                            duplicate_records,
+                            duplicate_solutions,
+                            failed_solutions,
+                            stall_events,
+                            failure_totals,
+                        )
+                        log.info(
+                            "[%s/%s] Leaderboard (TF): %s",
+                            source_label,
+                            plan_name,
+                            _summarize_leaderboard(tf_usage_counts, top=5),
+                        )
+                        log.info(
+                            "[%s/%s] Leaderboard (TFBS): %s",
+                            source_label,
+                            plan_name,
+                            _summarize_leaderboard(usage_counts, top=5),
+                        )
+                        log.info(
+                            "[%s/%s] Failed TFBS: %s",
+                            source_label,
+                            plan_name,
+                            _summarize_failure_leaderboard(
+                                failure_counts,
+                                input_name=source_label,
+                                plan_name=plan_name,
+                                top=5,
+                            ),
+                        )
+                        log.info(
+                            "[%s/%s] Diversity: %s",
+                            source_label,
+                            plan_name,
+                            _summarize_diversity(
+                                usage_counts,
+                                tf_usage_counts,
+                                library_tfs=library_tfs,
+                                library_tfbs=library_tfbs,
+                            ),
+                        )
                     log.info(
                         "[%s/%s] Example: %s",
                         source_label,
@@ -2702,6 +2806,11 @@ def _accumulate_stats(key: tuple[str, str], stats: dict) -> None:
 
     # Round-robin scheduler
     round_robin = bool(cfg.runtime.round_robin)
+    if round_robin and str(cfg.generation.sampling.pool_strategy) == "iterative_subsample":
+        log.warning(
+            "round_robin=true with pool_strategy=iterative_subsample will rebuild libraries more frequently; "
+            "expect higher runtime for multi-plan runs."
+        )
     inputs = cfg.inputs
     checkpoint_every = int(cfg.runtime.checkpoint_every)
     state_counts: dict[tuple[str, str], int] = {}
diff --git a/src/dnadesign/densegen/tests/test_cli_summarize_library.py b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
index 49618288..5445905a 100644
--- a/src/dnadesign/densegen/tests/test_cli_summarize_library.py
+++ b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
@@ -65,6 +65,11 @@ def _base_meta(library_hash: str, library_index: int) -> dict:
         "input_pwm_pvalue_threshold": None,
         "input_pwm_pvalue_bins": None,
         "input_pwm_pvalue_bin_ids": None,
+        "input_pwm_mining_batch_size": None,
+        "input_pwm_mining_max_batches": None,
+        "input_pwm_mining_max_seconds": None,
+        "input_pwm_mining_retain_bin_ids": None,
+        "input_pwm_mining_log_every_batches": None,
         "input_pwm_selection_policy": None,
         "input_pwm_bgfile": None,
         "input_pwm_keep_all_candidates_debug": None,
diff --git a/src/dnadesign/densegen/tests/test_outputs_parquet.py b/src/dnadesign/densegen/tests/test_outputs_parquet.py
index 606bd03f..83d5c7da 100644
--- a/src/dnadesign/densegen/tests/test_outputs_parquet.py
+++ b/src/dnadesign/densegen/tests/test_outputs_parquet.py
@@ -60,6 +60,11 @@ def _dummy_meta() -> dict:
         "input_pwm_pvalue_threshold": None,
         "input_pwm_pvalue_bins": None,
         "input_pwm_pvalue_bin_ids": None,
+        "input_pwm_mining_batch_size": None,
+        "input_pwm_mining_max_batches": None,
+        "input_pwm_mining_max_seconds": None,
+        "input_pwm_mining_retain_bin_ids": None,
+        "input_pwm_mining_log_every_batches": None,
         "input_pwm_selection_policy": None,
         "input_pwm_bgfile": None,
         "input_pwm_keep_all_candidates_debug": None,
diff --git a/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py b/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py
index 95c2aa8b..6195073f 100644
--- a/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py
+++ b/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py
@@ -41,7 +41,7 @@ def test_write_minimal_meme_motif(tmp_path: Path) -> None:
 
 
 def test_write_candidates_fasta(tmp_path: Path) -> None:
-    records = build_candidate_records("My Motif", ["ACG", "TTT"])
+    records = build_candidate_records("My Motif", ["ACG", "TTT"], start_index=5)
     out = tmp_path / "candidates.fasta"
     write_candidates_fasta(records, out)
     lines = out.read_text().splitlines()
@@ -49,8 +49,8 @@ def test_write_candidates_fasta(tmp_path: Path) -> None:
     assert lines[1] == "ACG"
     assert lines[2].startswith(">")
     assert lines[3] == "TTT"
-    assert records[0][0].endswith("|cand0")
-    assert records[1][0].endswith("|cand1")
+    assert records[0][0].endswith("|cand5")
+    assert records[1][0].endswith("|cand6")
 
 
 def test_parse_fimo_tsv_and_best_hits() -> None:
diff --git a/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py b/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py
new file mode 100644
index 00000000..9895fdcb
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+
+from dnadesign.densegen.src.adapters.sources import pwm_fimo
+from dnadesign.densegen.src.adapters.sources.pwm_sampling import PWMMotif, sample_pwm_sites
+
+
+def _parse_fasta(path: Path) -> list[str]:
+    ids: list[str] = []
+    with path.open() as handle:
+        for line in handle:
+            if line.startswith(">"):
+                ids.append(line.strip().lstrip(">"))
+    return ids
+
+
+def test_pwm_sampling_fimo_mining_retain_bins(monkeypatch) -> None:
+    motif = PWMMotif(
+        motif_id="M1",
+        matrix=[
+            {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+            {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+            {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+        ],
+        background={"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+    )
+
+    def fake_run_fimo(*, meme_motif_path, fasta_path, **_kwargs):  # type: ignore[override]
+        ids = _parse_fasta(Path(fasta_path))
+        rows = []
+        for idx, rec_id in enumerate(ids):
+            pval = 1e-6 if idx % 2 == 0 else 1e-2
+            rows.append(
+                {
+                    "sequence_name": rec_id,
+                    "start": 1,
+                    "stop": 3,
+                    "strand": "+",
+                    "score": 5.0,
+                    "p_value": pval,
+                    "matched_sequence": "AAA",
+                }
+            )
+        return rows, None
+
+    monkeypatch.setattr(pwm_fimo, "run_fimo", fake_run_fimo)
+
+    rng = np.random.default_rng(0)
+    selected, meta = sample_pwm_sites(
+        rng,
+        motif,
+        strategy="stochastic",
+        n_sites=2,
+        oversample_factor=2,
+        max_candidates=None,
+        max_seconds=None,
+        score_threshold=None,
+        score_percentile=None,
+        scoring_backend="fimo",
+        pvalue_threshold=1e-1,
+        pvalue_bins=[1e-5, 1e-3, 1.0],
+        selection_policy="random_uniform",
+        mining={
+            "batch_size": 2,
+            "max_batches": 2,
+            "retain_bin_ids": [0],
+            "log_every_batches": 1,
+        },
+        include_matched_sequence=True,
+        return_metadata=True,
+    )
+
+    assert len(selected) == 2
+    for seq in selected:
+        info = meta[seq]
+        assert info["fimo_bin_id"] == 0
+        assert info["fimo_matched_sequence"] == "AAA"
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index e5794dd6..cd0bcb45 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -21,11 +21,16 @@ densegen:
       sampling:
         strategy: stochastic
         n_sites: 80
-        oversample_factor: 12
-        max_candidates: 50000  # bounded candidate generation
+        oversample_factor: 200
+        max_candidates: 20000  # bounded candidate generation (cap across mining batches)
         scoring_backend: fimo
         pvalue_threshold: 1e-4
         selection_policy: stratified
+        mining:
+          batch_size: 5000
+          max_batches: 4
+          retain_bin_ids: [0, 1, 2, 3]
+          log_every_batches: 1
         length_policy: range
         length_range: [22, 28]
 
@@ -92,7 +97,10 @@ densegen:
     log_dir: outputs/logs
     level: INFO
     suppress_solver_stderr: true
-    print_visual: false
+    print_visual: true
+    progress_style: screen
+    progress_every: 1
+    progress_refresh_seconds: 1.0
 
 plots:
   source: parquet

From 14279f0ece5f23a9bbf3c71454708056d805c1b7 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 11:04:27 -0500
Subject: [PATCH 06/40] densegen: cache input sampling and improve run UX

---
 .../densegen/docs/demo/demo_basic.md          |   4 +
 .../densegen/docs/guide/generation.md         |   3 +
 src/dnadesign/densegen/docs/reference/cli.md  |  10 +
 src/dnadesign/densegen/src/cli.py             |  21 ++
 src/dnadesign/densegen/src/core/pipeline.py   |  17 +-
 .../densegen/tests/test_source_cache.py       | 212 ++++++++++++++++++
 6 files changed, 264 insertions(+), 3 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/test_source_cache.py

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index d322242e..15c4377e 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -95,6 +95,10 @@ Example output:
 ✨ Run staged: /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
 ```
 
+If you re-run the demo in the same run root and DenseGen’s schema has changed, you may see a
+Parquet schema mismatch. Either delete `outputs/dense_arrays.parquet` +
+`outputs/_densegen_ids.sqlite` or stage a fresh workspace.
+
 ## 3) Validate config
 
 ```bash
diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index e3f29a8f..ab7f6cba 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -122,6 +122,9 @@ Round‑robin is **distinct from Stage‑B sampling** (`generation.sampling`): l
 uses the same policy per plan, but round‑robin can trigger more frequent library rebuilds when
 `pool_strategy: iterative_subsample` is used. Expect extra compute if many plans are active.
 
+Input PWM sampling is performed **once per run** and cached across round‑robin passes. If you
+need a fresh PWM sample, start a new run (or stage a new workspace).
+
 ---
 
 ### Regulator constraints
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index 2c139a4b..bc1a9b69 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -74,6 +74,8 @@ Options:
 - `--log-file PATH` - override the log file path. Otherwise DenseGen writes
   to `logging.log_dir/<run_id>.log` inside the workspace. The override path
   must still resolve inside `densegen.run.root`.
+Notes:
+- If you enable `scoring_backend: fimo`, run via `pixi run dense ...` (or ensure `fimo` is on PATH).
 
 ---
 
@@ -114,6 +116,8 @@ Options:
 - `--by-library/--no-by-library` - group library summaries per build attempt.
 - `--top-per-tf` - limit TFBS rows per TF when summarizing.
 - `--show-library-hash/--short-library-hash` - toggle full vs short library hashes.
+Tip:
+- For large runs, prefer `--no-by-library` or lower `--top`/`--top-per-tf` to keep output readable.
 
 ---
 
@@ -143,6 +147,12 @@ Demo run (small, Parquet-only config):
 uv run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
 ```
 
+FIMO-backed sampling (pixi):
+
+```bash
+pixi run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
+```
+
 ---
 
 @e-south
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 9fbd1195..04a58201 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -247,6 +247,23 @@ def _render_missing_input_hint(cfg_path: Path, loaded, exc: Exception) -> None:
             console.print(f"  - {hint}")
 
 
+def _render_output_schema_hint(exc: Exception) -> bool:
+    msg = str(exc)
+    if "Existing Parquet schema does not match the current DenseGen schema" in msg:
+        console.print(f"[bold red]Output schema mismatch:[/] {msg}")
+        console.print("[bold]Next steps[/]:")
+        console.print("  - Remove outputs/dense_arrays.parquet and outputs/_densegen_ids.sqlite, or")
+        console.print("  - Stage a fresh workspace with `dense stage --copy-inputs` and re-run.")
+        return True
+    if "Output sinks are out of sync before run" in msg:
+        console.print(f"[bold red]Output sink mismatch:[/] {msg}")
+        console.print("[bold]Next steps[/]:")
+        console.print("  - Remove stale outputs so sinks align, or")
+        console.print("  - Run with a single output target to rebuild from scratch.")
+        return True
+    return False
+
+
 def _warn_pwm_sampling_configs(loaded, cfg_path: Path) -> None:
     warnings: list[str] = []
     for inp in loaded.root.densegen.inputs:
@@ -1189,6 +1206,10 @@ def run(
     except FileNotFoundError as exc:
         _render_missing_input_hint(cfg_path, loaded, exc)
         raise typer.Exit(code=1)
+    except RuntimeError as exc:
+        if _render_output_schema_hint(exc):
+            raise typer.Exit(code=1)
+        raise
 
     console.print(":tada: [bold green]Run complete[/].")
     console.print("[bold]Next steps[/]:")
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index df88519c..4b997aa7 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -1283,6 +1283,7 @@ def _process_plan_for_source(
     checkpoint_every: int = 0,
     write_state: Callable[[], None] | None = None,
     site_failure_counts: dict[tuple[str, str, str, str, str | None], dict[str, int]] | None = None,
+    source_cache: dict[str, tuple[list, pd.DataFrame | None]] | None = None,
 ) -> tuple[int, dict]:
     source_label = source_cfg.name
     plan_name = plan_item.name
@@ -1366,9 +1367,16 @@ def _process_plan_for_source(
     outputs_root = run_root_path / "outputs"
     existing_library_builds = _load_existing_library_index(outputs_root)
 
-    # Load source
-    src_obj = deps.source_factory(source_cfg, cfg_path)
-    data_entries, meta_df = src_obj.load_data(rng=np_rng, outputs_root=outputs_root)
+    # Load source (cache PWM sampling results across round-robin passes).
+    cache_key = source_label
+    cached = source_cache.get(cache_key) if source_cache is not None else None
+    if cached is None:
+        src_obj = deps.source_factory(source_cfg, cfg_path)
+        data_entries, meta_df = src_obj.load_data(rng=np_rng, outputs_root=outputs_root)
+        if source_cache is not None:
+            source_cache[cache_key] = (data_entries, meta_df)
+    else:
+        data_entries, meta_df = cached
     input_meta = _input_metadata(source_cfg, cfg_path)
     input_tf_tfbs_pair_count: int | None = None
     if meta_df is not None and isinstance(meta_df, pd.DataFrame):
@@ -2705,6 +2713,7 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
     plan_order: list[tuple[str, str]] = []
     plan_leaderboards: dict[tuple[str, str], dict] = {}
     inputs_manifest_entries: dict[str, dict] = {}
+    source_cache: dict[str, tuple[list, pd.DataFrame | None]] = {}
     outputs_root = run_outputs_root(run_root)
     outputs_root.mkdir(parents=True, exist_ok=True)
     ensure_run_meta_dir(run_root)
@@ -2889,6 +2898,7 @@ def _write_state() -> None:
                     checkpoint_every=checkpoint_every,
                     write_state=_write_state,
                     site_failure_counts=site_failure_counts,
+                    source_cache=source_cache,
                 )
                 per_plan[(s.name, item.name)] = per_plan.get((s.name, item.name), 0) + produced
                 total += produced
@@ -2936,6 +2946,7 @@ def _write_state() -> None:
                         checkpoint_every=checkpoint_every,
                         write_state=_write_state,
                         site_failure_counts=site_failure_counts,
+                        source_cache=source_cache,
                     )
                     produced_counts[key] = current + produced
                     leaderboard_latest = stats.get("leaderboard_latest")
diff --git a/src/dnadesign/densegen/tests/test_source_cache.py b/src/dnadesign/densegen/tests/test_source_cache.py
new file mode 100644
index 00000000..9c83e950
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_source_cache.py
@@ -0,0 +1,212 @@
+from __future__ import annotations
+
+import random
+from pathlib import Path
+
+import numpy as np
+import yaml
+
+from dnadesign.densegen.src.adapters.optimizer import OptimizerRun
+from dnadesign.densegen.src.adapters.outputs.base import SinkBase
+from dnadesign.densegen.src.config import load_config
+from dnadesign.densegen.src.core.pipeline import PipelineDeps, _process_plan_for_source
+
+
+class _DummySink(SinkBase):
+    def __init__(self) -> None:
+        self.records = []
+
+    def add(self, record):
+        self.records.append(record)
+        return True
+
+    def flush(self) -> None:
+        return None
+
+
+class _DummyOpt:
+    def forbid(self, _sol) -> None:
+        return None
+
+
+class _DummySol:
+    def __init__(self, sequence: str, library: list[str]) -> None:
+        self.sequence = sequence
+        self.library = library
+        self._indices = [0]
+        self.compression_ratio = 1.0
+
+    def offset_indices_in_order(self):
+        return [(0, idx) for idx in self._indices]
+
+
+class _DummyAdapter:
+    def probe_solver(self, backend: str, *, test_length: int = 10) -> None:
+        return None
+
+    def build(
+        self,
+        *,
+        library,
+        sequence_length,
+        solver,
+        strategy,
+        solver_options,
+        fixed_elements,
+        strands="double",
+        regulator_by_index=None,
+        required_regulators=None,
+        min_count_by_regulator=None,
+        min_required_regulators=None,
+    ):
+        opt = _DummyOpt()
+        seqs = ["AAA", "CCC"]
+
+        def _gen():
+            for seq in seqs:
+                yield _DummySol(sequence=seq, library=library)
+
+        return OptimizerRun(optimizer=opt, generator=_gen())
+
+
+class _DummySource:
+    def __init__(self, entries: list[str]) -> None:
+        self.entries = entries
+        self.calls = 0
+
+    def load_data(self, *, rng, outputs_root):
+        self.calls += 1
+        return self.entries, None
+
+
+def test_source_cache_reuses_loaded_inputs(tmp_path: Path) -> None:
+    run_dir = tmp_path / "run"
+    run_dir.mkdir()
+    (run_dir / "outputs" / "parquet").mkdir(parents=True)
+    (run_dir / "logs").mkdir()
+
+    seq_path = run_dir / "seqs.csv"
+    seq_path.write_text("sequence\nAAA\nCCC\nGGG\nTTT\n")
+
+    cfg = {
+        "densegen": {
+            "schema_version": "2.2",
+            "run": {"id": "demo", "root": "."},
+            "inputs": [
+                {
+                    "name": "demo",
+                    "type": "sequence_library",
+                    "path": str(seq_path),
+                    "format": "csv",
+                    "sequence_column": "sequence",
+                }
+            ],
+            "output": {
+                "targets": ["parquet"],
+                "schema": {"bio_type": "dna", "alphabet": "dna_4"},
+                "parquet": {"path": "outputs/dense_arrays.parquet"},
+            },
+            "generation": {
+                "sequence_length": 3,
+                "quota": 2,
+                "sampling": {
+                    "pool_strategy": "subsample",
+                    "library_size": 2,
+                    "subsample_over_length_budget_by": 0,
+                    "library_sampling_strategy": "tf_balanced",
+                    "cover_all_regulators": False,
+                    "unique_binding_sites": True,
+                    "max_sites_per_regulator": None,
+                    "relax_on_exhaustion": False,
+                    "allow_incomplete_coverage": False,
+                    "iterative_max_libraries": 2,
+                    "iterative_min_new_solutions": 0,
+                },
+                "plan": [{"name": "default", "quota": 2}],
+            },
+            "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+            "runtime": {
+                "round_robin": True,
+                "arrays_generated_before_resample": 1,
+                "min_count_per_tf": 0,
+                "max_duplicate_solutions": 5,
+                "stall_seconds_before_resample": 10,
+                "stall_warning_every_seconds": 10,
+                "max_resample_attempts": 1,
+                "max_total_resamples": 1,
+                "max_seconds_per_plan": 0,
+                "max_failed_solutions": 0,
+                "random_seed": 1,
+            },
+            "postprocess": {"gap_fill": {"mode": "off", "end": "5prime", "gc_min": 0.4, "gc_max": 0.6}},
+            "logging": {"log_dir": "logs", "level": "INFO"},
+        }
+    }
+
+    cfg_path = run_dir / "config.yaml"
+    cfg_path.write_text(yaml.safe_dump(cfg))
+    loaded = load_config(cfg_path)
+
+    dummy_source = _DummySource(entries=["AAA", "CCC", "GGG", "TTT"])
+    sink = _DummySink()
+    deps = PipelineDeps(
+        source_factory=lambda _cfg, _path: dummy_source,
+        sink_factory=lambda _cfg, _path: [sink],
+        optimizer=_DummyAdapter(),
+        gap_fill=lambda *args, **kwargs: "",
+    )
+
+    plan_item = loaded.root.densegen.generation.resolve_plan()[0]
+    source_cache: dict[str, tuple[list, None]] = {}
+
+    _process_plan_for_source(
+        loaded.root.densegen.inputs[0],
+        plan_item,
+        loaded.root.densegen,
+        [sink],
+        chosen_solver="CBC",
+        deps=deps,
+        rng=random.Random(1),
+        np_rng=np.random.default_rng(1),
+        cfg_path=loaded.path,
+        run_id=loaded.root.densegen.run.id,
+        run_root=str(run_dir),
+        run_config_path="config.yaml",
+        run_config_sha256="sha",
+        random_seed=1,
+        dense_arrays_version=None,
+        dense_arrays_version_source="test",
+        output_bio_type="dna",
+        output_alphabet="dna_4",
+        one_subsample_only=True,
+        already_generated=0,
+        inputs_manifest={},
+        source_cache=source_cache,
+    )
+
+    _process_plan_for_source(
+        loaded.root.densegen.inputs[0],
+        plan_item,
+        loaded.root.densegen,
+        [sink],
+        chosen_solver="CBC",
+        deps=deps,
+        rng=random.Random(1),
+        np_rng=np.random.default_rng(1),
+        cfg_path=loaded.path,
+        run_id=loaded.root.densegen.run.id,
+        run_root=str(run_dir),
+        run_config_path="config.yaml",
+        run_config_sha256="sha",
+        random_seed=1,
+        dense_arrays_version=None,
+        dense_arrays_version_source="test",
+        output_bio_type="dna",
+        output_alphabet="dna_4",
+        one_subsample_only=True,
+        already_generated=0,
+        inputs_manifest={},
+        source_cache=source_cache,
+    )
+
+    assert dummy_source.calls == 1

From 176c4d1caa1d62b1f2c66361dc22e63ce2a32acf Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 13:06:47 -0500
Subject: [PATCH 07/40] densegen: tighten FIMO mining config and preflight
 checks

---
 .../densegen/src/adapters/outputs/parquet.py  |   2 +-
 .../src/adapters/sources/pwm_artifact.py      |   2 -
 .../src/adapters/sources/pwm_artifact_set.py  |   2 -
 .../densegen/src/adapters/sources/pwm_fimo.py |   9 +-
 .../src/adapters/sources/pwm_jaspar.py        |   2 -
 .../src/adapters/sources/pwm_matrix_csv.py    |   2 -
 .../densegen/src/adapters/sources/pwm_meme.py |   2 -
 .../src/adapters/sources/pwm_meme_set.py      |   2 -
 .../src/adapters/sources/pwm_sampling.py      | 110 ++-
 src/dnadesign/densegen/src/cli.py             | 703 ++++++++++++++----
 src/dnadesign/densegen/src/config/__init__.py |  57 +-
 src/dnadesign/densegen/src/core/metadata.py   |   2 +-
 .../densegen/src/core/metadata_schema.py      |  16 +-
 src/dnadesign/densegen/src/core/pipeline.py   | 567 ++++++++------
 src/dnadesign/densegen/src/core/reporting.py  |  49 +-
 .../densegen/src/integrations/meme_suite.py   |  10 +
 .../densegen/tests/test_cli_config_option.py  |   4 +-
 .../densegen/tests/test_cli_describe.py       |   2 +-
 .../tests/test_cli_summarize_library.py       |   4 +-
 .../densegen/tests/test_config_strict.py      |  23 +
 .../densegen/tests/test_outputs_parquet.py    |   2 +-
 .../densegen/tests/test_pwm_fimo_utils.py     |   5 +-
 .../tests/test_pwm_sampling_mining.py         |  29 +
 .../workspaces/demo_meme_two_tf/config.yaml   |   2 +-
 24 files changed, 1107 insertions(+), 501 deletions(-)

diff --git a/src/dnadesign/densegen/src/adapters/outputs/parquet.py b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
index a45456f2..0751907b 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/parquet.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
@@ -36,7 +36,6 @@ def _meta_arrow_type(name: str, pa):
         "input_pwm_pvalue_bins",
     }
     list_int = {
-        "input_pwm_pvalue_bin_ids",
         "input_pwm_mining_retain_bin_ids",
     }
     int_fields = {
@@ -48,6 +47,7 @@ def _meta_arrow_type(name: str, pa):
         "input_pwm_oversample_factor",
         "input_pwm_mining_batch_size",
         "input_pwm_mining_max_batches",
+        "input_pwm_mining_max_candidates",
         "input_pwm_mining_log_every_batches",
         "input_row_count",
         "input_tf_count",
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
index e193617d..1339aa68 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
@@ -176,7 +176,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
-        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
@@ -205,7 +204,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             scoring_backend=scoring_backend,
             pvalue_threshold=pvalue_threshold,
             pvalue_bins=pvalue_bins,
-            pvalue_bin_ids=pvalue_bin_ids,
             mining=mining,
             bgfile=bgfile_path,
             selection_policy=selection_policy,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
index 6a87a1f0..9a9353af 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
@@ -72,7 +72,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             scoring_backend = str(sampling_cfg.get("scoring_backend", "densegen")).lower()
             pvalue_threshold = sampling_cfg.get("pvalue_threshold")
             pvalue_bins = sampling_cfg.get("pvalue_bins")
-            pvalue_bin_ids = sampling_cfg.get("pvalue_bin_ids")
             mining = sampling_cfg.get("mining")
             bgfile = sampling_cfg.get("bgfile")
             selection_policy = str(sampling_cfg.get("selection_policy", "random_uniform"))
@@ -100,7 +99,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 scoring_backend=scoring_backend,
                 pvalue_threshold=pvalue_threshold,
                 pvalue_bins=pvalue_bins,
-                pvalue_bin_ids=pvalue_bin_ids,
                 mining=mining,
                 bgfile=bgfile_path,
                 selection_policy=selection_policy,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py b/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py
index 1cb2fc4b..353e6900 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_fimo.py
@@ -19,7 +19,7 @@
 from pathlib import Path
 from typing import Iterable, Sequence
 
-from ...integrations.meme_suite import resolve_executable
+from ...integrations.meme_suite import require_executable
 from .pwm_sampling import PWMMotif, normalize_background
 
 _HEADER_RE = re.compile(r"[\s\-]+")
@@ -158,12 +158,7 @@ def run_fimo(
     include_matched_sequence: bool = False,
     return_tsv: bool = False,
 ) -> tuple[list[dict], str | None]:
-    exe = resolve_executable("fimo", tool_path=None)
-    if exe is None:
-        raise FileNotFoundError(
-            "FIMO executable not found. Install MEME Suite and ensure `fimo` is on PATH, "
-            "or set MEME_BIN to the MEME bin directory (pixi users: `pixi run dense ...`)."
-        )
+    exe = require_executable("fimo", tool_path=None)
     cmd = [str(exe), "--text"]
     if not include_matched_sequence:
         cmd.append("--skip-matched-sequence")
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
index 4ce3594f..bb08ba6d 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
@@ -116,7 +116,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
-        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
@@ -148,7 +147,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 scoring_backend=scoring_backend,
                 pvalue_threshold=pvalue_threshold,
                 pvalue_bins=pvalue_bins,
-                pvalue_bin_ids=pvalue_bin_ids,
                 mining=mining,
                 bgfile=bgfile_path,
                 selection_policy=selection_policy,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
index 049eecfd..7e313dad 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
@@ -80,7 +80,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
-        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
@@ -109,7 +108,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             scoring_backend=scoring_backend,
             pvalue_threshold=pvalue_threshold,
             pvalue_bins=pvalue_bins,
-            pvalue_bin_ids=pvalue_bin_ids,
             mining=mining,
             bgfile=bgfile_path,
             selection_policy=selection_policy,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
index dc3facb0..bce0a6fe 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
@@ -94,7 +94,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
-        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
@@ -127,7 +126,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 scoring_backend=scoring_backend,
                 pvalue_threshold=pvalue_threshold,
                 pvalue_bins=pvalue_bins,
-                pvalue_bin_ids=pvalue_bin_ids,
                 mining=mining,
                 bgfile=bgfile_path,
                 selection_policy=selection_policy,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
index cafece29..c081095b 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
@@ -88,7 +88,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
         pvalue_threshold = sampling.get("pvalue_threshold")
         pvalue_bins = sampling.get("pvalue_bins")
-        pvalue_bin_ids = sampling.get("pvalue_bin_ids")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
         selection_policy = str(sampling.get("selection_policy", "random_uniform"))
@@ -121,7 +120,6 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 scoring_backend=scoring_backend,
                 pvalue_threshold=pvalue_threshold,
                 pvalue_bins=pvalue_bins,
-                pvalue_bin_ids=pvalue_bin_ids,
                 mining=mining,
                 bgfile=bgfile_path,
                 selection_policy=selection_policy,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 5b84db1a..630b5291 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -249,7 +249,10 @@ def select_by_score(
             "increase oversample_factor",
         ]
         if context.get("cap_applied"):
-            suggestions.append("increase max_candidates (cap was hit)")
+            if context.get("mining_max_candidates") is not None:
+                suggestions.append("increase mining.max_candidates (cap was hit)")
+            else:
+                suggestions.append("increase max_candidates (cap was hit)")
         if context.get("time_limited"):
             suggestions.append("increase max_seconds (time limit was hit)")
         if context.get("width") is not None and int(context.get("width")) <= 6:
@@ -378,19 +381,21 @@ def _select_fimo_candidates(
             msg_lines.append(f"Observed candidate lengths={context.get('length_observed')}.")
         if context.get("pvalue_bins_label") is not None:
             msg_lines.append(f"P-value bins={context.get('pvalue_bins_label')}.")
-        if context.get("pvalue_bin_ids") is not None:
-            msg_lines.append(f"Retained bins={context.get('pvalue_bin_ids')}.")
+        if context.get("retain_bin_ids") is not None:
+            msg_lines.append(f"Retained bins={context.get('retain_bin_ids')}.")
         suggestions = [
             "reduce n_sites",
             "relax pvalue_threshold (e.g., 1e-4 → 1e-3)",
             "increase oversample_factor",
         ]
-        if context.get("pvalue_bin_ids") is not None:
+        if context.get("retain_bin_ids") is not None:
             suggestions.append("broaden mining.retain_bin_ids (or remove bin filtering)")
         if context.get("cap_applied"):
             suggestions.append("increase max_candidates (cap was hit)")
         if context.get("time_limited"):
             suggestions.append("increase max_seconds (time limit was hit)")
+        if context.get("mining_max_candidates") is not None and context.get("mining_candidates_limited"):
+            suggestions.append("increase mining.max_candidates")
         if context.get("mining_max_batches") is not None and context.get("mining_batches_limited"):
             suggestions.append("increase mining.max_batches")
         if context.get("mining_max_seconds") is not None and context.get("mining_time_limited"):
@@ -434,7 +439,6 @@ def sample_pwm_sites(
     scoring_backend: str = "densegen",
     pvalue_threshold: Optional[float] = None,
     pvalue_bins: Optional[Sequence[float]] = None,
-    pvalue_bin_ids: Optional[Sequence[int]] = None,
     mining: Optional[object] = None,
     bgfile: Optional[str | Path] = None,
     selection_policy: str = "random_uniform",
@@ -462,8 +466,6 @@ def sample_pwm_sites(
             raise ValueError("PWM sampling requires exactly one of score_threshold or score_percentile")
         if pvalue_bins is not None:
             raise ValueError("pvalue_bins is only valid when scoring_backend='fimo'")
-        if pvalue_bin_ids is not None:
-            raise ValueError("pvalue_bin_ids is only valid when scoring_backend='fimo'")
         if mining is not None:
             raise ValueError("mining is only valid when scoring_backend='fimo'")
         if include_matched_sequence:
@@ -474,12 +476,13 @@ def sample_pwm_sites(
         pvalue_threshold = float(pvalue_threshold)
         if not (0.0 < pvalue_threshold <= 1.0):
             raise ValueError("pwm.sampling.pvalue_threshold must be between 0 and 1")
+        if max_candidates is not None or max_seconds is not None:
+            raise ValueError(
+                "max_candidates/max_seconds are only supported for densegen scoring; "
+                "use mining.max_candidates or mining.max_seconds for fimo."
+            )
         if selection_policy not in {"random_uniform", "top_n", "stratified"}:
             raise ValueError(f"Unsupported pwm selection_policy: {selection_policy}")
-        if mining is not None:
-            retain_bins = _mining_attr(mining, "retain_bin_ids")
-            if retain_bins is not None and pvalue_bin_ids is not None:
-                raise ValueError("Provide retain_bin_ids in mining or pvalue_bin_ids, not both.")
         if score_threshold is not None or score_percentile is not None:
             log.warning(
                 "PWM sampling scoring_backend=fimo ignores score_threshold/score_percentile for motif %s.",
@@ -524,16 +527,25 @@ def sample_pwm_sites(
     if length_policy == "range" and length_range is not None and len(length_range) == 2:
         length_label = f"{length_policy}({length_range[0]}..{length_range[1]})"
 
-    def _cap_label(cap_applied: bool, time_limited: bool) -> str:
+    def _cap_label(
+        cap_applied: bool,
+        time_limited: bool,
+        *,
+        mining_max_candidates: Optional[int] = None,
+    ) -> str:
         cap_label = ""
-        if cap_applied and max_candidates is not None:
-            cap_label = f" (capped by max_candidates={max_candidates})"
+        if cap_applied:
+            if mining_max_candidates is not None:
+                cap_label = f" (capped by mining.max_candidates={mining_max_candidates})"
+            elif max_candidates is not None:
+                cap_label = f" (capped by max_candidates={max_candidates})"
         if time_limited and max_seconds is not None:
             cap_label = f"{cap_label}; max_seconds={max_seconds}" if cap_label else f" (max_seconds={max_seconds})"
         return cap_label
 
     def _context(length_obs: str, cap_applied: bool, requested: int, generated: int, time_limited: bool) -> dict:
         mining_cfg = mining
+        mining_max_candidates = _mining_attr(mining_cfg, "max_candidates")
         return {
             "motif_id": motif.motif_id,
             "width": width,
@@ -548,13 +560,14 @@ def _context(length_obs: str, cap_applied: bool, requested: int, generated: int,
             "requested_candidates": requested,
             "generated_candidates": generated,
             "cap_applied": cap_applied,
-            "cap_label": _cap_label(cap_applied, time_limited),
+            "cap_label": _cap_label(cap_applied, time_limited, mining_max_candidates=mining_max_candidates),
             "time_limited": time_limited,
             "mining_batch_size": _mining_attr(mining_cfg, "batch_size"),
             "mining_max_batches": _mining_attr(mining_cfg, "max_batches"),
             "mining_max_seconds": _mining_attr(mining_cfg, "max_seconds"),
             "mining_log_every_batches": _mining_attr(mining_cfg, "log_every_batches"),
             "mining_retain_bin_ids": _mining_attr(mining_cfg, "retain_bin_ids"),
+            "mining_max_candidates": mining_max_candidates,
         }
 
     def _select(
@@ -620,8 +633,6 @@ def _score_with_fimo(
             raise ValueError("pvalue_threshold required for fimo backend")
         resolved_bins = _resolve_pvalue_edges(pvalue_bins)
         retain_bins = _mining_attr(mining, "retain_bin_ids")
-        if retain_bins is None and pvalue_bin_ids is not None:
-            retain_bins = list(pvalue_bin_ids)
         allowed_bins: Optional[set[int]] = None
         if retain_bins is not None:
             allowed_bins = {int(idx) for idx in retain_bins}
@@ -631,8 +642,20 @@ def _score_with_fimo(
         keep_weak = keep_low
         mining_batch_size = int(_mining_attr(mining, "batch_size", n_candidates))
         mining_max_batches = _mining_attr(mining, "max_batches")
+        mining_max_candidates = _mining_attr(mining, "max_candidates")
         mining_max_seconds = _mining_attr(mining, "max_seconds")
         mining_log_every = int(_mining_attr(mining, "log_every_batches", 1))
+        log.info(
+            "FIMO mining config for %s: target=%d batch=%d "
+            "max_batches=%s max_candidates=%s max_seconds=%s retain_bins=%s",
+            motif.motif_id,
+            n_candidates,
+            mining_batch_size,
+            str(mining_max_batches) if mining_max_batches is not None else "-",
+            str(mining_max_candidates) if mining_max_candidates is not None else "-",
+            str(mining_max_seconds) if mining_max_seconds is not None else "-",
+            str(sorted(allowed_bins)) if allowed_bins is not None else "all",
+        )
         debug_path: Optional[Path] = None
         debug_dir = debug_output_dir
         if keep_all_candidates_debug:
@@ -693,6 +716,7 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
         time_limited = False
         mining_time_limited = False
         mining_batches_limited = False
+        mining_candidates_limited = False
         batches = 0
         tsv_lines: list[str] = []
         provided_sequences = sequences
@@ -759,6 +783,9 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
                     if mining_max_batches is not None and batches >= int(mining_max_batches):
                         mining_batches_limited = True
                         break
+                    if mining_max_candidates is not None and generated_total >= int(mining_max_candidates):
+                        mining_candidates_limited = True
+                        break
                     if mining_max_seconds is not None and (time.monotonic() - mining_start) >= float(
                         mining_max_seconds
                     ):
@@ -827,11 +854,12 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
                         bins_label = _format_pvalue_bins(resolved_bins, total_bin_counts, only_bins=retain_bins)
                         accepted_label = _format_pvalue_bins(resolved_bins, accepted_bin_counts, only_bins=retain_bins)
                         log.info(
-                            "FIMO mining %s batch %d/%s: generated=%d accepted=%d bins=%s accepted_bins=%s",
+                            "FIMO mining %s batch %d/%s: generated=%d/%d accepted=%d bins=%s accepted_bins=%s",
                             motif.motif_id,
                             batches,
                             str(mining_max_batches) if mining_max_batches is not None else "-",
                             generated_total,
+                            n_candidates,
                             len(candidates),
                             bins_label,
                             accepted_label,
@@ -855,12 +883,14 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
 
         context = _context(length_obs, cap_applied, requested, generated_total, time_limited)
         context["pvalue_bins_label"] = bins_label
-        context["pvalue_bin_ids"] = sorted(allowed_bins) if allowed_bins is not None else None
+        context["retain_bin_ids"] = sorted(allowed_bins) if allowed_bins is not None else None
         context["mining_batch_size"] = mining_batch_size
         context["mining_max_batches"] = mining_max_batches
+        context["mining_max_candidates"] = mining_max_candidates
         context["mining_max_seconds"] = mining_max_seconds
         context["mining_time_limited"] = mining_time_limited
         context["mining_batches_limited"] = mining_batches_limited
+        context["mining_candidates_limited"] = mining_candidates_limited
         picked = _select_fimo_candidates(
             candidates,
             n_sites=n_sites,
@@ -929,19 +959,35 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
     requested_candidates = max(1, n_sites * oversample_factor)
     n_candidates = requested_candidates
     cap_applied = False
-    if max_candidates is not None:
-        cap_val = int(max_candidates)
-        if cap_val <= 0:
-            raise ValueError("max_candidates must be > 0 when set")
-        if requested_candidates > cap_val:
-            n_candidates = cap_val
-            cap_applied = True
-            log.warning(
-                "PWM sampling capped candidate generation for motif %s: requested=%d max_candidates=%d",
-                motif.motif_id,
-                requested_candidates,
-                cap_val,
-            )
+    mining_max_candidates = _mining_attr(mining, "max_candidates")
+    if scoring_backend == "densegen":
+        if max_candidates is not None:
+            cap_val = int(max_candidates)
+            if cap_val <= 0:
+                raise ValueError("max_candidates must be > 0 when set")
+            if requested_candidates > cap_val:
+                n_candidates = cap_val
+                cap_applied = True
+                log.warning(
+                    "PWM sampling capped candidate generation for motif %s: requested=%d max_candidates=%d",
+                    motif.motif_id,
+                    requested_candidates,
+                    cap_val,
+                )
+    else:
+        if mining_max_candidates is not None:
+            mining_cap = int(mining_max_candidates)
+            if mining_cap < n_sites:
+                raise ValueError("pwm.sampling.mining.max_candidates must be >= n_sites")
+            if mining_cap != requested_candidates:
+                cap_applied = mining_cap < requested_candidates
+                n_candidates = mining_cap
+                log.info(
+                    "PWM mining candidate target for motif %s: requested=%d mining.max_candidates=%d",
+                    motif.motif_id,
+                    requested_candidates,
+                    mining_cap,
+                )
     n_candidates = max(1, n_candidates)
     if scoring_backend == "densegen":
         candidates: List[Tuple[str, str]] = []
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 04a58201..807b26d9 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -6,14 +6,18 @@
 Typer/Rich CLI entrypoint for DenseGen.
 
 Commands:
-  - validate : Validate YAML config (schema + sanity).
-  - plan     : Show resolved per-constraint quota plan.
-  - stage    : Scaffold a new workspace with config.yaml + subfolders.
-  - run      : Execute generation pipeline; optionally auto-plot.
-  - plot     : Generate plots from outputs using config YAML.
-  - ls-plots : List available plot names and descriptions.
-  - summarize : Print an outputs/meta/run_manifest.json summary table.
-  - report   : Generate audit-grade report tables for a run.
+  - validate-config : Validate YAML config (schema + sanity).
+  - inspect inputs  : Show resolved inputs + PWM sampling.
+  - inspect plan    : Show resolved per-constraint quota plan.
+  - inspect config  : Describe resolved config (inputs/outputs/solver).
+  - inspect run     : Summarize run manifest or list workspaces.
+  - workspace init  : Scaffold a new workspace with config.yaml + subfolders.
+  - stage-a build-pool : Build Stage-A TFBS pools from inputs.
+  - stage-b build-libraries : Build Stage-B libraries from pools/inputs.
+  - run             : Execute generation pipeline; optionally auto-plot.
+  - plot            : Generate plots from outputs using config YAML.
+  - ls-plots         : List available plot names and descriptions.
+  - report          : Generate audit-grade report tables for a run.
 
 Run:
   python -m dnadesign.densegen.src.cli --help
@@ -27,15 +31,20 @@
 
 import contextlib
 import io
+import json
+import logging
 import os
 import platform
+import random
 import re
 import shutil
 import sys
 import tempfile
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Iterator, Optional
 
+import numpy as np
 import pandas as pd
 import typer
 import yaml
@@ -50,17 +59,27 @@
     resolve_relative_path,
     resolve_run_root,
     resolve_run_scoped_path,
+    schema_version_at_least,
+)
+from .core.pipeline import (
+    _load_existing_library_index,
+    _load_failure_counts_from_attempts,
+    build_library_for_plan,
+    default_deps,
+    resolve_plan,
+    run_pipeline,
 )
-from .core.pipeline import resolve_plan, run_pipeline
 from .core.reporting import collect_report_data, write_report
 from .core.run_manifest import load_run_manifest
 from .core.run_paths import run_manifest_path, run_state_path
 from .core.run_state import load_run_state
+from .integrations.meme_suite import require_executable
 from .utils.logging_utils import install_native_stderr_filters, setup_logging
 
 rich_traceback(show_locals=False)
 console = Console()
 _PYARROW_SYSCTL_PATTERN = re.compile(r"sysctlbyname failed for 'hw\.")
+log = logging.getLogger(__name__)
 
 
 @contextlib.contextmanager
@@ -102,6 +121,36 @@ def _densegen_root_from(file_path: Path) -> Path:
 DEFAULT_WORKSPACES_ROOT = DENSEGEN_ROOT / "workspaces"
 
 
+def _input_uses_fimo(input_cfg) -> bool:
+    sampling = getattr(input_cfg, "sampling", None)
+    backend = str(getattr(sampling, "scoring_backend", "densegen")).lower() if sampling is not None else ""
+    if backend == "fimo":
+        return True
+    overrides = getattr(input_cfg, "overrides_by_motif_id", None)
+    if isinstance(overrides, dict):
+        for override in overrides.values():
+            try:
+                override_backend = str(override.get("scoring_backend", "")).lower()
+            except Exception:
+                continue
+            if override_backend == "fimo":
+                return True
+    return False
+
+
+def _ensure_fimo_available(cfg, *, strict: bool = True) -> None:
+    if not any(_input_uses_fimo(inp) for inp in cfg.inputs):
+        return
+    try:
+        require_executable("fimo", tool_path=None)
+    except FileNotFoundError as exc:
+        msg = f"FIMO is required for this config but was not found. {exc}"
+        if strict:
+            console.print(f"[bold red]{msg}[/]")
+            raise typer.Exit(code=1)
+        log.warning(msg)
+
+
 def _default_config_path() -> Path:
     # Prefer a realistic, self-contained MEME demo config inside the package tree.
     return DENSEGEN_ROOT / "workspaces" / "demo_meme_two_tf" / "config.yaml"
@@ -180,6 +229,141 @@ def _short_hash(val: str, *, n: int = 8) -> str:
     return val[:n]
 
 
+def _print_inputs_summary(loaded) -> None:
+    cfg = loaded.root.densegen
+    inputs = Table("name", "type", "source")
+    for inp in cfg.inputs:
+        if hasattr(inp, "path"):
+            src = str(resolve_relative_path(loaded.path, inp.path))
+        elif hasattr(inp, "paths"):
+            resolved = [str(resolve_relative_path(loaded.path, p)) for p in getattr(inp, "paths") or []]
+            src = f"{len(resolved)} files"
+            if resolved:
+                src = f"{len(resolved)} files ({resolved[0]})"
+        elif hasattr(inp, "dataset"):
+            src = f"{inp.dataset} (root={resolve_relative_path(loaded.path, inp.root)})"
+        else:
+            src = "-"
+        inputs.add_row(inp.name, inp.type, src)
+    console.print(inputs)
+
+    pwm_inputs = [
+        inp
+        for inp in cfg.inputs
+        if getattr(inp, "type", "")
+        in {
+            "pwm_meme",
+            "pwm_meme_set",
+            "pwm_jaspar",
+            "pwm_matrix_csv",
+            "pwm_artifact",
+            "pwm_artifact_set",
+        }
+    ]
+    if not pwm_inputs:
+        return
+    pwm_table = Table(
+        "name",
+        "motifs",
+        "n_sites",
+        "strategy",
+        "backend",
+        "score",
+        "selection",
+        "bins",
+        "mining",
+        "bgfile",
+        "oversample",
+        "max_candidates",
+        "max_seconds",
+        "length",
+    )
+    for inp in pwm_inputs:
+        sampling = getattr(inp, "sampling", None)
+        if sampling is None:
+            continue
+        if inp.type == "pwm_matrix_csv":
+            motif_label = str(getattr(inp, "motif_id", "-"))
+        elif inp.type in {"pwm_meme", "pwm_meme_set", "pwm_jaspar"}:
+            motif_ids = getattr(inp, "motif_ids", None) or []
+            motif_label = ", ".join(motif_ids) if motif_ids else "all"
+            if inp.type == "pwm_meme_set":
+                file_count = len(getattr(inp, "paths", []) or [])
+                motif_label = f"{motif_label} ({file_count} files)"
+        elif inp.type == "pwm_artifact_set":
+            motif_label = f"{len(getattr(inp, 'paths', []) or [])} artifacts"
+        else:
+            motif_label = "from artifact"
+        backend = getattr(sampling, "scoring_backend", "densegen")
+        score_label = "-"
+        if backend == "fimo" and sampling.pvalue_threshold is not None:
+            comparator = ">=" if sampling.strategy == "background" else "<="
+            score_label = f"pvalue{comparator}{sampling.pvalue_threshold}"
+        elif sampling.score_threshold is not None:
+            score_label = f"threshold={sampling.score_threshold}"
+        elif sampling.score_percentile is not None:
+            score_label = f"percentile={sampling.score_percentile}"
+        selection_label = "-" if backend != "fimo" else (getattr(sampling, "selection_policy", None) or "-")
+        bins_label = "-"
+        if backend == "fimo":
+            bins_label = "canonical"
+            if getattr(sampling, "pvalue_bins", None) is not None:
+                bins_label = "custom"
+            mining_cfg = getattr(sampling, "mining", None)
+            bin_ids = getattr(mining_cfg, "retain_bin_ids", None)
+            if bin_ids:
+                bins_label = f"{bins_label} retain={bin_ids}"
+        mining_label = "-"
+        mining_cfg = getattr(sampling, "mining", None)
+        if backend == "fimo" and mining_cfg is not None:
+            parts = [f"batch={mining_cfg.batch_size}"]
+            if mining_cfg.max_batches is not None:
+                parts.append(f"max_batches={mining_cfg.max_batches}")
+            if getattr(mining_cfg, "max_candidates", None) is not None:
+                parts.append(f"max_candidates={mining_cfg.max_candidates}")
+            if mining_cfg.max_seconds is not None:
+                parts.append(f"max_seconds={mining_cfg.max_seconds}s")
+            if mining_cfg.retain_bin_ids:
+                parts.append(f"retain={mining_cfg.retain_bin_ids}")
+            mining_label = ", ".join(parts)
+        bgfile_label = getattr(sampling, "bgfile", None) or "-"
+        length_label = str(sampling.length_policy)
+        if sampling.length_policy == "range" and sampling.length_range is not None:
+            length_label = f"range({sampling.length_range[0]}..{sampling.length_range[1]})"
+        pwm_table.add_row(
+            inp.name,
+            motif_label,
+            str(sampling.n_sites),
+            str(sampling.strategy),
+            str(backend),
+            score_label,
+            str(selection_label),
+            str(bins_label),
+            str(mining_label),
+            str(bgfile_label),
+            str(sampling.oversample_factor),
+            str(sampling.max_candidates) if sampling.max_candidates is not None else "-",
+            str(sampling.max_seconds) if sampling.max_seconds is not None else "-",
+            length_label,
+        )
+    console.print("[bold]Input-stage PWM sampling[/]")
+    console.print(pwm_table)
+    console.print(
+        "  -> Produces the realized TFBS pool (input_tfbs_count), captured in inputs_manifest.json after runs."
+    )
+
+
+def _pool_manifest_path(out_dir: Path) -> Path:
+    return out_dir / "pool_manifest.json"
+
+
+def _load_pool_manifest(out_dir: Path) -> dict:
+    manifest_path = _pool_manifest_path(out_dir)
+    if not manifest_path.exists():
+        raise FileNotFoundError(f"Pool manifest not found: {manifest_path}")
+    return json.loads(manifest_path.read_text())
+
+
 def _list_dir_entries(path: Path, *, limit: int = 10) -> list[str]:
     if not path.exists() or not path.is_dir():
         return []
@@ -229,7 +413,9 @@ def _render_missing_input_hint(cfg_path: Path, loaded, exc: Exception) -> None:
 
     hints = []
     if (cfg_path.parent / "inputs").exists():
-        hints.append("If this is a staged run dir, use `dense stage --copy-inputs` or copy files into run/inputs.")
+        hints.append(
+            "If this is a staged run dir, use `dense workspace init --copy-inputs` or copy files into run/inputs."
+        )
     missing_str = " ".join(str(p) for p in missing)
     demo_paths = (
         "cruncher/workspaces/demo_basics_two_tf",
@@ -253,7 +439,7 @@ def _render_output_schema_hint(exc: Exception) -> bool:
         console.print(f"[bold red]Output schema mismatch:[/] {msg}")
         console.print("[bold]Next steps[/]:")
         console.print("  - Remove outputs/dense_arrays.parquet and outputs/_densegen_ids.sqlite, or")
-        console.print("  - Stage a fresh workspace with `dense stage --copy-inputs` and re-run.")
+        console.print("  - Stage a fresh workspace with `dense workspace init --copy-inputs` and re-run.")
         return True
     if "Output sinks are out of sync before run" in msg:
         console.print(f"[bold red]Output sink mismatch:[/] {msg}")
@@ -425,6 +611,15 @@ def _list_workspaces_table(workspaces_root: Path, *, limit: int, show_all: bool)
     no_args_is_help=True,
     help="DenseGen — Dense Array Generator (Typer/Rich CLI)",
 )
+inspect_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Inspect configs, inputs, and runs.")
+stage_a_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Stage A helpers (input TFBS pools).")
+stage_b_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Stage B helpers (library sampling).")
+workspace_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Workspace scaffolding.")
+
+app.add_typer(inspect_app, name="inspect")
+app.add_typer(stage_a_app, name="stage-a")
+app.add_typer(stage_b_app, name="stage-b")
+app.add_typer(workspace_app, name="workspace")
 
 
 @app.callback()
@@ -440,8 +635,8 @@ def _root(
     ctx.obj = {"config_path": config}
 
 
-@app.command(help="Validate the config YAML (schema + sanity).")
-def validate(
+@app.command("validate-config", help="Validate the config YAML (schema + sanity).")
+def validate_config(
     ctx: typer.Context,
     probe_solver: bool = typer.Option(False, help="Also probe the solver backend."),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
@@ -450,6 +645,7 @@ def validate(
     loaded = _load_config_or_exit(cfg_path)
     _warn_pwm_sampling_configs(loaded, cfg_path)
     _warn_full_pool_strategy(loaded)
+    _ensure_fimo_available(loaded.root.densegen, strict=True)
     if probe_solver:
         from .adapters.optimizer import DenseArraysAdapter
         from .core.pipeline import select_solver_strict
@@ -473,8 +669,8 @@ def ls_plots():
     console.print(table)
 
 
-@app.command(help="Stage a new workspace with config.yaml and standard subfolders.")
-def stage(
+@workspace_app.command("init", help="Stage a new workspace with config.yaml and standard subfolders.")
+def workspace_init(
     run_id: str = typer.Option(..., "--id", "-i", help="Run identifier (directory name)."),
     root: Path = typer.Option(DEFAULT_WORKSPACES_ROOT, "--root", help="Workspaces root directory."),
     template: Optional[Path] = typer.Option(None, "--template", help="Template config YAML to copy."),
@@ -560,8 +756,8 @@ def stage(
     console.print(f":sparkles: [bold green]Workspace staged[/]: {config_path}")
 
 
-@app.command(help="Summarize a run manifest.")
-def summarize(
+@inspect_app.command("run", help="Summarize a run manifest or list workspaces.")
+def inspect_run(
     ctx: typer.Context,
     run: Optional[Path] = typer.Option(None, "--run", "-r", help="Run directory (defaults to config run root)."),
     root: Optional[Path] = typer.Option(None, "--root", help="Workspaces root directory (lists workspaces)."),
@@ -602,7 +798,7 @@ def summarize(
             if not cfg_path.exists():
                 console.print(
                     f"[bold red]Config not found for --library:[/] {cfg_path}. "
-                    "Provide --config or run summarize without --library."
+                    "Provide --config or run inspect run without --library."
                 )
                 raise typer.Exit(code=1)
             loaded = _load_config_or_exit(cfg_path)
@@ -872,18 +1068,40 @@ def _render_tfbs_tables(lib_hash: str) -> None:
 @app.command(help="Generate audit-grade report summary for a run.")
 def report(
     ctx: typer.Context,
+    run: Optional[Path] = typer.Option(None, "--run", "-r", help="Run directory (defaults to config run root)."),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
     out: str = typer.Option("outputs", "--out", help="Output directory (relative to run root)."),
+    format: str = typer.Option(
+        "all",
+        "--format",
+        "-f",
+        help="Report format: json, md, html, or all (comma-separated allowed).",
+    ),
 ):
-    cfg_path = _resolve_config_path(ctx, config)
+    if run is not None and config is not None:
+        console.print("[bold red]Choose either --run or --config, not both.[/]")
+        raise typer.Exit(code=1)
+    if run is not None:
+        cfg_path = Path(run) / "config.yaml"
+        if not cfg_path.exists():
+            console.print(f"[bold red]Config not found under run:[/] {cfg_path}")
+            raise typer.Exit(code=1)
+    else:
+        cfg_path = _resolve_config_path(ctx, config)
     loaded = _load_config_or_exit(cfg_path)
+    raw_formats = {f.strip().lower() for f in format.split(",") if f.strip()}
+    if not raw_formats:
+        raw_formats = {"all"}
+    allowed_formats = {"json", "md", "html", "all"}
+    unknown = sorted(raw_formats - allowed_formats)
+    if unknown:
+        console.print(f"[bold red]Unknown report format(s):[/] {', '.join(unknown)}")
+        console.print("Allowed: json, md, html, all.")
+        raise typer.Exit(code=1)
+    formats_used = {"json", "md", "html"} if "all" in raw_formats else raw_formats
     try:
         with _suppress_pyarrow_sysctl_warnings():
-            write_report(
-                loaded.root,
-                cfg_path,
-                out_dir=out,
-            )
+            write_report(loaded.root, cfg_path, out_dir=out, formats=raw_formats)
     except FileNotFoundError as exc:
         console.print(f"[bold red]Report failed:[/] {exc}")
         run_root = _run_root_for(loaded)
@@ -896,11 +1114,18 @@ def report(
     run_root = _run_root_for(loaded)
     out_dir = resolve_run_scoped_path(cfg_path, run_root, out, label="report.out")
     console.print(f":sparkles: [bold green]Report written[/]: {out_dir}")
-    console.print("[bold]Outputs[/]: report.json, report.md")
-
-
-@app.command(help="Show the resolved per-constraint quota plan.")
-def plan(
+    outputs = []
+    if "json" in formats_used:
+        outputs.append("report.json")
+    if "md" in formats_used:
+        outputs.append("report.md")
+    if "html" in formats_used:
+        outputs.append("report.html")
+    console.print(f"[bold]Outputs[/]: {', '.join(outputs) if outputs else '-'}")
+
+
+@inspect_app.command("plan", help="Show the resolved per-constraint quota plan.")
+def inspect_plan(
     ctx: typer.Context,
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
@@ -915,8 +1140,8 @@ def plan(
     console.print(table)
 
 
-@app.command(help="Describe resolved config, inputs, outputs, and solver details.")
-def describe(
+@inspect_app.command("config", help="Describe resolved config, inputs, outputs, and solver details.")
+def inspect_config(
     ctx: typer.Context,
     show_constraints: bool = typer.Option(False, help="Print full fixed elements per plan item."),
     probe_solver: bool = typer.Option(False, help="Probe the solver backend before reporting."),
@@ -926,6 +1151,7 @@ def describe(
     loaded = _load_config_or_exit(cfg_path)
     root = loaded.root
     cfg = root.densegen
+    _ensure_fimo_available(cfg, strict=True)
     run_root = _run_root_for(loaded)
 
     if probe_solver:
@@ -937,126 +1163,7 @@ def describe(
     console.print(f"[bold]Config[/]: {cfg_path}")
     console.print(f"[bold]Run[/]: id={cfg.run.id} root={run_root}")
 
-    inputs = Table("name", "type", "source")
-    for inp in cfg.inputs:
-        if hasattr(inp, "path"):
-            src = str(resolve_relative_path(loaded.path, inp.path))
-        elif hasattr(inp, "paths"):
-            resolved = [str(resolve_relative_path(loaded.path, p)) for p in getattr(inp, "paths") or []]
-            src = f"{len(resolved)} files"
-            if resolved:
-                src = f"{len(resolved)} files ({resolved[0]})"
-        elif hasattr(inp, "dataset"):
-            src = f"{inp.dataset} (root={resolve_relative_path(loaded.path, inp.root)})"
-        else:
-            src = "-"
-        inputs.add_row(inp.name, inp.type, src)
-    console.print(inputs)
-
-    # Alignment (8): make two-stage sampling explicit in CLI describe output.
-    pwm_inputs = [
-        inp
-        for inp in cfg.inputs
-        if getattr(inp, "type", "")
-        in {
-            "pwm_meme",
-            "pwm_meme_set",
-            "pwm_jaspar",
-            "pwm_matrix_csv",
-            "pwm_artifact",
-            "pwm_artifact_set",
-        }
-    ]
-    if pwm_inputs:
-        pwm_table = Table(
-            "name",
-            "motifs",
-            "n_sites",
-            "strategy",
-            "backend",
-            "score",
-            "selection",
-            "bins",
-            "mining",
-            "bgfile",
-            "oversample",
-            "max_candidates",
-            "max_seconds",
-            "length",
-        )
-        for inp in pwm_inputs:
-            sampling = getattr(inp, "sampling", None)
-            if sampling is None:
-                continue
-            if inp.type == "pwm_matrix_csv":
-                motif_label = str(getattr(inp, "motif_id", "-"))
-            elif inp.type in {"pwm_meme", "pwm_meme_set", "pwm_jaspar"}:
-                motif_ids = getattr(inp, "motif_ids", None) or []
-                motif_label = ", ".join(motif_ids) if motif_ids else "all"
-                if inp.type == "pwm_meme_set":
-                    file_count = len(getattr(inp, "paths", []) or [])
-                    motif_label = f"{motif_label} ({file_count} files)"
-            elif inp.type == "pwm_artifact_set":
-                motif_label = f"{len(getattr(inp, 'paths', []) or [])} artifacts"
-            else:
-                motif_label = "from artifact"
-            backend = getattr(sampling, "scoring_backend", "densegen")
-            score_label = "-"
-            if backend == "fimo" and sampling.pvalue_threshold is not None:
-                comparator = ">=" if sampling.strategy == "background" else "<="
-                score_label = f"pvalue{comparator}{sampling.pvalue_threshold}"
-            elif sampling.score_threshold is not None:
-                score_label = f"threshold={sampling.score_threshold}"
-            elif sampling.score_percentile is not None:
-                score_label = f"percentile={sampling.score_percentile}"
-            selection_label = "-" if backend != "fimo" else (getattr(sampling, "selection_policy", None) or "-")
-            bins_label = "-"
-            if backend == "fimo":
-                bins_label = "canonical"
-                if getattr(sampling, "pvalue_bins", None) is not None:
-                    bins_label = "custom"
-                mining_cfg = getattr(sampling, "mining", None)
-                bin_ids = getattr(mining_cfg, "retain_bin_ids", None)
-                if bin_ids is None:
-                    bin_ids = getattr(sampling, "pvalue_bin_ids", None)
-                if bin_ids:
-                    bins_label = f"{bins_label} retain={bin_ids}"
-            mining_label = "-"
-            mining_cfg = getattr(sampling, "mining", None)
-            if backend == "fimo" and mining_cfg is not None:
-                parts = [f"batch={mining_cfg.batch_size}"]
-                if mining_cfg.max_batches is not None:
-                    parts.append(f"max_batches={mining_cfg.max_batches}")
-                if mining_cfg.max_seconds is not None:
-                    parts.append(f"max_seconds={mining_cfg.max_seconds}s")
-                if mining_cfg.retain_bin_ids:
-                    parts.append(f"retain={mining_cfg.retain_bin_ids}")
-                mining_label = ", ".join(parts)
-            bgfile_label = getattr(sampling, "bgfile", None) or "-"
-            length_label = str(sampling.length_policy)
-            if sampling.length_policy == "range" and sampling.length_range is not None:
-                length_label = f"range({sampling.length_range[0]}..{sampling.length_range[1]})"
-            pwm_table.add_row(
-                inp.name,
-                motif_label,
-                str(sampling.n_sites),
-                str(sampling.strategy),
-                str(backend),
-                score_label,
-                str(selection_label),
-                str(bins_label),
-                str(mining_label),
-                str(bgfile_label),
-                str(sampling.oversample_factor),
-                str(sampling.max_candidates) if sampling.max_candidates is not None else "-",
-                str(sampling.max_seconds) if sampling.max_seconds is not None else "-",
-                length_label,
-            )
-        console.print("[bold]Input-stage PWM sampling[/]")
-        console.print(pwm_table)
-        console.print(
-            "  -> Produces the realized TFBS pool (input_tfbs_count), captured in inputs_manifest.json after runs."
-        )
+    _print_inputs_summary(loaded)
 
     plan_table = Table(
         "name",
@@ -1171,6 +1278,298 @@ def describe(
         console.print("[bold]Plots[/]: none")
 
 
+@inspect_app.command("inputs", help="Show resolved inputs and PWM sampling summary.")
+def inspect_inputs(
+    ctx: typer.Context,
+    config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
+):
+    cfg_path = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(cfg_path)
+    console.print(f"[bold]Config[/]: {cfg_path}")
+    _ensure_fimo_available(loaded.root.densegen, strict=False)
+    _print_inputs_summary(loaded)
+
+
+@stage_a_app.command("build-pool", help="Build Stage-A TFBS pools from inputs.")
+def stage_a_build_pool(
+    ctx: typer.Context,
+    out: str = typer.Option("outputs/pools", "--out", help="Output directory (relative to run root)."),
+    input_name: Optional[list[str]] = typer.Option(
+        None,
+        "--input",
+        "-i",
+        help="Input name(s) to build (defaults to all inputs).",
+    ),
+    overwrite: bool = typer.Option(False, help="Overwrite existing pool files."),
+    config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
+):
+    cfg_path = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(cfg_path)
+    cfg = loaded.root.densegen
+    _ensure_fimo_available(cfg, strict=True)
+    run_root = _run_root_for(loaded)
+    out_dir = resolve_run_scoped_path(cfg_path, run_root, out, label="stage-a.out")
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    selected = {name for name in (input_name or [])}
+    if selected:
+        available = {inp.name for inp in cfg.inputs}
+        missing = sorted(selected - available)
+        if missing:
+            raise typer.BadParameter(f"Unknown input name(s): {', '.join(missing)}")
+
+    rng = np.random.default_rng(int(cfg.runtime.random_seed))
+    deps = default_deps()
+    outputs_root = run_root / "outputs"
+    outputs_root.mkdir(parents=True, exist_ok=True)
+
+    rows = []
+    manifest_inputs: list[dict] = []
+    for inp in cfg.inputs:
+        if selected and inp.name not in selected:
+            continue
+        src = deps.source_factory(inp, cfg_path)
+        data_entries, meta_df = src.load_data(rng=rng, outputs_root=outputs_root)
+        if meta_df is None:
+            df = pd.DataFrame({"sequence": [str(s) for s in data_entries]})
+        else:
+            df = meta_df.copy()
+        df.insert(0, "input_name", inp.name)
+        filename = f"{_sanitize_filename(inp.name)}__pool.parquet"
+        dest = out_dir / filename
+        if dest.exists() and not overwrite:
+            console.print(f"[bold red]Pool already exists:[/] {dest}")
+            raise typer.Exit(code=1)
+        df.to_parquet(dest, index=False)
+        if "fimo_bin_id" in df.columns:
+            bin_counts = df["fimo_bin_id"].value_counts().sort_index()
+            bin_table = Table("bin_id", "pvalue_range", "count")
+            for bin_id, count in bin_counts.items():
+                low = None
+                high = None
+                if "fimo_bin_low" in df.columns:
+                    low_vals = df.loc[df["fimo_bin_id"] == bin_id, "fimo_bin_low"]
+                    if not low_vals.empty:
+                        low = float(low_vals.iloc[0])
+                if "fimo_bin_high" in df.columns:
+                    high_vals = df.loc[df["fimo_bin_id"] == bin_id, "fimo_bin_high"]
+                    if not high_vals.empty:
+                        high = float(high_vals.iloc[0])
+                if low is not None and high is not None:
+                    range_label = f"({low:g}, {high:g}]"
+                else:
+                    range_label = "-"
+                bin_table.add_row(str(bin_id), range_label, str(int(count)))
+            console.print(f"[bold]FIMO p-value bins for {inp.name}[/]")
+            console.print(bin_table)
+        manifest_inputs.append(
+            {
+                "name": inp.name,
+                "type": inp.type,
+                "pool_path": dest.name,
+                "rows": int(len(df)),
+                "columns": list(df.columns),
+            }
+        )
+        rows.append((inp.name, inp.type, str(len(df)), dest.name))
+
+    if not rows:
+        console.print("[yellow]No pools built (no matching inputs).[/]")
+        raise typer.Exit(code=1)
+
+    manifest = {
+        "schema_version": "1.0",
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "run_id": cfg.run.id,
+        "run_root": str(run_root),
+        "config_path": str(cfg_path),
+        "inputs": manifest_inputs,
+    }
+    manifest_path = _pool_manifest_path(out_dir)
+    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
+
+    table = Table("input", "type", "rows", "pool_file")
+    for row in rows:
+        table.add_row(*row)
+    console.print(table)
+    console.print(f":sparkles: [bold green]Pool manifest written[/]: {manifest_path}")
+
+
+@stage_b_app.command("build-libraries", help="Build Stage-B libraries from pools or inputs.")
+def stage_b_build_libraries(
+    ctx: typer.Context,
+    out: str = typer.Option("outputs/libraries", "--out", help="Output directory (relative to run root)."),
+    pool: Optional[Path] = typer.Option(
+        None,
+        "--pool",
+        help="Optional pool directory from `stage-a build-pool` (defaults to reading inputs).",
+    ),
+    input_name: Optional[list[str]] = typer.Option(
+        None,
+        "--input",
+        "-i",
+        help="Input name(s) to build (defaults to all inputs).",
+    ),
+    plan: Optional[list[str]] = typer.Option(
+        None,
+        "--plan",
+        "-p",
+        help="Plan item name(s) to build (defaults to all plans).",
+    ),
+    overwrite: bool = typer.Option(False, help="Overwrite existing library_builds.parquet."),
+    config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
+):
+    cfg_path = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(cfg_path)
+    cfg = loaded.root.densegen
+    if pool is None:
+        _ensure_fimo_available(cfg, strict=True)
+    run_root = _run_root_for(loaded)
+    out_dir = resolve_run_scoped_path(cfg_path, run_root, out, label="stage-b.out")
+    out_dir.mkdir(parents=True, exist_ok=True)
+    out_path = out_dir / "library_builds.parquet"
+    if out_path.exists() and not overwrite:
+        console.print(f"[bold red]library_builds.parquet already exists:[/] {out_path}")
+        raise typer.Exit(code=1)
+
+    selected_inputs = {name for name in (input_name or [])}
+    if selected_inputs:
+        available = {inp.name for inp in cfg.inputs}
+        missing = sorted(selected_inputs - available)
+        if missing:
+            raise typer.BadParameter(f"Unknown input name(s): {', '.join(missing)}")
+
+    selected_plans = {name for name in (plan or [])}
+    resolved_plan = resolve_plan(loaded)
+    if selected_plans:
+        available_plans = {p.name for p in resolved_plan}
+        missing = sorted(selected_plans - available_plans)
+        if missing:
+            raise typer.BadParameter(f"Unknown plan name(s): {', '.join(missing)}")
+
+    deps = default_deps()
+    seed = int(cfg.runtime.random_seed)
+    rng = random.Random(seed)
+    np_rng = np.random.default_rng(seed)
+    sampling_cfg = cfg.generation.sampling
+    schema_is_22 = schema_version_at_least(cfg.schema_version, major=2, minor=2)
+    outputs_root = run_root / "outputs"
+    failure_counts = _load_failure_counts_from_attempts(outputs_root)
+    libraries_built = _load_existing_library_index(outputs_root) if outputs_root.exists() else 0
+
+    pool_manifest = None
+    pool_dir = None
+    if pool is not None:
+        pool_dir = resolve_relative_path(cfg_path, pool)
+        if not pool_dir.exists() or not pool_dir.is_dir():
+            raise typer.BadParameter(f"Pool directory not found: {pool_dir}")
+        pool_manifest = _load_pool_manifest(pool_dir)
+
+    rows = []
+    table = Table("input", "plan", "library_index", "library_hash", "size", "achieved/target", "pool", "sampling")
+    for inp in cfg.inputs:
+        if selected_inputs and inp.name not in selected_inputs:
+            continue
+        if pool_manifest is not None and pool_dir is not None:
+            entry = next((e for e in pool_manifest.get("inputs", []) if e.get("name") == inp.name), None)
+            if entry is None:
+                raise typer.BadParameter(f"Pool manifest missing input: {inp.name}")
+            pool_path = pool_dir / str(entry.get("pool_path") or "")
+            if not pool_path.exists():
+                raise typer.BadParameter(f"Pool file not found for input {inp.name}: {pool_path}")
+            df = pd.read_parquet(pool_path)
+            if "tf" in df.columns and "tfbs" in df.columns:
+                meta_df = df
+                data_entries = df["tfbs"].tolist()
+            elif "sequence" in df.columns:
+                meta_df = None
+                data_entries = df["sequence"].tolist()
+            else:
+                raise typer.BadParameter(
+                    f"Pool file for {inp.name} must contain tf/tfbs or sequence columns: {pool_path}"
+                )
+        else:
+            src = deps.source_factory(inp, cfg_path)
+            data_entries, meta_df = src.load_data(rng=np_rng, outputs_root=outputs_root)
+
+        for plan_item in resolved_plan:
+            if selected_plans and plan_item.name not in selected_plans:
+                continue
+            library, _parts, reg_labels, info = build_library_for_plan(
+                source_label=inp.name,
+                plan_item=plan_item,
+                data_entries=data_entries,
+                meta_df=meta_df,
+                sampling_cfg=sampling_cfg,
+                seq_len=int(cfg.generation.sequence_length),
+                min_count_per_tf=int(cfg.runtime.min_count_per_tf),
+                usage_counts={},
+                failure_counts=failure_counts if failure_counts else None,
+                rng=rng,
+                np_rng=np_rng,
+                schema_is_22=schema_is_22,
+                library_index_start=libraries_built,
+            )
+            libraries_built = int(info.get("library_index", libraries_built))
+            library_hash = str(info.get("library_hash") or "")
+            target_len = int(info.get("target_length") or 0)
+            achieved_len = int(info.get("achieved_length") or 0)
+            pool_strategy = str(info.get("pool_strategy") or sampling_cfg.pool_strategy)
+            sampling_strategy = str(info.get("library_sampling_strategy") or sampling_cfg.library_sampling_strategy)
+            row = {
+                "created_at": datetime.now(timezone.utc).isoformat(),
+                "input_name": inp.name,
+                "input_type": inp.type,
+                "plan_name": plan_item.name,
+                "library_index": int(info.get("library_index") or 0),
+                "library_hash": library_hash,
+                "library_tfbs": list(library),
+                "library_tfs": list(reg_labels) if reg_labels else [],
+                "library_site_ids": list(info.get("site_id_by_index") or []),
+                "library_sources": list(info.get("source_by_index") or []),
+                "pool_strategy": pool_strategy,
+                "library_sampling_strategy": sampling_strategy,
+                "library_size": int(info.get("library_size") or len(library)),
+                "target_length": target_len,
+                "achieved_length": achieved_len,
+                "relaxed_cap": bool(info.get("relaxed_cap") or False),
+                "final_cap": info.get("final_cap"),
+                "iterative_max_libraries": int(info.get("iterative_max_libraries") or 0),
+                "iterative_min_new_solutions": int(info.get("iterative_min_new_solutions") or 0),
+                "required_regulators_selected": info.get("required_regulators_selected"),
+            }
+            rows.append(row)
+            table.add_row(
+                inp.name,
+                plan_item.name,
+                str(row["library_index"]),
+                _short_hash(library_hash),
+                str(len(library)),
+                f"{achieved_len}/{target_len}",
+                pool_strategy,
+                sampling_strategy,
+            )
+
+    if not rows:
+        console.print("[yellow]No libraries built (no matching inputs/plans).[/]")
+        raise typer.Exit(code=1)
+
+    df_out = pd.DataFrame(rows)
+    df_out.to_parquet(out_path, index=False)
+    manifest = {
+        "schema_version": "1.0",
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "run_id": cfg.run.id,
+        "run_root": str(run_root),
+        "config_path": str(cfg_path),
+        "library_builds_path": str(out_path),
+    }
+    manifest_path = out_dir / "library_manifest.json"
+    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
+    console.print(table)
+    console.print(f":sparkles: [bold green]Library builds written[/]: {out_path}")
+
+
 @app.command(help="Run generation for the job. Optionally auto-run plots declared in YAML.")
 def run(
     ctx: typer.Context,
@@ -1213,7 +1612,7 @@ def run(
 
     console.print(":tada: [bold green]Run complete[/].")
     console.print("[bold]Next steps[/]:")
-    console.print(f"  - dense summarize --library -c {cfg_path}")
+    console.print(f"  - dense inspect run --library -c {cfg_path}")
     console.print(f"  - dense report -c {cfg_path}")
 
     # Auto-plot if configured
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index fb882183..e8f7b920 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -13,7 +13,6 @@
 from __future__ import annotations
 
 import os
-import warnings
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Annotated, Any, Dict, List, Optional, Union
@@ -159,7 +158,8 @@ class PWMMiningConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
     batch_size: int = 100000
     max_batches: Optional[int] = None
-    max_seconds: Optional[float] = None
+    max_candidates: Optional[int] = None
+    max_seconds: Optional[float] = 60.0
     retain_bin_ids: Optional[List[int]] = None
     log_every_batches: int = 1
 
@@ -177,6 +177,13 @@ def _max_batches_ok(cls, v: Optional[int]):
             raise ValueError("pwm.sampling.mining.max_batches must be > 0 when set")
         return v
 
+    @field_validator("max_candidates")
+    @classmethod
+    def _max_candidates_ok(cls, v: Optional[int]):
+        if v is not None and v <= 0:
+            raise ValueError("pwm.sampling.mining.max_candidates must be > 0 when set")
+        return v
+
     @field_validator("max_seconds")
     @classmethod
     def _max_seconds_ok(cls, v: Optional[float]):
@@ -220,7 +227,6 @@ class PWMSamplingConfig(BaseModel):
     scoring_backend: Literal["densegen", "fimo"] = "densegen"
     pvalue_threshold: Optional[float] = None
     pvalue_bins: Optional[List[float]] = None
-    pvalue_bin_ids: Optional[List[int]] = None
     mining: Optional[PWMMiningConfig] = None
     bgfile: Optional[str] = None
     selection_policy: Literal["random_uniform", "top_n", "stratified"] = "random_uniform"
@@ -312,20 +318,6 @@ def _pvalue_bins_ok(cls, v: Optional[List[float]]):
             raise ValueError("pwm.sampling.pvalue_bins must end with 1.0")
         return bins
 
-    @field_validator("pvalue_bin_ids")
-    @classmethod
-    def _pvalue_bin_ids_ok(cls, v: Optional[List[int]]):
-        if v is None:
-            return v
-        if not v:
-            raise ValueError("pwm.sampling.pvalue_bin_ids must be non-empty when set")
-        ids = [int(x) for x in v]
-        if any(idx < 0 for idx in ids):
-            raise ValueError("pwm.sampling.pvalue_bin_ids values must be >= 0")
-        if len(set(ids)) != len(ids):
-            raise ValueError("pwm.sampling.pvalue_bin_ids must be unique")
-        return ids
-
     @model_validator(mode="after")
     def _score_mode(self):
         has_thresh = self.score_threshold is not None
@@ -337,8 +329,6 @@ def _score_mode(self):
                 raise ValueError("pwm.sampling.pvalue_threshold is only valid when scoring_backend='fimo'")
             if self.pvalue_bins is not None:
                 raise ValueError("pwm.sampling.pvalue_bins is only valid when scoring_backend='fimo'")
-            if self.pvalue_bin_ids is not None:
-                raise ValueError("pwm.sampling.pvalue_bin_ids is only valid when scoring_backend='fimo'")
             if self.mining is not None:
                 raise ValueError("pwm.sampling.mining is only valid when scoring_backend='fimo'")
             if self.include_matched_sequence:
@@ -348,25 +338,28 @@ def _score_mode(self):
                 raise ValueError("pwm.sampling.pvalue_threshold is required when scoring_backend='fimo'")
             if not (0.0 < float(self.pvalue_threshold) <= 1.0):
                 raise ValueError("pwm.sampling.pvalue_threshold must be between 0 and 1")
-            if self.pvalue_bin_ids is not None and self.mining is not None:
+            if "max_candidates" in self.model_fields_set and self.max_candidates is not None:
                 raise ValueError(
-                    "pwm.sampling.pvalue_bin_ids is deprecated; use pwm.sampling.mining.retain_bin_ids instead."
+                    "pwm.sampling.max_candidates is not used with scoring_backend='fimo'. "
+                    "Use pwm.sampling.mining.max_candidates instead."
                 )
-            if self.pvalue_bin_ids is not None and self.mining is None:
-                warnings.warn(
-                    "pwm.sampling.pvalue_bin_ids is deprecated; use pwm.sampling.mining.retain_bin_ids.",
-                    stacklevel=2,
+            if "max_seconds" in self.model_fields_set and self.max_seconds is not None:
+                raise ValueError(
+                    "pwm.sampling.max_seconds is not used with scoring_backend='fimo'. "
+                    "Use pwm.sampling.mining.max_seconds instead."
                 )
-                self.mining = PWMMiningConfig(retain_bin_ids=list(self.pvalue_bin_ids))
-            bin_ids = None
+            if "max_candidates" not in self.model_fields_set:
+                self.max_candidates = None
+            if "max_seconds" not in self.model_fields_set:
+                self.max_seconds = None
+            if self.mining is None:
+                self.mining = PWMMiningConfig()
+            if self.pvalue_bins is None:
+                self.pvalue_bins = list(CANONICAL_PVALUE_BINS)
             if self.mining is not None and self.mining.retain_bin_ids is not None:
-                bin_ids = list(self.mining.retain_bin_ids)
-            elif self.pvalue_bin_ids is not None:
-                bin_ids = list(self.pvalue_bin_ids)
-            if bin_ids is not None:
                 bins = list(self.pvalue_bins) if self.pvalue_bins is not None else list(CANONICAL_PVALUE_BINS)
                 max_idx = len(bins) - 1
-                if any(idx > max_idx for idx in bin_ids):
+                if any(idx > max_idx for idx in self.mining.retain_bin_ids):
                     raise ValueError("pwm.sampling.mining.retain_bin_ids contains an index outside the available bins")
         if self.strategy == "consensus" and int(self.n_sites) != 1:
             raise ValueError("pwm.sampling.strategy=consensus requires n_sites=1")
diff --git a/src/dnadesign/densegen/src/core/metadata.py b/src/dnadesign/densegen/src/core/metadata.py
index 861de3ed..728fe697 100644
--- a/src/dnadesign/densegen/src/core/metadata.py
+++ b/src/dnadesign/densegen/src/core/metadata.py
@@ -146,9 +146,9 @@ def build_metadata(
         "input_pwm_score_percentile": input_meta.get("input_pwm_score_percentile"),
         "input_pwm_pvalue_threshold": input_meta.get("input_pwm_pvalue_threshold"),
         "input_pwm_pvalue_bins": input_meta.get("input_pwm_pvalue_bins"),
-        "input_pwm_pvalue_bin_ids": input_meta.get("input_pwm_pvalue_bin_ids"),
         "input_pwm_mining_batch_size": input_meta.get("input_pwm_mining_batch_size"),
         "input_pwm_mining_max_batches": input_meta.get("input_pwm_mining_max_batches"),
+        "input_pwm_mining_max_candidates": input_meta.get("input_pwm_mining_max_candidates"),
         "input_pwm_mining_max_seconds": input_meta.get("input_pwm_mining_max_seconds"),
         "input_pwm_mining_retain_bin_ids": input_meta.get("input_pwm_mining_retain_bin_ids"),
         "input_pwm_mining_log_every_batches": input_meta.get("input_pwm_mining_log_every_batches"),
diff --git a/src/dnadesign/densegen/src/core/metadata_schema.py b/src/dnadesign/densegen/src/core/metadata_schema.py
index ca0c2736..ea568c9d 100644
--- a/src/dnadesign/densegen/src/core/metadata_schema.py
+++ b/src/dnadesign/densegen/src/core/metadata_schema.py
@@ -98,14 +98,9 @@ class MetaField:
     MetaField("input_pwm_score_percentile", (numbers.Real,), "PWM score percentile.", allow_none=True),
     MetaField("input_pwm_pvalue_threshold", (numbers.Real,), "PWM p-value threshold (FIMO).", allow_none=True),
     MetaField("input_pwm_pvalue_bins", (list,), "PWM p-value bins (FIMO).", allow_none=True),
-    MetaField(
-        "input_pwm_pvalue_bin_ids",
-        (list,),
-        "Deprecated: selected p-value bin indices (use input_pwm_mining_retain_bin_ids).",
-        allow_none=True,
-    ),
     MetaField("input_pwm_mining_batch_size", (int,), "PWM mining batch size (FIMO).", allow_none=True),
     MetaField("input_pwm_mining_max_batches", (int,), "PWM mining max batches (FIMO).", allow_none=True),
+    MetaField("input_pwm_mining_max_candidates", (int,), "PWM mining max candidates (FIMO).", allow_none=True),
     MetaField("input_pwm_mining_max_seconds", (numbers.Real,), "PWM mining max seconds (FIMO).", allow_none=True),
     MetaField(
         "input_pwm_mining_retain_bin_ids",
@@ -236,15 +231,6 @@ def _validate_list_fields(meta: Mapping[str, Any]) -> None:
                 if not isinstance(item, numbers.Real):
                     raise TypeError("Metadata field 'input_pwm_pvalue_bins' must contain only numbers")
 
-    if "input_pwm_pvalue_bin_ids" in meta:
-        vals = meta["input_pwm_pvalue_bin_ids"]
-        if vals is not None:
-            if isinstance(vals, (str, bytes)) or not isinstance(vals, Sequence):
-                raise TypeError("Metadata field 'input_pwm_pvalue_bin_ids' must be a list of integers")
-            for item in vals:
-                if not isinstance(item, int):
-                    raise TypeError("Metadata field 'input_pwm_pvalue_bin_ids' must contain only integers")
-
     if "input_pwm_mining_retain_bin_ids" in meta:
         vals = meta["input_pwm_mining_retain_bin_ids"]
         if vals is not None:
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 4b997aa7..07a2012e 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -198,28 +198,39 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
     requested = None
     generated = None
     capped = False
+    backend = str(_sampling_attr(sampling, "scoring_backend") or "densegen").lower()
     if isinstance(n_sites, int) and isinstance(oversample, int):
         requested = int(n_sites) * int(oversample)
         generated = requested
-        if max_candidates is not None:
-            try:
-                cap_val = int(max_candidates)
-            except Exception:
-                cap_val = None
-            if cap_val is not None:
-                generated = min(requested, cap_val)
-                capped = generated < requested
+        if backend == "fimo":
+            mining_cfg = _sampling_attr(sampling, "mining")
+            mining_max_candidates = _mining_attr(mining_cfg, "max_candidates")
+            if mining_max_candidates is not None:
+                try:
+                    cap_val = int(mining_max_candidates)
+                except Exception:
+                    cap_val = None
+                if cap_val is not None:
+                    generated = min(requested, cap_val)
+                    capped = generated < requested
+        else:
+            if max_candidates is not None:
+                try:
+                    cap_val = int(max_candidates)
+                except Exception:
+                    cap_val = None
+                if cap_val is not None:
+                    generated = min(requested, cap_val)
+                    capped = generated < requested
     length_range = _sampling_attr(sampling, "length_range")
     if length_range is not None:
         length_range = list(length_range)
     mining = _sampling_attr(sampling, "mining")
     mining_batch_size = _mining_attr(mining, "batch_size")
     mining_max_batches = _mining_attr(mining, "max_batches")
+    mining_max_candidates = _mining_attr(mining, "max_candidates")
     mining_max_seconds = _mining_attr(mining, "max_seconds")
     mining_retain_bin_ids = _mining_attr(mining, "retain_bin_ids")
-    legacy_bin_ids = _sampling_attr(sampling, "pvalue_bin_ids")
-    if mining_retain_bin_ids is None:
-        mining_retain_bin_ids = legacy_bin_ids
     mining_log_every_batches = _mining_attr(mining, "log_every_batches")
     return {
         "strategy": _sampling_attr(sampling, "strategy"),
@@ -235,7 +246,6 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
         "score_percentile": _sampling_attr(sampling, "score_percentile"),
         "pvalue_threshold": _sampling_attr(sampling, "pvalue_threshold"),
         "pvalue_bins": _resolve_pvalue_bins_meta(sampling),
-        "pvalue_bin_ids": legacy_bin_ids,
         "selection_policy": _sampling_attr(sampling, "selection_policy"),
         "bgfile": _sampling_attr(sampling, "bgfile"),
         "keep_all_candidates_debug": _sampling_attr(sampling, "keep_all_candidates_debug"),
@@ -244,6 +254,7 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
         "mining": {
             "batch_size": mining_batch_size,
             "max_batches": mining_max_batches,
+            "max_candidates": mining_max_candidates,
             "max_seconds": mining_max_seconds,
             "retain_bin_ids": mining_retain_bin_ids,
             "log_every_batches": mining_log_every_batches,
@@ -508,10 +519,9 @@ def _input_metadata(source_cfg, cfg_path: Path) -> dict:
             meta["input_pwm_pvalue_bins"] = _resolve_pvalue_bins_meta(sampling)
             mining_cfg = getattr(sampling, "mining", None)
             retained_bins = _mining_attr(mining_cfg, "retain_bin_ids")
-            legacy_bin_ids = getattr(sampling, "pvalue_bin_ids", None)
-            meta["input_pwm_pvalue_bin_ids"] = legacy_bin_ids if legacy_bin_ids is not None else retained_bins
             meta["input_pwm_mining_batch_size"] = _mining_attr(mining_cfg, "batch_size")
             meta["input_pwm_mining_max_batches"] = _mining_attr(mining_cfg, "max_batches")
+            meta["input_pwm_mining_max_candidates"] = _mining_attr(mining_cfg, "max_candidates")
             meta["input_pwm_mining_max_seconds"] = _mining_attr(mining_cfg, "max_seconds")
             meta["input_pwm_mining_retain_bin_ids"] = retained_bins
             meta["input_pwm_mining_log_every_batches"] = _mining_attr(mining_cfg, "log_every_batches")
@@ -521,7 +531,6 @@ def _input_metadata(source_cfg, cfg_path: Path) -> dict:
             meta["input_pwm_include_matched_sequence"] = getattr(sampling, "include_matched_sequence", None)
             meta["input_pwm_n_sites"] = getattr(sampling, "n_sites", None)
             meta["input_pwm_oversample_factor"] = getattr(sampling, "oversample_factor", None)
-            meta["input_pwm_max_candidates"] = getattr(sampling, "max_candidates", None)
     else:
         meta["input_mode"] = source_type
         meta["input_pwm_ids"] = []
@@ -919,6 +928,264 @@ def _hash_library(
     return digest
 
 
+def build_library_for_plan(
+    *,
+    source_label: str,
+    plan_item: ResolvedPlanItem,
+    data_entries: list,
+    meta_df: pd.DataFrame | None,
+    sampling_cfg: object,
+    seq_len: int,
+    min_count_per_tf: int,
+    usage_counts: dict[tuple[str, str], int],
+    failure_counts: dict[tuple[str, str, str, str, str | None], dict[str, int]] | None,
+    rng: random.Random,
+    np_rng: np.random.Generator,
+    schema_is_22: bool,
+    library_index_start: int,
+) -> tuple[list[str], list[str], list[str], dict]:
+    pool_strategy = str(getattr(sampling_cfg, "pool_strategy", "subsample"))
+    library_size = int(getattr(sampling_cfg, "library_size", 0))
+    subsample_over = int(getattr(sampling_cfg, "subsample_over_length_budget_by", 0))
+    library_sampling_strategy = str(getattr(sampling_cfg, "library_sampling_strategy", "tf_balanced"))
+    cover_all_tfs = bool(getattr(sampling_cfg, "cover_all_regulators", True))
+    unique_binding_sites = bool(getattr(sampling_cfg, "unique_binding_sites", True))
+    max_sites_per_tf = getattr(sampling_cfg, "max_sites_per_regulator", None)
+    relax_on_exhaustion = bool(getattr(sampling_cfg, "relax_on_exhaustion", False))
+    allow_incomplete_coverage = bool(getattr(sampling_cfg, "allow_incomplete_coverage", False))
+    iterative_max_libraries = int(getattr(sampling_cfg, "iterative_max_libraries", 0))
+    iterative_min_new_solutions = int(getattr(sampling_cfg, "iterative_min_new_solutions", 0))
+
+    fixed_elements = plan_item.fixed_elements
+    required_regulators = list(dict.fromkeys(plan_item.required_regulators or []))
+    min_required_regulators = plan_item.min_required_regulators
+    plan_min_count_by_regulator = dict(plan_item.min_count_by_regulator or {})
+    k_required = int(min_required_regulators) if min_required_regulators is not None else None
+    k_of_required = bool(required_regulators) and k_required is not None
+    if k_of_required and k_required > len(required_regulators):
+        raise ValueError(
+            "min_required_regulators cannot exceed required_regulators size "
+            f"({k_required} > {len(required_regulators)})."
+        )
+    side_left, side_right = _extract_side_biases(fixed_elements)
+    required_bias_motifs = list(dict.fromkeys([*side_left, *side_right]))
+
+    libraries_built = int(library_index_start)
+
+    def _finalize(
+        library: list[str],
+        parts: list[str],
+        reg_labels: list[str],
+        info: dict,
+        *,
+        site_id_by_index: list[str | None] | None,
+        source_by_index: list[str | None] | None,
+    ) -> tuple[list[str], list[str], list[str], dict]:
+        nonlocal libraries_built
+        libraries_built += 1
+        info["library_index"] = libraries_built
+        info["library_hash"] = _hash_library(library, reg_labels, site_id_by_index, source_by_index)
+        info["site_id_by_index"] = site_id_by_index
+        info["source_by_index"] = source_by_index
+        return library, parts, reg_labels, info
+
+    if meta_df is not None and isinstance(meta_df, pd.DataFrame):
+        available_tfs = set(meta_df["tf"].tolist())
+        missing = [t for t in required_regulators if t not in available_tfs]
+        if missing:
+            preview = ", ".join(missing[:10])
+            raise ValueError(f"Required regulators not found in input: {preview}")
+        if plan_min_count_by_regulator:
+            missing_counts = [t for t in plan_min_count_by_regulator if t not in available_tfs]
+            if missing_counts:
+                preview = ", ".join(missing_counts[:10])
+                raise ValueError(f"min_count_by_regulator TFs not found in input: {preview}")
+        if min_required_regulators is not None:
+            if not required_regulators and min_required_regulators > len(available_tfs):
+                raise ValueError(
+                    f"min_required_regulators={min_required_regulators} exceeds available regulators "
+                    f"({len(available_tfs)})."
+                )
+
+        if pool_strategy == "full":
+            lib_df = meta_df.copy()
+            if unique_binding_sites:
+                lib_df = lib_df.drop_duplicates(["tf", "tfbs"])
+            if required_bias_motifs:
+                missing_bias = [m for m in required_bias_motifs if m not in set(lib_df["tfbs"])]
+                if missing_bias:
+                    preview = ", ".join(missing_bias[:10])
+                    raise ValueError(f"Required side-bias motifs not found in input: {preview}")
+            lib_df = lib_df.reset_index(drop=True)
+            library = lib_df["tfbs"].tolist()
+            reg_labels = lib_df["tf"].tolist()
+            parts = [f"{tf}:{tfbs}" for tf, tfbs in zip(reg_labels, lib_df["tfbs"].tolist())]
+            site_id_by_index = lib_df["site_id"].tolist() if "site_id" in lib_df.columns else None
+            source_by_index = lib_df["source"].tolist() if "source" in lib_df.columns else None
+            info = {
+                "target_length": seq_len + subsample_over,
+                "achieved_length": sum(len(s) for s in library),
+                "relaxed_cap": False,
+                "final_cap": None,
+                "pool_strategy": pool_strategy,
+                "library_size": len(library),
+                "iterative_max_libraries": iterative_max_libraries,
+                "iterative_min_new_solutions": iterative_min_new_solutions,
+            }
+            return _finalize(
+                library,
+                parts,
+                reg_labels,
+                info,
+                site_id_by_index=site_id_by_index,
+                source_by_index=source_by_index,
+            )
+
+        sampler = TFSampler(meta_df, np_rng)
+        required_regulators_selected = required_regulators
+        if k_of_required:
+            candidates = sorted(required_regulators)
+            if k_required is not None and k_required < len(candidates):
+                chosen = np_rng.choice(len(candidates), size=k_required, replace=False)
+                required_regulators_selected = sorted([candidates[int(i)] for i in chosen])
+            else:
+                required_regulators_selected = candidates
+        required_tfs_for_library = list(
+            dict.fromkeys([*required_regulators_selected, *plan_min_count_by_regulator.keys()])
+        )
+        if min_required_regulators is not None and not required_regulators:
+            if pool_strategy in {"subsample", "iterative_subsample"}:
+                if library_size < int(min_required_regulators):
+                    raise ValueError(
+                        "library_size is too small to satisfy min_required_regulators when "
+                        f"required_regulators is empty. library_size={library_size} "
+                        f"min_required_regulators={min_required_regulators}. "
+                        "Increase library_size or lower min_required_regulators."
+                    )
+        if pool_strategy in {"subsample", "iterative_subsample"}:
+            required_slots = len(required_bias_motifs) + len(required_tfs_for_library)
+            if library_size < required_slots:
+                raise ValueError(
+                    "library_size is too small for required motifs. "
+                    f"library_size={library_size} but required_tfbs={len(required_bias_motifs)} "
+                    f"+ required_tfs={len(required_tfs_for_library)} "
+                    f"(min_required_regulators={min_required_regulators}). "
+                    "Increase library_size or relax required constraints."
+                )
+        if schema_is_22 and pool_strategy in {"subsample", "iterative_subsample"}:
+            failure_counts_by_tfbs: dict[tuple[str, str], int] | None = None
+            if library_sampling_strategy == "coverage_weighted" and getattr(sampling_cfg, "avoid_failed_motifs", False):
+                failure_counts_by_tfbs = _aggregate_failure_counts_for_sampling(
+                    failure_counts,
+                    input_name=source_label,
+                    plan_name=plan_item.name,
+                )
+            library, parts, reg_labels, info = sampler.generate_binding_site_library(
+                library_size,
+                sequence_length=seq_len,
+                budget_overhead=subsample_over,
+                required_tfbs=required_bias_motifs,
+                required_tfs=required_tfs_for_library,
+                cover_all_tfs=cover_all_tfs,
+                unique_binding_sites=unique_binding_sites,
+                max_sites_per_tf=max_sites_per_tf,
+                relax_on_exhaustion=relax_on_exhaustion,
+                allow_incomplete_coverage=allow_incomplete_coverage,
+                sampling_strategy=library_sampling_strategy,
+                usage_counts=usage_counts if library_sampling_strategy == "coverage_weighted" else None,
+                coverage_boost_alpha=float(getattr(sampling_cfg, "coverage_boost_alpha", 0.15)),
+                coverage_boost_power=float(getattr(sampling_cfg, "coverage_boost_power", 1.0)),
+                failure_counts=failure_counts_by_tfbs,
+                avoid_failed_motifs=bool(getattr(sampling_cfg, "avoid_failed_motifs", False)),
+                failure_penalty_alpha=float(getattr(sampling_cfg, "failure_penalty_alpha", 0.5)),
+                failure_penalty_power=float(getattr(sampling_cfg, "failure_penalty_power", 1.0)),
+            )
+        else:
+            library, parts, reg_labels, info = sampler.generate_binding_site_subsample(
+                seq_len,
+                subsample_over,
+                required_tfbs=required_bias_motifs,
+                required_tfs=required_tfs_for_library,
+                cover_all_tfs=cover_all_tfs,
+                unique_binding_sites=unique_binding_sites,
+                max_sites_per_tf=max_sites_per_tf,
+                relax_on_exhaustion=relax_on_exhaustion,
+                allow_incomplete_coverage=allow_incomplete_coverage,
+            )
+        info.update(
+            {
+                "pool_strategy": pool_strategy,
+                "library_size": library_size,
+                "library_sampling_strategy": library_sampling_strategy,
+                "coverage_boost_alpha": float(getattr(sampling_cfg, "coverage_boost_alpha", 0.15)),
+                "coverage_boost_power": float(getattr(sampling_cfg, "coverage_boost_power", 1.0)),
+                "iterative_max_libraries": iterative_max_libraries,
+                "iterative_min_new_solutions": iterative_min_new_solutions,
+                "required_regulators_selected": required_regulators_selected if k_of_required else None,
+            }
+        )
+        site_id_by_index = info.get("site_id_by_index")
+        source_by_index = info.get("source_by_index")
+        return _finalize(
+            library,
+            parts,
+            reg_labels,
+            info,
+            site_id_by_index=site_id_by_index,
+            source_by_index=source_by_index,
+        )
+
+    if required_regulators or plan_min_count_by_regulator or min_required_regulators is not None:
+        preview = ", ".join(required_regulators[:10]) if required_regulators else "n/a"
+        raise ValueError(
+            "Regulator constraints are set (required/min_count/min_required) "
+            "but the input does not provide regulators. "
+            f"required_regulators={preview}."
+        )
+    all_sequences = [s for s in data_entries]
+    if not all_sequences:
+        raise ValueError(f"No sequences found for source {source_label}")
+    pool = list(dict.fromkeys(all_sequences)) if unique_binding_sites else list(all_sequences)
+    if pool_strategy == "full":
+        if required_bias_motifs:
+            missing = [m for m in required_bias_motifs if m not in pool]
+            if missing:
+                preview = ", ".join(missing[:10])
+                raise ValueError(f"Required side-bias motifs not found in sequences input: {preview}")
+        library = pool
+    else:
+        if library_size > len(pool):
+            raise ValueError(f"library_size={library_size} exceeds available unique sequences ({len(pool)}).")
+        take = min(max(1, int(library_size)), len(pool))
+        if required_bias_motifs:
+            missing = [m for m in required_bias_motifs if m not in pool]
+            if missing:
+                preview = ", ".join(missing[:10])
+                raise ValueError(f"Required side-bias motifs not found in sequences input: {preview}")
+            if take < len(required_bias_motifs):
+                raise ValueError(
+                    f"library_size={take} is smaller than required side_biases ({len(required_bias_motifs)})."
+                )
+            required_set = set(required_bias_motifs)
+            remaining = [s for s in pool if s not in required_set]
+            library = list(required_bias_motifs) + rng.sample(remaining, take - len(required_bias_motifs))
+        else:
+            library = rng.sample(pool, take)
+    tf_parts: list[str] = []
+    reg_labels: list[str] = []
+    info = {
+        "target_length": seq_len + subsample_over,
+        "achieved_length": sum(len(s) for s in library),
+        "relaxed_cap": False,
+        "final_cap": None,
+        "pool_strategy": pool_strategy,
+        "library_size": len(library) if pool_strategy == "full" else library_size,
+        "iterative_max_libraries": iterative_max_libraries,
+        "iterative_min_new_solutions": iterative_min_new_solutions,
+    }
+    return _finalize(library, tf_parts, reg_labels, info, site_id_by_index=None, source_by_index=None)
+
+
 def _compute_sampling_fraction(
     library: list[str],
     *,
@@ -1294,14 +1561,7 @@ def _process_plan_for_source(
     sampling_cfg = gen.sampling
 
     pool_strategy = str(sampling_cfg.pool_strategy)
-    library_size = int(sampling_cfg.library_size)
-    subsample_over = int(sampling_cfg.subsample_over_length_budget_by)
     library_sampling_strategy = str(sampling_cfg.library_sampling_strategy)
-    cover_all_tfs = bool(sampling_cfg.cover_all_regulators)
-    unique_binding_sites = bool(sampling_cfg.unique_binding_sites)
-    max_sites_per_tf = sampling_cfg.max_sites_per_regulator
-    relax_on_exhaustion = bool(sampling_cfg.relax_on_exhaustion)
-    allow_incomplete_coverage = bool(sampling_cfg.allow_incomplete_coverage)
     iterative_max_libraries = int(sampling_cfg.iterative_max_libraries)
     iterative_min_new_solutions = int(sampling_cfg.iterative_min_new_solutions)
     schema_is_22 = schema_version_at_least(global_cfg.schema_version, major=2, minor=2)
@@ -1431,6 +1691,7 @@ def _process_plan_for_source(
             mining_cfg = _sampling_attr(input_sampling_cfg, "mining")
             mining_batch_size = _mining_attr(mining_cfg, "batch_size")
             mining_max_batches = _mining_attr(mining_cfg, "max_batches")
+            mining_max_candidates = _mining_attr(mining_cfg, "max_candidates")
             mining_max_seconds = _mining_attr(mining_cfg, "max_seconds")
             mining_retain_bins = _mining_attr(mining_cfg, "retain_bin_ids")
             if length_range is not None:
@@ -1446,11 +1707,7 @@ def _process_plan_for_source(
             bins_label = "-"
             if scoring_backend == "fimo":
                 bins_label = "canonical" if _sampling_attr(input_sampling_cfg, "pvalue_bins") is None else "custom"
-                bin_ids = (
-                    mining_retain_bins
-                    if mining_retain_bins is not None
-                    else _sampling_attr(input_sampling_cfg, "pvalue_bin_ids")
-                )
+                bin_ids = mining_retain_bins
                 if bin_ids:
                     bins_label = f"{bins_label} retain={sorted(list(bin_ids))}"
             length_label = str(length_policy)
@@ -1459,10 +1716,20 @@ def _process_plan_for_source(
             cap_label = "-"
             if isinstance(n_sites, int) and isinstance(oversample, int):
                 requested = n_sites * oversample
-                if max_candidates is not None:
-                    cap_label = f"{max_candidates} (requested={requested})"
-            if max_seconds is not None:
-                cap_label = f"{cap_label}; max_seconds={max_seconds}" if cap_label != "-" else f"{max_seconds}s"
+                if scoring_backend == "fimo":
+                    if mining_max_candidates is not None:
+                        cap_label = f"{mining_max_candidates} (requested={requested})"
+                    if mining_max_seconds is not None:
+                        cap_label = (
+                            f"{cap_label}; max_seconds={mining_max_seconds}s"
+                            if cap_label != "-"
+                            else f"{mining_max_seconds}s"
+                        )
+                else:
+                    if max_candidates is not None:
+                        cap_label = f"{max_candidates} (requested={requested})"
+                    if max_seconds is not None:
+                        cap_label = f"{cap_label}; max_seconds={max_seconds}" if cap_label != "-" else f"{max_seconds}s"
             counts_label = _summarize_tf_counts(meta_df["tf"].tolist())
             selection_label = selection_policy if scoring_backend == "fimo" else "-"
             mining_label = "-"
@@ -1472,6 +1739,8 @@ def _process_plan_for_source(
                     parts.append(f"batch={mining_batch_size}")
                 if mining_max_batches is not None:
                     parts.append(f"max_batches={mining_max_batches}")
+                if mining_max_candidates is not None:
+                    parts.append(f"max_candidates={mining_max_candidates}")
                 if mining_max_seconds is not None:
                     parts.append(f"max_seconds={mining_max_seconds}s")
                 mining_label = ", ".join(parts) if parts else "enabled"
@@ -1524,8 +1793,6 @@ def _process_plan_for_source(
             f"({k_required} > {len(required_regulators)})."
         )
     metadata_min_counts = {tf: max(min_count_per_tf, int(val)) for tf, val in plan_min_count_by_regulator.items()}
-    side_left, side_right = _extract_side_biases(fixed_elements)
-    required_bias_motifs = list(dict.fromkeys([*side_left, *side_right]))
     fixed_elements_dump = _fixed_elements_dump(fixed_elements)
     fixed_elements_max_len = _max_fixed_element_len(fixed_elements_dump)
 
@@ -1537,207 +1804,22 @@ def _process_plan_for_source(
 
     if pool_strategy != "iterative_subsample" and not one_subsample_only:
         max_per_subsample = quota
-
-    def _build_library() -> tuple[list[str], list[str], list[str], dict]:
-        nonlocal libraries_built
-        if meta_df is not None and isinstance(meta_df, pd.DataFrame):
-            available_tfs = set(meta_df["tf"].tolist())
-            missing = [t for t in required_regulators if t not in available_tfs]
-            if missing:
-                preview = ", ".join(missing[:10])
-                raise ValueError(f"Required regulators not found in input: {preview}")
-            if plan_min_count_by_regulator:
-                missing_counts = [t for t in plan_min_count_by_regulator if t not in available_tfs]
-                if missing_counts:
-                    preview = ", ".join(missing_counts[:10])
-                    raise ValueError(f"min_count_by_regulator TFs not found in input: {preview}")
-            if min_required_regulators is not None:
-                if not required_regulators and min_required_regulators > len(available_tfs):
-                    raise ValueError(
-                        f"min_required_regulators={min_required_regulators} exceeds available regulators "
-                        f"({len(available_tfs)})."
-                    )
-
-            if pool_strategy == "full":
-                lib_df = meta_df.copy()
-                if unique_binding_sites:
-                    lib_df = lib_df.drop_duplicates(["tf", "tfbs"])
-                if required_bias_motifs:
-                    missing_bias = [m for m in required_bias_motifs if m not in set(lib_df["tfbs"])]
-                    if missing_bias:
-                        preview = ", ".join(missing_bias[:10])
-                        raise ValueError(f"Required side-bias motifs not found in input: {preview}")
-                lib_df = lib_df.reset_index(drop=True)
-                library = lib_df["tfbs"].tolist()
-                reg_labels = lib_df["tf"].tolist()
-                parts = [f"{tf}:{tfbs}" for tf, tfbs in zip(reg_labels, lib_df["tfbs"].tolist())]
-                site_id_by_index = lib_df["site_id"].tolist() if "site_id" in lib_df.columns else None
-                source_by_index = lib_df["source"].tolist() if "source" in lib_df.columns else None
-                info = {
-                    "target_length": seq_len + subsample_over,
-                    "achieved_length": sum(len(s) for s in library),
-                    "relaxed_cap": False,
-                    "final_cap": None,
-                    "pool_strategy": pool_strategy,
-                    "library_size": len(library),
-                    "iterative_max_libraries": iterative_max_libraries,
-                    "iterative_min_new_solutions": iterative_min_new_solutions,
-                }
-                libraries_built += 1
-                info["library_index"] = libraries_built
-                info["library_hash"] = _hash_library(library, reg_labels, site_id_by_index, source_by_index)
-                info["site_id_by_index"] = site_id_by_index
-                info["source_by_index"] = source_by_index
-                return library, parts, reg_labels, info
-
-            sampler = TFSampler(meta_df, np_rng)
-            required_regulators_selected = required_regulators
-            if k_of_required:
-                candidates = sorted(required_regulators)
-                if k_required is not None and k_required < len(candidates):
-                    chosen = np_rng.choice(len(candidates), size=k_required, replace=False)
-                    required_regulators_selected = sorted([candidates[int(i)] for i in chosen])
-                else:
-                    required_regulators_selected = candidates
-            required_tfs_for_library = list(
-                dict.fromkeys([*required_regulators_selected, *plan_min_count_by_regulator.keys()])
-            )
-            if min_required_regulators is not None and not required_regulators:
-                if pool_strategy in {"subsample", "iterative_subsample"}:
-                    if library_size < int(min_required_regulators):
-                        raise ValueError(
-                            "library_size is too small to satisfy min_required_regulators when "
-                            f"required_regulators is empty. library_size={library_size} "
-                            f"min_required_regulators={min_required_regulators}. "
-                            "Increase library_size or lower min_required_regulators."
-                        )
-            if pool_strategy in {"subsample", "iterative_subsample"}:
-                required_slots = len(required_bias_motifs) + len(required_tfs_for_library)
-                if library_size < required_slots:
-                    raise ValueError(
-                        "library_size is too small for required motifs. "
-                        f"library_size={library_size} but required_tfbs={len(required_bias_motifs)} "
-                        f"+ required_tfs={len(required_tfs_for_library)} "
-                        f"(min_required_regulators={min_required_regulators}). "
-                        "Increase library_size or relax required constraints."
-                    )
-            # Alignment (1,4): count-based library sizing with explicit sampling strategy under schema>=2.2.
-            if schema_is_22 and pool_strategy in {"subsample", "iterative_subsample"}:
-                failure_counts_by_tfbs: dict[tuple[str, str], int] | None = None
-                if library_sampling_strategy == "coverage_weighted" and sampling_cfg.avoid_failed_motifs:
-                    failure_counts_by_tfbs = _aggregate_failure_counts_for_sampling(
-                        failure_counts,
-                        input_name=source_label,
-                        plan_name=plan_name,
-                    )
-                library, parts, reg_labels, info = sampler.generate_binding_site_library(
-                    library_size,
-                    sequence_length=seq_len,
-                    budget_overhead=subsample_over,
-                    required_tfbs=required_bias_motifs,
-                    required_tfs=required_tfs_for_library,
-                    cover_all_tfs=cover_all_tfs,
-                    unique_binding_sites=unique_binding_sites,
-                    max_sites_per_tf=max_sites_per_tf,
-                    relax_on_exhaustion=relax_on_exhaustion,
-                    allow_incomplete_coverage=allow_incomplete_coverage,
-                    sampling_strategy=library_sampling_strategy,
-                    usage_counts=usage_counts if library_sampling_strategy == "coverage_weighted" else None,
-                    coverage_boost_alpha=float(sampling_cfg.coverage_boost_alpha),
-                    coverage_boost_power=float(sampling_cfg.coverage_boost_power),
-                    failure_counts=failure_counts_by_tfbs,
-                    avoid_failed_motifs=bool(sampling_cfg.avoid_failed_motifs),
-                    failure_penalty_alpha=float(sampling_cfg.failure_penalty_alpha),
-                    failure_penalty_power=float(sampling_cfg.failure_penalty_power),
-                )
-            else:
-                library, parts, reg_labels, info = sampler.generate_binding_site_subsample(
-                    seq_len,
-                    subsample_over,
-                    required_tfbs=required_bias_motifs,
-                    required_tfs=required_tfs_for_library,
-                    cover_all_tfs=cover_all_tfs,
-                    unique_binding_sites=unique_binding_sites,
-                    max_sites_per_tf=max_sites_per_tf,
-                    relax_on_exhaustion=relax_on_exhaustion,
-                    allow_incomplete_coverage=allow_incomplete_coverage,
-                )
-            info.update(
-                {
-                    "pool_strategy": pool_strategy,
-                    "library_size": library_size,
-                    "library_sampling_strategy": library_sampling_strategy,
-                    "coverage_boost_alpha": float(sampling_cfg.coverage_boost_alpha),
-                    "coverage_boost_power": float(sampling_cfg.coverage_boost_power),
-                    "iterative_max_libraries": iterative_max_libraries,
-                    "iterative_min_new_solutions": iterative_min_new_solutions,
-                    "required_regulators_selected": required_regulators_selected if k_of_required else None,
-                }
-            )
-            libraries_built += 1
-            info["library_index"] = libraries_built
-            site_id_by_index = info.get("site_id_by_index")
-            source_by_index = info.get("source_by_index")
-            info["library_hash"] = _hash_library(library, reg_labels, site_id_by_index, source_by_index)
-            return library, parts, reg_labels, info
-
-        # Sequence library (no regulators)
-        if required_regulators or plan_min_count_by_regulator or min_required_regulators is not None:
-            preview = ", ".join(required_regulators[:10]) if required_regulators else "n/a"
-            raise ValueError(
-                "Regulator constraints are set (required/min_count/min_required) "
-                "but the input does not provide regulators. "
-                f"required_regulators={preview}."
-            )
-        all_sequences = [s for s in data_entries]
-        if not all_sequences:
-            raise ValueError(f"No sequences found for source {source_label}")
-        pool = list(dict.fromkeys(all_sequences)) if unique_binding_sites else list(all_sequences)
-        if pool_strategy == "full":
-            if required_bias_motifs:
-                missing = [m for m in required_bias_motifs if m not in pool]
-                if missing:
-                    preview = ", ".join(missing[:10])
-                    raise ValueError(f"Required side-bias motifs not found in sequences input: {preview}")
-            library = pool
-        else:
-            if library_size > len(pool):
-                raise ValueError(f"library_size={library_size} exceeds available unique sequences ({len(pool)}).")
-            take = min(max(1, int(library_size)), len(pool))
-            if required_bias_motifs:
-                missing = [m for m in required_bias_motifs if m not in pool]
-                if missing:
-                    preview = ", ".join(missing[:10])
-                    raise ValueError(f"Required side-bias motifs not found in sequences input: {preview}")
-                if take < len(required_bias_motifs):
-                    raise ValueError(
-                        f"library_size={take} is smaller than required side_biases ({len(required_bias_motifs)})."
-                    )
-                required_set = set(required_bias_motifs)
-                remaining = [s for s in pool if s not in required_set]
-                library = list(required_bias_motifs) + rng.sample(remaining, take - len(required_bias_motifs))
-            else:
-                library = rng.sample(pool, take)
-        tf_parts: list[str] = []
-        reg_labels: list[str] = []
-        info = {
-            "target_length": seq_len + subsample_over,
-            "achieved_length": sum(len(s) for s in library),
-            "relaxed_cap": False,
-            "final_cap": None,
-            "pool_strategy": pool_strategy,
-            "library_size": len(library) if pool_strategy == "full" else library_size,
-            "iterative_max_libraries": iterative_max_libraries,
-            "iterative_min_new_solutions": iterative_min_new_solutions,
-        }
-        libraries_built += 1
-        info["library_index"] = libraries_built
-        info["library_hash"] = _hash_library(library, reg_labels, None, None)
-        info["site_id_by_index"] = None
-        info["source_by_index"] = None
-        return library, tf_parts, reg_labels, info
-
-    library_for_opt, tfbs_parts, regulator_labels, sampling_info = _build_library()
+    library_for_opt, tfbs_parts, regulator_labels, sampling_info = build_library_for_plan(
+        source_label=source_label,
+        plan_item=plan_item,
+        data_entries=data_entries,
+        meta_df=meta_df,
+        sampling_cfg=sampling_cfg,
+        seq_len=seq_len,
+        min_count_per_tf=min_count_per_tf,
+        usage_counts=usage_counts,
+        failure_counts=failure_counts if failure_counts else None,
+        rng=rng,
+        np_rng=np_rng,
+        schema_is_22=schema_is_22,
+        library_index_start=libraries_built,
+    )
+    libraries_built = int(sampling_info.get("library_index", libraries_built))
     site_id_by_index = sampling_info.get("site_id_by_index")
     source_by_index = sampling_info.get("source_by_index")
     sampling_library_index = sampling_info.get("library_index", 0)
@@ -2583,7 +2665,22 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                 )
 
             # New library
-            library_for_opt, tfbs_parts, regulator_labels, sampling_info = _build_library()
+            library_for_opt, tfbs_parts, regulator_labels, sampling_info = build_library_for_plan(
+                source_label=source_label,
+                plan_item=plan_item,
+                data_entries=data_entries,
+                meta_df=meta_df,
+                sampling_cfg=sampling_cfg,
+                seq_len=seq_len,
+                min_count_per_tf=min_count_per_tf,
+                usage_counts=usage_counts,
+                failure_counts=failure_counts if failure_counts else None,
+                rng=rng,
+                np_rng=np_rng,
+                schema_is_22=schema_is_22,
+                library_index_start=libraries_built,
+            )
+            libraries_built = int(sampling_info.get("library_index", libraries_built))
             site_id_by_index = sampling_info.get("site_id_by_index")
             source_by_index = sampling_info.get("source_by_index")
             sampling_library_index = sampling_info.get("library_index", sampling_library_index)
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index 1a35fa8b..c4049d75 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -565,20 +565,29 @@ def write_report(
     *,
     out_dir: str | Path = "outputs",
     include_combinatorics: bool = False,
+    formats: set[str] | None = None,
 ) -> ReportBundle:
     run_root = resolve_run_root(cfg_path, root_cfg.densegen.run.root)
     out_path = resolve_run_scoped_path(cfg_path, run_root, str(out_dir), label="report.out")
     out_path.mkdir(parents=True, exist_ok=True)
 
     bundle = collect_report_data(root_cfg, cfg_path, include_combinatorics=include_combinatorics)
-    report_path = out_path / "report.json"
-    report_path.write_text(json.dumps(bundle.run_report, indent=2, sort_keys=True))
-    report_md = out_path / "report.md"
-    _write_report_md(report_md, bundle)
+    formats = {f.lower() for f in (formats or {"json", "md"})}
+    if "all" in formats:
+        formats = {"json", "md", "html"}
+    if "json" in formats:
+        report_path = out_path / "report.json"
+        report_path.write_text(json.dumps(bundle.run_report, indent=2, sort_keys=True))
+    if "md" in formats:
+        report_md = out_path / "report.md"
+        _write_report_md(report_md, bundle)
+    if "html" in formats:
+        report_html = out_path / "report.html"
+        _write_report_html(report_html, bundle)
     return bundle
 
 
-def _write_report_md(path: Path, bundle: ReportBundle) -> None:
+def _render_report_md(bundle: ReportBundle) -> str:
     report = bundle.run_report
     lines = [
         "# DenseGen Report",
@@ -622,4 +631,32 @@ def _write_report_md(path: Path, bundle: ReportBundle) -> None:
             label = f"{tf}:{tfbs}" if tf else tfbs
             reason_suffix = f" (top reason: {reason})" if reason else ""
             lines.append(f"- {label} — failures={failures}{reason_suffix}")
-    path.write_text("\n".join(lines) + "\n")
+    return "\n".join(lines) + "\n"
+
+
+def _write_report_md(path: Path, bundle: ReportBundle) -> None:
+    path.write_text(_render_report_md(bundle))
+
+
+def _write_report_html(path: Path, bundle: ReportBundle) -> None:
+    md = _render_report_md(bundle)
+    body = md.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+    html = "\n".join(
+        [
+            "<!DOCTYPE html>",
+            "<html>",
+            "<head>",
+            '<meta charset="utf-8"/>',
+            "<title>DenseGen Report</title>",
+            "<style>body{font-family:ui-monospace,Menlo,Monaco,Consolas,'Liberation Mono','Courier New',monospace;"
+            "padding:24px;background:#fafafa;color:#111;}pre{white-space:pre-wrap;}</style>",
+            "</head>",
+            "<body>",
+            "<pre>",
+            body,
+            "</pre>",
+            "</body>",
+            "</html>",
+        ]
+    )
+    path.write_text(html)
diff --git a/src/dnadesign/densegen/src/integrations/meme_suite.py b/src/dnadesign/densegen/src/integrations/meme_suite.py
index 9abdb34c..bbc1579f 100644
--- a/src/dnadesign/densegen/src/integrations/meme_suite.py
+++ b/src/dnadesign/densegen/src/integrations/meme_suite.py
@@ -39,3 +39,13 @@ def resolve_executable(tool: str, *, tool_path: Path | None = None) -> Path | No
             return candidate
     found = shutil.which(tool)
     return Path(found) if found else None
+
+
+def require_executable(tool: str, *, tool_path: Path | None = None) -> Path:
+    exe = resolve_executable(tool, tool_path=tool_path)
+    if exe is None:
+        raise FileNotFoundError(
+            f"{tool} executable not found. Install MEME Suite and ensure `{tool}` is on PATH, "
+            "or set MEME_BIN to the MEME bin directory (pixi users: `pixi run dense ...`)."
+        )
+    return exe
diff --git a/src/dnadesign/densegen/tests/test_cli_config_option.py b/src/dnadesign/densegen/tests/test_cli_config_option.py
index 2f231cfe..44be5935 100644
--- a/src/dnadesign/densegen/tests/test_cli_config_option.py
+++ b/src/dnadesign/densegen/tests/test_cli_config_option.py
@@ -53,7 +53,7 @@ def test_validate_accepts_config_after_command(tmp_path: Path) -> None:
     cfg_path = tmp_path / "config.yaml"
     _write_min_config(cfg_path)
     runner = CliRunner()
-    result = runner.invoke(app, ["validate", "-c", str(cfg_path)])
+    result = runner.invoke(app, ["validate-config", "-c", str(cfg_path)])
     assert result.exit_code == 0, result.output
     assert "Config is valid" in result.output
 
@@ -62,6 +62,6 @@ def test_validate_reports_invalid_config(tmp_path: Path) -> None:
     cfg_path = tmp_path / "config.yaml"
     cfg_path.write_text("densegen:\n  inputs: []\n")
     runner = CliRunner()
-    result = runner.invoke(app, ["validate", "-c", str(cfg_path)])
+    result = runner.invoke(app, ["validate-config", "-c", str(cfg_path)])
     assert result.exit_code != 0, result.output
     assert "Config error" in result.output
diff --git a/src/dnadesign/densegen/tests/test_cli_describe.py b/src/dnadesign/densegen/tests/test_cli_describe.py
index 6b2c9d1f..e0971164 100644
--- a/src/dnadesign/densegen/tests/test_cli_describe.py
+++ b/src/dnadesign/densegen/tests/test_cli_describe.py
@@ -53,7 +53,7 @@ def test_describe_outputs_summary(tmp_path: Path) -> None:
     cfg_path = tmp_path / "config.yaml"
     _write_min_config(cfg_path)
     runner = CliRunner()
-    result = runner.invoke(app, ["describe", "-c", str(cfg_path)])
+    result = runner.invoke(app, ["inspect", "config", "-c", str(cfg_path)])
     assert result.exit_code == 0, result.output
     assert "Config" in result.output
     assert "Gap fill" in result.output
diff --git a/src/dnadesign/densegen/tests/test_cli_summarize_library.py b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
index 5445905a..670f0772 100644
--- a/src/dnadesign/densegen/tests/test_cli_summarize_library.py
+++ b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
@@ -64,9 +64,9 @@ def _base_meta(library_hash: str, library_index: int) -> dict:
         "input_pwm_score_percentile": None,
         "input_pwm_pvalue_threshold": None,
         "input_pwm_pvalue_bins": None,
-        "input_pwm_pvalue_bin_ids": None,
         "input_pwm_mining_batch_size": None,
         "input_pwm_mining_max_batches": None,
+        "input_pwm_mining_max_candidates": None,
         "input_pwm_mining_max_seconds": None,
         "input_pwm_mining_retain_bin_ids": None,
         "input_pwm_mining_log_every_batches": None,
@@ -234,7 +234,7 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
     manifest.write_json(run_manifest_path(run_root))
 
     runner = CliRunner()
-    result = runner.invoke(app, ["summarize", "--run", str(run_root), "--library"])
+    result = runner.invoke(app, ["inspect", "run", "--run", str(run_root), "--library"])
     assert result.exit_code == 0, result.output
     assert "Library build summary" in result.output
     assert "abc123" in result.output
diff --git a/src/dnadesign/densegen/tests/test_config_strict.py b/src/dnadesign/densegen/tests/test_config_strict.py
index fe234a8e..1e437d9f 100644
--- a/src/dnadesign/densegen/tests/test_config_strict.py
+++ b/src/dnadesign/densegen/tests/test_config_strict.py
@@ -120,6 +120,29 @@ def test_promoter_constraint_motif_validation(tmp_path: Path) -> None:
         load_config(cfg_path)
 
 
+def test_fimo_rejects_max_candidates(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["inputs"] = [
+        {
+            "name": "motifs",
+            "type": "pwm_meme",
+            "path": "inputs.meme",
+            "sampling": {
+                "strategy": "stochastic",
+                "n_sites": 2,
+                "oversample_factor": 2,
+                "scoring_backend": "fimo",
+                "pvalue_threshold": 1e-4,
+                "max_candidates": 100,
+                "mining": {"batch_size": 10},
+            },
+        }
+    ]
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="max_candidates is not used"):
+        load_config(cfg_path)
+
+
 def test_promoter_constraint_range_non_negative(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
     cfg["densegen"]["generation"]["plan"] = [
diff --git a/src/dnadesign/densegen/tests/test_outputs_parquet.py b/src/dnadesign/densegen/tests/test_outputs_parquet.py
index 83d5c7da..d3e53caf 100644
--- a/src/dnadesign/densegen/tests/test_outputs_parquet.py
+++ b/src/dnadesign/densegen/tests/test_outputs_parquet.py
@@ -59,9 +59,9 @@ def _dummy_meta() -> dict:
         "input_pwm_score_percentile": None,
         "input_pwm_pvalue_threshold": None,
         "input_pwm_pvalue_bins": None,
-        "input_pwm_pvalue_bin_ids": None,
         "input_pwm_mining_batch_size": None,
         "input_pwm_mining_max_batches": None,
+        "input_pwm_mining_max_candidates": None,
         "input_pwm_mining_max_seconds": None,
         "input_pwm_mining_retain_bin_ids": None,
         "input_pwm_mining_log_every_batches": None,
diff --git a/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py b/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py
index 6195073f..8b778a0d 100644
--- a/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py
+++ b/src/dnadesign/densegen/tests/test_pwm_fimo_utils.py
@@ -70,7 +70,10 @@ def test_parse_fimo_tsv_and_best_hits() -> None:
     assert best["cand1"].pvalue == pytest.approx(0.5)
 
 
-@pytest.mark.skipif(resolve_executable("fimo", tool_path=None) is None, reason="fimo executable not available")
+@pytest.mark.skipif(
+    resolve_executable("fimo", tool_path=None) is None,
+    reason="fimo executable not available (run tests via `pixi run pytest` or set MEME_BIN).",
+)
 def test_run_fimo_smoke(tmp_path: Path) -> None:
     motif = PWMMotif(
         motif_id="M1",
diff --git a/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py b/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py
index 9895fdcb..4b653038 100644
--- a/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py
+++ b/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 
 import numpy as np
+import pytest
 
 from dnadesign.densegen.src.adapters.sources import pwm_fimo
 from dnadesign.densegen.src.adapters.sources.pwm_sampling import PWMMotif, sample_pwm_sites
@@ -78,3 +79,31 @@ def fake_run_fimo(*, meme_motif_path, fasta_path, **_kwargs):  # type: ignore[ov
         info = meta[seq]
         assert info["fimo_bin_id"] == 0
         assert info["fimo_matched_sequence"] == "AAA"
+
+
+def test_pwm_sampling_fimo_mining_max_candidates_guard() -> None:
+    motif = PWMMotif(
+        motif_id="M2",
+        matrix=[
+            {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+            {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+        ],
+        background={"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+    )
+    rng = np.random.default_rng(0)
+    with pytest.raises(ValueError, match="mining.max_candidates must be >= n_sites"):
+        sample_pwm_sites(
+            rng,
+            motif,
+            strategy="stochastic",
+            n_sites=5,
+            oversample_factor=1,
+            max_candidates=None,
+            max_seconds=None,
+            score_threshold=None,
+            score_percentile=None,
+            scoring_backend="fimo",
+            pvalue_threshold=1e-2,
+            mining={"batch_size": 2, "max_candidates": 2},
+            selection_policy="random_uniform",
+        )
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index cd0bcb45..3fed86a2 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -22,12 +22,12 @@ densegen:
         strategy: stochastic
         n_sites: 80
         oversample_factor: 200
-        max_candidates: 20000  # bounded candidate generation (cap across mining batches)
         scoring_backend: fimo
         pvalue_threshold: 1e-4
         selection_policy: stratified
         mining:
           batch_size: 5000
+          max_candidates: 20000
           max_batches: 4
           retain_bin_ids: [0, 1, 2, 3]
           log_every_batches: 1

From 4d5eed71a0e2ba8c64a780d3febf65ed3da83c19 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 13:07:02 -0500
Subject: [PATCH 08/40] densegen docs: clarify FIMO mining workflow

---
 src/dnadesign/densegen/README.md              |   6 +-
 .../densegen/docs/demo/demo_basic.md          |  68 ++++++---
 .../densegen/docs/dev/improvements.md         |   2 +-
 .../densegen/docs/guide/generation.md         |  29 +++-
 src/dnadesign/densegen/docs/guide/inputs.md   |  35 +++--
 .../densegen/docs/guide/outputs-metadata.md   |   2 +-
 .../densegen/docs/guide/workspace.md          |   4 +-
 src/dnadesign/densegen/docs/reference/cli.md  | 144 ++++++++++++------
 .../densegen/docs/reference/config.md         |  16 +-
 .../densegen/docs/reference/outputs.md        |  17 +++
 .../docs/workflows/cruncher_pwm_pipeline.md   |   4 +-
 src/dnadesign/densegen/workspaces/README.md   |   2 +-
 12 files changed, 227 insertions(+), 102 deletions(-)

diff --git a/src/dnadesign/densegen/README.md b/src/dnadesign/densegen/README.md
index be196836..228eed89 100644
--- a/src/dnadesign/densegen/README.md
+++ b/src/dnadesign/densegen/README.md
@@ -21,10 +21,10 @@ FIMO-backed PWM sampling is supported when MEME Suite is available (`fimo` on PA
 Stratified FIMO sampling uses canonical p‑value bins by default; see the guide for mining workflows.
 
 ```bash
-uv run dense validate -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense describe -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+pixi run dense validate-config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+uv run dense inspect inputs -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
 pixi run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
-uv run dense summarize -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --library --top-per-tf 5
+uv run dense inspect run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --library --top-per-tf 5
 uv run dense plot -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --only tf_usage,tf_coverage
 ```
 
diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 15c4377e..91bd3ad3 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -11,12 +11,13 @@ and uses the dense-arrays CBC backend. All paths are explicit; missing files fai
 - [2) Stage a workspace](#2-stage-a-workspace) - copy inputs and rewrite paths.
 - [3) Validate config](#3-validate-config) - schema and sanity checks.
 - [4) Plan constraints](#4-plan-constraints) - see resolved quotas and constraint buckets.
-- [5) Describe the resolved run](#5-describe-the-resolved-run) - verify inputs, outputs, solver.
-- [6) Run generation](#6-run-generation) - produce sequences and metadata.
-- [7) Summarize the run](#7-summarize-the-run) - review run-level counts.
-- [8) Audit report](#8-audit-report) - build offered-vs-used tables.
-- [9) Inspect outputs](#9-inspect-outputs) - list Parquet artifacts.
-- [10) Plot analysis](#10-plot-analysis) - render tf_usage and tf_coverage.
+- [5) Inspect the resolved run config](#5-inspect-the-resolved-run-config) - verify inputs, outputs, solver.
+- [6) (Optional) Stage‑A + Stage‑B previews](#6-optional-stagea--stageb-previews) - preview pools and libraries.
+- [7) Run generation](#7-run-generation) - produce sequences and metadata.
+- [8) Inspect run summary](#8-inspect-run-summary) - review run-level counts.
+- [9) Audit report](#9-audit-report) - build offered-vs-used tables.
+- [10) Inspect outputs](#10-inspect-outputs) - list Parquet artifacts.
+- [11) Plot analysis](#11-plot-analysis) - render tf_usage and tf_coverage.
 - [Appendix (optional)](#appendix-optional) - PWM sampling + USR output.
 
 ## 0) Prereqs
@@ -29,7 +30,7 @@ uv sync --locked
 
 This demo uses **FIMO** (MEME Suite) to adjudicate strong motif matches. Ensure `fimo` is on PATH
 or set `MEME_BIN` to the MEME bin directory. If you use pixi, run commands via
-`pixi run dense ...` so MEME tools are available (recommended for the run step).
+`pixi run dense ...` so MEME tools are available (recommended for validation + run steps).
 
 All commands below assume you are at the repo root. We will write the demo run to a scratch
 directory; set a run root:
@@ -55,7 +56,13 @@ src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/cpxR.txt
 These are MEME files parsed with Cruncher’s MEME parser (DenseGen reuses the same parsing
 logic for DRY). The demo uses LexA + CpxR motifs and exercises PWM sampling bounds. Sampling
 uses FIMO p-values to define “strong” matches and `selection_policy: stratified` to balance
-across canonical p‑value bins (see the input-stage sampling table in `dense describe`).
+across canonical p‑value bins (see the input-stage sampling table in `dense inspect inputs`).
+
+Inspect the resolved inputs + Stage‑A sampling table:
+
+```bash
+pixi run dense inspect inputs -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+```
 
 ### 1b) (Optional) Rebuild inputs from Cruncher
 
@@ -84,7 +91,7 @@ Stage a self-contained workspace from the demo template (this copies inputs and
 paths):
 
 ```bash
-uv run dense stage --id demo_press --root "$RUN_ROOT" \
+uv run dense workspace init --id demo_press --root "$RUN_ROOT" \
   --template src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml \
   --copy-inputs
 ```
@@ -102,7 +109,7 @@ Parquet schema mismatch. Either delete `outputs/dense_arrays.parquet` +
 ## 3) Validate config
 
 ```bash
-uv run dense validate -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+pixi run dense validate-config -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
 ```
 
 Example output:
@@ -114,7 +121,7 @@ Example output:
 ## 4) Plan constraints
 
 ```bash
-uv run dense plan -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+uv run dense inspect plan -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
 ```
 
 Example output:
@@ -127,12 +134,12 @@ Example output:
 └──────┴───────┴──────────────────────────┘
 ```
 
-## 5) Describe the resolved run
+## 5) Inspect the resolved run config
 
 This step shows the resolved inputs, outputs, solver selection, and the two-stage sampling knobs.
 
 ```bash
-uv run dense describe -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+uv run dense inspect config -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
 ```
 
 Example output (abridged):
@@ -156,7 +163,22 @@ Solver-stage library sampling
 ...
 ```
 
-## 6) Run generation
+## 6) (Optional) Stage‑A + Stage‑B previews
+
+Stage‑A: materialize the TFBS pool (FIMO mining + stratified selection). This is useful when
+you want to inspect mining yields per p‑value bin before running the solver:
+
+```bash
+pixi run dense stage-a build-pool -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+```
+
+Stage‑B: build a solver library from the pool without running the solver:
+
+```bash
+pixi run dense stage-b build-libraries -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+```
+
+## 7) Run generation
 
 ```bash
 pixi run dense run -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --no-plot
@@ -172,7 +194,7 @@ Example output (abridged):
 2026-01-15 14:02:02 | INFO | dnadesign.densegen.src.utils.logging_utils | Logging initialized (level=INFO)
 Quota plan: meme_demo=50
 2026-01-15 14:02:02 | INFO | dnadesign.densegen.src.adapters.optimizer.dense_arrays | Solver selected: CBC
-2026-01-15 14:02:05 | INFO | dnadesign.densegen.src.adapters.sources.pwm_sampling | FIMO yield for motif lexA: hits=960 accepted=120 selected=80 bins=(0e+00,1e-10]:0 ... selected_bins=(0e+00,1e-10]:0 ...
+2026-01-15 14:02:05 | INFO | dnadesign.densegen.src.adapters.sources.pwm_sampling | FIMO yield for motif lexA: hits=120 accepted=120 selected=80 bins=(0e+00,1e-10]:40 (1e-10,1e-08]:35 ... selected_bins=(0e+00,1e-10]:26 ...
 2026-01-15 14:02:06 | INFO | dnadesign.densegen.src.core.pipeline | [demo/demo] 2/50 (4.00%) (local 2/2) CR=1.050 | seq ATTGACAGTAAACCTGCGGGAAATATAATTTACTCCGTATTTGCACATGGTTATCCACAG
 2026-01-15 14:02:05 | INFO | dnadesign.densegen.src.core.pipeline | Inputs manifest written: /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/meta/inputs_manifest.json
 🎉 Run complete.
@@ -181,13 +203,13 @@ Quota plan: meme_demo=50
 On macOS you may see Arrow sysctl warnings after generation; they are emitted by pyarrow and do
 not indicate a DenseGen failure.
 
-## 7) Summarize the run
+## 8) Inspect run summary
 
 DenseGen writes `outputs/meta/run_manifest.json` and `outputs/meta/inputs_manifest.json`. Summarize the
 run manifest:
 
 ```bash
-uv run dense summarize --run /private/tmp/densegen-demo-20260115-1405/demo_press
+uv run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press
 ```
 
 Example output:
@@ -205,7 +227,7 @@ Use `--verbose` for constraint-failure breakdowns and duplicate-solution counts.
 Use `--library` to print offered-vs-used summaries for quick debugging:
 
 ```bash
-uv run dense summarize --run /private/tmp/densegen-demo-20260115-1405/demo_press --library --top-per-tf 5
+uv run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press --library --top-per-tf 5
 ```
 
 This library summary is the quickest way to audit which TFBS were offered vs
@@ -214,17 +236,17 @@ used in the solver stage (Stage‑B sampling).
 If any solutions are rejected, DenseGen writes them to
 `outputs/attempts.parquet` in the run root.
 
-## 8) Audit report
+## 9) Audit report
 
 Generate an audit-grade summary of the run:
 
 ```bash
-uv run dense report -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+uv run dense report -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --format all
 ```
 
-This writes `outputs/report.json` and `outputs/report.md`.
+This writes `outputs/report.json`, `outputs/report.md`, and `outputs/report.html`.
 
-## 9) Inspect outputs
+## 10) Inspect outputs
 
 List the generated Parquet artifacts:
 
@@ -251,7 +273,7 @@ Example output:
 attempts.parquet
 ```
 
-## 10) Plot analysis
+## 11) Plot analysis
 
 First, list the available plots:
 
diff --git a/src/dnadesign/densegen/docs/dev/improvements.md b/src/dnadesign/densegen/docs/dev/improvements.md
index bacf195a..a6f86d33 100644
--- a/src/dnadesign/densegen/docs/dev/improvements.md
+++ b/src/dnadesign/densegen/docs/dev/improvements.md
@@ -121,7 +121,7 @@ Implement in phases aligned with impact and merge risk.
    - solver backend/strategy/options summary
    - optional histograms (compression_ratio, gc_total)
 
-10. Add a CLI command like dense summarize (or extend workspace listing) to read and pretty-print the manifest.
+10. Add a CLI command like dense inspect run (or extend workspace listing) to read and pretty-print the manifest.
 
 ### Phase 4 - Performance / resilience (optional but worthwhile)
 
diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index ab7f6cba..041c458c 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -103,7 +103,7 @@ Key fields:
 
 Notes:
 - `pool_strategy: full` uses a single library (no resampling) and ignores `library_size`, `subsample_over_length_budget_by`,
-  and related sampling caps/strategies (DenseGen warns in `dense validate`/`dense plan`).
+  and related sampling caps/strategies (DenseGen warns in `dense validate-config`/`dense inspect plan`).
 - Under schema `2.2+`, `subsample` can resample reactively on stalls/duplicate guards.
 - `iterative_subsample` resamples proactively after `arrays_generated_before_resample` or when a
   library under-produces.
@@ -111,6 +111,21 @@ Notes:
 - `coverage_weighted` dynamically boosts underused TFBS based on the run’s usage counts.
 - `avoid_failed_motifs: true` down-weights TFBS that repeatedly appear in failed solve attempts (tracked in attempts.parquet).
 
+### Stage‑A vs Stage‑B sampling (mental model)
+
+**Stage‑A (input sampling)** lives under `densegen.inputs[].sampling` and defines how TFBS pools
+are generated from PWMs (e.g., DenseGen log‑odds vs FIMO p‑values, thresholds, mining limits,
+length policy). Stage‑A produces the realized TFBS pool (`input_tfbs_count`), which is cached
+once per run and reused across round‑robin passes.
+
+**Stage‑B (library sampling)** lives under `densegen.generation.sampling` and selects a **solver
+library** from the Stage‑A pool (or from a binding‑site table / sequence library). This is where
+`pool_strategy`, `library_size`, and sampling strategies (tf‑balanced, uniform over pairs,
+coverage‑weighted) apply. Stage‑B is the only place that resampling happens.
+
+Use `dense stage-a build-pool` to materialize pools and `dense stage-b build-libraries` to preview
+solver libraries without running the solver.
+
 ### Run scheduling (round‑robin)
 
 `runtime.round_robin` controls **scheduling**, not sampling. When enabled, DenseGen interleaves plan
@@ -125,6 +140,18 @@ uses the same policy per plan, but round‑robin can trigger more frequent libra
 Input PWM sampling is performed **once per run** and cached across round‑robin passes. If you
 need a fresh PWM sample, start a new run (or stage a new workspace).
 
+### Runtime policy knobs (resampling + stop conditions)
+
+Key `runtime.*` controls:
+- `arrays_generated_before_resample` — number of successful arrays to emit before forcing a new
+  library (for iterative subsampling).
+- `stall_seconds_before_resample` — idle time with no new solutions before resampling.
+- `stall_warning_every_seconds` — how often to log stall warnings.
+- `max_resample_attempts` / `max_total_resamples` — caps on resample retries.
+- `max_seconds_per_plan` — time budget per plan item (0 = no limit).
+- `max_failed_solutions` / `max_duplicate_solutions` — guardrails to stop when failure/duplicate
+  counts are too high.
+
 ---
 
 ### Regulator constraints
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index 63911198..0d73925d 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -99,15 +99,15 @@ Required sampling fields:
 - `score_threshold` or `score_percentile` (exactly one; densegen backend only)
 - `pvalue_threshold` (float in (0, 1]; fimo backend only)
 - `oversample_factor`: oversampling multiplier for candidate generation
-- `max_candidates` (optional): cap on candidate generation; helps bound long motifs
-- `max_seconds` (optional): time limit for candidate generation per batch (best-effort cap)
+- `max_candidates` (optional): cap on candidate generation; helps bound long motifs (**densegen** backend only)
+- `max_seconds` (optional): time limit for candidate generation per batch (best-effort cap; **densegen** backend only)
 - `selection_policy`: `random_uniform | top_n | stratified` (default: `random_uniform`; fimo only)
 - `pvalue_bins` (optional): list of p‑value bin edges (strictly increasing; must end with `1.0`)
-- `pvalue_bin_ids` (deprecated; use `mining.retain_bin_ids`)
-- `mining` (optional; fimo only): batch/time controls for mining with FIMO
+- `mining` (fimo only): batch/time controls for mining with FIMO
   - `batch_size` (int > 0): candidates per batch
-  - `max_batches` (optional int > 0): limit batches per motif
-  - `max_seconds` (optional float > 0): limit total mining time per motif
+  - `max_batches` (optional int > 0): limit batches per motif (quota-style)
+  - `max_candidates` (optional int > 0): total candidates per motif (quota-style)
+  - `max_seconds` (optional float > 0; default 60s): limit total mining time per motif
   - `retain_bin_ids` (optional list of ints): keep only specific p‑value bins
   - `log_every_batches` (int > 0): log yield summaries every N batches
 - `bgfile` (optional): MEME bfile-format background model for FIMO
@@ -124,6 +124,12 @@ Notes:
 - `selection_policy: stratified` uses fixed p‑value bins to balance strong/weak matches.
 - Canonical p‑value bins (default): `[1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]`.
   Bin 0 is `(0, 1e-10]`, bin 1 is `(1e-10, 1e-8]`, etc.
+- FIMO mining defaults to **time-based** limits (`mining.max_seconds: 60`). To switch to a quota,
+  set `mining.max_seconds: null` and use `mining.max_candidates` or `mining.max_batches`
+  (with `mining.batch_size`) as the primary cap.
+- `mining.max_candidates` must be >= `n_sites`; DenseGen fails fast otherwise.
+- If you omit `mining` entirely, DenseGen uses the default mining settings (batch size + time cap)
+  for FIMO-backed sampling.
 
 #### FIMO p-values (beginner-friendly)
 - A **p-value** is the probability that a random sequence (under the background model)
@@ -136,7 +142,7 @@ Notes:
   specific affinity ranges).
 - FIMO adds per‑TFBS metadata columns: `fimo_score`, `fimo_pvalue`, `fimo_start`, `fimo_stop`,
   `fimo_strand`, `fimo_bin_id`, `fimo_bin_low`, `fimo_bin_high`, and (optionally)
-  `fimo_matched_sequence` (the best‑hit window within the TFBS).
+  `fimo_matched_sequence` (the best‑hit window within the TFBS; includes strand-aware match).
 - `length_policy` defaults to `exact`. Use `length_policy: range` with `length_range: [min, max]`
   to sample variable lengths (min must be >= motif length).
 - `trim_window_length` optionally trims the PWM to a max‑information window before sampling (useful
@@ -177,9 +183,9 @@ inputs:
       selection_policy: top_n
       n_sites: 80
       oversample_factor: 200
-      max_candidates: 20000
       mining:
         batch_size: 5000
+        max_candidates: 20000
         max_batches: 4
         retain_bin_ids: [0, 1, 2, 3]
         log_every_batches: 1
@@ -189,13 +195,16 @@ inputs:
 If you want to **mine** sequences across affinity strata, use `selection_policy: stratified` plus
 canonical p‑value bins and the `mining` block. A typical workflow:
 
-1) Oversample candidates (`oversample_factor`, `max_candidates`) and score with FIMO in batches
-   (`mining.batch_size`).
+1) Oversample candidates (`oversample_factor`) or set a direct quota (`mining.max_candidates`),
+   then score with FIMO in batches (`mining.batch_size`).
 2) Accept candidates using `pvalue_threshold` (global strength cutoff).
 3) Use `mining.retain_bin_ids` to select one or more bins (e.g., moderate matches only).
-4) Repeat runs (or increase `mining.max_batches` / `mining.max_seconds`) to accumulate a deduplicated
-   reservoir of sequences per bin.
-5) Use `dense summarize --library` to inspect which TFBS were offered vs used in Stage‑B sampling.
+4) Repeat runs (or increase `mining.max_candidates` / `mining.max_batches` / `mining.max_seconds`)
+   to accumulate a deduplicated reservoir of sequences per bin. By default mining runs for 60
+   seconds per motif; set `mining.max_seconds: null` to make quotas the primary cap.
+5) Use `dense stage-a build-pool` to materialize the pool, then `dense stage-b build-libraries`
+   to preview Stage‑B library sampling without running the solver.
+6) Use `dense inspect run --library` to inspect which TFBS were offered vs used in Stage‑B sampling.
 
 DenseGen reports per‑bin yield summaries (hits, accepted, selected) for retained bins only (or all
 bins if `retain_bin_ids` is unset), so you can track how many candidates land in each stratum and
diff --git a/src/dnadesign/densegen/docs/guide/outputs-metadata.md b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
index 2aec0c0e..69c9eb23 100644
--- a/src/dnadesign/densegen/docs/guide/outputs-metadata.md
+++ b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
@@ -36,7 +36,7 @@ the full outputs.
 Use the CLI to summarize a run:
 
 ```
-uv run dense summarize --run path/to/run
+uv run dense inspect run --run path/to/run
 ```
 
 ---
diff --git a/src/dnadesign/densegen/docs/guide/workspace.md b/src/dnadesign/densegen/docs/guide/workspace.md
index 0f2a30bd..d7048b48 100644
--- a/src/dnadesign/densegen/docs/guide/workspace.md
+++ b/src/dnadesign/densegen/docs/guide/workspace.md
@@ -63,7 +63,7 @@ plots:
 
 When a run is complete, archive or sync the workspace as a unit.
 
-Tip: use `dense stage --id <run_name>` to scaffold a new workspace. Use
-`dense summarize --root workspaces/_archive` to inspect archived workspaces.
+Tip: use `dense workspace init --id <run_name>` to scaffold a new workspace. Use
+`dense inspect run --root workspaces/_archive` to inspect archived workspaces.
 
 @e-south
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index bc1a9b69..a1f64a55 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -7,15 +7,18 @@ the run root. USR is optional and is only imported when configured.
 ### Contents
 - [Invocation](#invocation) - how to call the CLI.
 - [Config option](#config-option) - global or per-command config path.
-- [Commands](#commands) - validate, plan, describe, run, plot, and utilities.
-- [`dense validate`](#dense-validate) - schema and sanity checks.
-- [`dense plan`](#dense-plan) - resolved quota plan.
-- [`dense describe`](#dense-describe) - resolved inputs, outputs, and solver.
+- [Commands](#commands) - validate, inspect, stage helpers, run, plot, report.
+- [`dense validate-config`](#dense-validate-config) - schema and sanity checks.
+- [`dense inspect inputs`](#dense-inspect-inputs) - resolved inputs + PWM sampling summary.
+- [`dense inspect plan`](#dense-inspect-plan) - resolved quota plan.
+- [`dense inspect config`](#dense-inspect-config) - resolved inputs/outputs/solver details.
+- [`dense inspect run`](#dense-inspect-run) - summarize run manifests or list workspaces.
+- [`dense stage-a build-pool`](#dense-stage-a-build-pool) - build TFBS pools (Stage‑A).
+- [`dense stage-b build-libraries`](#dense-stage-b-build-libraries) - build solver libraries (Stage‑B).
+- [`dense workspace init`](#dense-workspace-init) - scaffold a workspace.
 - [`dense run`](#dense-run) - end-to-end generation.
 - [`dense plot`](#dense-plot) - render plots from outputs.
 - [`dense ls-plots`](#dense-ls-plots) - list available plots.
-- [`dense stage`](#dense-stage) - scaffold a workspace.
-- [`dense summarize`](#dense-summarize) - summarize outputs/meta/run_manifest.json or list workspaces.
 - [`dense report`](#dense-report) - write audit-grade report summary.
 - [Examples](#examples) - common command sequences.
 
@@ -35,8 +38,8 @@ python -m dnadesign.densegen --help
 
 - `-c, --config PATH` - config YAML path. Defaults to
   `src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml` inside the package.
-  - May be passed globally (`dense -c path validate`) or per command
-    (`dense validate -c path`).
+  - May be passed globally (`dense -c path inspect inputs`) or per command
+    (`dense inspect inputs -c path`).
 
 Input paths resolve against the config file directory. Outputs and logs must resolve
 inside `densegen.run.root` (run-scoped I/O). Config files must include `densegen.schema_version`
@@ -46,54 +49,87 @@ inside `densegen.run.root` (run-scoped I/O). Config files must include `densegen
 
 ### Commands
 
-### `dense validate`
+### `dense validate-config`
 Validate the config YAML (schema + sanity checks). Fails fast on unknown keys or invalid values.
+
 Options:
 - `--probe-solver` - also probe the solver backend (fails fast if unavailable).
 
 ---
 
-#### `dense plan`
+#### `dense inspect inputs`
+Print resolved inputs plus a PWM sampling summary (Stage‑A details).
+
+---
+
+#### `dense inspect plan`
 Print the resolved quota plan per constraint bucket.
 
 ---
 
-#### `dense describe`
+#### `dense inspect config`
 Summarize resolved inputs, outputs, plan items, and solver settings.
+
 Options:
 - `--show-constraints` - print full fixed elements per plan item.
 - `--probe-solver` - verify the solver backend before reporting.
 
 ---
 
-#### `dense run`
-Run the full generation pipeline.
+#### `dense inspect run`
+Summarize a run manifest (`outputs/meta/run_manifest.json`) or list workspaces.
 
 Options:
-- `--no-plot` - skip auto-plotting even if `plots` is configured in YAML.
-- `--log-file PATH` - override the log file path. Otherwise DenseGen writes
-  to `logging.log_dir/<run_id>.log` inside the workspace. The override path
-  must still resolve inside `densegen.run.root`.
-Notes:
-- If you enable `scoring_backend: fimo`, run via `pixi run dense ...` (or ensure `fimo` is on PATH).
+- `--run` - workspace directory (defaults to `densegen.run.root` from config).
+- `--root` - list workspaces under a root directory.
+- `--limit` - limit workspaces displayed when using `--root`.
+- `--all` - include directories without `config.yaml` when using `--root`.
+- `--config` - config path (used to resolve run root when `--run` is not set).
+- `--verbose` - show failure breakdown columns (constraint filters + duplicate solutions).
+- `--library` - include offered-vs-used summaries (TF/TFBS usage).
+- `--top` - number of rows to show in library summaries.
+- `--by-library/--no-by-library` - group library summaries per build attempt.
+- `--top-per-tf` - limit TFBS rows per TF when summarizing.
+- `--show-library-hash/--short-library-hash` - toggle full vs short library hashes.
+
+Tip:
+- For large runs, prefer `--no-by-library` or lower `--top`/`--top-per-tf` to keep output readable.
 
 ---
 
-#### `dense plot`
-Generate plots from existing outputs.
+#### `dense stage-a build-pool`
+Build Stage‑A TFBS pools from inputs and write a pool manifest.
 
 Options:
-- `--only NAME1,NAME2` - run a subset of plots by name.
+- `--out` - output directory relative to run root (default: `outputs/pools`).
+- `--input/-i` - input name(s) to build (defaults to all).
+- `--overwrite` - overwrite existing pool files.
+
+Outputs:
+- `pool_manifest.json`
+- `<input>__pool.parquet` per input
 
 ---
 
-#### `dense ls-plots`
-List available plot names and descriptions.
+#### `dense stage-b build-libraries`
+Build Stage‑B libraries (one per input + plan) from pools or inputs.
+
+Options:
+- `--out` - output directory relative to run root (default: `outputs/libraries`).
+- `--pool` - optional pool directory from `stage-a build-pool` (defaults to reading inputs).
+- `--input/-i` - input name(s) to build (defaults to all).
+- `--plan/-p` - plan item name(s) to build (defaults to all).
+- `--overwrite` - overwrite existing `library_builds.parquet`.
+
+Outputs:
+- `library_builds.parquet`
+- `library_manifest.json`
 
 ---
 
-#### `dense stage`
+#### `dense workspace init`
 Stage a new workspace with `config.yaml`, `inputs/`, `outputs/`, plus `outputs/logs/` and `outputs/meta/`.
+
 Options:
 - `--id` - run identifier (directory name).
 - `--root` - workspaces root directory (default: package `workspaces/` directory).
@@ -102,43 +138,55 @@ Options:
 
 ---
 
-#### `dense summarize`
-Summarize a run manifest (`outputs/meta/run_manifest.json`).
+#### `dense run`
+Run the full generation pipeline.
+
 Options:
-- `--run` - workspace directory (defaults to `densegen.run.root` from config).
-- `--root` - list workspaces under a root directory.
-- `--limit` - limit workspaces displayed when using `--root`.
-- `--all` - include directories without `config.yaml` when using `--root`.
-- `--config` - config path (used to resolve run root when `--run` is not set).
-- `--verbose` - show failure breakdown columns (constraint filters + duplicate solutions).
-- `--library` - include offered-vs-used summaries (TF/TFBS usage).
-- `--top` - number of rows to show in library summaries.
-- `--by-library/--no-by-library` - group library summaries per build attempt.
-- `--top-per-tf` - limit TFBS rows per TF when summarizing.
-- `--show-library-hash/--short-library-hash` - toggle full vs short library hashes.
-Tip:
-- For large runs, prefer `--no-by-library` or lower `--top`/`--top-per-tf` to keep output readable.
+- `--no-plot` - skip auto-plotting even if `plots` is configured in YAML.
+- `--log-file PATH` - override the log file path. Otherwise DenseGen writes
+  to `logging.log_dir/<run_id>.log` inside the workspace. The override path
+  must still resolve inside `densegen.run.root`.
+
+Notes:
+- If you enable `scoring_backend: fimo`, run via `pixi run dense ...` (or ensure `fimo` is on PATH).
+
+---
+
+#### `dense plot`
+Generate plots from existing outputs.
+
+Options:
+- `--only NAME1,NAME2` - run a subset of plots by name.
+
+---
+
+#### `dense ls-plots`
+List available plot names and descriptions.
 
 ---
 
 #### `dense report`
 Generate an audit-grade report summary for a run. Outputs are run-scoped under `outputs/` by default.
+
 Options:
+- `--run` - run directory (defaults to config run root).
 - `--out` - output directory relative to run root (default: `outputs`).
+- `--format` - `json`, `md`, `html`, or `all` (comma-separated allowed).
 
 ---
 
 ### Examples
 
 ```bash
-uv run dense validate -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense plan     -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense describe -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense run      -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense plot     -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
-uv run dense summarize --run src/dnadesign/densegen/workspaces/demo_meme_two_tf
-uv run dense summarize --root src/dnadesign/densegen/workspaces
-uv run dense report   -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+pixi run dense validate-config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+uv run dense inspect inputs -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+uv run dense inspect plan   -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+uv run dense inspect config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+uv run dense run            -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+uv run dense plot           -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
+uv run dense inspect run     --run src/dnadesign/densegen/workspaces/demo_meme_two_tf
+uv run dense inspect run     --root src/dnadesign/densegen/workspaces
+uv run dense report          -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --format all
 ```
 
 Demo run (small, Parquet-only config):
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 0fd049a7..3ed9f2a7 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -60,18 +60,19 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
     - `strategy`: `consensus | stochastic | background`
     - `n_sites` (int > 0)
     - `oversample_factor` (int > 0)
-    - `max_candidates` (optional int > 0; caps candidate generation)
-    - `max_seconds` (optional float > 0; time limit for candidate generation)
+    - `max_candidates` (optional int > 0; caps candidate generation; **densegen** backend only)
+    - `max_seconds` (optional float > 0; time limit for candidate generation; **densegen** backend only)
     - `scoring_backend`: `densegen | fimo` (default: `densegen`)
     - `score_threshold` or `score_percentile` (exactly one; **densegen** backend only)
     - `pvalue_threshold` (float in (0, 1]; **fimo** backend only)
     - `selection_policy`: `random_uniform | top_n | stratified` (default: `random_uniform`; fimo only)
     - `pvalue_bins` (optional list of floats; must end with `1.0`) - p‑value bin edges for stratified sampling
-    - `pvalue_bin_ids` (deprecated; use `mining.retain_bin_ids`)
-    - `mining` (optional; fimo only) - batch/time controls for mining via FIMO:
+    - `mining` (fimo only) - batch/time controls for mining via FIMO:
       - `batch_size` (int > 0; default 100000) - candidates per FIMO batch
       - `max_batches` (optional int > 0) - max batches per motif
-      - `max_seconds` (optional float > 0) - max seconds per motif mining loop
+      - `max_candidates` (optional int > 0) - total candidates to generate per motif (quota mode)
+        (must be >= `n_sites`)
+      - `max_seconds` (optional float > 0; default 60s) - max seconds per motif mining loop
       - `retain_bin_ids` (optional list of ints) - select p‑value bins to retain (0‑based indices);
         retained bins are the only bins reported in yield summaries
       - `log_every_batches` (int > 0; default 1) - log per‑bin yield summaries every N batches
@@ -90,8 +91,9 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
     - FIMO runs log per‑bin yield summaries (hits, accepted, selected). If `retain_bin_ids` is set,
       only those bins are reported; otherwise all bins are reported. `selection_policy: stratified`
       makes the selected‑bin distribution explicit for mining workflows.
-    - When `mining` is enabled, `max_seconds` caps per‑batch candidate generation while
-      `mining.max_seconds` caps the overall mining loop.
+    - For `scoring_backend: fimo`, use `mining.max_seconds` (time mode) or
+      `mining.max_candidates`/`mining.max_batches` (quota mode). The default is
+      `mining.max_seconds: 60`. Set `mining.max_seconds: null` to make quotas the primary cap.
 - `type: pwm_meme_set`
   - `paths` - list of MEME PWM files (merged into a single TF pool)
   - `motif_ids` (optional list) - choose motifs by ID across files
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index 23cef834..994603a9 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -98,9 +98,26 @@ The `dense report` command writes a compact audit summary under `outputs/`:
 
 - `outputs/report.json`
 - `outputs/report.md`
+- `outputs/report.html` (basic HTML wrapper for quick sharing)
 
 These summarize run scope and link to the canonical outputs (`dense_arrays.parquet` and
 `attempts.parquet`).
+Use `dense report --format json|md|html|all` to control which files are emitted.
+
+---
+
+### Stage helper outputs (optional)
+
+DenseGen can materialize Stage‑A/Stage‑B artifacts without running the solver:
+
+- `dense stage-a build-pool` writes:
+  - `outputs/pools/pool_manifest.json`
+  - `outputs/pools/<input>__pool.parquet`
+- `dense stage-b build-libraries` writes:
+  - `outputs/libraries/library_builds.parquet`
+  - `outputs/libraries/library_manifest.json`
+
+These are optional inspection artifacts and are not required for a normal `dense run`.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
index 78d10cbc..151a97ae 100644
--- a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
+++ b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
@@ -52,8 +52,8 @@ after `arrays_generated_before_resample` or when a library under-produces.
 ### 4) Run DenseGen
 
 ```bash
-uv run dense validate -c path/to/config.yaml
-uv run dense describe -c path/to/config.yaml
+pixi run dense validate-config -c path/to/config.yaml
+uv run dense inspect config -c path/to/config.yaml
 uv run dense run -c path/to/config.yaml --no-plot
 ```
 
diff --git a/src/dnadesign/densegen/workspaces/README.md b/src/dnadesign/densegen/workspaces/README.md
index a1c6e85b..13e80a8f 100644
--- a/src/dnadesign/densegen/workspaces/README.md
+++ b/src/dnadesign/densegen/workspaces/README.md
@@ -12,4 +12,4 @@ Archived or legacy artifacts live under `_archive/` so the active workspace list
 The canonical demo lives under `demo_meme_two_tf/` and uses MEME motif files copied from
 the basic Cruncher demo workspace (`inputs/local_motifs`). DenseGen reads these with the
 shared Cruncher MEME parser to keep parsing DRY and consistent.
-Use `dense summarize --root workspaces/_archive` if you want to inspect archived workspaces.
+Use `dense inspect run --root workspaces/_archive` if you want to inspect archived workspaces.

From 59b5e535ab8850b2f22513aa00b2f00af0eec225 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 15:14:07 -0500
Subject: [PATCH 09/40] densegen: add pool/library artifacts and audit
 reporting

---
 .../densegen/docs/demo/demo_basic.md          |  29 +-
 src/dnadesign/densegen/docs/guide/inputs.md   |   3 +
 .../densegen/docs/guide/outputs-metadata.md   |  41 ++
 .../densegen/docs/guide/workspace.md          |   2 +
 src/dnadesign/densegen/docs/reference/cli.md  |  33 +-
 .../densegen/docs/reference/config.md         |   4 +-
 .../densegen/docs/reference/outputs.md        |  25 +-
 .../densegen/src/adapters/outputs/parquet.py  |   2 +
 .../src/adapters/sources/binding_sites.py     |  12 +
 .../src/adapters/sources/pwm_artifact.py      |  31 +-
 .../src/adapters/sources/pwm_artifact_set.py  |  31 +-
 .../src/adapters/sources/pwm_jaspar.py        |  31 +-
 .../src/adapters/sources/pwm_matrix_csv.py    |  31 +-
 .../densegen/src/adapters/sources/pwm_meme.py |  31 +-
 .../src/adapters/sources/pwm_meme_set.py      |  31 +-
 src/dnadesign/densegen/src/cli.py             | 354 ++++++++++--------
 src/dnadesign/densegen/src/config/__init__.py |   7 +-
 .../densegen/src/core/artifacts/ids.py        |  76 ++++
 .../densegen/src/core/artifacts/library.py    |  97 +++++
 .../densegen/src/core/artifacts/pool.py       | 228 +++++++++++
 .../densegen/src/core/metadata_schema.py      |   4 +-
 src/dnadesign/densegen/src/core/pipeline.py   | 335 ++++++++++++++++-
 src/dnadesign/densegen/src/core/reporting.py  |  73 ++++
 src/dnadesign/densegen/src/core/sampler.py    |  16 +
 .../densegen/tests/test_artifacts_ids.py      |  33 ++
 .../densegen/tests/test_artifacts_library.py  |  61 +++
 .../densegen/tests/test_artifacts_pool.py     |  83 ++++
 .../densegen/tests/test_cli_workspace_init.py | 112 ++++++
 .../densegen/tests/test_used_tfbs_offsets.py  |   2 +
 .../workspaces/demo_meme_two_tf/config.yaml   |   2 +-
 30 files changed, 1597 insertions(+), 223 deletions(-)
 create mode 100644 src/dnadesign/densegen/src/core/artifacts/ids.py
 create mode 100644 src/dnadesign/densegen/src/core/artifacts/library.py
 create mode 100644 src/dnadesign/densegen/src/core/artifacts/pool.py
 create mode 100644 src/dnadesign/densegen/tests/test_artifacts_ids.py
 create mode 100644 src/dnadesign/densegen/tests/test_artifacts_library.py
 create mode 100644 src/dnadesign/densegen/tests/test_artifacts_pool.py
 create mode 100644 src/dnadesign/densegen/tests/test_cli_workspace_init.py

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 91bd3ad3..9adf7e50 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -99,7 +99,7 @@ uv run dense workspace init --id demo_press --root "$RUN_ROOT" \
 Example output:
 
 ```text
-✨ Run staged: /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+✨ Workspace staged: /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
 ```
 
 If you re-run the demo in the same run root and DenseGen’s schema has changed, you may see a
@@ -215,7 +215,7 @@ uv run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_pre
 Example output:
 
 ```text
-Run: demo_press  Root: /private/tmp/densegen-demo-20260115-1405/demo_press  Schema: 2.3  dense-arrays: <version> (<source>)
+Run: demo_press  Root: /private/tmp/densegen-demo-20260115-1405/demo_press  Schema: 2.4  dense-arrays: <version> (<source>)
 ┏━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┓
 ┃ input        ┃ plan ┃ generated ┃ duplica… ┃ failed ┃ resamples ┃ librari… ┃ stalls ┃
 ┡━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━┩
@@ -248,29 +248,30 @@ This writes `outputs/report.json`, `outputs/report.md`, and `outputs/report.html
 
 ## 10) Inspect outputs
 
-List the generated Parquet artifacts:
+List the generated Parquet artifacts and manifests:
 
 ```bash
-ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/dense_arrays.parquet
+ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs
 ```
 
 Example output:
 
 ```text
-_densegen_ids.sqlite
-part-10ca57ae0c1d410d8b88206d194a2ff1.parquet
+attempts.parquet
+composition.parquet
+dense_arrays.parquet
+libraries
+pools
+report.html
+report.json
+report.md
 ```
 
-Inspect the library manifests:
+Inspect Stage‑A pools and Stage‑B libraries:
 
 ```bash
-ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs
-```
-
-Example output:
-
-```text
-attempts.parquet
+ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/pools
+ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/libraries
 ```
 
 ## 11) Plot analysis
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index 0d73925d..3ad6b50d 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -124,6 +124,9 @@ Notes:
 - `selection_policy: stratified` uses fixed p‑value bins to balance strong/weak matches.
 - Canonical p‑value bins (default): `[1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]`.
   Bin 0 is `(0, 1e-10]`, bin 1 is `(1e-10, 1e-8]`, etc.
+- For FIMO, the candidate target is `n_sites * oversample_factor`, but mining caps or time limits
+  can stop early. Expect fewer candidates if `mining.max_seconds`, `mining.max_batches`, or
+  `mining.max_candidates` are binding.
 - FIMO mining defaults to **time-based** limits (`mining.max_seconds: 60`). To switch to a quota,
   set `mining.max_seconds: null` and use `mining.max_candidates` or `mining.max_batches`
   (with `mining.batch_size`) as the primary cap.
diff --git a/src/dnadesign/densegen/docs/guide/outputs-metadata.md b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
index 69c9eb23..4f7e0065 100644
--- a/src/dnadesign/densegen/docs/guide/outputs-metadata.md
+++ b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
@@ -50,6 +50,38 @@ per-motif site counts to make sampling behavior explicit.
 
 ---
 
+### Stage‑A pools (TFBS pool artifacts)
+
+DenseGen materializes Stage‑A pools under `outputs/pools/`:
+
+- `outputs/pools/pool_manifest.json` — manifest of pool files by input.
+- `outputs/pools/<input>__pool.parquet` — TFBS pools (or sequence pools).
+
+TFBS pools include stable `motif_id` and `tfbs_id` hashes plus optional FIMO metadata
+(`fimo_pvalue`, `fimo_bin_id`, etc.). Sequence pools include `tfbs_id` for joinability.
+
+---
+
+### Library artifacts (Stage‑B)
+
+DenseGen writes Stage‑B libraries under `outputs/libraries/`:
+
+- `library_builds.parquet` — one row per library build (index, hash, size, strategy).
+- `library_members.parquet` — normalized membership table (one row per TFBS in each library).
+- `library_manifest.json` — manifest + schema version.
+
+These artifacts provide a stable join path from solver attempts to the exact library contents.
+
+---
+
+### Composition table
+
+DenseGen writes `outputs/composition.parquet`, one row per TFBS placement in each accepted
+sequence. Columns include `sequence_id`, `input_name`, `plan_name`, `library_index`,
+`tf`, `tfbs`, `motif_id`, `tfbs_id`, and placement offsets.
+
+---
+
 ### Run state (checkpoint)
 
 DenseGen writes `outputs/meta/run_state.json` during execution. This checkpoint captures
@@ -57,6 +89,14 @@ per-input/plan progress so long runs can resume safely after interruption.
 
 ---
 
+### Events log
+
+DenseGen writes `outputs/meta/events.jsonl` (JSON lines) with structured events:
+`POOL_BUILT`, `LIBRARY_BUILT`, `STALL_DETECTED`, and `RESAMPLE_TRIGGERED`.
+This is a lightweight, machine-readable trace of the run’s control flow.
+
+---
+
 ### Attempts log
 
 DenseGen writes `outputs/attempts.parquet`, a consolidated log of solver attempts (success,
@@ -85,6 +125,7 @@ source = densegen:{input_name}:{plan_name}
 This is always present and is separate from metadata.
 Detailed placement provenance lives in `densegen__used_tfbs_detail` and the
 run-scoped library manifests.
+`densegen__used_tfbs_detail` includes `motif_id` and `tfbs_id` when available.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/guide/workspace.md b/src/dnadesign/densegen/docs/guide/workspace.md
index d7048b48..cabad8c2 100644
--- a/src/dnadesign/densegen/docs/guide/workspace.md
+++ b/src/dnadesign/densegen/docs/guide/workspace.md
@@ -65,5 +65,7 @@ When a run is complete, archive or sync the workspace as a unit.
 
 Tip: use `dense workspace init --id <run_name>` to scaffold a new workspace. Use
 `dense inspect run --root workspaces/_archive` to inspect archived workspaces.
+If your config references local motif files, add `--copy-inputs` so the workspace
+remains self-contained (or update paths in `config.yaml` after staging).
 
 @e-south
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index a1f64a55..b17b1c18 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -43,7 +43,7 @@ python -m dnadesign.densegen --help
 
 Input paths resolve against the config file directory. Outputs and logs must resolve
 inside `densegen.run.root` (run-scoped I/O). Config files must include `densegen.schema_version`
-(currently `2.3`) and `densegen.run`.
+(currently `2.4`) and `densegen.run`.
 
 ---
 
@@ -112,17 +112,18 @@ Outputs:
 ---
 
 #### `dense stage-b build-libraries`
-Build Stage‑B libraries (one per input + plan) from pools or inputs.
+Build Stage‑B libraries (one per input + plan) from Stage‑A pools.
 
 Options:
 - `--out` - output directory relative to run root (default: `outputs/libraries`).
-- `--pool` - optional pool directory from `stage-a build-pool` (defaults to reading inputs).
+- `--pool` - pool directory from `stage-a build-pool` (defaults to `outputs/pools` in the workspace).
 - `--input/-i` - input name(s) to build (defaults to all).
 - `--plan/-p` - plan item name(s) to build (defaults to all).
-- `--overwrite` - overwrite existing `library_builds.parquet`.
+- `--overwrite` - overwrite existing library artifacts.
 
 Outputs:
 - `library_builds.parquet`
+- `library_members.parquet`
 - `library_manifest.json`
 
 ---
@@ -178,15 +179,21 @@ Options:
 ### Examples
 
 ```bash
-pixi run dense validate-config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense inspect inputs -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense inspect plan   -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense inspect config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense run            -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense plot           -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
-uv run dense inspect run     --run src/dnadesign/densegen/workspaces/demo_meme_two_tf
-uv run dense inspect run     --root src/dnadesign/densegen/workspaces
-uv run dense report          -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --format all
+RUN_ROOT=/tmp/densegen-demo-$(date +%Y%m%d-%H%M)
+uv run dense workspace init --id demo_press --root "$RUN_ROOT" \
+  --template src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml \
+  --copy-inputs
+CFG="$RUN_ROOT/demo_press/config.yaml"
+
+pixi run dense validate-config -c "$CFG"
+uv run dense inspect inputs -c "$CFG"
+uv run dense inspect plan   -c "$CFG"
+uv run dense inspect config -c "$CFG"
+uv run dense run            -c "$CFG"
+uv run dense plot           -c "$CFG" --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
+uv run dense inspect run     --run "$RUN_ROOT/demo_press"
+uv run dense inspect run     --root "$RUN_ROOT"
+uv run dense report          -c "$CFG" --format all
 ```
 
 Demo run (small, Parquet-only config):
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 3ed9f2a7..554f23d8 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -23,7 +23,7 @@ for conceptual flow.
 ### Top-level
 
 - `densegen` (required)
-- `densegen.schema_version` (required; supported: `2.1`, `2.2`, `2.3`)
+- `densegen.schema_version` (required; supported: `2.1`, `2.2`, `2.3`, `2.4`)
 - `densegen.run` (required; run-scoped I/O root)
 - `plots` (optional; required `source` when `output.targets` has multiple sinks)
 
@@ -268,7 +268,7 @@ binding-site and PWM-sampled inputs.
 
 ```yaml
 densegen:
-  schema_version: "2.3"
+  schema_version: "2.4"
   run:
     id: demo
     root: "."
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index 994603a9..544ac5ad 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -83,12 +83,24 @@ These are produced alongside Parquet/USR outputs and provide a compact audit tra
 
 ---
 
-### Library provenance (attempts log)
+### Events log
+
+DenseGen writes `outputs/meta/events.jsonl` (JSON lines) with structured events
+for pool builds, library builds, stalls, and resamples. This is a lightweight
+machine-readable trace of runtime control flow.
+
+---
+
+### Library provenance (library artifacts + attempts)
+
+DenseGen records solver library provenance in two places:
+
+- `outputs/libraries/library_builds.parquet` + `library_members.parquet` (canonical library artifacts).
+- `outputs/attempts.parquet` (attempt-level audit log with offered library lists).
 
-DenseGen now records solver library provenance exclusively in `outputs/attempts.parquet`.
 Each attempt row stores the full library offered to the solver (`library_tfbs`, `library_tfs`,
 `library_site_ids`, `library_sources`) along with the library hash/index and solver status.
-Output records carry `densegen__sampling_library_hash` so you can join placements to attempts.
+Output records carry `densegen__sampling_library_hash` so you can join placements to libraries.
 
 ---
 
@@ -115,9 +127,11 @@ DenseGen can materialize Stage‑A/Stage‑B artifacts without running the solve
   - `outputs/pools/<input>__pool.parquet`
 - `dense stage-b build-libraries` writes:
   - `outputs/libraries/library_builds.parquet`
+  - `outputs/libraries/library_members.parquet`
   - `outputs/libraries/library_manifest.json`
 
-These are optional inspection artifacts and are not required for a normal `dense run`.
+Stage‑B expects Stage‑A pools (default `outputs/pools`). These are optional inspection artifacts
+and are not required for a normal `dense run`.
 
 ---
 
@@ -130,7 +144,8 @@ densegen:{input_name}:{plan_name}
 ```
 
 Per-placement provenance (TFBS, offsets, orientations) is recorded in
-`densegen__used_tfbs_detail` and the attempts log.
+`densegen__used_tfbs_detail` (including `motif_id`/`tfbs_id`), `outputs/composition.parquet`,
+and the attempts log.
 
 ---
 
diff --git a/src/dnadesign/densegen/src/adapters/outputs/parquet.py b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
index 0751907b..a64335da 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/parquet.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
@@ -107,6 +107,8 @@ def _meta_arrow_type(name: str, pa):
                 [
                     pa.field("tf", pa.string()),
                     pa.field("tfbs", pa.string()),
+                    pa.field("motif_id", pa.string()),
+                    pa.field("tfbs_id", pa.string()),
                     pa.field("orientation", pa.string()),
                     pa.field("offset", pa.int64()),
                     pa.field("offset_raw", pa.int64()),
diff --git a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
index 6c74a022..dd8b6479 100644
--- a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
+++ b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
@@ -19,6 +19,7 @@
 
 import pandas as pd
 
+from ...core.artifacts.ids import hash_label_motif, hash_tfbs_id
 from .base import BaseDataSource, infer_format, resolve_path
 
 log = logging.getLogger(__name__)
@@ -125,6 +126,17 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if source_col:
             out["source"] = df[source_col].astype(str).str.strip()
 
+        motif_id_map = {tf: hash_label_motif(label=tf, source_kind="binding_sites") for tf in tf_clean.unique()}
+        out["motif_id"] = tf_clean.map(motif_id_map)
+        out["tfbs_id"] = [
+            hash_tfbs_id(
+                motif_id=motif_id_map[tf],
+                sequence=seq,
+                scoring_backend="binding_sites",
+            )
+            for tf, seq in zip(tf_clean.tolist(), seq_clean.tolist())
+        ]
+
         out = out.reset_index(drop=True)
         source_default = str(data_path)
         src_vals = out.get("source")
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
index 1339aa68..d29783ae 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
@@ -18,6 +18,7 @@
 from pathlib import Path
 from typing import Any, List
 
+from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
 from .base import BaseDataSource, resolve_path
 from .pwm_sampling import PWMMotif, normalize_background, sample_pwm_sites
 
@@ -160,6 +161,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             raise FileNotFoundError(f"PWM artifact not found. Looked here:\n  - {artifact_path}")
 
         motif = _load_artifact(artifact_path)
+        motif_hash = hash_pwm_motif(
+            motif_label=motif.motif_id,
+            matrix=motif.matrix,
+            background=motif.background,
+            source_kind="pwm_artifact",
+        )
 
         sampling = dict(self.sampling or {})
         strategy = str(sampling.get("strategy", "stochastic"))
@@ -228,9 +235,27 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
 
         rows = []
         for seq in selected:
-            row = {"tf": motif.motif_id, "tfbs": seq, "source": str(artifact_path)}
-            if meta_by_seq:
-                row.update(meta_by_seq.get(seq, {}))
+            meta = meta_by_seq.get(seq, {}) if meta_by_seq else {}
+            start = meta.get("fimo_start")
+            stop = meta.get("fimo_stop")
+            strand = meta.get("fimo_strand")
+            tfbs_id = hash_tfbs_id(
+                motif_id=motif_hash,
+                sequence=seq,
+                scoring_backend=scoring_backend,
+                matched_start=int(start) if start is not None else None,
+                matched_stop=int(stop) if stop is not None else None,
+                matched_strand=str(strand) if strand is not None else None,
+            )
+            row = {
+                "tf": motif.motif_id,
+                "tfbs": seq,
+                "source": str(artifact_path),
+                "motif_id": motif_hash,
+                "tfbs_id": tfbs_id,
+            }
+            if meta:
+                row.update(meta)
             rows.append(row)
         df_out = pd.DataFrame(rows)
         return entries, df_out
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
index 9a9353af..c3b620a5 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
@@ -16,6 +16,7 @@
 from pathlib import Path
 from typing import List
 
+from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
 from .base import BaseDataSource, resolve_path
 from .pwm_artifact import load_artifact
 from .pwm_sampling import sample_pwm_sites
@@ -54,6 +55,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         entries = []
         all_rows = []
         for motif, path in zip(motifs, resolved):
+            motif_hash = hash_pwm_motif(
+                motif_label=motif.motif_id,
+                matrix=motif.matrix,
+                background=motif.background,
+                source_kind="pwm_artifact_set",
+            )
             sampling_cfg = sampling
             override = overrides.get(motif.motif_id)
             if override:
@@ -120,9 +127,27 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
 
             for seq in selected:
                 entries.append((motif.motif_id, seq, str(path)))
-                row = {"tf": motif.motif_id, "tfbs": seq, "source": str(path)}
-                if meta_by_seq:
-                    row.update(meta_by_seq.get(seq, {}))
+                meta = meta_by_seq.get(seq, {}) if meta_by_seq else {}
+                start = meta.get("fimo_start")
+                stop = meta.get("fimo_stop")
+                strand = meta.get("fimo_strand")
+                tfbs_id = hash_tfbs_id(
+                    motif_id=motif_hash,
+                    sequence=seq,
+                    scoring_backend=scoring_backend,
+                    matched_start=int(start) if start is not None else None,
+                    matched_stop=int(stop) if stop is not None else None,
+                    matched_strand=str(strand) if strand is not None else None,
+                )
+                row = {
+                    "tf": motif.motif_id,
+                    "tfbs": seq,
+                    "source": str(path),
+                    "motif_id": motif_hash,
+                    "tfbs_id": tfbs_id,
+                }
+                if meta:
+                    row.update(meta)
                 all_rows.append(row)
 
         import pandas as pd
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
index bb08ba6d..e0062364 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
@@ -17,6 +17,7 @@
 from pathlib import Path
 from typing import List, Optional
 
+from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
 from .base import BaseDataSource, resolve_path
 from .pwm_sampling import PWMMotif, normalize_background, sample_pwm_sites
 
@@ -133,6 +134,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         entries = []
         all_rows = []
         for motif in motifs:
+            motif_hash = hash_pwm_motif(
+                motif_label=motif.motif_id,
+                matrix=motif.matrix,
+                background=motif.background,
+                source_kind="pwm_jaspar",
+            )
             return_meta = scoring_backend == "fimo"
             result = sample_pwm_sites(
                 rng,
@@ -167,9 +174,27 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 meta_by_seq = {}
             for seq in selected:
                 entries.append((motif.motif_id, seq, str(jaspar_path)))
-                row = {"tf": motif.motif_id, "tfbs": seq, "source": str(jaspar_path)}
-                if meta_by_seq:
-                    row.update(meta_by_seq.get(seq, {}))
+                meta = meta_by_seq.get(seq, {}) if meta_by_seq else {}
+                start = meta.get("fimo_start")
+                stop = meta.get("fimo_stop")
+                strand = meta.get("fimo_strand")
+                tfbs_id = hash_tfbs_id(
+                    motif_id=motif_hash,
+                    sequence=seq,
+                    scoring_backend=scoring_backend,
+                    matched_start=int(start) if start is not None else None,
+                    matched_stop=int(stop) if stop is not None else None,
+                    matched_strand=str(strand) if strand is not None else None,
+                )
+                row = {
+                    "tf": motif.motif_id,
+                    "tfbs": seq,
+                    "source": str(jaspar_path),
+                    "motif_id": motif_hash,
+                    "tfbs_id": tfbs_id,
+                }
+                if meta:
+                    row.update(meta)
                 all_rows.append(row)
 
         import pandas as pd
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
index 7e313dad..496d4d5c 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
@@ -17,6 +17,7 @@
 
 import pandas as pd
 
+from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
 from .base import BaseDataSource, resolve_path
 from .pwm_sampling import PWMMotif, normalize_background, sample_pwm_sites
 
@@ -64,6 +65,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             matrix.append({b: v / total for b, v in vals.items()})
 
         motif = PWMMotif(motif_id=str(self.motif_id).strip(), matrix=matrix, background=normalize_background(None))
+        motif_hash = hash_pwm_motif(
+            motif_label=motif.motif_id,
+            matrix=motif.matrix,
+            background=motif.background,
+            source_kind="pwm_matrix_csv",
+        )
 
         sampling = dict(self.sampling or {})
         strategy = str(sampling.get("strategy", "stochastic"))
@@ -130,9 +137,27 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         entries = [(motif.motif_id, seq, str(csv_path)) for seq in selected]
         rows = []
         for seq in selected:
-            row = {"tf": motif.motif_id, "tfbs": seq, "source": str(csv_path)}
-            if meta_by_seq:
-                row.update(meta_by_seq.get(seq, {}))
+            meta = meta_by_seq.get(seq, {}) if meta_by_seq else {}
+            start = meta.get("fimo_start")
+            stop = meta.get("fimo_stop")
+            strand = meta.get("fimo_strand")
+            tfbs_id = hash_tfbs_id(
+                motif_id=motif_hash,
+                sequence=seq,
+                scoring_backend=scoring_backend,
+                matched_start=int(start) if start is not None else None,
+                matched_stop=int(stop) if stop is not None else None,
+                matched_strand=str(strand) if strand is not None else None,
+            )
+            row = {
+                "tf": motif.motif_id,
+                "tfbs": seq,
+                "source": str(csv_path),
+                "motif_id": motif_hash,
+                "tfbs_id": tfbs_id,
+            }
+            if meta:
+                row.update(meta)
             rows.append(row)
         df_out = pd.DataFrame(rows)
         return entries, df_out
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
index bce0a6fe..f7dac8dc 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
@@ -18,6 +18,7 @@
 
 from dnadesign.cruncher.io.parsers.meme import MemeMotif, parse_meme_file
 
+from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
 from .base import BaseDataSource, resolve_path
 from .pwm_sampling import PWMMotif, normalize_background, sample_pwm_sites
 
@@ -112,6 +113,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         all_rows = []
         for motif in motifs:
             pwm = _motif_to_pwm(motif, background)
+            motif_hash = hash_pwm_motif(
+                motif_label=pwm.motif_id,
+                matrix=pwm.matrix,
+                background=pwm.background,
+                source_kind="pwm_meme",
+            )
             return_meta = scoring_backend == "fimo"
             result = sample_pwm_sites(
                 rng,
@@ -147,9 +154,27 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
 
             for seq in selected:
                 entries.append((pwm.motif_id, seq, str(meme_path)))
-                row = {"tf": pwm.motif_id, "tfbs": seq, "source": str(meme_path)}
-                if meta_by_seq:
-                    row.update(meta_by_seq.get(seq, {}))
+                meta = meta_by_seq.get(seq, {}) if meta_by_seq else {}
+                start = meta.get("fimo_start")
+                stop = meta.get("fimo_stop")
+                strand = meta.get("fimo_strand")
+                tfbs_id = hash_tfbs_id(
+                    motif_id=motif_hash,
+                    sequence=seq,
+                    scoring_backend=scoring_backend,
+                    matched_start=int(start) if start is not None else None,
+                    matched_stop=int(stop) if stop is not None else None,
+                    matched_strand=str(strand) if strand is not None else None,
+                )
+                row = {
+                    "tf": pwm.motif_id,
+                    "tfbs": seq,
+                    "source": str(meme_path),
+                    "motif_id": motif_hash,
+                    "tfbs_id": tfbs_id,
+                }
+                if meta:
+                    row.update(meta)
                 all_rows.append(row)
 
         import pandas as pd
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
index c081095b..492fe857 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
@@ -18,6 +18,7 @@
 
 from dnadesign.cruncher.io.parsers.meme import MemeMotif, parse_meme_file
 
+from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
 from .base import BaseDataSource, resolve_path
 from .pwm_meme import _background_from_meta, _motif_to_pwm
 from .pwm_sampling import sample_pwm_sites
@@ -106,6 +107,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
         all_rows = []
         for motif, background, path in motifs_payload:
             pwm = _motif_to_pwm(motif, background)
+            motif_hash = hash_pwm_motif(
+                motif_label=pwm.motif_id,
+                matrix=pwm.matrix,
+                background=pwm.background,
+                source_kind="pwm_meme_set",
+            )
             return_meta = scoring_backend == "fimo"
             result = sample_pwm_sites(
                 rng,
@@ -140,9 +147,27 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 meta_by_seq = {}
             for seq in selected:
                 entries.append((pwm.motif_id, seq, str(path)))
-                row = {"tf": pwm.motif_id, "tfbs": seq, "source": str(path)}
-                if meta_by_seq:
-                    row.update(meta_by_seq.get(seq, {}))
+                meta = meta_by_seq.get(seq, {}) if meta_by_seq else {}
+                start = meta.get("fimo_start")
+                stop = meta.get("fimo_stop")
+                strand = meta.get("fimo_strand")
+                tfbs_id = hash_tfbs_id(
+                    motif_id=motif_hash,
+                    sequence=seq,
+                    scoring_backend=scoring_backend,
+                    matched_start=int(start) if start is not None else None,
+                    matched_stop=int(stop) if stop is not None else None,
+                    matched_strand=str(strand) if strand is not None else None,
+                )
+                row = {
+                    "tf": pwm.motif_id,
+                    "tfbs": seq,
+                    "source": str(path),
+                    "motif_id": motif_hash,
+                    "tfbs_id": tfbs_id,
+                }
+                if meta:
+                    row.update(meta)
                 all_rows.append(row)
 
         import pandas as pd
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 807b26d9..1c56a492 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -31,7 +31,6 @@
 
 import contextlib
 import io
-import json
 import logging
 import os
 import platform
@@ -61,6 +60,13 @@
     resolve_run_scoped_path,
     schema_version_at_least,
 )
+from .core.artifacts.library import write_library_artifact
+from .core.artifacts.pool import (
+    POOL_MODE_SEQUENCE,
+    POOL_MODE_TFBS,
+    build_pool_artifact,
+    load_pool_artifact,
+)
 from .core.pipeline import (
     _load_existing_library_index,
     _load_failure_counts_from_attempts,
@@ -80,6 +86,7 @@
 console = Console()
 _PYARROW_SYSCTL_PATTERN = re.compile(r"sysctlbyname failed for 'hw\.")
 log = logging.getLogger(__name__)
+install_native_stderr_filters()
 
 
 @contextlib.contextmanager
@@ -353,17 +360,6 @@ def _print_inputs_summary(loaded) -> None:
     )
 
 
-def _pool_manifest_path(out_dir: Path) -> Path:
-    return out_dir / "pool_manifest.json"
-
-
-def _load_pool_manifest(out_dir: Path) -> dict:
-    manifest_path = _pool_manifest_path(out_dir)
-    if not manifest_path.exists():
-        raise FileNotFoundError(f"Pool manifest not found: {manifest_path}")
-    return json.loads(manifest_path.read_text())
-
-
 def _list_dir_entries(path: Path, *, limit: int = 10) -> list[str]:
     if not path.exists() or not path.is_dir():
         return []
@@ -393,6 +389,25 @@ def _collect_missing_input_paths(loaded, cfg_path: Path) -> list[Path]:
     return missing
 
 
+def _collect_relative_input_paths_from_raw(dense_cfg: dict) -> list[str]:
+    rel_paths: list[str] = []
+    inputs_cfg = dense_cfg.get("inputs") or []
+    for inp in inputs_cfg:
+        if not isinstance(inp, dict):
+            continue
+        raw_path = inp.get("path")
+        if isinstance(raw_path, str) and raw_path.strip():
+            if not Path(raw_path).is_absolute():
+                rel_paths.append(raw_path)
+        raw_paths = inp.get("paths")
+        if isinstance(raw_paths, list):
+            for path in raw_paths:
+                if isinstance(path, str) and path.strip():
+                    if not Path(path).is_absolute():
+                        rel_paths.append(path)
+    return rel_paths
+
+
 def _render_missing_input_hint(cfg_path: Path, loaded, exc: Exception) -> None:
     console.print(f"[bold red]Input error:[/] {exc}")
     missing = _collect_missing_input_paths(loaded, cfg_path)
@@ -753,6 +768,16 @@ def workspace_init(
 
     config_path = run_dir / "config.yaml"
     config_path.write_text(yaml.safe_dump(raw, sort_keys=False))
+    if not copy_inputs:
+        rel_paths = _collect_relative_input_paths_from_raw(dense)
+        if rel_paths:
+            console.print(
+                "[yellow]Workspace uses file-based inputs with relative paths.[/]"
+                " They will resolve relative to the new workspace."
+            )
+            for rel_path in rel_paths[:6]:
+                console.print(f"  - {rel_path}")
+            console.print("[yellow]Tip[/]: re-run with --copy-inputs or update paths in config.yaml.")
     console.print(f":sparkles: [bold green]Workspace staged[/]: {config_path}")
 
 
@@ -1323,24 +1348,26 @@ def stage_a_build_pool(
     outputs_root = run_root / "outputs"
     outputs_root.mkdir(parents=True, exist_ok=True)
 
-    rows = []
-    manifest_inputs: list[dict] = []
-    for inp in cfg.inputs:
-        if selected and inp.name not in selected:
-            continue
-        src = deps.source_factory(inp, cfg_path)
-        data_entries, meta_df = src.load_data(rng=rng, outputs_root=outputs_root)
-        if meta_df is None:
-            df = pd.DataFrame({"sequence": [str(s) for s in data_entries]})
-        else:
-            df = meta_df.copy()
-        df.insert(0, "input_name", inp.name)
-        filename = f"{_sanitize_filename(inp.name)}__pool.parquet"
-        dest = out_dir / filename
-        if dest.exists() and not overwrite:
-            console.print(f"[bold red]Pool already exists:[/] {dest}")
+    with _suppress_pyarrow_sysctl_warnings():
+        try:
+            artifact, pool_data = build_pool_artifact(
+                cfg=cfg,
+                cfg_path=cfg_path,
+                deps=deps,
+                rng=rng,
+                outputs_root=outputs_root,
+                out_dir=out_dir,
+                overwrite=overwrite,
+                selected_inputs=selected if selected else None,
+            )
+        except FileExistsError as exc:
+            console.print(f"[bold red]{exc}[/]")
             raise typer.Exit(code=1)
-        df.to_parquet(dest, index=False)
+
+    for pool in pool_data.values():
+        if pool.df is None:
+            continue
+        df = pool.df
         if "fimo_bin_id" in df.columns:
             bin_counts = df["fimo_bin_id"].value_counts().sort_index()
             bin_table = Table("bin_id", "pvalue_range", "count")
@@ -1360,39 +1387,14 @@ def stage_a_build_pool(
                 else:
                     range_label = "-"
                 bin_table.add_row(str(bin_id), range_label, str(int(count)))
-            console.print(f"[bold]FIMO p-value bins for {inp.name}[/]")
+            console.print(f"[bold]FIMO p-value bins for {pool.name}[/]")
             console.print(bin_table)
-        manifest_inputs.append(
-            {
-                "name": inp.name,
-                "type": inp.type,
-                "pool_path": dest.name,
-                "rows": int(len(df)),
-                "columns": list(df.columns),
-            }
-        )
-        rows.append((inp.name, inp.type, str(len(df)), dest.name))
-
-    if not rows:
-        console.print("[yellow]No pools built (no matching inputs).[/]")
-        raise typer.Exit(code=1)
-
-    manifest = {
-        "schema_version": "1.0",
-        "created_at": datetime.now(timezone.utc).isoformat(),
-        "run_id": cfg.run.id,
-        "run_root": str(run_root),
-        "config_path": str(cfg_path),
-        "inputs": manifest_inputs,
-    }
-    manifest_path = _pool_manifest_path(out_dir)
-    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
 
     table = Table("input", "type", "rows", "pool_file")
-    for row in rows:
-        table.add_row(*row)
+    for entry in artifact.inputs.values():
+        table.add_row(entry.name, entry.input_type, str(entry.rows), entry.pool_path.name)
     console.print(table)
-    console.print(f":sparkles: [bold green]Pool manifest written[/]: {manifest_path}")
+    console.print(f":sparkles: [bold green]Pool manifest written[/]: {artifact.manifest_path}")
 
 
 @stage_b_app.command("build-libraries", help="Build Stage-B libraries from pools or inputs.")
@@ -1402,7 +1404,7 @@ def stage_b_build_libraries(
     pool: Optional[Path] = typer.Option(
         None,
         "--pool",
-        help="Optional pool directory from `stage-a build-pool` (defaults to reading inputs).",
+        help="Pool directory from `stage-a build-pool` (defaults to outputs/pools for this workspace).",
     ),
     input_name: Optional[list[str]] = typer.Option(
         None,
@@ -1422,15 +1424,9 @@ def stage_b_build_libraries(
     cfg_path = _resolve_config_path(ctx, config)
     loaded = _load_config_or_exit(cfg_path)
     cfg = loaded.root.densegen
-    if pool is None:
-        _ensure_fimo_available(cfg, strict=True)
     run_root = _run_root_for(loaded)
     out_dir = resolve_run_scoped_path(cfg_path, run_root, out, label="stage-b.out")
     out_dir.mkdir(parents=True, exist_ok=True)
-    out_path = out_dir / "library_builds.parquet"
-    if out_path.exists() and not overwrite:
-        console.print(f"[bold red]library_builds.parquet already exists:[/] {out_path}")
-        raise typer.Exit(code=1)
 
     selected_inputs = {name for name in (input_name or [])}
     if selected_inputs:
@@ -1447,7 +1443,6 @@ def stage_b_build_libraries(
         if missing:
             raise typer.BadParameter(f"Unknown plan name(s): {', '.join(missing)}")
 
-    deps = default_deps()
     seed = int(cfg.runtime.random_seed)
     rng = random.Random(seed)
     np_rng = np.random.default_rng(seed)
@@ -1457,117 +1452,147 @@ def stage_b_build_libraries(
     failure_counts = _load_failure_counts_from_attempts(outputs_root)
     libraries_built = _load_existing_library_index(outputs_root) if outputs_root.exists() else 0
 
-    pool_manifest = None
-    pool_dir = None
-    if pool is not None:
-        pool_dir = resolve_relative_path(cfg_path, pool)
-        if not pool_dir.exists() or not pool_dir.is_dir():
-            raise typer.BadParameter(f"Pool directory not found: {pool_dir}")
-        pool_manifest = _load_pool_manifest(pool_dir)
+    pool_dir = resolve_relative_path(cfg_path, pool) if pool is not None else (run_root / "outputs" / "pools")
+    if not pool_dir.exists() or not pool_dir.is_dir():
+        raise typer.BadParameter(f"Pool directory not found: {pool_dir}")
+    try:
+        pool_artifact = load_pool_artifact(pool_dir)
+    except FileNotFoundError as exc:
+        console.print(f"[bold red]{exc}[/]")
+        entries = _list_dir_entries(pool_dir, limit=10)
+        if entries:
+            console.print(f"[bold]Pool directory contents[/]: {', '.join(entries)}")
+        console.print("[bold]Next steps[/]:")
+        console.print(f"  - dense stage-a build-pool -c {cfg_path}")
+        console.print("  - ensure --pool points to the outputs/pools directory for this workspace")
+        raise typer.Exit(code=1)
 
-    rows = []
+    build_rows = []
+    member_rows = []
     table = Table("input", "plan", "library_index", "library_hash", "size", "achieved/target", "pool", "sampling")
-    for inp in cfg.inputs:
-        if selected_inputs and inp.name not in selected_inputs:
-            continue
-        if pool_manifest is not None and pool_dir is not None:
-            entry = next((e for e in pool_manifest.get("inputs", []) if e.get("name") == inp.name), None)
-            if entry is None:
-                raise typer.BadParameter(f"Pool manifest missing input: {inp.name}")
-            pool_path = pool_dir / str(entry.get("pool_path") or "")
+    with _suppress_pyarrow_sysctl_warnings():
+        for inp in cfg.inputs:
+            if selected_inputs and inp.name not in selected_inputs:
+                continue
+            entry = pool_artifact.entry_for(inp.name)
+            pool_path = pool_dir / entry.pool_path
             if not pool_path.exists():
                 raise typer.BadParameter(f"Pool file not found for input {inp.name}: {pool_path}")
             df = pd.read_parquet(pool_path)
-            if "tf" in df.columns and "tfbs" in df.columns:
+            if entry.pool_mode == POOL_MODE_TFBS:
                 meta_df = df
-                data_entries = df["tfbs"].tolist()
-            elif "sequence" in df.columns:
+                data_entries = df["tfbs"].tolist() if "tfbs" in df.columns else []
+            elif entry.pool_mode == POOL_MODE_SEQUENCE:
                 meta_df = None
                 data_entries = df["sequence"].tolist()
             else:
-                raise typer.BadParameter(
-                    f"Pool file for {inp.name} must contain tf/tfbs or sequence columns: {pool_path}"
+                raise typer.BadParameter(f"Unsupported pool_mode for input {inp.name}: {entry.pool_mode}")
+
+            for plan_item in resolved_plan:
+                if selected_plans and plan_item.name not in selected_plans:
+                    continue
+                library, _parts, reg_labels, info = build_library_for_plan(
+                    source_label=inp.name,
+                    plan_item=plan_item,
+                    data_entries=data_entries,
+                    meta_df=meta_df,
+                    sampling_cfg=sampling_cfg,
+                    seq_len=int(cfg.generation.sequence_length),
+                    min_count_per_tf=int(cfg.runtime.min_count_per_tf),
+                    usage_counts={},
+                    failure_counts=failure_counts if failure_counts else None,
+                    rng=rng,
+                    np_rng=np_rng,
+                    schema_is_22=schema_is_22,
+                    library_index_start=libraries_built,
+                )
+                libraries_built = int(info.get("library_index", libraries_built))
+                library_hash = str(info.get("library_hash") or "")
+                target_len = int(info.get("target_length") or 0)
+                achieved_len = int(info.get("achieved_length") or 0)
+                pool_strategy = str(info.get("pool_strategy") or sampling_cfg.pool_strategy)
+                sampling_strategy = str(info.get("library_sampling_strategy") or sampling_cfg.library_sampling_strategy)
+                library_id = library_hash
+                tfbs_id_by_index = info.get("tfbs_id_by_index") or []
+                motif_id_by_index = info.get("motif_id_by_index") or []
+                row = {
+                    "created_at": datetime.now(timezone.utc).isoformat(),
+                    "input_name": inp.name,
+                    "input_type": inp.type,
+                    "plan_name": plan_item.name,
+                    "library_index": int(info.get("library_index") or 0),
+                    "library_id": library_id,
+                    "library_hash": library_hash,
+                    "library_tfbs": list(library),
+                    "library_tfs": list(reg_labels) if reg_labels else [],
+                    "library_site_ids": list(info.get("site_id_by_index") or []),
+                    "library_sources": list(info.get("source_by_index") or []),
+                    "library_tfbs_ids": list(tfbs_id_by_index),
+                    "library_motif_ids": list(motif_id_by_index),
+                    "pool_strategy": pool_strategy,
+                    "library_sampling_strategy": sampling_strategy,
+                    "library_size": int(info.get("library_size") or len(library)),
+                    "target_length": target_len,
+                    "achieved_length": achieved_len,
+                    "relaxed_cap": bool(info.get("relaxed_cap") or False),
+                    "final_cap": info.get("final_cap"),
+                    "iterative_max_libraries": int(info.get("iterative_max_libraries") or 0),
+                    "iterative_min_new_solutions": int(info.get("iterative_min_new_solutions") or 0),
+                    "required_regulators_selected": info.get("required_regulators_selected"),
+                }
+                build_rows.append(row)
+                for idx, tfbs in enumerate(list(library)):
+                    member_rows.append(
+                        {
+                            "library_id": library_id,
+                            "library_hash": library_hash,
+                            "library_index": int(info.get("library_index") or 0),
+                            "input_name": inp.name,
+                            "plan_name": plan_item.name,
+                            "position": int(idx),
+                            "tf": reg_labels[idx] if idx < len(reg_labels or []) else "",
+                            "tfbs": tfbs,
+                            "tfbs_id": tfbs_id_by_index[idx] if idx < len(tfbs_id_by_index) else None,
+                            "motif_id": motif_id_by_index[idx] if idx < len(motif_id_by_index) else None,
+                            "site_id": (info.get("site_id_by_index") or [None])[idx]
+                            if idx < len(info.get("site_id_by_index") or [])
+                            else None,
+                            "source": (info.get("source_by_index") or [None])[idx]
+                            if idx < len(info.get("source_by_index") or [])
+                            else None,
+                        }
+                    )
+                table.add_row(
+                    inp.name,
+                    plan_item.name,
+                    str(row["library_index"]),
+                    _short_hash(library_hash),
+                    str(len(library)),
+                    f"{achieved_len}/{target_len}",
+                    pool_strategy,
+                    sampling_strategy,
                 )
-        else:
-            src = deps.source_factory(inp, cfg_path)
-            data_entries, meta_df = src.load_data(rng=np_rng, outputs_root=outputs_root)
-
-        for plan_item in resolved_plan:
-            if selected_plans and plan_item.name not in selected_plans:
-                continue
-            library, _parts, reg_labels, info = build_library_for_plan(
-                source_label=inp.name,
-                plan_item=plan_item,
-                data_entries=data_entries,
-                meta_df=meta_df,
-                sampling_cfg=sampling_cfg,
-                seq_len=int(cfg.generation.sequence_length),
-                min_count_per_tf=int(cfg.runtime.min_count_per_tf),
-                usage_counts={},
-                failure_counts=failure_counts if failure_counts else None,
-                rng=rng,
-                np_rng=np_rng,
-                schema_is_22=schema_is_22,
-                library_index_start=libraries_built,
-            )
-            libraries_built = int(info.get("library_index", libraries_built))
-            library_hash = str(info.get("library_hash") or "")
-            target_len = int(info.get("target_length") or 0)
-            achieved_len = int(info.get("achieved_length") or 0)
-            pool_strategy = str(info.get("pool_strategy") or sampling_cfg.pool_strategy)
-            sampling_strategy = str(info.get("library_sampling_strategy") or sampling_cfg.library_sampling_strategy)
-            row = {
-                "created_at": datetime.now(timezone.utc).isoformat(),
-                "input_name": inp.name,
-                "input_type": inp.type,
-                "plan_name": plan_item.name,
-                "library_index": int(info.get("library_index") or 0),
-                "library_hash": library_hash,
-                "library_tfbs": list(library),
-                "library_tfs": list(reg_labels) if reg_labels else [],
-                "library_site_ids": list(info.get("site_id_by_index") or []),
-                "library_sources": list(info.get("source_by_index") or []),
-                "pool_strategy": pool_strategy,
-                "library_sampling_strategy": sampling_strategy,
-                "library_size": int(info.get("library_size") or len(library)),
-                "target_length": target_len,
-                "achieved_length": achieved_len,
-                "relaxed_cap": bool(info.get("relaxed_cap") or False),
-                "final_cap": info.get("final_cap"),
-                "iterative_max_libraries": int(info.get("iterative_max_libraries") or 0),
-                "iterative_min_new_solutions": int(info.get("iterative_min_new_solutions") or 0),
-                "required_regulators_selected": info.get("required_regulators_selected"),
-            }
-            rows.append(row)
-            table.add_row(
-                inp.name,
-                plan_item.name,
-                str(row["library_index"]),
-                _short_hash(library_hash),
-                str(len(library)),
-                f"{achieved_len}/{target_len}",
-                pool_strategy,
-                sampling_strategy,
-            )
 
-    if not rows:
-        console.print("[yellow]No libraries built (no matching inputs/plans).[/]")
-        raise typer.Exit(code=1)
+        if not build_rows:
+            console.print("[yellow]No libraries built (no matching inputs/plans).[/]")
+            raise typer.Exit(code=1)
 
-    df_out = pd.DataFrame(rows)
-    df_out.to_parquet(out_path, index=False)
-    manifest = {
-        "schema_version": "1.0",
-        "created_at": datetime.now(timezone.utc).isoformat(),
-        "run_id": cfg.run.id,
-        "run_root": str(run_root),
-        "config_path": str(cfg_path),
-        "library_builds_path": str(out_path),
-    }
-    manifest_path = out_dir / "library_manifest.json"
-    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
+        try:
+            artifact = write_library_artifact(
+                out_dir=out_dir,
+                builds=build_rows,
+                members=member_rows,
+                cfg_path=cfg_path,
+                run_id=str(cfg.run.id),
+                run_root=run_root,
+                overwrite=overwrite,
+            )
+        except FileExistsError as exc:
+            console.print(f"[bold red]{exc}[/]")
+            raise typer.Exit(code=1)
     console.print(table)
-    console.print(f":sparkles: [bold green]Library builds written[/]: {out_path}")
+    console.print(f":sparkles: [bold green]Library builds written[/]: {artifact.builds_path}")
+    console.print(f":sparkles: [bold green]Library members written[/]: {artifact.members_path}")
 
 
 @app.command(help="Run generation for the job. Optionally auto-run plots declared in YAML.")
@@ -1582,6 +1607,7 @@ def run(
     root = loaded.root
     cfg = root.densegen
     run_root = _run_root_for(loaded)
+    _ensure_fimo_available(cfg, strict=True)
 
     # Logging setup
     log_cfg = cfg.logging
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index e8f7b920..a820c42b 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -42,8 +42,8 @@ def _construct_mapping(loader, node, deep: bool = False):
 _StrictLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _construct_mapping)
 
 
-LATEST_SCHEMA_VERSION = "2.3"
-SUPPORTED_SCHEMA_VERSIONS = {"2.1", "2.2", LATEST_SCHEMA_VERSION}
+LATEST_SCHEMA_VERSION = "2.4"
+SUPPORTED_SCHEMA_VERSIONS = {"2.1", "2.2", "2.3", LATEST_SCHEMA_VERSION}
 
 
 def parse_schema_version(value: str) -> tuple[int, int]:
@@ -356,6 +356,9 @@ def _score_mode(self):
                 self.mining = PWMMiningConfig()
             if self.pvalue_bins is None:
                 self.pvalue_bins = list(CANONICAL_PVALUE_BINS)
+            if self.mining is not None and self.mining.max_candidates is not None:
+                if int(self.mining.max_candidates) < int(self.n_sites):
+                    raise ValueError("pwm.sampling.mining.max_candidates must be >= n_sites")
             if self.mining is not None and self.mining.retain_bin_ids is not None:
                 bins = list(self.pvalue_bins) if self.pvalue_bins is not None else list(CANONICAL_PVALUE_BINS)
                 max_idx = len(bins) - 1
diff --git a/src/dnadesign/densegen/src/core/artifacts/ids.py b/src/dnadesign/densegen/src/core/artifacts/ids.py
new file mode 100644
index 00000000..c31b3245
--- /dev/null
+++ b/src/dnadesign/densegen/src/core/artifacts/ids.py
@@ -0,0 +1,76 @@
+"""
+Stable identifier helpers for DenseGen artifacts.
+
+These hashes are intended to be deterministic and join-friendly across runs.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+from typing import Mapping, Sequence
+
+_BASES = ("A", "C", "G", "T")
+_FLOAT_DIGITS = 10
+
+
+def _fmt_float(value: float) -> str:
+    return format(float(value), f".{_FLOAT_DIGITS}g")
+
+
+def _stable_json(payload: dict) -> str:
+    return json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
+
+
+def _hash_payload(payload: dict) -> str:
+    return hashlib.sha256(_stable_json(payload).encode("utf-8")).hexdigest()
+
+
+def hash_pwm_motif(
+    *,
+    motif_label: str,
+    matrix: Sequence[Mapping[str, float]],
+    background: Mapping[str, float],
+    source_kind: str,
+    source_label: str | None = None,
+) -> str:
+    rows = []
+    for row in matrix:
+        rows.append([_fmt_float(row.get(base, 0.0)) for base in _BASES])
+    payload = {
+        "source_kind": source_kind,
+        "source_label": source_label or "",
+        "motif_label": str(motif_label),
+        "matrix": rows,
+        "background": {base: _fmt_float(background.get(base, 0.0)) for base in _BASES},
+    }
+    return _hash_payload(payload)
+
+
+def hash_label_motif(*, label: str | None, source_kind: str, source_label: str | None = None) -> str:
+    payload = {
+        "source_kind": source_kind,
+        "source_label": source_label or "",
+        "label": str(label or ""),
+    }
+    return _hash_payload(payload)
+
+
+def hash_tfbs_id(
+    *,
+    motif_id: str | None,
+    sequence: str,
+    scoring_backend: str,
+    matched_start: int | None = None,
+    matched_stop: int | None = None,
+    matched_strand: str | None = None,
+) -> str:
+    payload = {
+        "motif_id": str(motif_id or ""),
+        "sequence": str(sequence),
+        "scoring_backend": str(scoring_backend),
+        "matched_start": matched_start,
+        "matched_stop": matched_stop,
+        "matched_strand": matched_strand or "",
+    }
+    return _hash_payload(payload)
diff --git a/src/dnadesign/densegen/src/core/artifacts/library.py b/src/dnadesign/densegen/src/core/artifacts/library.py
new file mode 100644
index 00000000..e77b727e
--- /dev/null
+++ b/src/dnadesign/densegen/src/core/artifacts/library.py
@@ -0,0 +1,97 @@
+"""
+Stage-B library artifacts.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+
+import pandas as pd
+
+from ...utils.logging_utils import install_native_stderr_filters
+
+LIBRARY_SCHEMA_VERSION = "1.0"
+
+
+@dataclass(frozen=True)
+class LibraryArtifact:
+    manifest_path: Path
+    builds_path: Path
+    members_path: Path
+    schema_version: str
+    run_id: str
+    run_root: str
+    config_path: str
+
+    @classmethod
+    def load(cls, manifest_path: Path) -> "LibraryArtifact":
+        payload = json.loads(manifest_path.read_text())
+        return cls(
+            manifest_path=manifest_path,
+            builds_path=Path(payload.get("library_builds_path", "")),
+            members_path=Path(payload.get("library_members_path", "")),
+            schema_version=str(payload.get("schema_version")),
+            run_id=str(payload.get("run_id")),
+            run_root=str(payload.get("run_root")),
+            config_path=str(payload.get("config_path")),
+        )
+
+
+def _library_manifest_path(out_dir: Path) -> Path:
+    return out_dir / "library_manifest.json"
+
+
+def write_library_artifact(
+    *,
+    out_dir: Path,
+    builds: list[dict],
+    members: list[dict],
+    cfg_path: Path,
+    run_id: str,
+    run_root: Path,
+    overwrite: bool = False,
+) -> LibraryArtifact:
+    out_dir.mkdir(parents=True, exist_ok=True)
+    install_native_stderr_filters()
+    builds_path = out_dir / "library_builds.parquet"
+    members_path = out_dir / "library_members.parquet"
+
+    if not overwrite:
+        if builds_path.exists():
+            raise FileExistsError(f"Library builds already exist: {builds_path}")
+        if members_path.exists():
+            raise FileExistsError(f"Library members already exist: {members_path}")
+
+    pd.DataFrame(builds).to_parquet(builds_path, index=False)
+    pd.DataFrame(members).to_parquet(members_path, index=False)
+
+    manifest = {
+        "schema_version": LIBRARY_SCHEMA_VERSION,
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "run_id": str(run_id),
+        "run_root": str(run_root),
+        "config_path": str(cfg_path),
+        "library_builds_path": str(builds_path),
+        "library_members_path": str(members_path),
+    }
+    manifest_path = _library_manifest_path(out_dir)
+    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
+    return LibraryArtifact(
+        manifest_path=manifest_path,
+        builds_path=builds_path,
+        members_path=members_path,
+        schema_version=LIBRARY_SCHEMA_VERSION,
+        run_id=str(run_id),
+        run_root=str(run_root),
+        config_path=str(cfg_path),
+    )
+
+
+def load_library_artifact(out_dir: Path) -> LibraryArtifact:
+    manifest_path = _library_manifest_path(out_dir)
+    if not manifest_path.exists():
+        raise FileNotFoundError(f"Library manifest not found: {manifest_path}")
+    return LibraryArtifact.load(manifest_path)
diff --git a/src/dnadesign/densegen/src/core/artifacts/pool.py b/src/dnadesign/densegen/src/core/artifacts/pool.py
new file mode 100644
index 00000000..fb9ce5df
--- /dev/null
+++ b/src/dnadesign/densegen/src/core/artifacts/pool.py
@@ -0,0 +1,228 @@
+"""
+Stage-A TFBS pool artifacts.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Iterable
+
+import pandas as pd
+
+from ...utils.logging_utils import install_native_stderr_filters
+from .ids import hash_tfbs_id
+
+POOL_SCHEMA_VERSION = "1.0"
+POOL_MODE_TFBS = "tfbs"
+POOL_MODE_SEQUENCE = "sequence"
+_SAFE_FILENAME_RE = re.compile(r"[^A-Za-z0-9_.-]+")
+
+
+def _sanitize_filename(name: str) -> str:
+    cleaned = _SAFE_FILENAME_RE.sub("_", str(name).strip())
+    return cleaned or "densegen"
+
+
+@dataclass(frozen=True)
+class PoolInputEntry:
+    name: str
+    input_type: str
+    pool_path: Path
+    rows: int
+    columns: list[str]
+    pool_mode: str
+
+
+@dataclass(frozen=True)
+class PoolData:
+    name: str
+    input_type: str
+    pool_mode: str
+    df: pd.DataFrame | None
+    sequences: list[str]
+    pool_path: Path
+
+
+@dataclass(frozen=True)
+class TFBSPoolArtifact:
+    manifest_path: Path
+    inputs: dict[str, PoolInputEntry]
+    schema_version: str
+    run_id: str
+    run_root: str
+    config_path: str
+
+    @classmethod
+    def load(cls, manifest_path: Path) -> "TFBSPoolArtifact":
+        payload = json.loads(manifest_path.read_text())
+        entries = {}
+        for item in payload.get("inputs", []):
+            entry = PoolInputEntry(
+                name=str(item.get("name")),
+                input_type=str(item.get("type")),
+                pool_path=Path(item.get("pool_path")),
+                rows=int(item.get("rows", 0)),
+                columns=list(item.get("columns") or []),
+                pool_mode=str(item.get("pool_mode") or POOL_MODE_TFBS),
+            )
+            entries[entry.name] = entry
+        return cls(
+            manifest_path=manifest_path,
+            inputs=entries,
+            schema_version=str(payload.get("schema_version")),
+            run_id=str(payload.get("run_id")),
+            run_root=str(payload.get("run_root")),
+            config_path=str(payload.get("config_path")),
+        )
+
+    def entry_for(self, input_name: str) -> PoolInputEntry:
+        if input_name not in self.inputs:
+            raise KeyError(f"Pool manifest missing input: {input_name}")
+        return self.inputs[input_name]
+
+
+def _pool_manifest_path(out_dir: Path) -> Path:
+    return out_dir / "pool_manifest.json"
+
+
+def load_pool_artifact(out_dir: Path) -> TFBSPoolArtifact:
+    manifest_path = _pool_manifest_path(out_dir)
+    if not manifest_path.exists():
+        raise FileNotFoundError(f"Pool manifest not found: {manifest_path}")
+    return TFBSPoolArtifact.load(manifest_path)
+
+
+def _resolve_pool_mode(df: pd.DataFrame) -> str:
+    if "tf" in df.columns and "tfbs" in df.columns:
+        return POOL_MODE_TFBS
+    if "sequence" in df.columns:
+        return POOL_MODE_SEQUENCE
+    raise ValueError("Pool dataframe must contain tf/tfbs columns or a sequence column.")
+
+
+def _ensure_tfbs_ids(df: pd.DataFrame) -> None:
+    missing = [col for col in ("motif_id", "tfbs_id") if col not in df.columns]
+    if missing:
+        raise ValueError(f"TFBS pool missing required columns: {', '.join(missing)}")
+
+
+def _build_sequence_pool(sequences: Iterable[str]) -> pd.DataFrame:
+    seqs = [str(s) for s in sequences]
+    df = pd.DataFrame({"sequence": seqs})
+    df["tfbs_id"] = [
+        hash_tfbs_id(
+            motif_id=None,
+            sequence=seq,
+            scoring_backend="sequence_library",
+        )
+        for seq in seqs
+    ]
+    return df
+
+
+def build_pool_artifact(
+    *,
+    cfg,
+    cfg_path: Path,
+    deps,
+    rng,
+    outputs_root: Path,
+    out_dir: Path,
+    overwrite: bool = False,
+    selected_inputs: set[str] | None = None,
+) -> tuple[TFBSPoolArtifact, dict[str, PoolData]]:
+    out_dir.mkdir(parents=True, exist_ok=True)
+    install_native_stderr_filters()
+    pool_entries: dict[str, PoolInputEntry] = {}
+    pool_data: dict[str, PoolData] = {}
+    used_names: dict[str, int] = {}
+    rows: list[tuple[str, str, str, Path]] = []
+
+    for inp in cfg.inputs:
+        if selected_inputs and inp.name not in selected_inputs:
+            continue
+        src = deps.source_factory(inp, cfg_path)
+        data_entries, meta_df = src.load_data(rng=rng, outputs_root=outputs_root)
+        if meta_df is None:
+            df = _build_sequence_pool(data_entries)
+        else:
+            df = meta_df.copy()
+        df.insert(0, "input_name", inp.name)
+
+        pool_mode = _resolve_pool_mode(df)
+        if pool_mode == POOL_MODE_TFBS:
+            _ensure_tfbs_ids(df)
+
+        base = _sanitize_filename(inp.name)
+        count = used_names.get(base, 0)
+        used_names[base] = count + 1
+        suffix = f"{base}__{count}" if count else base
+        filename = f"{suffix}__pool.parquet"
+        dest = out_dir / filename
+        if dest.exists() and not overwrite:
+            raise FileExistsError(f"Pool already exists: {dest}")
+        df.to_parquet(dest, index=False)
+
+        entry = PoolInputEntry(
+            name=inp.name,
+            input_type=str(inp.type),
+            pool_path=Path(filename),
+            rows=int(len(df)),
+            columns=list(df.columns),
+            pool_mode=pool_mode,
+        )
+        pool_entries[inp.name] = entry
+        sequences: list[str]
+        if pool_mode == POOL_MODE_SEQUENCE:
+            sequences = df["sequence"].tolist()
+            pool_df = None
+        else:
+            sequences = df["tfbs"].tolist() if "tfbs" in df.columns else []
+            pool_df = df
+        pool_data[inp.name] = PoolData(
+            name=inp.name,
+            input_type=str(inp.type),
+            pool_mode=pool_mode,
+            df=pool_df,
+            sequences=sequences,
+            pool_path=dest,
+        )
+        rows.append((inp.name, str(inp.type), str(len(df)), dest))
+
+    if not rows:
+        raise ValueError("No pools built (no matching inputs).")
+
+    manifest = {
+        "schema_version": POOL_SCHEMA_VERSION,
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "run_id": cfg.run.id,
+        "run_root": str(cfg.run.root),
+        "config_path": str(cfg_path),
+        "inputs": [
+            {
+                "name": entry.name,
+                "type": entry.input_type,
+                "pool_path": entry.pool_path.name,
+                "rows": entry.rows,
+                "columns": entry.columns,
+                "pool_mode": entry.pool_mode,
+            }
+            for entry in pool_entries.values()
+        ],
+    }
+    manifest_path = _pool_manifest_path(out_dir)
+    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
+
+    artifact = TFBSPoolArtifact(
+        manifest_path=manifest_path,
+        inputs=pool_entries,
+        schema_version=POOL_SCHEMA_VERSION,
+        run_id=str(cfg.run.id),
+        run_root=str(cfg.run.root),
+        config_path=str(cfg_path),
+    )
+    return artifact, pool_data
diff --git a/src/dnadesign/densegen/src/core/metadata_schema.py b/src/dnadesign/densegen/src/core/metadata_schema.py
index ea568c9d..45a85f6f 100644
--- a/src/dnadesign/densegen/src/core/metadata_schema.py
+++ b/src/dnadesign/densegen/src/core/metadata_schema.py
@@ -27,7 +27,7 @@ class MetaField:
 
 
 META_FIELDS: list[MetaField] = [
-    MetaField("schema_version", (str,), "DenseGen schema version (e.g., 2.1)."),
+    MetaField("schema_version", (str,), "DenseGen schema version (e.g., 2.4)."),
     MetaField("created_at", (str,), "UTC ISO8601 timestamp for record creation."),
     MetaField("run_id", (str,), "Run identifier (densegen.run.id)."),
     MetaField("run_root", (str,), "Resolved run root path (densegen.run.root)."),
@@ -54,7 +54,7 @@ class MetaField:
     MetaField(
         "used_tfbs_detail",
         (list,),
-        "Per-placement detail: tf/tfbs/orientation/offset (offset uses final sequence coordinates).",
+        "Per-placement detail: tf/tfbs/motif_id/tfbs_id/orientation/offset (offset uses final coordinates).",
     ),
     MetaField("used_tf_counts", (list,), "Per-TF placement counts ({tf, count})."),
     MetaField("used_tf_list", (list,), "TFs used in the final sequence."),
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 07a2012e..3770d8a7 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -43,6 +43,10 @@
     resolve_run_root,
     schema_version_at_least,
 )
+from ..utils.logging_utils import install_native_stderr_filters
+from .artifacts.ids import hash_tfbs_id
+from .artifacts.library import write_library_artifact
+from .artifacts.pool import build_pool_artifact
 from .metadata import build_metadata
 from .postprocess import random_fill
 from .pvalue_bins import resolve_pvalue_bins
@@ -537,7 +541,16 @@ def _input_metadata(source_cfg, cfg_path: Path) -> dict:
     return meta
 
 
-def _compute_used_tf_info(sol, library_for_opt, regulator_labels, fixed_elements, site_id_by_index, source_by_index):
+def _compute_used_tf_info(
+    sol,
+    library_for_opt,
+    regulator_labels,
+    fixed_elements,
+    site_id_by_index,
+    source_by_index,
+    tfbs_id_by_index,
+    motif_id_by_index,
+):
     promoter_motifs = set()
     if fixed_elements is not None:
         if hasattr(fixed_elements, "promoter_constraints"):
@@ -593,6 +606,14 @@ def _compute_used_tf_info(sol, library_for_opt, regulator_labels, fixed_elements
             source = source_by_index[base_idx]
             if source is not None:
                 entry["source"] = source
+        if tfbs_id_by_index is not None and base_idx < len(tfbs_id_by_index):
+            tfbs_id = tfbs_id_by_index[base_idx]
+            if tfbs_id is not None:
+                entry["tfbs_id"] = tfbs_id
+        if motif_id_by_index is not None and base_idx < len(motif_id_by_index):
+            motif_id = motif_id_by_index[base_idx]
+            if motif_id is not None:
+                entry["motif_id"] = motif_id
         used_detail.append(entry)
         if tf_label:
             counts[tf_label] = counts.get(tf_label, 0) + 1
@@ -980,6 +1001,8 @@ def _finalize(
         *,
         site_id_by_index: list[str | None] | None,
         source_by_index: list[str | None] | None,
+        tfbs_id_by_index: list[str | None] | None,
+        motif_id_by_index: list[str | None] | None,
     ) -> tuple[list[str], list[str], list[str], dict]:
         nonlocal libraries_built
         libraries_built += 1
@@ -987,6 +1010,8 @@ def _finalize(
         info["library_hash"] = _hash_library(library, reg_labels, site_id_by_index, source_by_index)
         info["site_id_by_index"] = site_id_by_index
         info["source_by_index"] = source_by_index
+        info["tfbs_id_by_index"] = tfbs_id_by_index
+        info["motif_id_by_index"] = motif_id_by_index
         return library, parts, reg_labels, info
 
     if meta_df is not None and isinstance(meta_df, pd.DataFrame):
@@ -1022,6 +1047,8 @@ def _finalize(
             parts = [f"{tf}:{tfbs}" for tf, tfbs in zip(reg_labels, lib_df["tfbs"].tolist())]
             site_id_by_index = lib_df["site_id"].tolist() if "site_id" in lib_df.columns else None
             source_by_index = lib_df["source"].tolist() if "source" in lib_df.columns else None
+            tfbs_id_by_index = lib_df["tfbs_id"].tolist() if "tfbs_id" in lib_df.columns else None
+            motif_id_by_index = lib_df["motif_id"].tolist() if "motif_id" in lib_df.columns else None
             info = {
                 "target_length": seq_len + subsample_over,
                 "achieved_length": sum(len(s) for s in library),
@@ -1039,6 +1066,8 @@ def _finalize(
                 info,
                 site_id_by_index=site_id_by_index,
                 source_by_index=source_by_index,
+                tfbs_id_by_index=tfbs_id_by_index,
+                motif_id_by_index=motif_id_by_index,
             )
 
         sampler = TFSampler(meta_df, np_rng)
@@ -1126,6 +1155,8 @@ def _finalize(
         )
         site_id_by_index = info.get("site_id_by_index")
         source_by_index = info.get("source_by_index")
+        tfbs_id_by_index = info.get("tfbs_id_by_index")
+        motif_id_by_index = info.get("motif_id_by_index")
         return _finalize(
             library,
             parts,
@@ -1133,6 +1164,8 @@ def _finalize(
             info,
             site_id_by_index=site_id_by_index,
             source_by_index=source_by_index,
+            tfbs_id_by_index=tfbs_id_by_index,
+            motif_id_by_index=motif_id_by_index,
         )
 
     if required_regulators or plan_min_count_by_regulator or min_required_regulators is not None:
@@ -1183,7 +1216,19 @@ def _finalize(
         "iterative_max_libraries": iterative_max_libraries,
         "iterative_min_new_solutions": iterative_min_new_solutions,
     }
-    return _finalize(library, tf_parts, reg_labels, info, site_id_by_index=None, source_by_index=None)
+    tfbs_id_by_index = [
+        hash_tfbs_id(motif_id=None, sequence=seq, scoring_backend="sequence_library") for seq in library
+    ]
+    return _finalize(
+        library,
+        tf_parts,
+        reg_labels,
+        info,
+        site_id_by_index=None,
+        source_by_index=None,
+        tfbs_id_by_index=tfbs_id_by_index,
+        motif_id_by_index=None,
+    )
 
 
 def _compute_sampling_fraction(
@@ -1245,6 +1290,14 @@ def _consolidate_parts(outputs_root: Path, *, part_glob: str, final_name: str) -
     return True
 
 
+def _emit_event(events_path: Path, *, event: str, payload: dict) -> None:
+    record = {"event": event, "created_at": datetime.now(timezone.utc).isoformat()}
+    record.update(payload)
+    events_path.parent.mkdir(parents=True, exist_ok=True)
+    with events_path.open("a", encoding="utf-8") as handle:
+        handle.write(json.dumps(record, sort_keys=True) + "\n")
+
+
 ATTEMPTS_CHUNK_SIZE = 256
 
 
@@ -1551,6 +1604,10 @@ def _process_plan_for_source(
     write_state: Callable[[], None] | None = None,
     site_failure_counts: dict[tuple[str, str, str, str, str | None], dict[str, int]] | None = None,
     source_cache: dict[str, tuple[list, pd.DataFrame | None]] | None = None,
+    library_build_rows: list[dict] | None = None,
+    library_member_rows: list[dict] | None = None,
+    composition_rows: list[dict] | None = None,
+    events_path: Path | None = None,
 ) -> tuple[int, dict]:
     source_label = source_cfg.name
     plan_name = plan_item.name
@@ -1560,6 +1617,73 @@ def _process_plan_for_source(
     seq_len = int(gen.sequence_length)
     sampling_cfg = gen.sampling
 
+    def _record_library_build(
+        *,
+        sampling_info: dict,
+        library_tfbs: list[str],
+        library_tfs: list[str],
+        library_tfbs_ids: list[str],
+        library_motif_ids: list[str],
+        library_site_ids: list[str | None],
+        library_sources: list[str | None],
+    ) -> None:
+        if library_build_rows is None or library_member_rows is None:
+            return
+        library_index = int(sampling_info.get("library_index") or 0)
+        library_hash = str(sampling_info.get("library_hash") or "")
+        library_id = library_hash or f"{source_label}:{plan_name}:{library_index}"
+        row = {
+            "created_at": datetime.now(timezone.utc).isoformat(),
+            "input_name": source_label,
+            "plan_name": plan_name,
+            "library_index": library_index,
+            "library_id": library_id,
+            "library_hash": library_hash,
+            "pool_strategy": sampling_info.get("pool_strategy"),
+            "library_sampling_strategy": sampling_info.get("library_sampling_strategy"),
+            "library_size": int(sampling_info.get("library_size") or len(library_tfbs)),
+            "target_length": sampling_info.get("target_length"),
+            "achieved_length": sampling_info.get("achieved_length"),
+            "relaxed_cap": sampling_info.get("relaxed_cap"),
+            "final_cap": sampling_info.get("final_cap"),
+            "iterative_max_libraries": sampling_info.get("iterative_max_libraries"),
+            "iterative_min_new_solutions": sampling_info.get("iterative_min_new_solutions"),
+            "required_regulators_selected": sampling_info.get("required_regulators_selected"),
+        }
+        library_build_rows.append(row)
+        if events_path is not None:
+            try:
+                _emit_event(
+                    events_path,
+                    event="LIBRARY_BUILT",
+                    payload={
+                        "input_name": source_label,
+                        "plan_name": plan_name,
+                        "library_index": library_index,
+                        "library_hash": library_hash,
+                        "library_size": int(row.get("library_size") or len(library_tfbs)),
+                    },
+                )
+            except Exception:
+                log.debug("Failed to emit LIBRARY_BUILT event.", exc_info=True)
+        for idx, tfbs in enumerate(library_tfbs):
+            library_member_rows.append(
+                {
+                    "library_id": library_id,
+                    "library_hash": library_hash,
+                    "library_index": library_index,
+                    "input_name": source_label,
+                    "plan_name": plan_name,
+                    "position": int(idx),
+                    "tf": library_tfs[idx] if idx < len(library_tfs) else "",
+                    "tfbs": tfbs,
+                    "tfbs_id": library_tfbs_ids[idx] if idx < len(library_tfbs_ids) else None,
+                    "motif_id": library_motif_ids[idx] if idx < len(library_motif_ids) else None,
+                    "site_id": library_site_ids[idx] if idx < len(library_site_ids) else None,
+                    "source": library_sources[idx] if idx < len(library_sources) else None,
+                }
+            )
+
     pool_strategy = str(sampling_cfg.pool_strategy)
     library_sampling_strategy = str(sampling_cfg.library_sampling_strategy)
     iterative_max_libraries = int(sampling_cfg.iterative_max_libraries)
@@ -1822,12 +1946,25 @@ def _process_plan_for_source(
     libraries_built = int(sampling_info.get("library_index", libraries_built))
     site_id_by_index = sampling_info.get("site_id_by_index")
     source_by_index = sampling_info.get("source_by_index")
+    tfbs_id_by_index = sampling_info.get("tfbs_id_by_index")
+    motif_id_by_index = sampling_info.get("motif_id_by_index")
     sampling_library_index = sampling_info.get("library_index", 0)
     sampling_library_hash = sampling_info.get("library_hash", "")
     library_tfbs = list(library_for_opt)
     library_tfs = list(regulator_labels) if regulator_labels else []
     library_site_ids = list(site_id_by_index) if site_id_by_index else []
     library_sources = list(source_by_index) if source_by_index else []
+    library_tfbs_ids = list(tfbs_id_by_index) if tfbs_id_by_index else []
+    library_motif_ids = list(motif_id_by_index) if motif_id_by_index else []
+    _record_library_build(
+        sampling_info=sampling_info,
+        library_tfbs=library_tfbs,
+        library_tfs=library_tfs,
+        library_tfbs_ids=library_tfbs_ids,
+        library_motif_ids=library_motif_ids,
+        library_site_ids=library_site_ids,
+        library_sources=library_sources,
+    )
     max_tfbs_len = max((len(str(m)) for m in library_tfbs), default=0)
     required_len = max(max_tfbs_len, fixed_elements_max_len)
     if seq_len < required_len:
@@ -2015,6 +2152,21 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         stall_seconds,
                     )
                     stall_events += 1
+                    if events_path is not None:
+                        try:
+                            _emit_event(
+                                events_path,
+                                event="STALL_DETECTED",
+                                payload={
+                                    "input_name": source_label,
+                                    "plan_name": plan_name,
+                                    "stall_seconds": float(now - subsample_started),
+                                    "library_index": int(sampling_library_index),
+                                    "library_hash": str(sampling_library_hash),
+                                },
+                            )
+                        except Exception:
+                            log.debug("Failed to emit STALL_DETECTED event.", exc_info=True)
                     stall_triggered = True
                     break
                 if (now - last_log_warn >= stall_warn_every) and (produced_this_library == 0):
@@ -2051,6 +2203,8 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     fixed_elements,
                     site_id_by_index,
                     source_by_index,
+                    tfbs_id_by_index,
+                    motif_id_by_index,
                 )
                 tf_list_from_library = sorted(set(regulator_labels)) if regulator_labels else []
                 solver_status = getattr(sol, "status", None)
@@ -2412,6 +2566,30 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     )
                     continue
 
+                if composition_rows is not None:
+                    for placement_index, entry in enumerate(used_tfbs_detail or []):
+                        composition_rows.append(
+                            {
+                                "sequence_id": record.id,
+                                "input_name": source_label,
+                                "plan_name": plan_name,
+                                "library_index": int(sampling_library_index),
+                                "library_hash": str(sampling_library_hash),
+                                "placement_index": int(placement_index),
+                                "tf": entry.get("tf"),
+                                "tfbs": entry.get("tfbs"),
+                                "motif_id": entry.get("motif_id"),
+                                "tfbs_id": entry.get("tfbs_id"),
+                                "orientation": entry.get("orientation"),
+                                "offset": entry.get("offset"),
+                                "length": entry.get("length"),
+                                "end": entry.get("end"),
+                                "pad_left": entry.get("pad_left"),
+                                "site_id": entry.get("site_id"),
+                                "source": entry.get("source"),
+                            }
+                        )
+
                 _append_attempt(
                     outputs_root,
                     run_id=run_id,
@@ -2634,6 +2812,13 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         iterative_min_new_solutions,
                     )
 
+            resample_reason = "resample"
+            if produced_this_library == 0:
+                resample_reason = "stall_no_solution" if stall_triggered else "no_solution"
+            elif pool_strategy == "iterative_subsample" and iterative_min_new_solutions > 0:
+                if produced_this_library < iterative_min_new_solutions:
+                    resample_reason = "min_new_solutions"
+
             # Resample
             # Alignment (2): allow reactive resampling for subsample under schema>=2.2.
             allow_resample = pool_strategy == "iterative_subsample" or (schema_is_22 and pool_strategy == "subsample")
@@ -2645,6 +2830,22 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                 )
             resamples_in_try += 1
             total_resamples += 1
+            if events_path is not None:
+                try:
+                    _emit_event(
+                        events_path,
+                        event="RESAMPLE_TRIGGERED",
+                        payload={
+                            "input_name": source_label,
+                            "plan_name": plan_name,
+                            "reason": resample_reason,
+                            "produced_this_library": int(produced_this_library),
+                            "library_index": int(sampling_library_index),
+                            "library_hash": str(sampling_library_hash),
+                        },
+                    )
+                except Exception:
+                    log.debug("Failed to emit RESAMPLE_TRIGGERED event.", exc_info=True)
             if max_total_resamples > 0 and total_resamples > max_total_resamples:
                 raise RuntimeError(f"[{source_label}/{plan_name}] Exceeded max_total_resamples={max_total_resamples}.")
             if resamples_in_try > max_resample_attempts:
@@ -2683,12 +2884,25 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             libraries_built = int(sampling_info.get("library_index", libraries_built))
             site_id_by_index = sampling_info.get("site_id_by_index")
             source_by_index = sampling_info.get("source_by_index")
+            tfbs_id_by_index = sampling_info.get("tfbs_id_by_index")
+            motif_id_by_index = sampling_info.get("motif_id_by_index")
             sampling_library_index = sampling_info.get("library_index", sampling_library_index)
             sampling_library_hash = sampling_info.get("library_hash", sampling_library_hash)
             library_tfbs = list(library_for_opt)
             library_tfs = list(regulator_labels) if regulator_labels else []
             library_site_ids = list(site_id_by_index) if site_id_by_index else []
             library_sources = list(source_by_index) if source_by_index else []
+            library_tfbs_ids = list(tfbs_id_by_index) if tfbs_id_by_index else []
+            library_motif_ids = list(motif_id_by_index) if motif_id_by_index else []
+            _record_library_build(
+                sampling_info=sampling_info,
+                library_tfbs=library_tfbs,
+                library_tfs=library_tfs,
+                library_tfbs_ids=library_tfbs_ids,
+                library_motif_ids=library_motif_ids,
+                library_site_ids=library_site_ids,
+                library_sources=library_sources,
+            )
             # Alignment (7): sampling_fraction uses unique TFBS strings and is bounded.
             sampling_fraction = _compute_sampling_fraction(
                 library_for_opt,
@@ -2779,6 +2993,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
 
 def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> RunSummary:
     deps = deps or default_deps()
+    install_native_stderr_filters()
     cfg = loaded.root.densegen
     run_root = resolve_run_root(loaded.path, cfg.run.root)
     run_root_str = str(run_root)
@@ -2811,8 +3026,45 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
     plan_leaderboards: dict[tuple[str, str], dict] = {}
     inputs_manifest_entries: dict[str, dict] = {}
     source_cache: dict[str, tuple[list, pd.DataFrame | None]] = {}
+    library_build_rows: list[dict] = []
+    library_member_rows: list[dict] = []
+    composition_rows: list[dict] = []
     outputs_root = run_outputs_root(run_root)
     outputs_root.mkdir(parents=True, exist_ok=True)
+    events_path = outputs_root / "meta" / "events.jsonl"
+    pool_dir = outputs_root / "pools"
+    try:
+        _pool_artifact, pool_data = build_pool_artifact(
+            cfg=cfg,
+            cfg_path=loaded.path,
+            deps=deps,
+            rng=np_rng,
+            outputs_root=outputs_root,
+            out_dir=pool_dir,
+            overwrite=True,
+        )
+    except Exception as exc:
+        raise RuntimeError(f"Failed to build Stage-A TFBS pools: {exc}") from exc
+    try:
+        _emit_event(
+            events_path,
+            event="POOL_BUILT",
+            payload={
+                "inputs": [
+                    {
+                        "name": pool.name,
+                        "input_type": pool.input_type,
+                        "pool_mode": pool.pool_mode,
+                        "rows": int(pool.df.shape[0]) if pool.df is not None else int(len(pool.sequences)),
+                    }
+                    for pool in pool_data.values()
+                ]
+            },
+        )
+    except Exception:
+        log.debug("Failed to emit POOL_BUILT event.", exc_info=True)
+    for name, pool in pool_data.items():
+        source_cache[name] = (pool.sequences, pool.df)
     ensure_run_meta_dir(run_root)
     state_path = run_state_path(run_root)
     state_created_at = datetime.now(timezone.utc).isoformat()
@@ -2996,6 +3248,10 @@ def _write_state() -> None:
                     write_state=_write_state,
                     site_failure_counts=site_failure_counts,
                     source_cache=source_cache,
+                    library_build_rows=library_build_rows,
+                    library_member_rows=library_member_rows,
+                    composition_rows=composition_rows,
+                    events_path=events_path,
                 )
                 per_plan[(s.name, item.name)] = per_plan.get((s.name, item.name), 0) + produced
                 total += produced
@@ -3044,6 +3300,10 @@ def _write_state() -> None:
                         write_state=_write_state,
                         site_failure_counts=site_failure_counts,
                         source_cache=source_cache,
+                        library_build_rows=library_build_rows,
+                        library_member_rows=library_member_rows,
+                        composition_rows=composition_rows,
+                        events_path=events_path,
                     )
                     produced_counts[key] = current + produced
                     leaderboard_latest = stats.get("leaderboard_latest")
@@ -3059,6 +3319,77 @@ def _write_state() -> None:
     outputs_root = run_outputs_root(run_root)
     _consolidate_parts(outputs_root, part_glob="attempts_part-*.parquet", final_name="attempts.parquet")
 
+    if library_build_rows:
+        libraries_dir = outputs_root / "libraries"
+        existing_builds: list[dict] = []
+        existing_members: list[dict] = []
+        builds_path = libraries_dir / "library_builds.parquet"
+        members_path = libraries_dir / "library_members.parquet"
+        if builds_path.exists():
+            try:
+                existing_builds = pd.read_parquet(builds_path).to_dict("records")
+            except Exception:
+                log.warning("Failed to read existing library_builds.parquet; overwriting.", exc_info=True)
+                existing_builds = []
+        if members_path.exists():
+            try:
+                existing_members = pd.read_parquet(members_path).to_dict("records")
+            except Exception:
+                log.warning("Failed to read existing library_members.parquet; overwriting.", exc_info=True)
+                existing_members = []
+
+        existing_indices = {
+            int(row.get("library_index") or 0) for row in existing_builds if row.get("library_index") is not None
+        }
+        new_builds = [row for row in library_build_rows if int(row.get("library_index") or 0) not in existing_indices]
+        build_rows = existing_builds + new_builds
+
+        existing_member_keys = {
+            (
+                int(row.get("library_index") or 0),
+                int(row.get("position") or 0),
+            )
+            for row in existing_members
+        }
+        new_members = [
+            row
+            for row in library_member_rows
+            if (int(row.get("library_index") or 0), int(row.get("position") or 0)) not in existing_member_keys
+        ]
+        member_rows = existing_members + new_members
+
+        try:
+            write_library_artifact(
+                out_dir=libraries_dir,
+                builds=build_rows,
+                members=member_rows,
+                cfg_path=loaded.path,
+                run_id=str(cfg.run.id),
+                run_root=run_root,
+                overwrite=True,
+            )
+        except Exception as exc:
+            raise RuntimeError(f"Failed to write library artifacts: {exc}") from exc
+
+    if composition_rows:
+        composition_path = outputs_root / "composition.parquet"
+        existing_rows: list[dict] = []
+        if composition_path.exists():
+            try:
+                existing_rows = pd.read_parquet(composition_path).to_dict("records")
+            except Exception:
+                log.warning("Failed to read existing composition.parquet; overwriting.", exc_info=True)
+                existing_rows = []
+        existing_keys = {
+            (str(row.get("sequence_id") or ""), int(row.get("placement_index") or 0)) for row in existing_rows
+        }
+        new_rows = [
+            row
+            for row in composition_rows
+            if (str(row.get("sequence_id") or ""), int(row.get("placement_index") or 0)) not in existing_keys
+        ]
+        pd.DataFrame(existing_rows + new_rows).to_parquet(composition_path, index=False)
+
     manifest_items = [
         PlanManifest(
             input_name=key[0],
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index c4049d75..d2ff922e 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -25,6 +25,7 @@
 
 from ..adapters.outputs import load_records_from_config
 from ..config import RootConfig, resolve_run_root, resolve_run_scoped_path
+from .artifacts.pool import POOL_MODE_TFBS, load_pool_artifact
 from .run_manifest import load_run_manifest
 from .run_paths import run_manifest_path, run_outputs_root
 
@@ -119,6 +120,8 @@ def _explode_used(df: pd.DataFrame) -> pd.DataFrame:
                     "input_name": str(row.get(input_col) or ""),
                     "tf": tf,
                     "tfbs": tfbs,
+                    "motif_id": entry.get("motif_id"),
+                    "tfbs_id": entry.get("tfbs_id"),
                     "orientation": entry.get("orientation"),
                     "offset": entry.get("offset"),
                     "length": entry.get("length"),
@@ -376,6 +379,48 @@ def collect_report_data(
 
     tables: Dict[str, pd.DataFrame] = {}
 
+    stage_a_bins = pd.DataFrame(columns=["input_name", "tf", "bin_id", "bin_low", "bin_high", "count", "total"])
+    pool_dir = outputs_root / "pools"
+    if pool_dir.exists():
+        try:
+            pool_artifact = load_pool_artifact(pool_dir)
+            rows: list[dict[str, Any]] = []
+            for entry in pool_artifact.inputs.values():
+                if entry.pool_mode != POOL_MODE_TFBS:
+                    continue
+                pool_path = pool_dir / entry.pool_path
+                if not pool_path.exists():
+                    continue
+                df_pool = pd.read_parquet(pool_path)
+                if "fimo_bin_id" not in df_pool.columns or "tf" not in df_pool.columns:
+                    continue
+                total_counts = df_pool.groupby("tf").size().to_dict()
+                grouped = df_pool.groupby(["tf", "fimo_bin_id"])
+                for (tf, bin_id), group in grouped:
+                    bin_low = None
+                    bin_high = None
+                    if "fimo_bin_low" in group.columns and not group["fimo_bin_low"].empty:
+                        bin_low = float(group["fimo_bin_low"].iloc[0])
+                    if "fimo_bin_high" in group.columns and not group["fimo_bin_high"].empty:
+                        bin_high = float(group["fimo_bin_high"].iloc[0])
+                    rows.append(
+                        {
+                            "input_name": entry.name,
+                            "tf": tf,
+                            "bin_id": int(bin_id),
+                            "bin_low": bin_low,
+                            "bin_high": bin_high,
+                            "count": int(len(group)),
+                            "total": int(total_counts.get(tf, len(group))),
+                        }
+                    )
+            if rows:
+                stage_a_bins = pd.DataFrame(rows)
+        except Exception:
+            log.warning("Failed to load Stage-A pool bins for report.", exc_info=True)
+
+    tables["stage_a_bins"] = stage_a_bins
+
     library_summary = pd.DataFrame(
         columns=["library_hash", "library_index", "input_name", "plan_name", "size", "total_bp", "outputs"]
     )
@@ -481,6 +526,13 @@ def collect_report_data(
         tables["tf_cooccurrence"] = _compute_cooccurrence(used_df)
         tables["tf_adjacency"] = _compute_adjacency(used_df)
 
+    composition_path = outputs_root / "composition.parquet"
+    if composition_path.exists():
+        try:
+            tables["composition"] = pd.read_parquet(composition_path)
+        except Exception:
+            log.warning("Failed to load composition.parquet for report tables.", exc_info=True)
+
     library_hashes = df[_dg("sampling_library_hash")].dropna().unique().tolist()
     tf_counts = used_df["tf"].value_counts().to_dict() if not used_df.empty else {}
     tfbs_counts = used_df["tfbs"].value_counts().to_dict() if not used_df.empty else {}
@@ -604,7 +656,28 @@ def _render_report_md(bundle: ReportBundle) -> str:
         "## Outputs",
         "- outputs/dense_arrays.parquet",
         "- outputs/attempts.parquet",
+        "- outputs/composition.parquet",
+        "- outputs/libraries/library_builds.parquet",
+        "- outputs/libraries/library_members.parquet",
+        "- outputs/pools/pool_manifest.json",
     ]
+    stage_a_bins = bundle.tables.get("stage_a_bins")
+    if stage_a_bins is not None and not stage_a_bins.empty:
+        lines.extend(["", "## Stage-A p-value bins"])
+        for (input_name, tf), sub in stage_a_bins.groupby(["input_name", "tf"]):
+            sub = sub.sort_values("bin_id")
+            parts = []
+            for _, row in sub.iterrows():
+                bin_id = int(row.get("bin_id") or 0)
+                count = int(row.get("count") or 0)
+                low = row.get("bin_low")
+                high = row.get("bin_high")
+                if low is not None and high is not None:
+                    label = f"({float(low):.0e},{float(high):.0e}]"
+                else:
+                    label = f"bin{bin_id}"
+                parts.append(f"{label}:{count}")
+            lines.append(f"- {input_name}/{tf}: " + " ".join(parts))
     leaderboard = report.get("leaderboard_latest") or {}
     leader_tf = leaderboard.get("tf") or []
     leader_tfbs = leaderboard.get("tfbs") or []
diff --git a/src/dnadesign/densegen/src/core/sampler.py b/src/dnadesign/densegen/src/core/sampler.py
index e80d1e4b..4fd65208 100644
--- a/src/dnadesign/densegen/src/core/sampler.py
+++ b/src/dnadesign/densegen/src/core/sampler.py
@@ -99,15 +99,21 @@ def generate_binding_site_subsample(
         labels: list[str] = []
         site_ids: list[str | None] = []
         sources: list[str | None] = []
+        tfbs_ids: list[str | None] = []
+        motif_ids: list[str | None] = []
         seen_tfbs = set()  # for unique_binding_sites (tf, tfbs)
         used_per_tf: dict[str, int] = {}
 
         has_site_id = "site_id" in self.df.columns
         has_source = "source" in self.df.columns
+        has_tfbs_id = "tfbs_id" in self.df.columns
+        has_motif_id = "motif_id" in self.df.columns
 
         def _append_provenance(row) -> None:
             site_ids.append(str(row["site_id"]) if has_site_id else None)
             sources.append(str(row["source"]) if has_source else None)
+            tfbs_ids.append(str(row["tfbs_id"]) if has_tfbs_id else None)
+            motif_ids.append(str(row["motif_id"]) if has_motif_id else None)
 
         unique_tfs = self.df["tf"].unique().tolist()
         self.rng.shuffle(unique_tfs)
@@ -235,6 +241,8 @@ def _add_required_tfs() -> None:
             "final_cap": cap,
             "site_id_by_index": site_ids if has_site_id else None,
             "source_by_index": sources if has_source else None,
+            "tfbs_id_by_index": tfbs_ids if has_tfbs_id else None,
+            "motif_id_by_index": motif_ids if has_motif_id else None,
         }
 
         return sites, meta, labels, info
@@ -278,6 +286,8 @@ def generate_binding_site_library(
 
         has_site_id = "site_id" in df.columns
         has_source = "source" in df.columns
+        has_tfbs_id = "tfbs_id" in df.columns
+        has_motif_id = "motif_id" in df.columns
         total_unique_tfbs = len(df.drop_duplicates(["tf", "tfbs"]))
 
         unique_tfs = sorted(df["tf"].unique().tolist())
@@ -313,6 +323,8 @@ def generate_binding_site_library(
         reasons: list[str] = []
         site_ids: list[str | None] = []
         sources: list[str | None] = []
+        tfbs_ids: list[str | None] = []
+        motif_ids: list[str | None] = []
         seen_tfbs = set()
         used_per_tf: dict[str, int] = {}
 
@@ -330,6 +342,8 @@ def _append_row(row, reason: str) -> bool:
             used_per_tf[tf] = used_per_tf.get(tf, 0) + 1
             site_ids.append(str(row["site_id"]) if has_site_id else None)
             sources.append(str(row["source"]) if has_source else None)
+            tfbs_ids.append(str(row["tfbs_id"]) if has_tfbs_id else None)
+            motif_ids.append(str(row["motif_id"]) if has_motif_id else None)
             return True
 
         def _pick_for_tf(tf: str, *, reason: str, cap_override: int | None = None) -> bool:
@@ -528,6 +542,8 @@ def _fill_uniform_over_pairs() -> None:
             "final_cap": cap,
             "site_id_by_index": site_ids if has_site_id else None,
             "source_by_index": sources if has_source else None,
+            "tfbs_id_by_index": tfbs_ids if has_tfbs_id else None,
+            "motif_id_by_index": motif_ids if has_motif_id else None,
             "selection_reason_by_index": reasons,
         }
         return sites, meta, labels, info
diff --git a/src/dnadesign/densegen/tests/test_artifacts_ids.py b/src/dnadesign/densegen/tests/test_artifacts_ids.py
new file mode 100644
index 00000000..bab0e3b7
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_artifacts_ids.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from dnadesign.densegen.src.core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
+
+
+def test_hash_tfbs_id_is_deterministic() -> None:
+    a = hash_tfbs_id(motif_id="M1", sequence="ACGT", scoring_backend="fimo", matched_start=1, matched_stop=4)
+    b = hash_tfbs_id(motif_id="M1", sequence="ACGT", scoring_backend="fimo", matched_start=1, matched_stop=4)
+    assert a == b
+
+
+def test_hash_tfbs_id_changes_with_inputs() -> None:
+    base = hash_tfbs_id(motif_id="M1", sequence="ACGT", scoring_backend="fimo", matched_start=1, matched_stop=4)
+    diff_seq = hash_tfbs_id(motif_id="M1", sequence="TGCA", scoring_backend="fimo", matched_start=1, matched_stop=4)
+    diff_match = hash_tfbs_id(motif_id="M1", sequence="ACGT", scoring_backend="fimo", matched_start=2, matched_stop=5)
+    assert base != diff_seq
+    assert base != diff_match
+
+
+def test_hash_pwm_motif_changes_with_matrix() -> None:
+    m1 = hash_pwm_motif(
+        motif_label="lexA",
+        matrix=[{"A": 0.7, "C": 0.1, "G": 0.1, "T": 0.1}],
+        background={"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+        source_kind="pwm_meme",
+    )
+    m2 = hash_pwm_motif(
+        motif_label="lexA",
+        matrix=[{"A": 0.6, "C": 0.2, "G": 0.1, "T": 0.1}],
+        background={"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+        source_kind="pwm_meme",
+    )
+    assert m1 != m2
diff --git a/src/dnadesign/densegen/tests/test_artifacts_library.py b/src/dnadesign/densegen/tests/test_artifacts_library.py
new file mode 100644
index 00000000..4490599e
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_artifacts_library.py
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from dnadesign.densegen.src.core.artifacts.library import load_library_artifact, write_library_artifact
+
+
+def test_write_library_artifact(tmp_path: Path) -> None:
+    builds = [
+        {
+            "created_at": "2026-01-20T00:00:00+00:00",
+            "input_name": "demo",
+            "plan_name": "plan",
+            "library_index": 1,
+            "library_id": "libhash",
+            "library_hash": "libhash",
+            "pool_strategy": "subsample",
+            "library_sampling_strategy": "tf_balanced",
+            "library_size": 2,
+            "target_length": 20,
+            "achieved_length": 18,
+            "relaxed_cap": False,
+            "final_cap": None,
+            "iterative_max_libraries": 0,
+            "iterative_min_new_solutions": 0,
+            "required_regulators_selected": None,
+        }
+    ]
+    members = [
+        {
+            "library_id": "libhash",
+            "library_hash": "libhash",
+            "library_index": 1,
+            "input_name": "demo",
+            "plan_name": "plan",
+            "position": 0,
+            "tf": "TF1",
+            "tfbs": "AAAA",
+            "tfbs_id": "id1",
+            "motif_id": "motif1",
+            "site_id": None,
+            "source": "src",
+        }
+    ]
+    artifact = write_library_artifact(
+        out_dir=tmp_path,
+        builds=builds,
+        members=members,
+        cfg_path=Path("config.yaml"),
+        run_id="demo",
+        run_root=tmp_path,
+        overwrite=True,
+    )
+
+    assert artifact.manifest_path.exists()
+    assert artifact.builds_path.exists()
+    assert artifact.members_path.exists()
+
+    loaded = load_library_artifact(tmp_path)
+    assert loaded.builds_path.name == artifact.builds_path.name
+    assert loaded.members_path.name == artifact.members_path.name
diff --git a/src/dnadesign/densegen/tests/test_artifacts_pool.py b/src/dnadesign/densegen/tests/test_artifacts_pool.py
new file mode 100644
index 00000000..9f3222ad
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_artifacts_pool.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import yaml
+
+from dnadesign.densegen.src.config import load_config
+from dnadesign.densegen.src.core.artifacts.pool import build_pool_artifact
+from dnadesign.densegen.src.core.pipeline import default_deps
+
+
+def test_build_pool_artifact_binding_sites(tmp_path: Path) -> None:
+    csv_path = tmp_path / "sites.csv"
+    csv_path.write_text("tf,tfbs\nTF1,AAAA\nTF2,CCCC\n")
+    cfg_path = tmp_path / "config.yaml"
+    cfg_path.write_text(
+        yaml.safe_dump(
+            {
+                "densegen": {
+                    "schema_version": "2.4",
+                    "run": {"id": "demo", "root": "."},
+                    "inputs": [
+                        {
+                            "name": "demo input",
+                            "type": "binding_sites",
+                            "path": str(csv_path),
+                            "format": "csv",
+                        }
+                    ],
+                    "output": {
+                        "targets": ["parquet"],
+                        "schema": {"bio_type": "dna", "alphabet": "dna_4"},
+                        "parquet": {"path": str(tmp_path / "out.parquet")},
+                    },
+                    "generation": {
+                        "sequence_length": 10,
+                        "quota": 1,
+                        "plan": [{"name": "default", "quota": 1}],
+                    },
+                    "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+                    "runtime": {
+                        "round_robin": False,
+                        "arrays_generated_before_resample": 10,
+                        "min_count_per_tf": 0,
+                        "max_duplicate_solutions": 5,
+                        "stall_seconds_before_resample": 10,
+                        "stall_warning_every_seconds": 10,
+                        "max_resample_attempts": 1,
+                        "max_total_resamples": 1,
+                        "max_seconds_per_plan": 0,
+                        "max_failed_solutions": 0,
+                        "checkpoint_every": 0,
+                        "leaderboard_every": 50,
+                    },
+                    "logging": {"log_dir": "outputs/logs", "level": "INFO"},
+                    "postprocess": {"gap_fill": {"mode": "off"}},
+                }
+            }
+        )
+    )
+
+    loaded = load_config(cfg_path)
+    cfg = loaded.root.densegen
+    out_dir = tmp_path / "outputs" / "pools"
+    outputs_root = tmp_path / "outputs"
+    artifact, pool_data = build_pool_artifact(
+        cfg=cfg,
+        cfg_path=cfg_path,
+        deps=default_deps(),
+        rng=np.random.default_rng(0),
+        outputs_root=outputs_root,
+        out_dir=out_dir,
+        overwrite=False,
+    )
+
+    assert artifact.manifest_path.exists()
+    entry = artifact.entry_for("demo input")
+    assert " " not in entry.pool_path.name
+    pool = pool_data["demo input"]
+    assert pool.df is not None
+    assert "tfbs_id" in pool.df.columns
+    assert "motif_id" in pool.df.columns
diff --git a/src/dnadesign/densegen/tests/test_cli_workspace_init.py b/src/dnadesign/densegen/tests/test_cli_workspace_init.py
new file mode 100644
index 00000000..668c21e3
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_cli_workspace_init.py
@@ -0,0 +1,112 @@
+from __future__ import annotations
+
+import textwrap
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from dnadesign.densegen.src.cli import app
+
+
+def _write_template_config(path: Path) -> None:
+    path.write_text(
+        textwrap.dedent(
+            """
+            densegen:
+              schema_version: "2.4"
+              run:
+                id: demo
+                root: "."
+              inputs:
+                - name: demo
+                  type: binding_sites
+                  path: inputs/sites.csv
+            """
+        ).strip()
+        + "\n"
+    )
+
+
+def _write_min_config(path: Path) -> None:
+    path.write_text(
+        textwrap.dedent(
+            """
+            densegen:
+              schema_version: "2.4"
+              run:
+                id: demo
+                root: "."
+              inputs:
+                - name: demo
+                  type: binding_sites
+                  path: inputs.csv
+
+              output:
+                targets: [parquet]
+                schema:
+                  bio_type: dna
+                  alphabet: dna_4
+                parquet:
+                  path: outputs/dense_arrays.parquet
+
+              generation:
+                sequence_length: 10
+                quota: 1
+                plan:
+                  - name: default
+                    quota: 1
+
+              solver:
+                backend: CBC
+                strategy: iterate
+
+              logging:
+                log_dir: outputs/logs
+            """
+        ).strip()
+        + "\n"
+    )
+
+
+def test_workspace_init_warns_on_relative_inputs_without_copy(tmp_path: Path) -> None:
+    template_path = tmp_path / "template.yaml"
+    _write_template_config(template_path)
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        [
+            "workspace",
+            "init",
+            "--id",
+            "demo_run",
+            "--root",
+            str(tmp_path),
+            "--template",
+            str(template_path),
+        ],
+    )
+    assert result.exit_code == 0, result.output
+    assert "Workspace uses file-based inputs with relative paths" in result.output
+    assert (tmp_path / "demo_run" / "config.yaml").exists()
+
+
+def test_stage_b_reports_missing_pool_manifest(tmp_path: Path) -> None:
+    cfg_path = tmp_path / "config.yaml"
+    _write_min_config(cfg_path)
+    pool_dir = tmp_path / "pools"
+    pool_dir.mkdir()
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        [
+            "stage-b",
+            "build-libraries",
+            "-c",
+            str(cfg_path),
+            "--pool",
+            str(pool_dir),
+        ],
+    )
+    assert result.exit_code != 0, result.output
+    assert "Pool manifest not found" in result.output
+    assert "dense stage-a build-pool" in result.output
diff --git a/src/dnadesign/densegen/tests/test_used_tfbs_offsets.py b/src/dnadesign/densegen/tests/test_used_tfbs_offsets.py
index fab47c33..3d0da07c 100644
--- a/src/dnadesign/densegen/tests/test_used_tfbs_offsets.py
+++ b/src/dnadesign/densegen/tests/test_used_tfbs_offsets.py
@@ -22,6 +22,8 @@ def test_used_tfbs_offsets_shift_with_5prime_padding() -> None:
         None,
         None,
         None,
+        None,
+        None,
     )
     assert used_tfbs == ["TF1:TT", "TF2:GG"]
     assert used_counts == {"TF1": 1, "TF2": 1}
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index 3fed86a2..281471b7 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -4,7 +4,7 @@
 # Motif widths: lexA=22, cpxR=21.
 
 densegen:
-  schema_version: "2.3"
+  schema_version: "2.4"
   run:
     id: demo_meme_two_tf
     root: "."

From 47ce75c6b583e7b9932cc62444b49ce1f41fb213 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 15:21:38 -0500
Subject: [PATCH 10/40] pixi: add pytest task for MEME-enabled tests

---
 pixi.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pixi.toml b/pixi.toml
index 7b4672b4..6084d7ab 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -7,6 +7,7 @@ platforms = ["osx-arm64", "osx-64", "linux-64"]
 [tasks]
 cruncher = "uv run cruncher"
 dense = "uv run dense"
+pytest = "uv run pytest -q"
 
 [dependencies]
 meme = "*"

From e6566cc41dcc2faa7f0471bd777faa3a59186d2d Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 16:10:39 -0500
Subject: [PATCH 11/40] densegen: harden sampling UX and reporting

---
 .../densegen/docs/demo/demo_basic.md          |  10 +-
 .../densegen/docs/guide/generation.md         |   6 +
 src/dnadesign/densegen/docs/guide/inputs.md   |   3 +-
 .../densegen/docs/guide/outputs-metadata.md   |  34 ++-
 src/dnadesign/densegen/docs/reference/cli.md  |  22 +-
 .../densegen/docs/reference/config.md         |   7 +
 .../densegen/docs/reference/outputs.md        |   5 +-
 .../src/adapters/sources/pwm_sampling.py      | 155 ++++++++++++
 src/dnadesign/densegen/src/cli.py             |  64 ++---
 src/dnadesign/densegen/src/config/__init__.py |  53 ++++
 .../densegen/src/core/artifacts/library.py    |   2 +-
 .../densegen/src/core/artifacts/pool.py       |   2 +-
 src/dnadesign/densegen/src/core/pipeline.py   | 226 +++++++++++++++---
 src/dnadesign/densegen/src/core/reporting.py  | 180 +++++++++++++-
 .../densegen/src/core/run_manifest.py         |  12 +
 .../densegen/src/core/runtime_policy.py       |  37 +++
 src/dnadesign/densegen/src/core/seeding.py    |  22 ++
 .../densegen/src/utils/logging_utils.py       |  55 +++--
 src/dnadesign/densegen/src/utils/mpl_utils.py |  18 ++
 .../tests/test_cli_summarize_library.py       |   4 +
 .../densegen/tests/test_config_strict.py      |  16 ++
 .../densegen/tests/test_run_manifest.py       |   5 +
 .../densegen/tests/test_source_cache.py       |   3 +-
 23 files changed, 836 insertions(+), 105 deletions(-)
 create mode 100644 src/dnadesign/densegen/src/core/runtime_policy.py
 create mode 100644 src/dnadesign/densegen/src/core/seeding.py
 create mode 100644 src/dnadesign/densegen/src/utils/mpl_utils.py

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 9adf7e50..b0589e96 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -200,13 +200,12 @@ Quota plan: meme_demo=50
 🎉 Run complete.
 ```
 
-On macOS you may see Arrow sysctl warnings after generation; they are emitted by pyarrow and do
-not indicate a DenseGen failure.
+DenseGen suppresses noisy pyarrow sysctl warnings to keep stdout clean during long runs.
 
 ## 8) Inspect run summary
 
-DenseGen writes `outputs/meta/run_manifest.json` and `outputs/meta/inputs_manifest.json`. Summarize the
-run manifest:
+DenseGen writes `outputs/meta/run_manifest.json`, `outputs/meta/inputs_manifest.json`, and
+`outputs/meta/effective_config.json`. Summarize the run manifest:
 
 ```bash
 uv run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press
@@ -244,7 +243,7 @@ Generate an audit-grade summary of the run:
 uv run dense report -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --format all
 ```
 
-This writes `outputs/report.json`, `outputs/report.md`, and `outputs/report.html`.
+This writes `outputs/report.json`, `outputs/report.md`, `outputs/report.html`, and `outputs/report_assets/`.
 
 ## 10) Inspect outputs
 
@@ -265,6 +264,7 @@ pools
 report.html
 report.json
 report.md
+report_assets
 ```
 
 Inspect Stage‑A pools and Stage‑B libraries:
diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index 041c458c..cab41eaa 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -76,6 +76,7 @@ DenseGen exposes dense-arrays solution modes via `solver.strategy`:
 - `optimal` - only the best solution per library.
 - `approximate` - heuristic solution per library (no solver options; backend optional).
 - `strands` - `single | double` (default: `double`).
+Use `solver.fallback_to_cbc` to allow a CBC fallback if the preferred solver is not available.
 
 ```yaml
 solver:
@@ -83,8 +84,13 @@ solver:
   strategy: diverse
   options: ["Threads=8", "TimeLimit=10"]
   strands: double
+  fallback_to_cbc: false
+  allow_unknown_options: false
 ```
 
+DenseGen validates solver option keys for known backends and fails fast on unknown options. If you
+need to pass custom solver flags, set `solver.allow_unknown_options: true` explicitly.
+
 ---
 
 ### Sampling controls
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index 3ad6b50d..1189bcab 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -111,7 +111,8 @@ Required sampling fields:
   - `retain_bin_ids` (optional list of ints): keep only specific p‑value bins
   - `log_every_batches` (int > 0): log yield summaries every N batches
 - `bgfile` (optional): MEME bfile-format background model for FIMO
-- `keep_all_candidates_debug` (optional): write raw FIMO TSVs to `outputs/meta/fimo/` for inspection
+- `keep_all_candidates_debug` (optional): write raw FIMO TSVs and candidate-level Parquet
+  (`candidates__<label>.parquet`) to `outputs/meta/fimo/` for inspection
 - `include_matched_sequence` (optional): include `fimo_matched_sequence` column in the TFBS table
 
 Notes:
diff --git a/src/dnadesign/densegen/docs/guide/outputs-metadata.md b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
index 4f7e0065..780b4355 100644
--- a/src/dnadesign/densegen/docs/guide/outputs-metadata.md
+++ b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
@@ -6,6 +6,7 @@ namespaced and recorded consistently so outputs remain resume-safe and auditable
 ### Contents
 - [Output targets](#output-targets) - Parquet and USR sinks.
 - [Run manifest](#run-manifest) - run-level summary JSON.
+- [Effective config](#effective-config) - resolved config + derived caps/seeds.
 - [Inputs manifest](#inputs-manifest) - resolved inputs and PWM sampling metadata.
 - [Library manifest](#library-manifest) - libraries offered to the solver.
 - [Rejection log](#rejection-log) - rejected solutions audit.
@@ -13,6 +14,7 @@ namespaced and recorded consistently so outputs remain resume-safe and auditable
 - [Metadata scheme](#metadata-scheme) - namespacing and categories.
 - [Parquet vs USR encoding](#parquet-vs-usr-encoding) - differences in storage.
 - [Metadata registry](#metadata-registry) - canonical schema location.
+- [Report assets](#report-assets) - plots emitted by `dense report`.
 
 ---
 
@@ -28,11 +30,11 @@ When multiple targets are configured, DenseGen asserts all targets are in sync b
 ### Run manifest
 
 Each run writes `outputs/meta/run_manifest.json` with per-input/plan counts (generated,
-duplicates, failures, resamples, libraries built, stalls), plus solver settings, schema version,
-and the dense-arrays version source. The manifest also tracks constraint-filter failure reasons
-and duplicate-solution counts. A compact `leaderboard_latest` snapshot is recorded per plan
-(top TF/TFBS usage, failure hotspots, and diversity coverage) for quick audits without loading
-the full outputs.
+duplicates, failures, resamples, libraries built, stalls), derived seeds, solver settings,
+schema version, and the dense-arrays version source. The manifest also tracks constraint-filter
+failure reasons and duplicate-solution counts. A compact `leaderboard_latest` snapshot is recorded
+per plan (top TF/TFBS usage, failure hotspots, and diversity coverage) for quick audits without
+loading the full outputs.
 Use the CLI to summarize a run:
 
 ```
@@ -41,6 +43,16 @@ uv run dense inspect run --run path/to/run
 
 ---
 
+### Effective config
+
+DenseGen writes `outputs/meta/effective_config.json`, which includes:
+- fully-resolved config values (defaults expanded),
+- derived seeds (`seed_stage_a`, `seed_stage_b`, `seed_solver`),
+- resolved input paths, and
+- computed sampling caps (requested candidates vs mining/time limits).
+
+---
+
 ### Inputs manifest
 
 When a run completes, DenseGen writes `outputs/meta/inputs_manifest.json`. This file captures
@@ -60,6 +72,11 @@ DenseGen materializes Stage‑A pools under `outputs/pools/`:
 TFBS pools include stable `motif_id` and `tfbs_id` hashes plus optional FIMO metadata
 (`fimo_pvalue`, `fimo_bin_id`, etc.). Sequence pools include `tfbs_id` for joinability.
 
+If `keep_all_candidates_debug: true`, DenseGen writes per-candidate debug artifacts under
+`outputs/meta/fimo/`:
+- `candidates__<label>.parquet` — candidate p‑values, bins, acceptance, and reject reasons.
+- `<label>__fimo.tsv` — raw FIMO TSV (when enabled).
+
 ---
 
 ### Library artifacts (Stage‑B)
@@ -97,6 +114,13 @@ This is a lightweight, machine-readable trace of the run’s control flow.
 
 ---
 
+### Report assets
+
+`dense report` emits summary plots under `outputs/report_assets/` and links them in `report.html`.
+These plots include Stage‑A p‑value histograms and Stage‑B utilization summaries.
+
+---
+
 ### Attempts log
 
 DenseGen writes `outputs/attempts.parquet`, a consolidated log of solver attempts (success,
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index b17b1c18..007d357b 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -174,6 +174,10 @@ Options:
 - `--out` - output directory relative to run root (default: `outputs`).
 - `--format` - `json`, `md`, `html`, or `all` (comma-separated allowed).
 
+Report outputs:
+- `report.json`, `report.md`, `report.html`
+- `report_assets/` (plots referenced by the HTML report)
+
 ---
 
 ### Examples
@@ -186,20 +190,20 @@ uv run dense workspace init --id demo_press --root "$RUN_ROOT" \
 CFG="$RUN_ROOT/demo_press/config.yaml"
 
 pixi run dense validate-config -c "$CFG"
-uv run dense inspect inputs -c "$CFG"
-uv run dense inspect plan   -c "$CFG"
-uv run dense inspect config -c "$CFG"
-uv run dense run            -c "$CFG"
-uv run dense plot           -c "$CFG" --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
-uv run dense inspect run     --run "$RUN_ROOT/demo_press"
-uv run dense inspect run     --root "$RUN_ROOT"
-uv run dense report          -c "$CFG" --format all
+pixi run dense inspect inputs -c "$CFG"
+pixi run dense inspect plan   -c "$CFG"
+pixi run dense inspect config -c "$CFG"
+pixi run dense run            -c "$CFG"
+pixi run dense plot           -c "$CFG" --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
+pixi run dense inspect run     --run "$RUN_ROOT/demo_press"
+pixi run dense inspect run     --root "$RUN_ROOT"
+pixi run dense report          -c "$CFG" --format all
 ```
 
 Demo run (small, Parquet-only config):
 
 ```bash
-uv run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
+pixi run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
 ```
 
 FIMO-backed sampling (pixi):
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 554f23d8..8230af56 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -207,6 +207,11 @@ binding-site and PWM-sampled inputs.
 - `options` (list of solver option strings)
   - `options` must be empty when `strategy: approximate`
 - `strands`: `single | double` (default: `double`)
+- `fallback_to_cbc` (bool; default `false`)
+  - If the requested solver probe fails, fall back to CBC instead of aborting.
+- `allow_unknown_options` (bool; default `false`)
+  - DenseGen validates solver option keys for known backends. Set to `true` to bypass validation.
+  - Known keys (case-insensitive): `Threads`, `TimeLimit`, `MIPGap`, `Seed`, `LogLevel`, `MaxSeconds`.
 
 ---
 
@@ -301,6 +306,8 @@ densegen:
     strategy: diverse
     options: []
     strands: double
+    fallback_to_cbc: false
+    allow_unknown_options: false
 
   runtime:
     round_robin: true
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index 544ac5ad..4b220fcd 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -76,8 +76,9 @@ Exact fields may expand over time. For the canonical list and types, see
 DenseGen writes run-level JSON files under `outputs/meta/`:
 
 - `outputs/meta/run_state.json` — checkpointed progress for resumable runs (updated during execution).
-- `outputs/meta/run_manifest.json` — summary counts per input/plan plus solver settings (written on completion). Includes a `leaderboard_latest` snapshot (top TF/TFBS usage, failure hotspots, diversity coverage).
+- `outputs/meta/run_manifest.json` — summary counts per input/plan plus solver settings and derived seeds (written on completion). Includes a `leaderboard_latest` snapshot (top TF/TFBS usage, failure hotspots, diversity coverage).
 - `outputs/meta/inputs_manifest.json` — resolved input paths and PWM sampling settings used for the run.
+- `outputs/meta/effective_config.json` — resolved config with derived seeds and sampling caps.
 
 These are produced alongside Parquet/USR outputs and provide a compact audit trail.
 
@@ -111,6 +112,7 @@ The `dense report` command writes a compact audit summary under `outputs/`:
 - `outputs/report.json`
 - `outputs/report.md`
 - `outputs/report.html` (basic HTML wrapper for quick sharing)
+- `outputs/report_assets/` (plots linked by `report.html`)
 
 These summarize run scope and link to the canonical outputs (`dense_arrays.parquet` and
 `attempts.parquet`).
@@ -125,6 +127,7 @@ DenseGen can materialize Stage‑A/Stage‑B artifacts without running the solve
 - `dense stage-a build-pool` writes:
   - `outputs/pools/pool_manifest.json`
   - `outputs/pools/<input>__pool.parquet`
+  - `outputs/meta/fimo/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
 - `dense stage-b build-libraries` writes:
   - `outputs/libraries/library_builds.parquet`
   - `outputs/libraries/library_members.parquet`
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 630b5291..9195cebc 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -19,6 +19,7 @@
 from typing import List, Optional, Sequence, Tuple
 
 import numpy as np
+import pandas as pd
 
 from ...core.pvalue_bins import resolve_pvalue_bins
 
@@ -61,6 +62,20 @@ class FimoCandidate:
     matched_sequence: Optional[str] = None
 
 
+def _write_candidate_records(
+    records: list[dict],
+    *,
+    debug_output_dir: Path,
+    debug_label: str,
+    motif_id: str,
+) -> Path:
+    safe_label = _safe_label(debug_label or motif_id)
+    debug_output_dir.mkdir(parents=True, exist_ok=True)
+    path = debug_output_dir / f"candidates__{safe_label}.parquet"
+    pd.DataFrame(records).to_parquet(path, index=False)
+    return path
+
+
 @dataclass(frozen=True)
 class PWMMotif:
     motif_id: str
@@ -720,6 +735,38 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
         batches = 0
         tsv_lines: list[str] = []
         provided_sequences = sequences
+        candidate_records: list[dict] | None = [] if keep_all_candidates_debug else None
+
+        def _record_candidate(
+            *,
+            seq: str,
+            hit,
+            bin_id: int | None,
+            bin_low: float | None,
+            bin_high: float | None,
+            accepted: bool,
+            reject_reason: str | None,
+        ) -> None:
+            if candidate_records is None:
+                return
+            candidate_records.append(
+                {
+                    "motif_id": motif.motif_id,
+                    "sequence": seq,
+                    "pvalue": None if hit is None else hit.pvalue,
+                    "score": None if hit is None else hit.score,
+                    "bin_id": bin_id,
+                    "bin_low": bin_low,
+                    "bin_high": bin_high,
+                    "start": None if hit is None else hit.start,
+                    "stop": None if hit is None else hit.stop,
+                    "strand": None if hit is None else hit.strand,
+                    "matched_sequence": None if hit is None else hit.matched_sequence,
+                    "accepted": bool(accepted),
+                    "selected": False,
+                    "reject_reason": reject_reason,
+                }
+            )
 
         with tempfile.TemporaryDirectory() as tmp:
             tmp_path = Path(tmp)
@@ -746,9 +793,27 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
                 for rec_id, seq in records:
                     hit = best_hits.get(rec_id)
                     if hit is None:
+                        _record_candidate(
+                            seq=seq,
+                            hit=None,
+                            bin_id=None,
+                            bin_low=None,
+                            bin_high=None,
+                            accepted=False,
+                            reject_reason="no_hit",
+                        )
                         continue
                     bin_id, bin_low, bin_high = _assign_pvalue_bin(hit.pvalue, resolved_bins)
                     if allowed_bins is not None and bin_id not in allowed_bins:
+                        _record_candidate(
+                            seq=seq,
+                            hit=hit,
+                            bin_id=bin_id,
+                            bin_low=bin_low,
+                            bin_high=bin_high,
+                            accepted=False,
+                            reject_reason="bin_filtered",
+                        )
                         continue
                     total_bin_counts[bin_id] += 1
                     if keep_weak:
@@ -756,8 +821,26 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
                     else:
                         accept = hit.pvalue <= float(pvalue_threshold)
                     if not accept:
+                        _record_candidate(
+                            seq=seq,
+                            hit=hit,
+                            bin_id=bin_id,
+                            bin_low=bin_low,
+                            bin_high=bin_high,
+                            accepted=False,
+                            reject_reason="pvalue_threshold",
+                        )
                         continue
                     if seq in seen:
+                        _record_candidate(
+                            seq=seq,
+                            hit=hit,
+                            bin_id=bin_id,
+                            bin_low=bin_low,
+                            bin_high=bin_high,
+                            accepted=False,
+                            reject_reason="duplicate",
+                        )
                         continue
                     seen.add(seq)
                     accepted_bin_counts[bin_id] += 1
@@ -775,6 +858,15 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
                             matched_sequence=hit.matched_sequence,
                         )
                     )
+                    _record_candidate(
+                        seq=seq,
+                        hit=hit,
+                        bin_id=bin_id,
+                        bin_low=bin_low,
+                        bin_high=bin_high,
+                        accepted=True,
+                        reject_reason=None,
+                    )
                 generated_total = len(provided_sequences)
                 batches = 1
             else:
@@ -819,9 +911,27 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
                     for rec_id, seq in records:
                         hit = best_hits.get(rec_id)
                         if hit is None:
+                            _record_candidate(
+                                seq=seq,
+                                hit=None,
+                                bin_id=None,
+                                bin_low=None,
+                                bin_high=None,
+                                accepted=False,
+                                reject_reason="no_hit",
+                            )
                             continue
                         bin_id, bin_low, bin_high = _assign_pvalue_bin(hit.pvalue, resolved_bins)
                         if allowed_bins is not None and bin_id not in allowed_bins:
+                            _record_candidate(
+                                seq=seq,
+                                hit=hit,
+                                bin_id=bin_id,
+                                bin_low=bin_low,
+                                bin_high=bin_high,
+                                accepted=False,
+                                reject_reason="bin_filtered",
+                            )
                             continue
                         total_bin_counts[bin_id] += 1
                         if keep_weak:
@@ -829,8 +939,26 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
                         else:
                             accept = hit.pvalue <= float(pvalue_threshold)
                         if not accept:
+                            _record_candidate(
+                                seq=seq,
+                                hit=hit,
+                                bin_id=bin_id,
+                                bin_low=bin_low,
+                                bin_high=bin_high,
+                                accepted=False,
+                                reject_reason="pvalue_threshold",
+                            )
                             continue
                         if seq in seen:
+                            _record_candidate(
+                                seq=seq,
+                                hit=hit,
+                                bin_id=bin_id,
+                                bin_low=bin_low,
+                                bin_high=bin_high,
+                                accepted=False,
+                                reject_reason="duplicate",
+                            )
                             continue
                         seen.add(seq)
                         accepted_bin_counts[bin_id] += 1
@@ -848,6 +976,15 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
                                 matched_sequence=hit.matched_sequence,
                             )
                         )
+                        _record_candidate(
+                            seq=seq,
+                            hit=hit,
+                            bin_id=bin_id,
+                            bin_low=bin_low,
+                            bin_high=bin_high,
+                            accepted=True,
+                            reject_reason=None,
+                        )
                     generated_total += len(sequences)
                     batches += 1
                     if mining_log_every > 0 and batches % mining_log_every == 0:
@@ -931,6 +1068,24 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
             if cand.matched_sequence:
                 meta["fimo_matched_sequence"] = cand.matched_sequence
             meta_by_seq[cand.seq] = meta
+        if candidate_records is not None and debug_dir is not None:
+            selected_set = {c.seq for c in picked}
+            for row in candidate_records:
+                if row.get("sequence") in selected_set:
+                    row["selected"] = True
+                    row["reject_reason"] = None
+                elif row.get("accepted"):
+                    row["reject_reason"] = "not_selected"
+            try:
+                path = _write_candidate_records(
+                    candidate_records,
+                    debug_output_dir=debug_dir,
+                    debug_label=debug_label or motif.motif_id,
+                    motif_id=motif.motif_id,
+                )
+                log.info("FIMO candidate records written: %s", path)
+            except Exception:
+                log.warning("Failed to write FIMO candidate records.", exc_info=True)
         return [c.seq for c in picked], meta_by_seq
 
     if strategy == "consensus":
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 1c56a492..1fe37372 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -64,6 +64,7 @@
 from .core.artifacts.pool import (
     POOL_MODE_SEQUENCE,
     POOL_MODE_TFBS,
+    PoolData,
     build_pool_artifact,
     load_pool_artifact,
 )
@@ -79,14 +80,16 @@
 from .core.run_manifest import load_run_manifest
 from .core.run_paths import run_manifest_path, run_state_path
 from .core.run_state import load_run_state
+from .core.seeding import derive_seed_map
 from .integrations.meme_suite import require_executable
 from .utils.logging_utils import install_native_stderr_filters, setup_logging
+from .utils.mpl_utils import ensure_mpl_cache_dir
 
 rich_traceback(show_locals=False)
 console = Console()
 _PYARROW_SYSCTL_PATTERN = re.compile(r"sysctlbyname failed for 'hw\.")
 log = logging.getLogger(__name__)
-install_native_stderr_filters()
+install_native_stderr_filters(suppress_solver_messages=False)
 
 
 @contextlib.contextmanager
@@ -216,20 +219,6 @@ def _count_files(path: Path, pattern: str = "*") -> int:
     return sum(1 for p in path.glob(pattern) if p.is_file())
 
 
-def _ensure_mpl_cache_dir() -> None:
-    if os.environ.get("MPLCONFIGDIR"):
-        return
-    cache_root = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
-    target = cache_root / "densegen" / "matplotlib"
-    try:
-        target.mkdir(parents=True, exist_ok=True)
-        os.environ["MPLCONFIGDIR"] = str(target)
-    except Exception:
-        tmp = Path(os.getenv("TMPDIR") or "/tmp") / "densegen-matplotlib"
-        tmp.mkdir(parents=True, exist_ok=True)
-        os.environ["MPLCONFIGDIR"] = str(tmp)
-
-
 def _short_hash(val: str, *, n: int = 8) -> str:
     if not val:
         return "-"
@@ -663,13 +652,14 @@ def validate_config(
     _ensure_fimo_available(loaded.root.densegen, strict=True)
     if probe_solver:
         from .adapters.optimizer import DenseArraysAdapter
-        from .core.pipeline import select_solver_strict
+        from .core.pipeline import select_solver
 
         solver_cfg = loaded.root.densegen.solver
-        select_solver_strict(
+        select_solver(
             solver_cfg.backend,
             DenseArraysAdapter(),
             strategy=str(solver_cfg.strategy),
+            fallback_to_cbc=bool(solver_cfg.fallback_to_cbc),
         )
     console.print(":white_check_mark: [bold green]Config is valid.[/]")
 
@@ -1181,12 +1171,20 @@ def inspect_config(
 
     if probe_solver:
         from .adapters.optimizer import DenseArraysAdapter
-        from .core.pipeline import select_solver_strict
+        from .core.pipeline import select_solver
 
-        select_solver_strict(cfg.solver.backend, DenseArraysAdapter(), strategy=str(cfg.solver.strategy))
+        select_solver(
+            cfg.solver.backend,
+            DenseArraysAdapter(),
+            strategy=str(cfg.solver.strategy),
+            fallback_to_cbc=bool(cfg.solver.fallback_to_cbc),
+        )
 
     console.print(f"[bold]Config[/]: {cfg_path}")
     console.print(f"[bold]Run[/]: id={cfg.run.id} root={run_root}")
+    effective_path = run_root / "outputs" / "meta" / "effective_config.json"
+    if effective_path.exists():
+        console.print(f"[bold]Effective config[/]: {effective_path}")
 
     _print_inputs_summary(loaded)
 
@@ -1343,7 +1341,8 @@ def stage_a_build_pool(
         if missing:
             raise typer.BadParameter(f"Unknown input name(s): {', '.join(missing)}")
 
-    rng = np.random.default_rng(int(cfg.runtime.random_seed))
+    seeds = derive_seed_map(int(cfg.runtime.random_seed), ["stage_a", "stage_b", "solver"])
+    rng = np.random.default_rng(seeds["stage_a"])
     deps = default_deps()
     outputs_root = run_root / "outputs"
     outputs_root.mkdir(parents=True, exist_ok=True)
@@ -1443,9 +1442,9 @@ def stage_b_build_libraries(
         if missing:
             raise typer.BadParameter(f"Unknown plan name(s): {', '.join(missing)}")
 
-    seed = int(cfg.runtime.random_seed)
-    rng = random.Random(seed)
-    np_rng = np.random.default_rng(seed)
+    seeds = derive_seed_map(int(cfg.runtime.random_seed), ["stage_a", "stage_b", "solver"])
+    rng = random.Random(seeds["stage_b"])
+    np_rng = np.random.default_rng(seeds["stage_b"])
     sampling_cfg = cfg.generation.sampling
     schema_is_22 = schema_version_at_least(cfg.schema_version, major=2, minor=2)
     outputs_root = run_root / "outputs"
@@ -1487,6 +1486,14 @@ def stage_b_build_libraries(
                 data_entries = df["sequence"].tolist()
             else:
                 raise typer.BadParameter(f"Unsupported pool_mode for input {inp.name}: {entry.pool_mode}")
+            pool = PoolData(
+                name=inp.name,
+                input_type=str(inp.type),
+                pool_mode=entry.pool_mode,
+                df=meta_df,
+                sequences=list(data_entries),
+                pool_path=pool_path,
+            )
 
             for plan_item in resolved_plan:
                 if selected_plans and plan_item.name not in selected_plans:
@@ -1494,8 +1501,7 @@ def stage_b_build_libraries(
                 library, _parts, reg_labels, info = build_library_for_plan(
                     source_label=inp.name,
                     plan_item=plan_item,
-                    data_entries=data_entries,
-                    meta_df=meta_df,
+                    pool=pool,
                     sampling_cfg=sampling_cfg,
                     seq_len=int(cfg.generation.sequence_length),
                     min_count_per_tf=int(cfg.runtime.min_count_per_tf),
@@ -1643,8 +1649,8 @@ def run(
 
     # Auto-plot if configured
     if not no_plot and root.plots:
-        _ensure_mpl_cache_dir()
-        install_native_stderr_filters()
+        ensure_mpl_cache_dir()
+        install_native_stderr_filters(suppress_solver_messages=False)
         from .viz.plotting import run_plots_from_config
 
         console.print("[bold]Generating plots...[/]")
@@ -1660,8 +1666,8 @@ def plot(
 ):
     cfg_path = _resolve_config_path(ctx, config)
     loaded = _load_config_or_exit(cfg_path)
-    _ensure_mpl_cache_dir()
-    install_native_stderr_filters()
+    ensure_mpl_cache_dir()
+    install_native_stderr_filters(suppress_solver_messages=False)
     from .viz.plotting import run_plots_from_config
 
     run_plots_from_config(loaded.root, loaded.path, only=only)
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index a820c42b..58334e58 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -45,6 +45,31 @@ def _construct_mapping(loader, node, deep: bool = False):
 LATEST_SCHEMA_VERSION = "2.4"
 SUPPORTED_SCHEMA_VERSIONS = {"2.1", "2.2", "2.3", LATEST_SCHEMA_VERSION}
 
+KNOWN_SOLVER_OPTION_KEYS = {
+    "CBC": {
+        "threads",
+        "timelimit",
+        "timelimitseconds",
+        "maxseconds",
+        "seconds",
+        "ratiogap",
+        "mipgap",
+        "seed",
+        "randomseed",
+        "loglevel",
+    },
+    "GUROBI": {
+        "threads",
+        "timelimit",
+        "mipgap",
+        "seed",
+        "logtoconsole",
+        "logfile",
+        "method",
+        "presolve",
+    },
+}
+
 
 def parse_schema_version(value: str) -> tuple[int, int]:
     parts = str(value).strip().split(".")
@@ -915,6 +940,8 @@ class SolverConfig(BaseModel):
     strategy: Literal["iterate", "diverse", "optimal", "approximate"]
     options: List[str] = Field(default_factory=list)
     strands: Literal["single", "double"] = "double"
+    fallback_to_cbc: bool = False
+    allow_unknown_options: bool = False
 
     @field_validator("backend")
     @classmethod
@@ -931,6 +958,32 @@ def _strategy_backend_consistency(self):
             raise ValueError("solver.backend is required unless strategy=approximate")
         if self.strategy == "approximate" and self.options:
             raise ValueError("solver.options must be empty when strategy=approximate")
+        if self.options:
+            cleaned: list[str] = []
+            for opt in self.options:
+                if not isinstance(opt, str) or not opt.strip():
+                    raise ValueError("solver.options entries must be non-empty strings")
+                cleaned.append(opt.strip())
+            self.options = cleaned
+            if not self.allow_unknown_options:
+                backend = (self.backend or "").strip().upper()
+                allowed = KNOWN_SOLVER_OPTION_KEYS.get(backend)
+                if allowed is None:
+                    raise ValueError(
+                        f"solver.options provided but backend '{backend}' has no known option list. "
+                        "Set solver.allow_unknown_options: true to bypass validation."
+                    )
+                unknown: list[str] = []
+                for opt in self.options:
+                    key = opt.split("=", 1)[0].split()[0].strip().lower()
+                    if key not in allowed:
+                        unknown.append(opt)
+                if unknown:
+                    preview = ", ".join(unknown[:5])
+                    raise ValueError(
+                        f"Unknown solver.options for backend '{backend}': {preview}. "
+                        "Set solver.allow_unknown_options: true to bypass validation."
+                    )
         return self
 
 
diff --git a/src/dnadesign/densegen/src/core/artifacts/library.py b/src/dnadesign/densegen/src/core/artifacts/library.py
index e77b727e..ca3ac62e 100644
--- a/src/dnadesign/densegen/src/core/artifacts/library.py
+++ b/src/dnadesign/densegen/src/core/artifacts/library.py
@@ -55,7 +55,7 @@ def write_library_artifact(
     overwrite: bool = False,
 ) -> LibraryArtifact:
     out_dir.mkdir(parents=True, exist_ok=True)
-    install_native_stderr_filters()
+    install_native_stderr_filters(suppress_solver_messages=False)
     builds_path = out_dir / "library_builds.parquet"
     members_path = out_dir / "library_members.parquet"
 
diff --git a/src/dnadesign/densegen/src/core/artifacts/pool.py b/src/dnadesign/densegen/src/core/artifacts/pool.py
index fb9ce5df..58d0f509 100644
--- a/src/dnadesign/densegen/src/core/artifacts/pool.py
+++ b/src/dnadesign/densegen/src/core/artifacts/pool.py
@@ -136,7 +136,7 @@ def build_pool_artifact(
     selected_inputs: set[str] | None = None,
 ) -> tuple[TFBSPoolArtifact, dict[str, PoolData]]:
     out_dir.mkdir(parents=True, exist_ok=True)
-    install_native_stderr_filters()
+    install_native_stderr_filters(suppress_solver_messages=False)
     pool_entries: dict[str, PoolInputEntry] = {}
     pool_data: dict[str, PoolData] = {}
     used_names: dict[str, int] = {}
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 3770d8a7..f0096563 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -46,7 +46,7 @@
 from ..utils.logging_utils import install_native_stderr_filters
 from .artifacts.ids import hash_tfbs_id
 from .artifacts.library import write_library_artifact
-from .artifacts.pool import build_pool_artifact
+from .artifacts.pool import POOL_MODE_SEQUENCE, POOL_MODE_TFBS, PoolData, build_pool_artifact
 from .metadata import build_metadata
 from .postprocess import random_fill
 from .pvalue_bins import resolve_pvalue_bins
@@ -59,7 +59,9 @@
     run_state_path,
 )
 from .run_state import RunState, load_run_state
+from .runtime_policy import RuntimePolicy
 from .sampler import TFSampler
+from .seeding import derive_seed_map
 
 log = logging.getLogger(__name__)
 
@@ -112,23 +114,36 @@ def resolve_plan(loaded: LoadedConfig) -> List[ResolvedPlanItem]:
     return loaded.root.densegen.generation.resolve_plan()
 
 
-def select_solver_strict(
+def select_solver(
     preferred: str | None,
     optimizer: OptimizerAdapter,
     *,
     strategy: str,
+    fallback_to_cbc: bool = False,
     test_length: int = 10,
 ) -> str | None:
     """
-    Probe the requested solver once. If it fails, raise with instructions.
-    No fallback behavior.
+    Probe the requested solver once. If it fails, optionally fall back to CBC.
     """
     if strategy == "approximate":
         return preferred
     if not preferred:
         raise ValueError("solver.backend is required unless strategy=approximate")
-    optimizer.probe_solver(preferred, test_length=test_length)
-    return preferred
+    try:
+        optimizer.probe_solver(preferred, test_length=test_length)
+        return preferred
+    except Exception as exc:
+        if fallback_to_cbc and str(preferred).upper() != "CBC":
+            log.warning(
+                "Requested solver '%s' failed; falling back to CBC (solver.fallback_to_cbc=true).",
+                preferred,
+            )
+            optimizer.probe_solver("CBC", test_length=test_length)
+            return "CBC"
+        raise RuntimeError(
+            f"Requested solver '{preferred}' failed during probe: {exc}\n"
+            "Please install/configure this solver or choose another in solver.backend."
+        ) from exc
 
 
 def _summarize_tf_counts(labels: List[str], max_items: int = 6) -> str:
@@ -953,8 +968,7 @@ def build_library_for_plan(
     *,
     source_label: str,
     plan_item: ResolvedPlanItem,
-    data_entries: list,
-    meta_df: pd.DataFrame | None,
+    pool: PoolData,
     sampling_cfg: object,
     seq_len: int,
     min_count_per_tf: int,
@@ -977,6 +991,9 @@ def build_library_for_plan(
     iterative_max_libraries = int(getattr(sampling_cfg, "iterative_max_libraries", 0))
     iterative_min_new_solutions = int(getattr(sampling_cfg, "iterative_min_new_solutions", 0))
 
+    data_entries = list(pool.sequences or [])
+    meta_df = pool.df if pool.pool_mode == POOL_MODE_TFBS else None
+
     fixed_elements = plan_item.fixed_elements
     required_regulators = list(dict.fromkeys(plan_item.required_regulators or []))
     min_required_regulators = plan_item.min_required_regulators
@@ -1016,6 +1033,9 @@ def _finalize(
 
     if meta_df is not None and isinstance(meta_df, pd.DataFrame):
         available_tfs = set(meta_df["tf"].tolist())
+        tfbs_counts = (
+            meta_df.groupby("tf")["tfbs"].nunique() if unique_binding_sites else meta_df.groupby("tf")["tfbs"].size()
+        )
         missing = [t for t in required_regulators if t not in available_tfs]
         if missing:
             preview = ", ".join(missing[:10])
@@ -1025,12 +1045,30 @@ def _finalize(
             if missing_counts:
                 preview = ", ".join(missing_counts[:10])
                 raise ValueError(f"min_count_by_regulator TFs not found in input: {preview}")
+            for tf, min_count in plan_min_count_by_regulator.items():
+                max_allowed = int(tfbs_counts.get(tf, 0))
+                if max_sites_per_tf is not None:
+                    max_allowed = min(max_allowed, int(max_sites_per_tf))
+                if library_size > 0:
+                    max_allowed = min(max_allowed, int(library_size))
+                if int(min_count) > max_allowed:
+                    raise ValueError(
+                        f"min_count_by_regulator[{tf}]={min_count} exceeds available sites ({max_allowed}). "
+                        "Increase library_size, relax min_count_by_regulator, or allow non-unique binding sites."
+                    )
         if min_required_regulators is not None:
             if not required_regulators and min_required_regulators > len(available_tfs):
                 raise ValueError(
                     f"min_required_regulators={min_required_regulators} exceeds available regulators "
                     f"({len(available_tfs)})."
                 )
+        if pool_strategy in {"subsample", "iterative_subsample"} and cover_all_tfs and not allow_incomplete_coverage:
+            if library_size > 0 and library_size < len(available_tfs):
+                raise ValueError(
+                    "library_size is too small to cover all regulators. "
+                    f"library_size={library_size} but available_tfs={len(available_tfs)}. "
+                    "Increase library_size or allow_incomplete_coverage."
+                )
 
         if pool_strategy == "full":
             lib_df = meta_df.copy()
@@ -1298,6 +1336,75 @@ def _emit_event(events_path: Path, *, event: str, payload: dict) -> None:
         handle.write(json.dumps(record, sort_keys=True) + "\n")
 
 
+def _dump_model(value) -> dict:
+    if hasattr(value, "model_dump"):
+        return value.model_dump(by_alias=True, exclude_none=False)
+    if hasattr(value, "__dict__"):
+        return dict(value.__dict__)
+    return dict(value)
+
+
+def _effective_sampling_caps(input_cfg, cfg_path: Path) -> dict | None:
+    sampling = getattr(input_cfg, "sampling", None)
+    if sampling is None:
+        return None
+    n_sites = getattr(sampling, "n_sites", None)
+    oversample = getattr(sampling, "oversample_factor", None)
+    requested = None
+    if isinstance(n_sites, int) and isinstance(oversample, int):
+        requested = int(n_sites) * int(oversample)
+    backend = str(getattr(sampling, "scoring_backend", "densegen"))
+    mining = getattr(sampling, "mining", None)
+    return {
+        "scoring_backend": backend,
+        "requested_candidates": requested,
+        "cap_candidates": getattr(mining, "max_candidates", None)
+        if backend == "fimo"
+        else getattr(sampling, "max_candidates", None),
+        "cap_seconds": getattr(mining, "max_seconds", None)
+        if backend == "fimo"
+        else getattr(sampling, "max_seconds", None),
+        "cap_batches": getattr(mining, "max_batches", None) if backend == "fimo" else None,
+    }
+
+
+def _write_effective_config(
+    *,
+    cfg,
+    cfg_path: Path,
+    run_root: Path,
+    seeds: dict[str, int],
+    outputs_root: Path,
+) -> Path:
+    resolved_inputs = []
+    for inp in cfg.inputs:
+        entry = {"name": inp.name, "type": getattr(inp, "type", None)}
+        if hasattr(inp, "path"):
+            entry["path"] = str(resolve_relative_path(cfg_path, getattr(inp, "path")))
+        if hasattr(inp, "paths"):
+            paths = getattr(inp, "paths", None)
+            if isinstance(paths, list):
+                entry["paths"] = [str(resolve_relative_path(cfg_path, p)) for p in paths]
+        caps = _effective_sampling_caps(inp, cfg_path)
+        if caps is not None:
+            entry["sampling_caps"] = caps
+        resolved_inputs.append(entry)
+
+    payload = {
+        "schema_version": cfg.schema_version,
+        "run_id": cfg.run.id,
+        "run_root": str(run_root),
+        "config_path": str(cfg_path),
+        "seeds": {k: int(v) for k, v in seeds.items()},
+        "inputs": resolved_inputs,
+        "config": _dump_model(cfg),
+    }
+    out_path = outputs_root / "meta" / "effective_config.json"
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(json.dumps(payload, indent=2, sort_keys=True))
+    return out_path
+
+
 ATTEMPTS_CHUNK_SIZE = 256
 
 
@@ -1603,7 +1710,7 @@ def _process_plan_for_source(
     checkpoint_every: int = 0,
     write_state: Callable[[], None] | None = None,
     site_failure_counts: dict[tuple[str, str, str, str, str | None], dict[str, int]] | None = None,
-    source_cache: dict[str, tuple[list, pd.DataFrame | None]] | None = None,
+    source_cache: dict[str, PoolData] | None = None,
     library_build_rows: list[dict] | None = None,
     library_member_rows: list[dict] | None = None,
     composition_rows: list[dict] | None = None,
@@ -1703,6 +1810,17 @@ def _record_library_build(
     leaderboard_every = int(runtime_cfg.leaderboard_every)
     checkpoint_every = int(checkpoint_every or 0)
 
+    policy = RuntimePolicy(
+        pool_strategy=pool_strategy,
+        schema_is_22=schema_is_22,
+        arrays_generated_before_resample=max_per_subsample,
+        stall_seconds_before_resample=stall_seconds,
+        stall_warning_every_seconds=stall_warn_every,
+        max_resample_attempts=max_resample_attempts,
+        max_total_resamples=max_total_resamples,
+        max_seconds_per_plan=max_seconds_per_plan,
+    )
+
     post = global_cfg.postprocess
     gap_cfg = post.gap_fill
     fill_gap = gap_cfg.mode != "off"
@@ -1757,10 +1875,31 @@ def _record_library_build(
     if cached is None:
         src_obj = deps.source_factory(source_cfg, cfg_path)
         data_entries, meta_df = src_obj.load_data(rng=np_rng, outputs_root=outputs_root)
+        if meta_df is not None and isinstance(meta_df, pd.DataFrame):
+            sequences = meta_df["tfbs"].tolist() if "tfbs" in meta_df.columns else list(data_entries or [])
+            pool = PoolData(
+                name=source_label,
+                input_type=str(getattr(source_cfg, "type", "")),
+                pool_mode=POOL_MODE_TFBS,
+                df=meta_df,
+                sequences=sequences,
+                pool_path=Path("."),
+            )
+        else:
+            pool = PoolData(
+                name=source_label,
+                input_type=str(getattr(source_cfg, "type", "")),
+                pool_mode=POOL_MODE_SEQUENCE,
+                df=None,
+                sequences=list(data_entries or []),
+                pool_path=Path("."),
+            )
         if source_cache is not None:
-            source_cache[cache_key] = (data_entries, meta_df)
+            source_cache[cache_key] = pool
     else:
-        data_entries, meta_df = cached
+        pool = cached
+    data_entries = pool.sequences
+    meta_df = pool.df
     input_meta = _input_metadata(source_cfg, cfg_path)
     input_tf_tfbs_pair_count: int | None = None
     if meta_df is not None and isinstance(meta_df, pd.DataFrame):
@@ -1931,8 +2070,7 @@ def _record_library_build(
     library_for_opt, tfbs_parts, regulator_labels, sampling_info = build_library_for_plan(
         source_label=source_label,
         plan_item=plan_item,
-        data_entries=data_entries,
-        meta_df=meta_df,
+        pool=pool,
         sampling_cfg=sampling_cfg,
         seq_len=seq_len,
         min_count_per_tf=min_count_per_tf,
@@ -2129,7 +2267,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
     produced_total_this_call = 0
 
     while global_generated < quota:
-        if max_seconds_per_plan > 0 and (time.monotonic() - plan_start) > max_seconds_per_plan:
+        if policy.plan_timed_out(now=time.monotonic(), plan_started=plan_start):
             raise RuntimeError(f"[{source_label}/{plan_name}] Exceeded max_seconds_per_plan={max_seconds_per_plan}.")
         local_generated = 0
         resamples_in_try = 0
@@ -2144,7 +2282,11 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
 
             for sol in generator:
                 now = time.monotonic()
-                if (now - subsample_started >= stall_seconds) and (produced_this_library == 0):
+                if policy.should_trigger_stall(
+                    now=now,
+                    subsample_started=subsample_started,
+                    produced_this_library=produced_this_library,
+                ):
                     log.info(
                         "[%s/%s] Stall (> %ds) with no solutions; will resample.",
                         source_label,
@@ -2169,7 +2311,11 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                             log.debug("Failed to emit STALL_DETECTED event.", exc_info=True)
                     stall_triggered = True
                     break
-                if (now - last_log_warn >= stall_warn_every) and (produced_this_library == 0):
+                if policy.should_warn_stall(
+                    now=now,
+                    last_warn=last_log_warn,
+                    produced_this_library=produced_this_library,
+                ):
                     log.info(
                         "[%s/%s] Still working... %.1fs on current library.",
                         source_label,
@@ -2821,8 +2967,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
 
             # Resample
             # Alignment (2): allow reactive resampling for subsample under schema>=2.2.
-            allow_resample = pool_strategy == "iterative_subsample" or (schema_is_22 and pool_strategy == "subsample")
-            if not allow_resample:
+            if not policy.allow_resample():
                 raise RuntimeError(
                     f"[{source_label}/{plan_name}] pool_strategy={pool_strategy!r} does not allow resampling "
                     f"under schema_version={global_cfg.schema_version}. "
@@ -2846,15 +2991,15 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     )
                 except Exception:
                     log.debug("Failed to emit RESAMPLE_TRIGGERED event.", exc_info=True)
-            if max_total_resamples > 0 and total_resamples > max_total_resamples:
+            if policy.max_total_resamples > 0 and total_resamples > policy.max_total_resamples:
                 raise RuntimeError(f"[{source_label}/{plan_name}] Exceeded max_total_resamples={max_total_resamples}.")
-            if resamples_in_try > max_resample_attempts:
+            if resamples_in_try > policy.max_resample_attempts:
                 log.info(
                     "[%s/%s] Reached max_resample_attempts (%d) for this subsample try "
                     "(produced %d/%d here). Moving on.",
                     source_label,
                     plan_name,
-                    max_resample_attempts,
+                    policy.max_resample_attempts,
                     local_generated,
                     max_per_subsample,
                 )
@@ -2869,8 +3014,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             library_for_opt, tfbs_parts, regulator_labels, sampling_info = build_library_for_plan(
                 source_label=source_label,
                 plan_item=plan_item,
-                data_entries=data_entries,
-                meta_df=meta_df,
+                pool=pool,
                 sampling_cfg=sampling_cfg,
                 seq_len=seq_len,
                 min_count_per_tf=min_count_per_tf,
@@ -2993,8 +3137,8 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
 
 def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> RunSummary:
     deps = deps or default_deps()
-    install_native_stderr_filters()
     cfg = loaded.root.densegen
+    install_native_stderr_filters(suppress_solver_messages=bool(cfg.logging.suppress_solver_stderr))
     run_root = resolve_run_root(loaded.path, cfg.run.root)
     run_root_str = str(run_root)
     config_sha = hashlib.sha256(loaded.path.read_bytes()).hexdigest()
@@ -3005,13 +3149,19 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
 
     # Seed
     seed = int(cfg.runtime.random_seed)
-    random.seed(seed)
-    rng = random.Random(seed)
-    np_rng = np.random.default_rng(seed)
+    seeds = derive_seed_map(seed, ["stage_a", "stage_b", "solver"])
+    rng = random.Random(seeds["stage_b"])
+    np_rng_stage_a = np.random.default_rng(seeds["stage_a"])
+    np_rng_stage_b = np.random.default_rng(seeds["stage_b"])
 
     # Plan & solver
     pl = cfg.generation.resolve_plan()
-    chosen_solver = select_solver_strict(cfg.solver.backend, deps.optimizer, strategy=str(cfg.solver.strategy))
+    chosen_solver = select_solver(
+        cfg.solver.backend,
+        deps.optimizer,
+        strategy=str(cfg.solver.strategy),
+        fallback_to_cbc=bool(cfg.solver.fallback_to_cbc),
+    )
     dense_arrays_version, dense_arrays_version_source = _resolve_dense_arrays_version(loaded.path)
 
     # Build sinks
@@ -3025,20 +3175,26 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
     plan_order: list[tuple[str, str]] = []
     plan_leaderboards: dict[tuple[str, str], dict] = {}
     inputs_manifest_entries: dict[str, dict] = {}
-    source_cache: dict[str, tuple[list, pd.DataFrame | None]] = {}
+    source_cache: dict[str, PoolData] = {}
     library_build_rows: list[dict] = []
     library_member_rows: list[dict] = []
     composition_rows: list[dict] = []
     outputs_root = run_outputs_root(run_root)
     outputs_root.mkdir(parents=True, exist_ok=True)
     events_path = outputs_root / "meta" / "events.jsonl"
+    try:
+        _write_effective_config(
+            cfg=cfg, cfg_path=loaded.path, run_root=run_root, seeds=seeds, outputs_root=outputs_root
+        )
+    except Exception:
+        log.debug("Failed to write effective_config.json.", exc_info=True)
     pool_dir = outputs_root / "pools"
     try:
         _pool_artifact, pool_data = build_pool_artifact(
             cfg=cfg,
             cfg_path=loaded.path,
             deps=deps,
-            rng=np_rng,
+            rng=np_rng_stage_a,
             outputs_root=outputs_root,
             out_dir=pool_dir,
             overwrite=True,
@@ -3064,7 +3220,7 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
     except Exception:
         log.debug("Failed to emit POOL_BUILT event.", exc_info=True)
     for name, pool in pool_data.items():
-        source_cache[name] = (pool.sequences, pool.df)
+        source_cache[name] = pool
     ensure_run_meta_dir(run_root)
     state_path = run_state_path(run_root)
     state_created_at = datetime.now(timezone.utc).isoformat()
@@ -3228,7 +3384,7 @@ def _write_state() -> None:
                     chosen_solver=chosen_solver,
                     deps=deps,
                     rng=rng,
-                    np_rng=np_rng,
+                    np_rng=np_rng_stage_b,
                     cfg_path=loaded.path,
                     run_id=cfg.run.id,
                     run_root=run_root_str,
@@ -3280,7 +3436,7 @@ def _write_state() -> None:
                         chosen_solver=chosen_solver,
                         deps=deps,
                         rng=rng,
-                        np_rng=np_rng,
+                        np_rng=np_rng_stage_b,
                         cfg_path=loaded.path,
                         run_id=cfg.run.id,
                         run_root=run_root_str,
@@ -3415,6 +3571,10 @@ def _write_state() -> None:
         schema_version=str(cfg.schema_version),
         config_sha256=config_sha,
         run_root=run_root_str,
+        random_seed=seed,
+        seed_stage_a=seeds.get("stage_a"),
+        seed_stage_b=seeds.get("stage_b"),
+        seed_solver=seeds.get("solver"),
         solver_backend=chosen_solver,
         solver_strategy=str(cfg.solver.strategy),
         solver_options=list(cfg.solver.options),
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index d2ff922e..8a933140 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -25,6 +25,7 @@
 
 from ..adapters.outputs import load_records_from_config
 from ..config import RootConfig, resolve_run_root, resolve_run_scoped_path
+from ..utils.mpl_utils import ensure_mpl_cache_dir
 from .artifacts.pool import POOL_MODE_TFBS, load_pool_artifact
 from .run_manifest import load_run_manifest
 from .run_paths import run_manifest_path, run_outputs_root
@@ -339,10 +340,11 @@ def _compute_adjacency(used_df: pd.DataFrame) -> pd.DataFrame:
     return agg
 
 
-@dataclass(frozen=True)
+@dataclass
 class ReportBundle:
     run_report: dict
     tables: Dict[str, pd.DataFrame]
+    plots: dict[str, list[str]] | None = None
 
 
 def collect_report_data(
@@ -450,6 +452,39 @@ def collect_report_data(
 
     tables["library_summary"] = library_summary
 
+    library_usage = pd.DataFrame(
+        columns=[
+            "library_hash",
+            "library_index",
+            "input_name",
+            "plan_name",
+            "attempts",
+            "successes",
+            "outputs",
+        ]
+    )
+    if not attempts_df.empty:
+        attempts_by_lib = (
+            attempts_df.groupby(["sampling_library_hash", "sampling_library_index", "input_name", "plan_name"])
+            .agg(
+                attempts=("status", "size"),
+                successes=("status", lambda x: int((x == "success").sum())),
+            )
+            .reset_index()
+            .rename(
+                columns={
+                    "sampling_library_hash": "library_hash",
+                    "sampling_library_index": "library_index",
+                }
+            )
+        )
+        library_usage = attempts_by_lib
+        if not outputs_by_lib.empty:
+            library_usage = library_usage.merge(outputs_by_lib, on="library_index", how="left")
+        if "outputs" not in library_usage.columns:
+            library_usage["outputs"] = 0
+    tables["library_usage"] = library_usage
+
     offered_tf = pd.DataFrame(columns=["library_hash", "tf", "offered_instances", "offered_unique_tfbs"])
     offered_tfbs = pd.DataFrame(columns=["library_hash", "tf", "tfbs", "offered_instances"])
     if not library_df.empty:
@@ -593,6 +628,9 @@ def collect_report_data(
         "attempts_failed": attempts_failed,
         "attempts_path": str(attempts_path) if attempts_path.exists() else None,
         "outputs_path": str(outputs_root / "dense_arrays.parquet"),
+        "effective_config_path": str(outputs_root / "meta" / "effective_config.json")
+        if (outputs_root / "meta" / "effective_config.json").exists()
+        else None,
     }
     manifest_path = run_manifest_path(run_root)
     if manifest_path.exists():
@@ -608,7 +646,111 @@ def collect_report_data(
         except Exception:
             log.warning("Failed to read run_manifest.json for report metadata.", exc_info=True)
 
-    return ReportBundle(run_report=run_report, tables=tables)
+    return ReportBundle(run_report=run_report, tables=tables, plots={})
+
+
+def _plot_available() -> bool:
+    try:
+        ensure_mpl_cache_dir()
+        import matplotlib  # noqa: F401
+    except Exception:
+        return False
+    return True
+
+
+def _safe_filename(text: str) -> str:
+    return "".join(ch if ch.isalnum() or ch in {"-", "_", "."} else "_" for ch in text) or "densegen"
+
+
+def _generate_report_plots(bundle: ReportBundle, *, cfg_path: Path, out_dir: Path) -> dict[str, list[str]]:
+    if not _plot_available():
+        log.info("matplotlib not available; skipping report plots.")
+        return {}
+    import matplotlib.pyplot as plt
+
+    plots: dict[str, list[str]] = {}
+    assets_dir = out_dir / "report_assets"
+    assets_dir.mkdir(parents=True, exist_ok=True)
+    run_root = resolve_run_root(cfg_path, bundle.run_report.get("run_root", ""))
+    outputs_root = run_outputs_root(run_root)
+
+    # Stage-A p-value histograms per input/TF (FIMO)
+    pool_dir = outputs_root / "pools"
+    if pool_dir.exists():
+        try:
+            pool_artifact = load_pool_artifact(pool_dir)
+            for entry in pool_artifact.inputs.values():
+                if entry.pool_mode != POOL_MODE_TFBS:
+                    continue
+                pool_path = pool_dir / entry.pool_path
+                if not pool_path.exists():
+                    continue
+                df_pool = pd.read_parquet(pool_path)
+                if "fimo_pvalue" not in df_pool.columns or "tf" not in df_pool.columns:
+                    continue
+                for tf, sub in df_pool.groupby("tf"):
+                    if sub.empty:
+                        continue
+                    pvals = sub["fimo_pvalue"].astype(float).replace(0, np.nan).dropna()
+                    if pvals.empty:
+                        continue
+                    fig, ax = plt.subplots(figsize=(6, 4))
+                    ax.hist(np.log10(pvals), bins=30, color="#4c78a8", edgecolor="white")
+                    ax.set_title(f"Stage-A p-value histogram: {entry.name}/{tf}")
+                    ax.set_xlabel("log10(p-value)")
+                    ax.set_ylabel("count")
+                    fname = f"stage_a_pvalue_hist__{_safe_filename(entry.name)}__{_safe_filename(str(tf))}.png"
+                    path = assets_dir / fname
+                    fig.tight_layout()
+                    fig.savefig(path)
+                    plt.close(fig)
+                    plots.setdefault("stage_a_pvalue_hist", []).append(str(path.relative_to(out_dir)))
+        except Exception:
+            log.warning("Failed to generate Stage-A p-value histograms.", exc_info=True)
+
+    # Stage-A bin occupancy bar charts (per input)
+    stage_a_bins = bundle.tables.get("stage_a_bins")
+    if stage_a_bins is not None and not stage_a_bins.empty:
+        try:
+            for input_name, sub in stage_a_bins.groupby("input_name"):
+                fig, ax = plt.subplots(figsize=(6, 4))
+                sub = sub.sort_values(["tf", "bin_id"])
+                labels = [f"{row['tf']}:{int(row['bin_id'])}" for _, row in sub.iterrows()]
+                counts = sub["count"].astype(int).tolist()
+                ax.bar(labels, counts, color="#f58518")
+                ax.set_title(f"Stage-A bin occupancy: {input_name}")
+                ax.set_ylabel("count")
+                ax.tick_params(axis="x", labelrotation=45, labelsize=8)
+                fname = f"stage_a_bin_counts__{_safe_filename(str(input_name))}.png"
+                path = assets_dir / fname
+                fig.tight_layout()
+                fig.savefig(path)
+                plt.close(fig)
+                plots.setdefault("stage_a_bin_counts", []).append(str(path.relative_to(out_dir)))
+        except Exception:
+            log.warning("Failed to generate Stage-A bin occupancy plots.", exc_info=True)
+
+    # Stage-B TF utilization (offered vs used)
+    offered_vs_used = bundle.tables.get("offered_vs_used_tf")
+    if offered_vs_used is not None and not offered_vs_used.empty:
+        try:
+            for lib_hash, sub in offered_vs_used.groupby("library_hash"):
+                sub = sub.sort_values("tf")
+                fig, ax = plt.subplots(figsize=(7, 4))
+                ax.bar(sub["tf"], sub["used_sequences"], color="#54a24b", label="used sequences")
+                ax.set_title(f"Stage-B TF utilization: {lib_hash[:8]}")
+                ax.set_ylabel("used sequences")
+                ax.tick_params(axis="x", labelrotation=45, labelsize=8)
+                fname = f"stage_b_tf_util__{_safe_filename(str(lib_hash))}.png"
+                path = assets_dir / fname
+                fig.tight_layout()
+                fig.savefig(path)
+                plt.close(fig)
+                plots.setdefault("stage_b_tf_utilization", []).append(str(path.relative_to(out_dir)))
+        except Exception:
+            log.warning("Failed to generate Stage-B utilization plots.", exc_info=True)
+
+    return plots
 
 
 def write_report(
@@ -624,6 +766,13 @@ def write_report(
     out_path.mkdir(parents=True, exist_ok=True)
 
     bundle = collect_report_data(root_cfg, cfg_path, include_combinatorics=include_combinatorics)
+    try:
+        plots = _generate_report_plots(bundle, cfg_path=cfg_path, out_dir=out_path)
+        bundle.plots = plots
+        if plots:
+            bundle.run_report["report_plots"] = plots
+    except Exception:
+        log.debug("Failed to generate report plots.", exc_info=True)
     formats = {f.lower() for f in (formats or {"json", "md"})}
     if "all" in formats:
         formats = {"json", "md", "html"}
@@ -660,6 +809,7 @@ def _render_report_md(bundle: ReportBundle) -> str:
         "- outputs/libraries/library_builds.parquet",
         "- outputs/libraries/library_members.parquet",
         "- outputs/pools/pool_manifest.json",
+        "- outputs/meta/effective_config.json",
     ]
     stage_a_bins = bundle.tables.get("stage_a_bins")
     if stage_a_bins is not None and not stage_a_bins.empty:
@@ -678,6 +828,16 @@ def _render_report_md(bundle: ReportBundle) -> str:
                     label = f"bin{bin_id}"
                 parts.append(f"{label}:{count}")
             lines.append(f"- {input_name}/{tf}: " + " ".join(parts))
+    library_usage = bundle.tables.get("library_usage")
+    if library_usage is not None and not library_usage.empty:
+        lines.extend(["", "## Library usage (top 5)"])
+        top_usage = library_usage.sort_values(["attempts", "outputs"], ascending=False).head(5)
+        for _, row in top_usage.iterrows():
+            lib_hash = str(row.get("library_hash") or "")[:8]
+            attempts = int(row.get("attempts") or 0)
+            outputs = int(row.get("outputs") or 0)
+            plan_name = str(row.get("plan_name") or "")
+            lines.append(f"- {plan_name}/{lib_hash}: attempts={attempts} outputs={outputs}")
     leaderboard = report.get("leaderboard_latest") or {}
     leader_tf = leaderboard.get("tf") or []
     leader_tfbs = leaderboard.get("tfbs") or []
@@ -704,6 +864,12 @@ def _render_report_md(bundle: ReportBundle) -> str:
             label = f"{tf}:{tfbs}" if tf else tfbs
             reason_suffix = f" (top reason: {reason})" if reason else ""
             lines.append(f"- {label} — failures={failures}{reason_suffix}")
+    if bundle.plots:
+        lines.extend(["", "## Report plots"])
+        for plot_name, paths in bundle.plots.items():
+            lines.append(f"- {plot_name}:")
+            for rel_path in paths:
+                lines.append(f"  - {rel_path}")
     return "\n".join(lines) + "\n"
 
 
@@ -714,6 +880,15 @@ def _write_report_md(path: Path, bundle: ReportBundle) -> None:
 def _write_report_html(path: Path, bundle: ReportBundle) -> None:
     md = _render_report_md(bundle)
     body = md.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+    img_sections: list[str] = []
+    if bundle.plots:
+        for plot_name, paths in bundle.plots.items():
+            for rel_path in paths:
+                img_sections.append(
+                    f'<div><h3>{plot_name}</h3><img src="{rel_path}" '
+                    'style="max-width:100%;height:auto;border:1px solid #ddd;"/></div>'
+                )
+    img_html = "\n".join(img_sections)
     html = "\n".join(
         [
             "<!DOCTYPE html>",
@@ -728,6 +903,7 @@ def _write_report_html(path: Path, bundle: ReportBundle) -> None:
             "<pre>",
             body,
             "</pre>",
+            img_html,
             "</body>",
             "</html>",
         ]
diff --git a/src/dnadesign/densegen/src/core/run_manifest.py b/src/dnadesign/densegen/src/core/run_manifest.py
index b3fa978d..a47398d6 100644
--- a/src/dnadesign/densegen/src/core/run_manifest.py
+++ b/src/dnadesign/densegen/src/core/run_manifest.py
@@ -63,6 +63,10 @@ class RunManifest:
     schema_version: str
     config_sha256: str
     run_root: str
+    random_seed: int | None
+    seed_stage_a: int | None
+    seed_stage_b: int | None
+    seed_solver: int | None
     solver_backend: str | None
     solver_strategy: str
     solver_options: list[str]
@@ -79,6 +83,10 @@ def to_dict(self) -> dict[str, Any]:
             "schema_version": self.schema_version,
             "config_sha256": self.config_sha256,
             "run_root": self.run_root,
+            "random_seed": self.random_seed,
+            "seed_stage_a": self.seed_stage_a,
+            "seed_stage_b": self.seed_stage_b,
+            "seed_solver": self.seed_solver,
             "solver_backend": self.solver_backend,
             "solver_strategy": self.solver_strategy,
             "solver_options": list(self.solver_options),
@@ -123,6 +131,10 @@ def load_run_manifest(path: Path) -> RunManifest:
         schema_version=str(data.get("schema_version", "")),
         config_sha256=str(data.get("config_sha256", "")),
         run_root=str(data.get("run_root", "")),
+        random_seed=data.get("random_seed"),
+        seed_stage_a=data.get("seed_stage_a"),
+        seed_stage_b=data.get("seed_stage_b"),
+        seed_solver=data.get("seed_solver"),
         solver_backend=data.get("solver_backend"),
         solver_strategy=str(data.get("solver_strategy", "")),
         solver_options=list(data.get("solver_options", [])),
diff --git a/src/dnadesign/densegen/src/core/runtime_policy.py b/src/dnadesign/densegen/src/core/runtime_policy.py
new file mode 100644
index 00000000..f500ae48
--- /dev/null
+++ b/src/dnadesign/densegen/src/core/runtime_policy.py
@@ -0,0 +1,37 @@
+"""
+Runtime policy helpers for resampling and stall detection.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class RuntimePolicy:
+    pool_strategy: str
+    schema_is_22: bool
+    arrays_generated_before_resample: int
+    stall_seconds_before_resample: int
+    stall_warning_every_seconds: int
+    max_resample_attempts: int
+    max_total_resamples: int
+    max_seconds_per_plan: int
+
+    def allow_resample(self) -> bool:
+        return self.pool_strategy == "iterative_subsample" or (self.schema_is_22 and self.pool_strategy == "subsample")
+
+    def should_trigger_stall(self, *, now: float, subsample_started: float, produced_this_library: int) -> bool:
+        if self.stall_seconds_before_resample <= 0:
+            return False
+        return (produced_this_library == 0) and (now - subsample_started >= self.stall_seconds_before_resample)
+
+    def should_warn_stall(self, *, now: float, last_warn: float, produced_this_library: int) -> bool:
+        if self.stall_warning_every_seconds <= 0:
+            return False
+        return (produced_this_library == 0) and (now - last_warn >= self.stall_warning_every_seconds)
+
+    def plan_timed_out(self, *, now: float, plan_started: float) -> bool:
+        if self.max_seconds_per_plan <= 0:
+            return False
+        return (now - plan_started) >= float(self.max_seconds_per_plan)
diff --git a/src/dnadesign/densegen/src/core/seeding.py b/src/dnadesign/densegen/src/core/seeding.py
new file mode 100644
index 00000000..d6e49f7b
--- /dev/null
+++ b/src/dnadesign/densegen/src/core/seeding.py
@@ -0,0 +1,22 @@
+"""
+Seed derivation helpers for reproducible runs.
+"""
+
+from __future__ import annotations
+
+import hashlib
+
+
+def derive_seed(base_seed: int, label: str) -> int:
+    """
+    Derive a stable integer seed from a base seed and label.
+
+    The result is deterministic across platforms and Python versions.
+    """
+    payload = f"{int(base_seed)}:{label}".encode("utf-8")
+    digest = hashlib.sha256(payload).digest()
+    return int.from_bytes(digest[:8], "big", signed=False)
+
+
+def derive_seed_map(base_seed: int, labels: list[str]) -> dict[str, int]:
+    return {label: derive_seed(base_seed, label) for label in labels}
diff --git a/src/dnadesign/densegen/src/utils/logging_utils.py b/src/dnadesign/densegen/src/utils/logging_utils.py
index d6dab537..21567c54 100644
--- a/src/dnadesign/densegen/src/utils/logging_utils.py
+++ b/src/dnadesign/densegen/src/utils/logging_utils.py
@@ -18,6 +18,19 @@
 from pathlib import Path
 from typing import Iterable, Optional
 
+_NATIVE_STDERR_PATTERNS: list[tuple[str, re.Pattern, str | None]] = []
+_NATIVE_STDERR_LOCK = threading.Lock()
+
+
+def _register_native_stderr_patterns(patterns: Iterable[tuple[str, str | None]]) -> None:
+    with _NATIVE_STDERR_LOCK:
+        existing = {pat for pat, _compiled, _msg in _NATIVE_STDERR_PATTERNS}
+        for pat, msg in patterns:
+            if pat in existing:
+                continue
+            _NATIVE_STDERR_PATTERNS.append((pat, re.compile(pat), msg))
+            existing.add(pat)
+
 
 def _install_native_stderr_deduper(patterns: Iterable[tuple[str, str | None]]) -> None:
     """
@@ -28,11 +41,12 @@ def _install_native_stderr_deduper(patterns: Iterable[tuple[str, str | None]]) -
     This catches warnings printed by OR-Tools/absl from C++ (which bypass Python
     logging). Safe to call once; subsequent calls are no-ops.
     """
+    _register_native_stderr_patterns(patterns)
+
     # If we've already redirected, don't do it again.
     if getattr(_install_native_stderr_deduper, "_installed", False):
         return
 
-    pats = [(re.compile(pat), msg) for pat, msg in patterns]
     log = logging.getLogger("densegen.stderr")
 
     # Duplicate current stderr FD (so we can still forward non-matching lines)
@@ -65,9 +79,11 @@ def reader() -> None:
                     line, buf = buf.split(b"\n", 1)
                     text = line.decode("utf-8", errors="replace")
                     suppressed = False
-                    for pat, msg in pats:
+                    with _NATIVE_STDERR_LOCK:
+                        pats = list(_NATIVE_STDERR_PATTERNS)
+                    for pat_str, pat, msg in pats:
                         if pat.search(text):
-                            key = pat.pattern
+                            key = pat_str
                             if key not in seen:
                                 seen.add(key)
                                 if msg and log.hasHandlers():
@@ -96,18 +112,24 @@ def reader() -> None:
     _install_native_stderr_deduper._installed = True  # type: ignore[attr-defined]
 
 
-def install_native_stderr_filters() -> None:
-    _install_native_stderr_deduper(
-        patterns=[
-            (
-                r"SetSolverSpecificParametersAsString\(\) not supported by Cbc",
-                "CBC backend does not support SetSolverSpecificParametersAsString; "
-                "continuing without solver-specific parameter strings.",
-            ),
-            (r"arrow/cpp/src/arrow/util/cpu_info\.cc", None),
-            (r"sysctlbyname failed for 'hw\.", None),
-        ]
-    )
+_PYARROW_STDERR_PATTERNS = [
+    (r"arrow/cpp/src/arrow/util/cpu_info\.cc", None),
+    (r"sysctlbyname failed for 'hw\.", None),
+]
+_SOLVER_STDERR_PATTERNS = [
+    (
+        r"SetSolverSpecificParametersAsString\(\) not supported by Cbc",
+        "CBC backend does not support SetSolverSpecificParametersAsString; "
+        "continuing without solver-specific parameter strings.",
+    ),
+]
+
+
+def install_native_stderr_filters(*, suppress_solver_messages: bool = True) -> None:
+    patterns = list(_PYARROW_STDERR_PATTERNS)
+    if suppress_solver_messages:
+        patterns.extend(_SOLVER_STDERR_PATTERNS)
+    _install_native_stderr_deduper(patterns=patterns)
 
 
 def setup_logging(
@@ -157,5 +179,4 @@ def setup_logging(
     root.setLevel(lvl)
     logging.getLogger(__name__).info("Logging initialized (level=%s)", level)
 
-    if suppress_solver_stderr:
-        install_native_stderr_filters()
+    install_native_stderr_filters(suppress_solver_messages=suppress_solver_stderr)
diff --git a/src/dnadesign/densegen/src/utils/mpl_utils.py b/src/dnadesign/densegen/src/utils/mpl_utils.py
new file mode 100644
index 00000000..3ab8f5ff
--- /dev/null
+++ b/src/dnadesign/densegen/src/utils/mpl_utils.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+
+def ensure_mpl_cache_dir() -> None:
+    if os.environ.get("MPLCONFIGDIR"):
+        return
+    cache_root = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
+    target = cache_root / "densegen" / "matplotlib"
+    try:
+        target.mkdir(parents=True, exist_ok=True)
+        os.environ["MPLCONFIGDIR"] = str(target)
+    except Exception:
+        tmp = Path(os.getenv("TMPDIR") or "/tmp") / "densegen-matplotlib"
+        tmp.mkdir(parents=True, exist_ok=True)
+        os.environ["MPLCONFIGDIR"] = str(tmp)
diff --git a/src/dnadesign/densegen/tests/test_cli_summarize_library.py b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
index 670f0772..c0f20f29 100644
--- a/src/dnadesign/densegen/tests/test_cli_summarize_library.py
+++ b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
@@ -211,6 +211,10 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
         schema_version="2.3",
         config_sha256="dummy",
         run_root=str(run_root),
+        random_seed=123,
+        seed_stage_a=456,
+        seed_stage_b=789,
+        seed_solver=101112,
         solver_backend="CBC",
         solver_strategy="iterate",
         solver_options=[],
diff --git a/src/dnadesign/densegen/tests/test_config_strict.py b/src/dnadesign/densegen/tests/test_config_strict.py
index 1e437d9f..620e6f95 100644
--- a/src/dnadesign/densegen/tests/test_config_strict.py
+++ b/src/dnadesign/densegen/tests/test_config_strict.py
@@ -104,6 +104,22 @@ def test_output_kind_is_rejected(tmp_path: Path) -> None:
         load_config(cfg_path)
 
 
+def test_solver_options_unknown_rejected(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["solver"]["options"] = ["UnknownOpt=1"]
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="Unknown solver.options"):
+        load_config(cfg_path)
+
+
+def test_solver_options_allow_unknown(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["solver"]["options"] = ["UnknownOpt=1"]
+    cfg["densegen"]["solver"]["allow_unknown_options"] = True
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    load_config(cfg_path)
+
+
 def test_promoter_constraint_motif_validation(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
     cfg["densegen"]["generation"]["plan"] = [
diff --git a/src/dnadesign/densegen/tests/test_run_manifest.py b/src/dnadesign/densegen/tests/test_run_manifest.py
index 0be324e2..2bdc5c5a 100644
--- a/src/dnadesign/densegen/tests/test_run_manifest.py
+++ b/src/dnadesign/densegen/tests/test_run_manifest.py
@@ -34,6 +34,10 @@ def test_run_manifest_roundtrip(tmp_path) -> None:
         schema_version="2.1",
         config_sha256="abc123",
         run_root="/tmp/demo",
+        random_seed=42,
+        seed_stage_a=101,
+        seed_stage_b=202,
+        seed_solver=303,
         solver_backend="CBC",
         solver_strategy="iterate",
         solver_options=[],
@@ -52,3 +56,4 @@ def test_run_manifest_roundtrip(tmp_path) -> None:
     assert loaded.items[0].failed_min_count_per_tf == 1
     assert loaded.items[0].duplicate_solutions == 3
     assert loaded.items[0].leaderboard_latest is not None
+    assert loaded.random_seed == 42
diff --git a/src/dnadesign/densegen/tests/test_source_cache.py b/src/dnadesign/densegen/tests/test_source_cache.py
index 9c83e950..ee5fae27 100644
--- a/src/dnadesign/densegen/tests/test_source_cache.py
+++ b/src/dnadesign/densegen/tests/test_source_cache.py
@@ -9,6 +9,7 @@
 from dnadesign.densegen.src.adapters.optimizer import OptimizerRun
 from dnadesign.densegen.src.adapters.outputs.base import SinkBase
 from dnadesign.densegen.src.config import load_config
+from dnadesign.densegen.src.core.artifacts.pool import PoolData
 from dnadesign.densegen.src.core.pipeline import PipelineDeps, _process_plan_for_source
 
 
@@ -157,7 +158,7 @@ def test_source_cache_reuses_loaded_inputs(tmp_path: Path) -> None:
     )
 
     plan_item = loaded.root.densegen.generation.resolve_plan()[0]
-    source_cache: dict[str, tuple[list, None]] = {}
+    source_cache: dict[str, PoolData] = {}
 
     _process_plan_for_source(
         loaded.root.densegen.inputs[0],

From cc151b6866d4237c3ef4337ada30d936c174a7ef Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Tue, 20 Jan 2026 16:48:51 -0500
Subject: [PATCH 12/40] densegen: drop legacy schema paths

---
 .../densegen/docs/guide/generation.md         |   2 +-
 .../densegen/docs/reference/config.md         |   7 +-
 .../docs/workflows/cruncher_pwm_pipeline.md   |   2 +-
 src/dnadesign/densegen/src/cli.py             |   5 +-
 src/dnadesign/densegen/src/config/__init__.py |  18 +-
 src/dnadesign/densegen/src/core/pipeline.py   |  77 +++-----
 .../densegen/src/core/runtime_policy.py       |   3 +-
 src/dnadesign/densegen/src/core/sampler.py    | 181 ------------------
 src/dnadesign/densegen/src/viz/plotting.py    |   5 -
 .../densegen/tests/test_cli_config_option.py  |   2 +-
 .../densegen/tests/test_cli_describe.py       |   2 +-
 .../tests/test_cli_summarize_library.py       |   6 +-
 .../densegen/tests/test_config_strict.py      |   2 +-
 .../densegen/tests/test_outputs_parquet.py    |   2 +-
 .../tests/test_required_regulators.py         |   4 +-
 .../tests/test_round_robin_chunk_cap.py       |   2 +-
 .../densegen/tests/test_run_manifest.py       |   4 +-
 .../densegen/tests/test_run_state.py          |   4 +-
 .../tests/test_sampler_required_tfbs.py       |   6 +-
 .../tests/test_sequence_length_guard.py       |   2 +-
 .../densegen/tests/test_source_cache.py       |   2 +-
 21 files changed, 57 insertions(+), 281 deletions(-)

diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index cab41eaa..45b317e5 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -110,7 +110,7 @@ Key fields:
 Notes:
 - `pool_strategy: full` uses a single library (no resampling) and ignores `library_size`, `subsample_over_length_budget_by`,
   and related sampling caps/strategies (DenseGen warns in `dense validate-config`/`dense inspect plan`).
-- Under schema `2.2+`, `subsample` can resample reactively on stalls/duplicate guards.
+- `subsample` can resample reactively on stalls/duplicate guards.
 - `iterative_subsample` resamples proactively after `arrays_generated_before_resample` or when a
   library under-produces.
 - `unique_binding_sites` enforces uniqueness at the regulator+sequence pair level.
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 8230af56..24530e28 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -23,7 +23,7 @@ for conceptual flow.
 ### Top-level
 
 - `densegen` (required)
-- `densegen.schema_version` (required; supported: `2.1`, `2.2`, `2.3`, `2.4`)
+- `densegen.schema_version` (required; supported: `2.4`)
 - `densegen.run` (required; run-scoped I/O root)
 - `plots` (optional; required `source` when `output.targets` has multiple sinks)
 
@@ -177,12 +177,11 @@ Output, logs, and plots must resolve inside `densegen.run.root`.
 ### `densegen.generation.sampling`
 
 These controls apply after PWM input sampling. `library_size` does not change PWM sampling counts.
-Under schema `2.2+`, `library_size` also bounds the motif count offered to the solver for
-binding-site and PWM-sampled inputs.
+`library_size` also bounds the motif count offered to the solver for binding-site and PWM-sampled inputs.
 
 - `pool_strategy`: `full | subsample | iterative_subsample`
 - `library_size` (int > 0; used for subsample strategies)
-- `library_sampling_strategy`: `tf_balanced | uniform_over_pairs | coverage_weighted` (schema `2.2+`)
+- `library_sampling_strategy`: `tf_balanced | uniform_over_pairs | coverage_weighted`
 - `coverage_boost_alpha` (float >= 0; used when `library_sampling_strategy=coverage_weighted`)
 - `coverage_boost_power` (float > 0; used when `library_sampling_strategy=coverage_weighted`)
 - `avoid_failed_motifs` (bool; when true, down-weight TFBS that frequently fail solves)
diff --git a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
index 151a97ae..8f6c1636 100644
--- a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
+++ b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
@@ -45,7 +45,7 @@ inputs:
         length_policy: exact
 ```
 
-PWM sampling is stochastic. Under schema `2.2+`, `pool_strategy: subsample` will resample
+PWM sampling is stochastic. `pool_strategy: subsample` will resample
 reactively on stalls/duplicate guards, while `iterative_subsample` resamples proactively
 after `arrays_generated_before_resample` or when a library under-produces.
 
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 1fe37372..d1c9d17a 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -58,7 +58,6 @@
     resolve_relative_path,
     resolve_run_root,
     resolve_run_scoped_path,
-    schema_version_at_least,
 )
 from .core.artifacts.library import write_library_artifact
 from .core.artifacts.pool import (
@@ -1446,12 +1445,13 @@ def stage_b_build_libraries(
     rng = random.Random(seeds["stage_b"])
     np_rng = np.random.default_rng(seeds["stage_b"])
     sampling_cfg = cfg.generation.sampling
-    schema_is_22 = schema_version_at_least(cfg.schema_version, major=2, minor=2)
     outputs_root = run_root / "outputs"
     failure_counts = _load_failure_counts_from_attempts(outputs_root)
     libraries_built = _load_existing_library_index(outputs_root) if outputs_root.exists() else 0
 
     pool_dir = resolve_relative_path(cfg_path, pool) if pool is not None else (run_root / "outputs" / "pools")
+    if pool_dir.exists() and pool_dir.is_file():
+        raise typer.BadParameter(f"Pool path must be a directory from `stage-a build-pool`, not a file: {pool_dir}")
     if not pool_dir.exists() or not pool_dir.is_dir():
         raise typer.BadParameter(f"Pool directory not found: {pool_dir}")
     try:
@@ -1509,7 +1509,6 @@ def stage_b_build_libraries(
                     failure_counts=failure_counts if failure_counts else None,
                     rng=rng,
                     np_rng=np_rng,
-                    schema_is_22=schema_is_22,
                     library_index_start=libraries_built,
                 )
                 libraries_built = int(info.get("library_index", libraries_built))
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index 58334e58..7ece8cbc 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -43,7 +43,7 @@ def _construct_mapping(loader, node, deep: bool = False):
 
 
 LATEST_SCHEMA_VERSION = "2.4"
-SUPPORTED_SCHEMA_VERSIONS = {"2.1", "2.2", "2.3", LATEST_SCHEMA_VERSION}
+SUPPORTED_SCHEMA_VERSIONS = {LATEST_SCHEMA_VERSION}
 
 KNOWN_SOLVER_OPTION_KEYS = {
     "CBC": {
@@ -71,22 +71,6 @@ def _construct_mapping(loader, node, deep: bool = False):
 }
 
 
-def parse_schema_version(value: str) -> tuple[int, int]:
-    parts = str(value).strip().split(".")
-    if len(parts) != 2:
-        raise ValueError(f"Invalid schema_version format: {value!r}")
-    try:
-        major = int(parts[0])
-        minor = int(parts[1])
-    except Exception as exc:
-        raise ValueError(f"Invalid schema_version format: {value!r}") from exc
-    return major, minor
-
-
-def schema_version_at_least(value: str, *, major: int, minor: int) -> bool:
-    return parse_schema_version(value) >= (major, minor)
-
-
 class ConfigError(ValueError):
     pass
 
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index f0096563..13801bbe 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -41,7 +41,6 @@
     ResolvedPlanItem,
     resolve_relative_path,
     resolve_run_root,
-    schema_version_at_least,
 )
 from ..utils.logging_utils import install_native_stderr_filters
 from .artifacts.ids import hash_tfbs_id
@@ -976,7 +975,6 @@ def build_library_for_plan(
     failure_counts: dict[tuple[str, str, str, str, str | None], dict[str, int]] | None,
     rng: random.Random,
     np_rng: np.random.Generator,
-    schema_is_22: bool,
     library_index_start: int,
 ) -> tuple[list[str], list[str], list[str], dict]:
     pool_strategy = str(getattr(sampling_cfg, "pool_strategy", "subsample"))
@@ -1139,46 +1137,33 @@ def _finalize(
                     f"(min_required_regulators={min_required_regulators}). "
                     "Increase library_size or relax required constraints."
                 )
-        if schema_is_22 and pool_strategy in {"subsample", "iterative_subsample"}:
-            failure_counts_by_tfbs: dict[tuple[str, str], int] | None = None
-            if library_sampling_strategy == "coverage_weighted" and getattr(sampling_cfg, "avoid_failed_motifs", False):
-                failure_counts_by_tfbs = _aggregate_failure_counts_for_sampling(
-                    failure_counts,
-                    input_name=source_label,
-                    plan_name=plan_item.name,
-                )
-            library, parts, reg_labels, info = sampler.generate_binding_site_library(
-                library_size,
-                sequence_length=seq_len,
-                budget_overhead=subsample_over,
-                required_tfbs=required_bias_motifs,
-                required_tfs=required_tfs_for_library,
-                cover_all_tfs=cover_all_tfs,
-                unique_binding_sites=unique_binding_sites,
-                max_sites_per_tf=max_sites_per_tf,
-                relax_on_exhaustion=relax_on_exhaustion,
-                allow_incomplete_coverage=allow_incomplete_coverage,
-                sampling_strategy=library_sampling_strategy,
-                usage_counts=usage_counts if library_sampling_strategy == "coverage_weighted" else None,
-                coverage_boost_alpha=float(getattr(sampling_cfg, "coverage_boost_alpha", 0.15)),
-                coverage_boost_power=float(getattr(sampling_cfg, "coverage_boost_power", 1.0)),
-                failure_counts=failure_counts_by_tfbs,
-                avoid_failed_motifs=bool(getattr(sampling_cfg, "avoid_failed_motifs", False)),
-                failure_penalty_alpha=float(getattr(sampling_cfg, "failure_penalty_alpha", 0.5)),
-                failure_penalty_power=float(getattr(sampling_cfg, "failure_penalty_power", 1.0)),
-            )
-        else:
-            library, parts, reg_labels, info = sampler.generate_binding_site_subsample(
-                seq_len,
-                subsample_over,
-                required_tfbs=required_bias_motifs,
-                required_tfs=required_tfs_for_library,
-                cover_all_tfs=cover_all_tfs,
-                unique_binding_sites=unique_binding_sites,
-                max_sites_per_tf=max_sites_per_tf,
-                relax_on_exhaustion=relax_on_exhaustion,
-                allow_incomplete_coverage=allow_incomplete_coverage,
+        failure_counts_by_tfbs: dict[tuple[str, str], int] | None = None
+        if library_sampling_strategy == "coverage_weighted" and getattr(sampling_cfg, "avoid_failed_motifs", False):
+            failure_counts_by_tfbs = _aggregate_failure_counts_for_sampling(
+                failure_counts,
+                input_name=source_label,
+                plan_name=plan_item.name,
             )
+        library, parts, reg_labels, info = sampler.generate_binding_site_library(
+            library_size,
+            sequence_length=seq_len,
+            budget_overhead=subsample_over,
+            required_tfbs=required_bias_motifs,
+            required_tfs=required_tfs_for_library,
+            cover_all_tfs=cover_all_tfs,
+            unique_binding_sites=unique_binding_sites,
+            max_sites_per_tf=max_sites_per_tf,
+            relax_on_exhaustion=relax_on_exhaustion,
+            allow_incomplete_coverage=allow_incomplete_coverage,
+            sampling_strategy=library_sampling_strategy,
+            usage_counts=usage_counts if library_sampling_strategy == "coverage_weighted" else None,
+            coverage_boost_alpha=float(getattr(sampling_cfg, "coverage_boost_alpha", 0.15)),
+            coverage_boost_power=float(getattr(sampling_cfg, "coverage_boost_power", 1.0)),
+            failure_counts=failure_counts_by_tfbs,
+            avoid_failed_motifs=bool(getattr(sampling_cfg, "avoid_failed_motifs", False)),
+            failure_penalty_alpha=float(getattr(sampling_cfg, "failure_penalty_alpha", 0.5)),
+            failure_penalty_power=float(getattr(sampling_cfg, "failure_penalty_power", 1.0)),
+        )
         info.update(
             {
                 "pool_strategy": pool_strategy,
@@ -1795,7 +1780,6 @@ def _record_library_build(
     library_sampling_strategy = str(sampling_cfg.library_sampling_strategy)
     iterative_max_libraries = int(sampling_cfg.iterative_max_libraries)
     iterative_min_new_solutions = int(sampling_cfg.iterative_min_new_solutions)
-    schema_is_22 = schema_version_at_least(global_cfg.schema_version, major=2, minor=2)
 
     runtime_cfg = global_cfg.runtime
     max_per_subsample = int(runtime_cfg.arrays_generated_before_resample)
@@ -1812,7 +1796,6 @@ def _record_library_build(
 
     policy = RuntimePolicy(
         pool_strategy=pool_strategy,
-        schema_is_22=schema_is_22,
         arrays_generated_before_resample=max_per_subsample,
         stall_seconds_before_resample=stall_seconds,
         stall_warning_every_seconds=stall_warn_every,
@@ -2078,7 +2061,6 @@ def _record_library_build(
         failure_counts=failure_counts if failure_counts else None,
         rng=rng,
         np_rng=np_rng,
-        schema_is_22=schema_is_22,
         library_index_start=libraries_built,
     )
     libraries_built = int(sampling_info.get("library_index", libraries_built))
@@ -2966,11 +2948,9 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     resample_reason = "min_new_solutions"
 
             # Resample
-            # Alignment (2): allow reactive resampling for subsample under schema>=2.2.
             if not policy.allow_resample():
                 raise RuntimeError(
-                    f"[{source_label}/{plan_name}] pool_strategy={pool_strategy!r} does not allow resampling "
-                    f"under schema_version={global_cfg.schema_version}. "
+                    f"[{source_label}/{plan_name}] pool_strategy={pool_strategy!r} does not allow resampling. "
                     "Reduce quota or use iterative_subsample."
                 )
             resamples_in_try += 1
@@ -3022,7 +3002,6 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                 failure_counts=failure_counts if failure_counts else None,
                 rng=rng,
                 np_rng=np_rng,
-                schema_is_22=schema_is_22,
                 library_index_start=libraries_built,
             )
             libraries_built = int(sampling_info.get("library_index", libraries_built))
@@ -3588,7 +3567,7 @@ def _write_state() -> None:
 
     if inputs_manifest_entries:
         payload = {
-            "schema_version": "1.0",
+            "schema_version": str(cfg.schema_version),
             "run_id": cfg.run.id,
             "created_at": datetime.now(timezone.utc).isoformat(),
             "config_sha256": config_sha,
diff --git a/src/dnadesign/densegen/src/core/runtime_policy.py b/src/dnadesign/densegen/src/core/runtime_policy.py
index f500ae48..52383e14 100644
--- a/src/dnadesign/densegen/src/core/runtime_policy.py
+++ b/src/dnadesign/densegen/src/core/runtime_policy.py
@@ -10,7 +10,6 @@
 @dataclass(frozen=True)
 class RuntimePolicy:
     pool_strategy: str
-    schema_is_22: bool
     arrays_generated_before_resample: int
     stall_seconds_before_resample: int
     stall_warning_every_seconds: int
@@ -19,7 +18,7 @@ class RuntimePolicy:
     max_seconds_per_plan: int
 
     def allow_resample(self) -> bool:
-        return self.pool_strategy == "iterative_subsample" or (self.schema_is_22 and self.pool_strategy == "subsample")
+        return self.pool_strategy in {"iterative_subsample", "subsample"}
 
     def should_trigger_stall(self, *, now: float, subsample_started: float, produced_this_library: int) -> bool:
         if self.stall_seconds_before_resample <= 0:
diff --git a/src/dnadesign/densegen/src/core/sampler.py b/src/dnadesign/densegen/src/core/sampler.py
index 4fd65208..f33b311d 100644
--- a/src/dnadesign/densegen/src/core/sampler.py
+++ b/src/dnadesign/densegen/src/core/sampler.py
@@ -66,187 +66,6 @@ def subsample_binding_sites(self, sample_size: int, unique_tf_only: bool = False
                 )
             )
 
-    def generate_binding_site_subsample(
-        self,
-        sequence_length: int,
-        budget_overhead: int,
-        *,
-        required_tfbs: list[str] | None = None,
-        required_tfs: list[str] | None = None,
-        cover_all_tfs: bool = False,
-        unique_binding_sites: bool = True,
-        max_sites_per_tf: int | None = None,
-        relax_on_exhaustion: bool = True,
-        allow_incomplete_coverage: bool = False,
-    ) -> tuple[list, list, list, dict]:
-        """
-        Build a motif library whose total length >= sequence_length + budget_overhead.
-
-        Returns
-        -------
-        sites, tfbs_parts, regulator_labels, info
-
-        - If cover_all_tfs=True, we first ensure >=1 TFBS per every unique TF.
-        - unique_binding_sites=True prevents duplicate (TF, TFBS) pairs.
-        - max_sites_per_tf caps per-TF TFBS AFTER coverage is satisfied (None = no cap).
-        - If we cannot meet the length budget without violating these rules and
-          relax_on_exhaustion=True, we gradually relax the cap to avoid stalling.
-        - With strict policies, failures raise ValueError (no fallback).
-        """
-        target = sequence_length + budget_overhead
-        sites: list[str] = []
-        meta: list[str] = []
-        labels: list[str] = []
-        site_ids: list[str | None] = []
-        sources: list[str | None] = []
-        tfbs_ids: list[str | None] = []
-        motif_ids: list[str | None] = []
-        seen_tfbs = set()  # for unique_binding_sites (tf, tfbs)
-        used_per_tf: dict[str, int] = {}
-
-        has_site_id = "site_id" in self.df.columns
-        has_source = "source" in self.df.columns
-        has_tfbs_id = "tfbs_id" in self.df.columns
-        has_motif_id = "motif_id" in self.df.columns
-
-        def _append_provenance(row) -> None:
-            site_ids.append(str(row["site_id"]) if has_site_id else None)
-            sources.append(str(row["source"]) if has_source else None)
-            tfbs_ids.append(str(row["tfbs_id"]) if has_tfbs_id else None)
-            motif_ids.append(str(row["motif_id"]) if has_motif_id else None)
-
-        unique_tfs = self.df["tf"].unique().tolist()
-        self.rng.shuffle(unique_tfs)
-        total_unique_tfbs = len(self.df.drop_duplicates(["tf", "tfbs"]))
-
-        required = [s.strip().upper() for s in (required_tfbs or []) if str(s).strip()]
-        if len(set(required)) != len(required):
-            raise ValueError("required_tfbs must be unique")
-        if required:
-            available_tfbs = set(self.df["tfbs"].tolist())
-            missing = [m for m in required if m not in available_tfbs]
-            if missing:
-                preview = ", ".join(missing[:10])
-                raise ValueError(f"Required TFBS motifs not found in input: {preview}")
-
-        required_tf_list = [str(t).strip() for t in (required_tfs or []) if str(t).strip()]
-        if len(set(required_tf_list)) != len(required_tf_list):
-            raise ValueError("required_tfs must be unique")
-        if required_tf_list:
-            available = set(self.df["tf"].tolist())
-            missing_tfs = [t for t in required_tf_list if t not in available]
-            if missing_tfs:
-                preview = ", ".join(missing_tfs[:10])
-                raise ValueError(f"Required regulators not found in input: {preview}")
-
-        def _pick_for_tf(tf: str) -> bool:
-            group = self.df[self.df["tf"] == tf]
-            # try a few draws to satisfy uniqueness if requested
-            for _ in range(min(20, len(group))):
-                row = group.sample(n=1, random_state=int(self.rng.integers(1_000_000)))
-                tfbs = row["tfbs"].iloc[0]
-                key = (tf, tfbs)
-                if (not unique_binding_sites) or (key not in seen_tfbs):
-                    sites.append(tfbs)
-                    meta.append(f"{tf}:{tfbs}")
-                    labels.append(tf)
-                    _append_provenance(row.iloc[0])
-                    seen_tfbs.add(key)
-                    used_per_tf[tf] = used_per_tf.get(tf, 0) + 1
-                    return True
-            return False  # couldn’t find a new TFBS that met uniqueness
-
-        def _add_required_tfbs() -> None:
-            if not required:
-                return
-            for motif in required:
-                rows = self.df[self.df["tfbs"] == motif]
-                if rows.empty:
-                    raise ValueError(f"Required TFBS motif not found in input: {motif}")
-                row = rows.iloc[0]
-                tf = row["tf"]
-                sites.append(motif)
-                meta.append(f"{tf}:{motif}")
-                labels.append(tf)
-                _append_provenance(row)
-                seen_tfbs.add((tf, motif))
-                used_per_tf[tf] = used_per_tf.get(tf, 0) + 1
-
-        def _add_required_tfs() -> None:
-            if not required_tf_list:
-                return
-            for tf in required_tf_list:
-                if used_per_tf.get(tf, 0) > 0:
-                    continue
-                if not _pick_for_tf(tf):
-                    raise ValueError(f"Failed to select a motif for required regulator '{tf}'")
-
-        _add_required_tfbs()
-        _add_required_tfs()
-
-        # 1) coverage pass: ensure >=1 TFBS per TF
-        if cover_all_tfs:
-            missing = []
-            for tf in unique_tfs:
-                if not _pick_for_tf(tf):
-                    missing.append(tf)
-            if missing and not allow_incomplete_coverage:
-                raise ValueError(
-                    f"Failed to cover all TFs (missing {len(missing)}). "
-                    "Allow incomplete coverage or relax uniqueness constraints."
-                )
-
-        # 2) expand until we meet/exceed target length
-        cap = max_sites_per_tf
-        relaxed_cap = False
-        while sum(len(s) for s in sites) < target:
-            progressed = False
-            # cycle through TFs to add more sites within per-TF caps
-            for tf in unique_tfs:
-                if cap is not None and used_per_tf.get(tf, 0) >= cap:
-                    continue
-                if _pick_for_tf(tf):
-                    progressed = True
-                if sum(len(s) for s in sites) >= target:
-                    break
-
-            if progressed:
-                continue
-
-            if unique_binding_sites and len(seen_tfbs) >= total_unique_tfbs:
-                raise ValueError(
-                    "Unique TFBS pool exhausted before meeting target length. "
-                    "Reduce target length/budget or allow duplicates."
-                )
-
-            if relax_on_exhaustion and cap is not None:
-                cap += 1
-                relaxed_cap = True
-                continue
-            if relax_on_exhaustion and cap is None:
-                raise ValueError(
-                    "Sampling stalled with max_sites_per_regulator unset. "
-                    "Reduce target length/budget, disable unique_binding_sites, or add more input sites."
-                )
-
-            raise ValueError(
-                "Could not meet target length with current sampling constraints. "
-                "Enable relax_on_exhaustion or loosen caps."
-            )
-
-        info = {
-            "target_length": int(target),
-            "achieved_length": int(sum(len(s) for s in sites)),
-            "relaxed_cap": bool(relaxed_cap),
-            "final_cap": cap,
-            "site_id_by_index": site_ids if has_site_id else None,
-            "source_by_index": sources if has_source else None,
-            "tfbs_id_by_index": tfbs_ids if has_tfbs_id else None,
-            "motif_id_by_index": motif_ids if has_motif_id else None,
-        }
-
-        return sites, meta, labels, info
-
     def generate_binding_site_library(
         self,
         library_size: int,
diff --git a/src/dnadesign/densegen/src/viz/plotting.py b/src/dnadesign/densegen/src/viz/plotting.py
index 7c674773..f8c3a76d 100644
--- a/src/dnadesign/densegen/src/viz/plotting.py
+++ b/src/dnadesign/densegen/src/viz/plotting.py
@@ -651,11 +651,6 @@ def _prom_color(label: str, default: str):
         pc = style.get("promoter_colors") or {}
         if label in pc:
             return pc[label]
-        # backward-compatible keys:
-        if label == "35 site":
-            return pc.get("-35 site", default)
-        if label == "10 site":
-            return pc.get("-10 site", default)
         return default
 
     for key, default in default_promoter_colors.items():
diff --git a/src/dnadesign/densegen/tests/test_cli_config_option.py b/src/dnadesign/densegen/tests/test_cli_config_option.py
index 44be5935..4c97a68b 100644
--- a/src/dnadesign/densegen/tests/test_cli_config_option.py
+++ b/src/dnadesign/densegen/tests/test_cli_config_option.py
@@ -13,7 +13,7 @@ def _write_min_config(path: Path) -> None:
         textwrap.dedent(
             """
             densegen:
-              schema_version: "2.1"
+              schema_version: "2.4"
               run:
                 id: demo
                 root: "."
diff --git a/src/dnadesign/densegen/tests/test_cli_describe.py b/src/dnadesign/densegen/tests/test_cli_describe.py
index e0971164..71d76200 100644
--- a/src/dnadesign/densegen/tests/test_cli_describe.py
+++ b/src/dnadesign/densegen/tests/test_cli_describe.py
@@ -13,7 +13,7 @@ def _write_min_config(path: Path) -> None:
         textwrap.dedent(
             """
             densegen:
-              schema_version: "2.1"
+              schema_version: "2.4"
               run:
                 id: demo
                 root: "."
diff --git a/src/dnadesign/densegen/tests/test_cli_summarize_library.py b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
index c0f20f29..1e37bdf9 100644
--- a/src/dnadesign/densegen/tests/test_cli_summarize_library.py
+++ b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
@@ -13,7 +13,7 @@
 
 def _base_meta(library_hash: str, library_index: int) -> dict:
     return {
-        "schema_version": "2.3",
+        "schema_version": "2.4",
         "run_id": "demo",
         "run_root": ".",
         "run_config_path": "config.yaml",
@@ -118,7 +118,7 @@ def _write_config(path: Path) -> None:
     path.write_text(
         """
         densegen:
-          schema_version: "2.3"
+          schema_version: "2.4"
           run:
             id: demo
             root: "."
@@ -208,7 +208,7 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
     manifest = RunManifest(
         run_id="demo",
         created_at="2026-01-14T00:00:00+00:00",
-        schema_version="2.3",
+        schema_version="2.4",
         config_sha256="dummy",
         run_root=str(run_root),
         random_seed=123,
diff --git a/src/dnadesign/densegen/tests/test_config_strict.py b/src/dnadesign/densegen/tests/test_config_strict.py
index 620e6f95..906749e6 100644
--- a/src/dnadesign/densegen/tests/test_config_strict.py
+++ b/src/dnadesign/densegen/tests/test_config_strict.py
@@ -10,7 +10,7 @@
 
 MIN_CONFIG = {
     "densegen": {
-        "schema_version": "2.1",
+        "schema_version": "2.4",
         "run": {"id": "demo", "root": "."},
         "inputs": [
             {
diff --git a/src/dnadesign/densegen/tests/test_outputs_parquet.py b/src/dnadesign/densegen/tests/test_outputs_parquet.py
index d3e53caf..3165eee4 100644
--- a/src/dnadesign/densegen/tests/test_outputs_parquet.py
+++ b/src/dnadesign/densegen/tests/test_outputs_parquet.py
@@ -11,7 +11,7 @@
 
 def _dummy_meta() -> dict:
     return {
-        "schema_version": "2.3",
+        "schema_version": "2.4",
         "run_id": "demo",
         "run_root": ".",
         "run_config_path": "config.yaml",
diff --git a/src/dnadesign/densegen/tests/test_required_regulators.py b/src/dnadesign/densegen/tests/test_required_regulators.py
index dc4bd248..0691bbef 100644
--- a/src/dnadesign/densegen/tests/test_required_regulators.py
+++ b/src/dnadesign/densegen/tests/test_required_regulators.py
@@ -74,7 +74,7 @@ def test_required_regulators_filtering(tmp_path: Path) -> None:
     csv_path.write_text("tf,tfbs\nTF1,AAA\nTF2,CCC\n")
     cfg = {
         "densegen": {
-            "schema_version": "2.1",
+            "schema_version": "2.4",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {
@@ -151,7 +151,7 @@ def test_required_regulators_k_of_n(tmp_path: Path) -> None:
     csv_path.write_text("tf,tfbs\nTF1,AAA\nTF2,CCC\n")
     cfg = {
         "densegen": {
-            "schema_version": "2.3",
+            "schema_version": "2.4",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {
diff --git a/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py b/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py
index 84c12711..e3ed5cfc 100644
--- a/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py
+++ b/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py
@@ -81,7 +81,7 @@ def test_round_robin_chunk_cap_subsample(tmp_path: Path) -> None:
 
     cfg = {
         "densegen": {
-            "schema_version": "2.2",
+            "schema_version": "2.4",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {
diff --git a/src/dnadesign/densegen/tests/test_run_manifest.py b/src/dnadesign/densegen/tests/test_run_manifest.py
index 2bdc5c5a..fb46bc4a 100644
--- a/src/dnadesign/densegen/tests/test_run_manifest.py
+++ b/src/dnadesign/densegen/tests/test_run_manifest.py
@@ -31,7 +31,7 @@ def test_run_manifest_roundtrip(tmp_path) -> None:
     manifest = RunManifest(
         run_id="demo_run",
         created_at="2026-01-16T12:00:00Z",
-        schema_version="2.1",
+        schema_version="2.4",
         config_sha256="abc123",
         run_root="/tmp/demo",
         random_seed=42,
@@ -50,7 +50,7 @@ def test_run_manifest_roundtrip(tmp_path) -> None:
     path = run_manifest_path(tmp_path)
     manifest.write_json(path)
     loaded = load_run_manifest(path)
-    assert loaded.schema_version == "2.1"
+    assert loaded.schema_version == "2.4"
     assert loaded.dense_arrays_version == "0.0.0"
     assert loaded.dense_arrays_version_source == "lock"
     assert loaded.items[0].failed_min_count_per_tf == 1
diff --git a/src/dnadesign/densegen/tests/test_run_state.py b/src/dnadesign/densegen/tests/test_run_state.py
index 1b96bed3..9c401e1d 100644
--- a/src/dnadesign/densegen/tests/test_run_state.py
+++ b/src/dnadesign/densegen/tests/test_run_state.py
@@ -8,7 +8,7 @@ def test_run_state_roundtrip(tmp_path) -> None:
     counts = {("input", "plan"): 3, ("input", "plan2"): 1}
     state = RunState.from_counts(
         run_id="demo",
-        schema_version="2.3",
+        schema_version="2.4",
         config_sha256="abc123",
         run_root="/tmp/demo",
         counts=counts,
@@ -20,5 +20,5 @@ def test_run_state_roundtrip(tmp_path) -> None:
     state.write_json(path)
     loaded = load_run_state(path)
     assert loaded.run_id == "demo"
-    assert loaded.schema_version == "2.3"
+    assert loaded.schema_version == "2.4"
     assert loaded.items[0].generated >= 0
diff --git a/src/dnadesign/densegen/tests/test_sampler_required_tfbs.py b/src/dnadesign/densegen/tests/test_sampler_required_tfbs.py
index 9cc7174c..9af74e52 100644
--- a/src/dnadesign/densegen/tests/test_sampler_required_tfbs.py
+++ b/src/dnadesign/densegen/tests/test_sampler_required_tfbs.py
@@ -15,7 +15,8 @@ def test_required_tfbs_are_injected() -> None:
     )
     rng = np.random.default_rng(0)
     sampler = TFSampler(df, rng)
-    sites, meta, labels, _info = sampler.generate_binding_site_subsample(
+    sites, meta, labels, _info = sampler.generate_binding_site_library(
+        library_size=2,
         sequence_length=3,
         budget_overhead=0,
         required_tfbs=["CCC"],
@@ -37,7 +38,8 @@ def test_required_tfs_are_injected() -> None:
     )
     rng = np.random.default_rng(1)
     sampler = TFSampler(df, rng)
-    sites, meta, labels, _info = sampler.generate_binding_site_subsample(
+    sites, meta, labels, _info = sampler.generate_binding_site_library(
+        library_size=2,
         sequence_length=3,
         budget_overhead=0,
         required_tfs=["TF2"],
diff --git a/src/dnadesign/densegen/tests/test_sequence_length_guard.py b/src/dnadesign/densegen/tests/test_sequence_length_guard.py
index 2a9fd2b5..8401b797 100644
--- a/src/dnadesign/densegen/tests/test_sequence_length_guard.py
+++ b/src/dnadesign/densegen/tests/test_sequence_length_guard.py
@@ -73,7 +73,7 @@ def test_sequence_length_guard_shorter_than_motif(tmp_path: Path) -> None:
     csv_path.write_text("tf,tfbs\nTF1,AAAAA\n")
     cfg = {
         "densegen": {
-            "schema_version": "2.3",
+            "schema_version": "2.4",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {
diff --git a/src/dnadesign/densegen/tests/test_source_cache.py b/src/dnadesign/densegen/tests/test_source_cache.py
index ee5fae27..1e975705 100644
--- a/src/dnadesign/densegen/tests/test_source_cache.py
+++ b/src/dnadesign/densegen/tests/test_source_cache.py
@@ -91,7 +91,7 @@ def test_source_cache_reuses_loaded_inputs(tmp_path: Path) -> None:
 
     cfg = {
         "densegen": {
-            "schema_version": "2.2",
+            "schema_version": "2.4",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {

From 273288fd6be1e5a836030bd6b195195a2c776b1c Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Wed, 21 Jan 2026 11:38:11 -0500
Subject: [PATCH 13/40] densegen: strengthen artifacts, reporting, and CLI

---
 src/dnadesign/densegen/README.md              |  16 +-
 .../densegen/docs/demo/demo_basic.md          |  29 +-
 .../densegen/docs/guide/generation.md         |  10 +-
 src/dnadesign/densegen/docs/guide/inputs.md   |   4 +-
 .../densegen/docs/guide/outputs-metadata.md   |  20 +-
 src/dnadesign/densegen/docs/reference/cli.md  |   2 +
 .../densegen/docs/reference/config.md         |  13 +-
 .../densegen/docs/reference/outputs.md        |  33 +-
 .../densegen/src/adapters/outputs/parquet.py  |   3 +
 .../densegen/src/adapters/sources/factory.py  |   6 +
 .../src/adapters/sources/pwm_artifact.py      |   4 +-
 .../src/adapters/sources/pwm_artifact_set.py  |   4 +-
 .../src/adapters/sources/pwm_jaspar.py        |   4 +-
 .../src/adapters/sources/pwm_matrix_csv.py    |   4 +-
 .../densegen/src/adapters/sources/pwm_meme.py |   4 +-
 .../src/adapters/sources/pwm_meme_set.py      |   4 +-
 .../src/adapters/sources/pwm_sampling.py      |  10 +
 src/dnadesign/densegen/src/cli.py             |  57 +-
 src/dnadesign/densegen/src/config/__init__.py |  13 +-
 .../densegen/src/core/artifacts/candidates.py | 125 +++++
 .../densegen/src/core/artifacts/ids.py        |  50 ++
 .../densegen/src/core/artifacts/library.py    | 191 +++++++
 .../densegen/src/core/artifacts/records.py    | 160 ++++++
 src/dnadesign/densegen/src/core/pipeline.py   | 501 +++++++++++++++---
 src/dnadesign/densegen/src/core/reporting.py  | 457 ++++++++++++++--
 .../densegen/src/integrations/meme_suite.py   |  28 +
 .../tests/test_cli_summarize_library.py       |  21 +-
 .../tests/test_pwm_artifact_source.py         |   4 +
 .../tests/test_pwm_meme_set_source.py         |   2 +
 .../densegen/tests/test_pwm_meme_source.py    |   3 +
 .../densegen/tests/test_pwm_other_sources.py  |   2 +
 .../workspaces/demo_meme_two_tf/config.yaml   |   3 +-
 32 files changed, 1624 insertions(+), 163 deletions(-)
 create mode 100644 src/dnadesign/densegen/src/core/artifacts/candidates.py
 create mode 100644 src/dnadesign/densegen/src/core/artifacts/records.py

diff --git a/src/dnadesign/densegen/README.md b/src/dnadesign/densegen/README.md
index 228eed89..fea80095 100644
--- a/src/dnadesign/densegen/README.md
+++ b/src/dnadesign/densegen/README.md
@@ -21,11 +21,17 @@ FIMO-backed PWM sampling is supported when MEME Suite is available (`fimo` on PA
 Stratified FIMO sampling uses canonical p‑value bins by default; see the guide for mining workflows.
 
 ```bash
-pixi run dense validate-config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense inspect inputs -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-pixi run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
-uv run dense inspect run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --library --top-per-tf 5
-uv run dense plot -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --only tf_usage,tf_coverage
+pixi run dense workspace init --id demo --root /tmp --template src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --copy-inputs
+CFG=/tmp/demo/config.yaml
+
+pixi run dense validate-config -c "$CFG" --probe-solver
+pixi run dense inspect inputs -c "$CFG"
+pixi run dense stage-a build-pool -c "$CFG"
+pixi run dense stage-b build-libraries -c "$CFG"
+pixi run dense run -c "$CFG" --no-plot
+pixi run dense inspect run --run /tmp/demo --library --top-per-tf 5
+pixi run dense report -c "$CFG" --format md
+pixi run dense plot -c "$CFG" --only tf_usage,tf_coverage
 ```
 
 For a full end-to-end walkthrough with expected outputs, see
diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index b0589e96..3028424f 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -91,7 +91,7 @@ Stage a self-contained workspace from the demo template (this copies inputs and
 paths):
 
 ```bash
-uv run dense workspace init --id demo_press --root "$RUN_ROOT" \
+pixi run dense workspace init --id demo_press --root "$RUN_ROOT" \
   --template src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml \
   --copy-inputs
 ```
@@ -121,7 +121,7 @@ Example output:
 ## 4) Plan constraints
 
 ```bash
-uv run dense inspect plan -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+pixi run dense inspect plan -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
 ```
 
 Example output:
@@ -139,7 +139,7 @@ Example output:
 This step shows the resolved inputs, outputs, solver selection, and the two-stage sampling knobs.
 
 ```bash
-uv run dense inspect config -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+pixi run dense inspect config -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
 ```
 
 Example output (abridged):
@@ -208,7 +208,7 @@ DenseGen writes `outputs/meta/run_manifest.json`, `outputs/meta/inputs_manifest.
 `outputs/meta/effective_config.json`. Summarize the run manifest:
 
 ```bash
-uv run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press
+pixi run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press
 ```
 
 Example output:
@@ -226,21 +226,27 @@ Use `--verbose` for constraint-failure breakdowns and duplicate-solution counts.
 Use `--library` to print offered-vs-used summaries for quick debugging:
 
 ```bash
-uv run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press --library --top-per-tf 5
+pixi run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press --library --top-per-tf 5
 ```
 
 This library summary is the quickest way to audit which TFBS were offered vs
 used in the solver stage (Stage‑B sampling).
 
-If any solutions are rejected, DenseGen writes them to
-`outputs/attempts.parquet` in the run root.
+Use `--events` to view stall/resample events and library rebuilds:
+
+```bash
+pixi run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press --events
+```
+
+DenseGen writes all attempts to `outputs/attempts.parquet` and accepted solutions
+to `outputs/solutions.parquet` (joinable via `attempt_id` / `solution_id`).
 
 ## 9) Audit report
 
 Generate an audit-grade summary of the run:
 
 ```bash
-uv run dense report -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --format all
+pixi run dense report -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --format all
 ```
 
 This writes `outputs/report.json`, `outputs/report.md`, `outputs/report.html`, and `outputs/report_assets/`.
@@ -259,12 +265,15 @@ Example output:
 attempts.parquet
 composition.parquet
 dense_arrays.parquet
+candidates
 libraries
 pools
 report.html
 report.json
 report.md
 report_assets
+solutions.parquet
+meta
 ```
 
 Inspect Stage‑A pools and Stage‑B libraries:
@@ -279,7 +288,7 @@ ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/libraries
 First, list the available plots:
 
 ```bash
-uv run dense ls-plots
+pixi run dense ls-plots
 ```
 
 Example output:
@@ -304,7 +313,7 @@ Example output:
 Then render four plots:
 
 ```bash
-uv run dense plot -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
+pixi run dense plot -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
 ```
 
 Example output (abridged):
diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index 45b317e5..25936a9f 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -76,7 +76,9 @@ DenseGen exposes dense-arrays solution modes via `solver.strategy`:
 - `optimal` - only the best solution per library.
 - `approximate` - heuristic solution per library (no solver options; backend optional).
 - `strands` - `single | double` (default: `double`).
-Use `solver.fallback_to_cbc` to allow a CBC fallback if the preferred solver is not available.
+DenseGen fails fast if the requested solver backend is unavailable; use
+`dense validate-config --probe-solver` or `dense inspect config --probe-solver`
+to check availability before long runs.
 
 ```yaml
 solver:
@@ -84,7 +86,6 @@ solver:
   strategy: diverse
   options: ["Threads=8", "TimeLimit=10"]
   strands: double
-  fallback_to_cbc: false
   allow_unknown_options: false
 ```
 
@@ -100,6 +101,8 @@ uniqueness, caps, and relaxation). DenseGen records sampling policy and outcomes
 
 Key fields:
 - `pool_strategy`: `full | subsample | iterative_subsample`
+- `library_source`: `build | artifact` (use `artifact` to replay prebuilt libraries)
+- `library_artifact_path`: path to `outputs/libraries` from `dense stage-b build-libraries`
 - `library_size` (used for subsample strategies)
 - `library_sampling_strategy` (`tf_balanced | uniform_over_pairs | coverage_weighted`)
 - `coverage_boost_alpha`, `coverage_boost_power` (used with `coverage_weighted`)
@@ -131,6 +134,9 @@ coverage‑weighted) apply. Stage‑B is the only place that resampling happens.
 
 Use `dense stage-a build-pool` to materialize pools and `dense stage-b build-libraries` to preview
 solver libraries without running the solver.
+To **replay** a specific library artifact deterministically, set
+`generation.sampling.library_source: artifact` and point
+`generation.sampling.library_artifact_path` at the library artifact directory.
 
 ### Run scheduling (round‑robin)
 
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index 1189bcab..d7301f1d 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -112,7 +112,7 @@ Required sampling fields:
   - `log_every_batches` (int > 0): log yield summaries every N batches
 - `bgfile` (optional): MEME bfile-format background model for FIMO
 - `keep_all_candidates_debug` (optional): write raw FIMO TSVs and candidate-level Parquet
-  (`candidates__<label>.parquet`) to `outputs/meta/fimo/` for inspection
+  (`candidates__<label>.parquet`) under `outputs/candidates/<input_name>/` for inspection
 - `include_matched_sequence` (optional): include `fimo_matched_sequence` column in the TFBS table
 
 Notes:
@@ -214,6 +214,8 @@ DenseGen reports per‑bin yield summaries (hits, accepted, selected) for retain
 bins if `retain_bin_ids` is unset), so you can track how many candidates land in each stratum and
 adjust thresholds or oversampling accordingly. With `selection_policy: stratified`, the selected‑bin
 counts show how evenly the final pool spans strata.
+If candidate logging is enabled, DenseGen also writes aggregated mining summaries to
+`outputs/candidates/candidates_summary.parquet`.
 
 #### Stdout UX for long runs
 DenseGen supports three logging styles so long runs stay readable:
diff --git a/src/dnadesign/densegen/docs/guide/outputs-metadata.md b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
index 780b4355..acf7297e 100644
--- a/src/dnadesign/densegen/docs/guide/outputs-metadata.md
+++ b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
@@ -20,7 +20,8 @@ namespaced and recorded consistently so outputs remain resume-safe and auditable
 
 ### Output targets
 
-- **Parquet**: dataset directory with `part-*.parquet` files (default, analytics-friendly).
+- **Parquet**: single-file `outputs/dense_arrays.parquet` plus audit Parquet tables
+  (`outputs/attempts.parquet`, `outputs/solutions.parquet`, `outputs/composition.parquet`).
 - **USR**: Dataset.attach with namespace `densegen`.
 
 When multiple targets are configured, DenseGen asserts all targets are in sync before writing.
@@ -73,9 +74,11 @@ TFBS pools include stable `motif_id` and `tfbs_id` hashes plus optional FIMO met
 (`fimo_pvalue`, `fimo_bin_id`, etc.). Sequence pools include `tfbs_id` for joinability.
 
 If `keep_all_candidates_debug: true`, DenseGen writes per-candidate debug artifacts under
-`outputs/meta/fimo/`:
+`outputs/candidates/<input_name>/`:
 - `candidates__<label>.parquet` — candidate p‑values, bins, acceptance, and reject reasons.
 - `<label>__fimo.tsv` — raw FIMO TSV (when enabled).
+DenseGen also aggregates these into `outputs/candidates/candidates.parquet` and
+`outputs/candidates/candidates_summary.parquet` with a manifest (`candidates_manifest.json`).
 
 ---
 
@@ -94,7 +97,7 @@ These artifacts provide a stable join path from solver attempts to the exact lib
 ### Composition table
 
 DenseGen writes `outputs/composition.parquet`, one row per TFBS placement in each accepted
-sequence. Columns include `sequence_id`, `input_name`, `plan_name`, `library_index`,
+sequence. Columns include `solution_id`, `attempt_id`, `input_name`, `plan_name`, `library_index`,
 `tf`, `tfbs`, `motif_id`, `tfbs_id`, and placement offsets.
 
 ---
@@ -126,8 +129,15 @@ These plots include Stage‑A p‑value histograms and Stage‑B utilization sum
 DenseGen writes `outputs/attempts.parquet`, a consolidated log of solver attempts (success,
 duplicate, and constraint rejections). Each row includes the attempt status, reason/detail JSON,
 the sequence (if available), solver/provenance fields, and the exact library TF/TFBS/site_id lists
-offered to the solver. If no attempts occur, the file is absent. Attempts logs use Parquet and
-therefore require `pyarrow`.
+offered to the solver. Attempts include `attempt_id`, `attempt_index`, and (for successes)
+`solution_id`. If no attempts occur, the file is absent. Attempts logs use Parquet and therefore
+require `pyarrow`.
+
+### Solutions log
+
+DenseGen writes `outputs/solutions.parquet`, one row per accepted solution with `solution_id`,
+`attempt_id`, and the library hash/index. Join keys: `solutions.solution_id` ↔ `dense_arrays.id`
+and `solutions.attempt_id` ↔ `attempts.attempt_id`.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index 007d357b..65e07924 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -91,6 +91,7 @@ Options:
 - `--by-library/--no-by-library` - group library summaries per build attempt.
 - `--top-per-tf` - limit TFBS rows per TF when summarizing.
 - `--show-library-hash/--short-library-hash` - toggle full vs short library hashes.
+- `--events` - show event summary (stalls/resamples, library rebuilds).
 
 Tip:
 - For large runs, prefer `--no-by-library` or lower `--top`/`--top-per-tf` to keep output readable.
@@ -108,6 +109,7 @@ Options:
 Outputs:
 - `pool_manifest.json`
 - `<input>__pool.parquet` per input
+- `outputs/candidates/candidates.parquet` + `candidates_summary.parquet` (when candidate logging is enabled)
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 24530e28..8bd6cafe 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -77,7 +77,8 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
         retained bins are the only bins reported in yield summaries
       - `log_every_batches` (int > 0; default 1) - log per‑bin yield summaries every N batches
     - `bgfile` (optional path) - MEME bfile-format background model for FIMO
-    - `keep_all_candidates_debug` (bool, default false) - write raw FIMO TSVs to `outputs/meta/fimo/` for inspection
+    - `keep_all_candidates_debug` (bool, default false) - write raw FIMO TSVs to `outputs/candidates/<input_name>/`
+      for inspection
     - `include_matched_sequence` (bool, default false) - include `fimo_matched_sequence` in TFBS outputs
     - `length_policy`: `exact | range` (default: `exact`)
     - `length_range`: `[min, max]` (required when `length_policy=range`; `min` >= motif length)
@@ -180,6 +181,8 @@ These controls apply after PWM input sampling. `library_size` does not change PW
 `library_size` also bounds the motif count offered to the solver for binding-site and PWM-sampled inputs.
 
 - `pool_strategy`: `full | subsample | iterative_subsample`
+- `library_source`: `build | artifact` (use `artifact` to replay prebuilt libraries)
+- `library_artifact_path`: required when `library_source: artifact` (path to `outputs/libraries`)
 - `library_size` (int > 0; used for subsample strategies)
 - `library_sampling_strategy`: `tf_balanced | uniform_over_pairs | coverage_weighted`
 - `coverage_boost_alpha` (float >= 0; used when `library_sampling_strategy=coverage_weighted`)
@@ -196,6 +199,11 @@ These controls apply after PWM input sampling. `library_size` does not change PW
 - `iterative_max_libraries` (int > 0 when `pool_strategy=iterative_subsample`)
 - `iterative_min_new_solutions` (int >= 0)
 
+Notes:
+- When `library_source: artifact`, DenseGen replays the libraries found in
+  `library_artifact_path` and validates that `pool_strategy`, `library_sampling_strategy`,
+  and `library_size` match the artifact metadata. Stage‑B sampling is not rebuilt.
+
 ---
 
 ### `densegen.solver`
@@ -206,8 +214,6 @@ These controls apply after PWM input sampling. `library_size` does not change PW
 - `options` (list of solver option strings)
   - `options` must be empty when `strategy: approximate`
 - `strands`: `single | double` (default: `double`)
-- `fallback_to_cbc` (bool; default `false`)
-  - If the requested solver probe fails, fall back to CBC instead of aborting.
 - `allow_unknown_options` (bool; default `false`)
   - DenseGen validates solver option keys for known backends. Set to `true` to bypass validation.
   - Known keys (case-insensitive): `Threads`, `TimeLimit`, `MIPGap`, `Seed`, `LogLevel`, `MaxSeconds`.
@@ -305,7 +311,6 @@ densegen:
     strategy: diverse
     options: []
     strands: double
-    fallback_to_cbc: false
     allow_unknown_options: false
 
   runtime:
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index 4b220fcd..700738f6 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -37,8 +37,8 @@ Behavior:
 - Parquet requires `pyarrow`; if unavailable, DenseGen fails fast.
 - The output `path` must be a `.parquet` file (single-file output).
 - List/dict metadata values are stored as native list/struct columns (no JSON encoding).
-- If an existing dataset has a mismatched schema (for example, legacy JSON metadata), DenseGen
-  fails fast and requires a fresh output path.
+- If an existing dataset has a mismatched schema, DenseGen fails fast and requires a fresh output
+  path.
 - DenseGen maintains a local ID index (`_densegen_ids.sqlite`) to speed deduplication and
   alignment checks.
 
@@ -101,7 +101,19 @@ DenseGen records solver library provenance in two places:
 
 Each attempt row stores the full library offered to the solver (`library_tfbs`, `library_tfs`,
 `library_site_ids`, `library_sources`) along with the library hash/index and solver status.
-Output records carry `densegen__sampling_library_hash` so you can join placements to libraries.
+Attempts include `attempt_id` and `solution_id` (when successful) for stable joins. Output
+records carry `densegen__sampling_library_hash` so you can join placements to libraries.
+
+---
+
+### Solutions log
+
+DenseGen writes `outputs/solutions.parquet` (append-only) with the canonical solution id,
+attempt id, and library hash. Join keys:
+
+- `solutions.solution_id` ↔ `dense_arrays.id`
+- `solutions.attempt_id` ↔ `attempts.attempt_id`
+- `solutions.solution_id` ↔ `composition.solution_id`
 
 ---
 
@@ -127,7 +139,8 @@ DenseGen can materialize Stage‑A/Stage‑B artifacts without running the solve
 - `dense stage-a build-pool` writes:
   - `outputs/pools/pool_manifest.json`
   - `outputs/pools/<input>__pool.parquet`
-  - `outputs/meta/fimo/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
+  - `outputs/candidates/<input_name>/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
+  - `outputs/candidates/candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json`
 - `dense stage-b build-libraries` writes:
   - `outputs/libraries/library_builds.parquet`
   - `outputs/libraries/library_members.parquet`
@@ -156,9 +169,15 @@ and the attempts log.
 
 DenseGen writes `outputs/attempts.parquet`, an append-only audit log of solver attempts
 (success, duplicate, and constraint rejections). Each row includes the attempt status,
-reason/detail JSON, solver metadata, and library hash/index. Each attempt also records the
-exact library TF/TFBS/site_id lists offered to the solver (subset attribution). If no attempts
-are logged, the file is absent. Attempts logs require `pyarrow`.
+reason/detail JSON, solver metadata, and library hash/index. Each attempt includes:
+
+- `attempt_id` — stable join key across artifacts
+- `solution_id` — present for successful attempts
+- `attempt_index` — per-plan monotonic counter
+
+Each attempt also records the exact library TF/TFBS/site_id lists offered to the solver
+(subset attribution). If no attempts are logged, the file is absent. Attempts logs require
+`pyarrow`.
 
 ---
 
diff --git a/src/dnadesign/densegen/src/adapters/outputs/parquet.py b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
index a64335da..f7bf64b8 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/parquet.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
@@ -17,10 +17,13 @@
 from typing import Any
 
 from ...core.metadata_schema import META_FIELDS, validate_metadata
+from ...utils.logging_utils import install_native_stderr_filters
 from .base import DEFAULT_NAMESPACE, AlignmentDigest, SinkBase
 from .id_index import IdIndex
 from .record import OutputRecord
 
+install_native_stderr_filters(suppress_solver_messages=False)
+
 
 def _meta_arrow_type(name: str, pa):
     list_str = {
diff --git a/src/dnadesign/densegen/src/adapters/sources/factory.py b/src/dnadesign/densegen/src/adapters/sources/factory.py
index 6e953041..1a2052a4 100644
--- a/src/dnadesign/densegen/src/adapters/sources/factory.py
+++ b/src/dnadesign/densegen/src/adapters/sources/factory.py
@@ -58,6 +58,7 @@ def data_source_factory(cfg, cfg_path: Path) -> BaseDataSource:
             cfg_path=cfg_path,
             motif_ids=cfg.motif_ids,
             sampling=cfg.sampling.model_dump(),
+            input_name=cfg.name,
         )
     if isinstance(cfg, PWMMemeSetInput):
         return PWMMemeSetDataSource(
@@ -65,6 +66,7 @@ def data_source_factory(cfg, cfg_path: Path) -> BaseDataSource:
             cfg_path=cfg_path,
             motif_ids=cfg.motif_ids,
             sampling=cfg.sampling.model_dump(),
+            input_name=cfg.name,
         )
     if isinstance(cfg, PWMJasparInput):
         return PWMJasparDataSource(
@@ -72,6 +74,7 @@ def data_source_factory(cfg, cfg_path: Path) -> BaseDataSource:
             cfg_path=cfg_path,
             motif_ids=cfg.motif_ids,
             sampling=cfg.sampling.model_dump(),
+            input_name=cfg.name,
         )
     if isinstance(cfg, PWMMatrixCSVInput):
         return PWMMatrixCSVDataSource(
@@ -80,12 +83,14 @@ def data_source_factory(cfg, cfg_path: Path) -> BaseDataSource:
             motif_id=cfg.motif_id,
             columns=cfg.columns.model_dump(),
             sampling=cfg.sampling.model_dump(),
+            input_name=cfg.name,
         )
     if isinstance(cfg, PWMArtifactInput):
         return PWMArtifactDataSource(
             path=cfg.path,
             cfg_path=cfg_path,
             sampling=cfg.sampling.model_dump(),
+            input_name=cfg.name,
         )
     if isinstance(cfg, PWMArtifactSetInput):
         return PWMArtifactSetDataSource(
@@ -93,6 +98,7 @@ def data_source_factory(cfg, cfg_path: Path) -> BaseDataSource:
             cfg_path=cfg_path,
             sampling=cfg.sampling.model_dump(),
             overrides_by_motif_id={k: v.model_dump() for k, v in cfg.overrides_by_motif_id.items()},
+            input_name=cfg.name,
         )
     if isinstance(cfg, USRSequencesInput):
         return USRSequencesDataSource(dataset=cfg.dataset, cfg_path=cfg_path, root=cfg.root, limit=cfg.limit)
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
index d29783ae..cf3c2ab2 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
@@ -152,6 +152,7 @@ class PWMArtifactDataSource(BaseDataSource):
     path: str
     cfg_path: Path
     sampling: dict
+    input_name: str
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
@@ -195,12 +196,13 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
+            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
 
         return_meta = scoring_backend == "fimo"
         result = sample_pwm_sites(
             rng,
             motif,
+            input_name=self.input_name,
             strategy=strategy,
             n_sites=n_sites,
             oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
index c3b620a5..7a613006 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
@@ -28,6 +28,7 @@ class PWMArtifactSetDataSource(BaseDataSource):
     cfg_path: Path
     sampling: dict
     overrides_by_motif_id: dict[str, dict] | None = None
+    input_name: str = ""
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
@@ -91,11 +92,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                     raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
             debug_output_dir: Path | None = None
             if keep_all_candidates_debug and outputs_root is not None:
-                debug_output_dir = Path(outputs_root) / "meta" / "fimo"
+                debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
             return_meta = scoring_backend == "fimo"
             result = sample_pwm_sites(
                 rng,
                 motif,
+                input_name=self.input_name,
                 strategy=strategy,
                 n_sites=n_sites,
                 oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
index e0062364..2361d75b 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
@@ -87,6 +87,7 @@ class PWMJasparDataSource(BaseDataSource):
     cfg_path: Path
     motif_ids: Optional[List[str]]
     sampling: dict
+    input_name: str
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
@@ -129,7 +130,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
+            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
 
         entries = []
         all_rows = []
@@ -144,6 +145,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             result = sample_pwm_sites(
                 rng,
                 motif,
+                input_name=self.input_name,
                 strategy=strategy,
                 n_sites=n_sites,
                 oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
index 496d4d5c..a3502bab 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
@@ -29,6 +29,7 @@ class PWMMatrixCSVDataSource(BaseDataSource):
     motif_id: str
     columns: dict[str, str]
     sampling: dict
+    input_name: str
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
@@ -99,12 +100,13 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
+            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
 
         return_meta = scoring_backend == "fimo"
         result = sample_pwm_sites(
             rng,
             motif,
+            input_name=self.input_name,
             strategy=strategy,
             n_sites=n_sites,
             oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
index f7dac8dc..3cc81af6 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
@@ -56,6 +56,7 @@ class PWMMemeDataSource(BaseDataSource):
     cfg_path: Path
     motif_ids: Optional[List[str]]
     sampling: dict
+    input_name: str
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
@@ -107,7 +108,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
+            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
 
         entries = []
         all_rows = []
@@ -123,6 +124,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             result = sample_pwm_sites(
                 rng,
                 pwm,
+                input_name=self.input_name,
                 strategy=strategy,
                 n_sites=n_sites,
                 oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
index 492fe857..1dc8c8df 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
@@ -42,6 +42,7 @@ class PWMMemeSetDataSource(BaseDataSource):
     cfg_path: Path
     motif_ids: Optional[List[str]]
     sampling: dict
+    input_name: str
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None):
         if rng is None:
@@ -101,7 +102,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "meta" / "fimo"
+            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
 
         entries = []
         all_rows = []
@@ -117,6 +118,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             result = sample_pwm_sites(
                 rng,
                 pwm,
+                input_name=self.input_name,
                 strategy=strategy,
                 n_sites=n_sites,
                 oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 9195cebc..2ffed46e 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -21,6 +21,7 @@
 import numpy as np
 import pandas as pd
 
+from ...core.artifacts.ids import hash_candidate_id
 from ...core.pvalue_bins import resolve_pvalue_bins
 
 SMOOTHING_ALPHA = 1e-6
@@ -444,6 +445,7 @@ def sample_pwm_sites(
     rng: np.random.Generator,
     motif: PWMMotif,
     *,
+    input_name: Optional[str] = None,
     strategy: str,
     n_sites: int,
     oversample_factor: int,
@@ -749,9 +751,17 @@ def _record_candidate(
         ) -> None:
             if candidate_records is None:
                 return
+            candidate_id = hash_candidate_id(
+                motif_id=motif.motif_id,
+                sequence=seq,
+                scoring_backend=scoring_backend,
+            )
             candidate_records.append(
                 {
+                    "candidate_id": candidate_id,
+                    "input_name": input_name,
                     "motif_id": motif.motif_id,
+                    "scoring_backend": scoring_backend,
                     "sequence": seq,
                     "pvalue": None if hit is None else hit.pvalue,
                     "score": None if hit is None else hit.score,
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index d1c9d17a..e6f30a19 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -31,6 +31,7 @@
 
 import contextlib
 import io
+import json
 import logging
 import os
 import platform
@@ -59,6 +60,7 @@
     resolve_run_root,
     resolve_run_scoped_path,
 )
+from .core.artifacts.candidates import build_candidate_artifact
 from .core.artifacts.library import write_library_artifact
 from .core.artifacts.pool import (
     POOL_MODE_SEQUENCE,
@@ -608,6 +610,20 @@ def _list_workspaces_table(workspaces_root: Path, *, limit: int, show_all: bool)
     return table
 
 
+def _read_events(path: Path) -> list[dict]:
+    if not path.exists():
+        return []
+    rows: list[dict] = []
+    for line in path.read_text().splitlines():
+        if not line.strip():
+            continue
+        try:
+            rows.append(json.loads(line))
+        except Exception:
+            continue
+    return rows
+
+
 # ----------------- Typer CLI -----------------
 app = typer.Typer(
     add_completion=False,
@@ -658,7 +674,6 @@ def validate_config(
             solver_cfg.backend,
             DenseArraysAdapter(),
             strategy=str(solver_cfg.strategy),
-            fallback_to_cbc=bool(solver_cfg.fallback_to_cbc),
         )
     console.print(":white_check_mark: [bold green]Config is valid.[/]")
 
@@ -788,6 +803,7 @@ def inspect_run(
         "--show-library-hash/--short-library-hash",
         help="Show full library hash (or short hash if disabled).",
     ),
+    events: bool = typer.Option(False, "--events", help="Show events summary (stalls/resamples)."),
 ):
     if root is not None and run is not None:
         console.print("[bold red]Choose either --root or --run, not both.[/]")
@@ -900,6 +916,27 @@ def inspect_run(
             )
     console.print(table)
 
+    if events:
+        events_path = run_root / "outputs" / "meta" / "events.jsonl"
+        rows = _read_events(events_path)
+        if not rows:
+            console.print("[yellow]No events found.[/]")
+        else:
+            counts: dict[str, int] = {}
+            last_seen: dict[str, str] = {}
+            for entry in rows:
+                name = str(entry.get("event") or "unknown")
+                counts[name] = counts.get(name, 0) + 1
+                created = str(entry.get("created_at") or "")
+                if created:
+                    prev = last_seen.get(name)
+                    if prev is None or created > prev:
+                        last_seen[name] = created
+            events_table = Table("event", "count", "last_created_at")
+            for name, count in sorted(counts.items(), key=lambda kv: kv[1], reverse=True):
+                events_table.add_row(name, str(count), last_seen.get(name, "-"))
+            console.print(events_table)
+
     if library:
         if loaded is None or cfg_path is None:
             console.print("[bold red]Config is required for --library summaries.[/]")
@@ -1176,7 +1213,6 @@ def inspect_config(
             cfg.solver.backend,
             DenseArraysAdapter(),
             strategy=str(cfg.solver.strategy),
-            fallback_to_cbc=bool(cfg.solver.fallback_to_cbc),
         )
 
     console.print(f"[bold]Config[/]: {cfg_path}")
@@ -1265,6 +1301,9 @@ def inspect_config(
     sampling_table = Table("setting", "value")
     target_length = cfg.generation.sequence_length + int(sampling.subsample_over_length_budget_by)
     sampling_table.add_row("pool_strategy", str(sampling.pool_strategy))
+    sampling_table.add_row("library_source", str(sampling.library_source))
+    if sampling.library_source == "artifact":
+        sampling_table.add_row("library_artifact_path", str(sampling.library_artifact_path))
     sampling_table.add_row("library_size", str(sampling.library_size))
     sampling_table.add_row("library_sampling_strategy", str(sampling.library_sampling_strategy))
     sampling_table.add_row(
@@ -1361,6 +1400,20 @@ def stage_a_build_pool(
         except FileExistsError as exc:
             console.print(f"[bold red]{exc}[/]")
             raise typer.Exit(code=1)
+        candidates_dir = outputs_root / "candidates"
+        candidate_files = list(candidates_dir.rglob("candidates__*.parquet")) if candidates_dir.exists() else []
+        if candidate_files:
+            try:
+                build_candidate_artifact(
+                    candidates_dir=candidates_dir,
+                    cfg_path=cfg_path,
+                    run_id=str(cfg.run.id),
+                    run_root=run_root,
+                    overwrite=True,
+                )
+            except Exception as exc:
+                console.print(f"[bold red]Failed to write candidate artifacts:[/] {exc}")
+                raise typer.Exit(code=1)
 
     for pool in pool_data.values():
         if pool.df is None:
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index 7ece8cbc..9b460da3 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -709,6 +709,8 @@ def _required_regulator_k_ok(self):
 class SamplingConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
     pool_strategy: Literal["full", "subsample", "iterative_subsample"] = "subsample"
+    library_source: Literal["build", "artifact"] = "build"
+    library_artifact_path: Optional[str] = None
     library_size: int = 16
     subsample_over_length_budget_by: int = 30
     library_sampling_strategy: Literal[
@@ -772,6 +774,16 @@ def _pool_strategy_rules(self):
             raise ValueError("iterative_max_libraries must be > 0 when pool_strategy=iterative_subsample")
         return self
 
+    @model_validator(mode="after")
+    def _library_source_rules(self):
+        if self.library_source == "artifact":
+            if self.library_artifact_path is None or not str(self.library_artifact_path).strip():
+                raise ValueError("sampling.library_artifact_path is required when sampling.library_source=artifact")
+        else:
+            if self.library_artifact_path is not None:
+                raise ValueError("sampling.library_artifact_path is only valid when sampling.library_source=artifact")
+        return self
+
 
 class GenerationConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
@@ -924,7 +936,6 @@ class SolverConfig(BaseModel):
     strategy: Literal["iterate", "diverse", "optimal", "approximate"]
     options: List[str] = Field(default_factory=list)
     strands: Literal["single", "double"] = "double"
-    fallback_to_cbc: bool = False
     allow_unknown_options: bool = False
 
     @field_validator("backend")
diff --git a/src/dnadesign/densegen/src/core/artifacts/candidates.py b/src/dnadesign/densegen/src/core/artifacts/candidates.py
new file mode 100644
index 00000000..1bb51f85
--- /dev/null
+++ b/src/dnadesign/densegen/src/core/artifacts/candidates.py
@@ -0,0 +1,125 @@
+"""
+Stage-A candidate site artifacts.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+
+import pandas as pd
+
+from ...utils.logging_utils import install_native_stderr_filters
+
+CANDIDATE_SCHEMA_VERSION = "1.0"
+
+
+@dataclass(frozen=True)
+class CandidateArtifact:
+    manifest_path: Path
+    candidates_path: Path
+    summary_path: Path
+    schema_version: str
+    run_id: str
+    run_root: str
+    config_path: str
+
+
+def _manifest_path(out_dir: Path) -> Path:
+    return out_dir / "candidates_manifest.json"
+
+
+def _summary_path(out_dir: Path) -> Path:
+    return out_dir / "candidates_summary.parquet"
+
+
+def _candidates_path(out_dir: Path) -> Path:
+    return out_dir / "candidates.parquet"
+
+
+def build_candidate_artifact(
+    *,
+    candidates_dir: Path,
+    cfg_path: Path,
+    run_id: str,
+    run_root: Path,
+    overwrite: bool = False,
+) -> CandidateArtifact:
+    install_native_stderr_filters(suppress_solver_messages=False)
+    candidates_dir.mkdir(parents=True, exist_ok=True)
+    manifest_path = _manifest_path(candidates_dir)
+    candidates_path = _candidates_path(candidates_dir)
+    summary_path = _summary_path(candidates_dir)
+
+    if not overwrite:
+        if candidates_path.exists() or summary_path.exists() or manifest_path.exists():
+            raise FileExistsError(f"Candidate artifacts already exist in {candidates_dir}")
+
+    files = sorted(candidates_dir.rglob("candidates__*.parquet"))
+    if not files:
+        raise FileNotFoundError(f"No candidate parquet files found under {candidates_dir}")
+
+    frames = []
+    for path in files:
+        try:
+            frames.append(pd.read_parquet(path))
+        except Exception as exc:
+            raise RuntimeError(f"Failed to read candidate parquet: {path}") from exc
+    df = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
+    df.to_parquet(candidates_path, index=False)
+
+    summary = pd.DataFrame(
+        columns=[
+            "input_name",
+            "motif_id",
+            "scoring_backend",
+            "total_candidates",
+            "accepted",
+            "selected",
+            "rejected",
+        ]
+    )
+    if not df.empty:
+        grouped = df.groupby(["input_name", "motif_id", "scoring_backend"], dropna=False)
+        rows = []
+        for (input_name, motif_id, scoring_backend), sub in grouped:
+            total = int(len(sub))
+            accepted = int(sub["accepted"].sum()) if "accepted" in sub.columns else 0
+            selected = int(sub["selected"].sum()) if "selected" in sub.columns else 0
+            rejected = total - accepted
+            rows.append(
+                {
+                    "input_name": str(input_name),
+                    "motif_id": str(motif_id),
+                    "scoring_backend": str(scoring_backend),
+                    "total_candidates": total,
+                    "accepted": accepted,
+                    "selected": selected,
+                    "rejected": rejected,
+                }
+            )
+        summary = pd.DataFrame(rows)
+    summary.to_parquet(summary_path, index=False)
+
+    manifest = {
+        "schema_version": CANDIDATE_SCHEMA_VERSION,
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "run_id": str(run_id),
+        "run_root": str(run_root),
+        "config_path": str(cfg_path),
+        "candidates_path": str(candidates_path),
+        "summary_path": str(summary_path),
+        "source_files": [str(path) for path in files],
+    }
+    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
+    return CandidateArtifact(
+        manifest_path=manifest_path,
+        candidates_path=candidates_path,
+        summary_path=summary_path,
+        schema_version=CANDIDATE_SCHEMA_VERSION,
+        run_id=str(run_id),
+        run_root=str(run_root),
+        config_path=str(cfg_path),
+    )
diff --git a/src/dnadesign/densegen/src/core/artifacts/ids.py b/src/dnadesign/densegen/src/core/artifacts/ids.py
index c31b3245..ff55446e 100644
--- a/src/dnadesign/densegen/src/core/artifacts/ids.py
+++ b/src/dnadesign/densegen/src/core/artifacts/ids.py
@@ -74,3 +74,53 @@ def hash_tfbs_id(
         "matched_strand": matched_strand or "",
     }
     return _hash_payload(payload)
+
+
+def hash_attempt_id(
+    *,
+    run_id: str,
+    input_name: str,
+    plan_name: str,
+    library_hash: str,
+    attempt_index: int,
+) -> str:
+    payload = {
+        "run_id": str(run_id),
+        "input_name": str(input_name),
+        "plan_name": str(plan_name),
+        "library_hash": str(library_hash),
+        "attempt_index": int(attempt_index),
+    }
+    return _hash_payload(payload)
+
+
+def hash_solution_id(
+    *,
+    run_id: str,
+    input_name: str,
+    plan_name: str,
+    sequence_hash: str,
+    library_hash: str,
+) -> str:
+    payload = {
+        "run_id": str(run_id),
+        "input_name": str(input_name),
+        "plan_name": str(plan_name),
+        "sequence_hash": str(sequence_hash),
+        "library_hash": str(library_hash),
+    }
+    return _hash_payload(payload)
+
+
+def hash_candidate_id(
+    *,
+    motif_id: str,
+    sequence: str,
+    scoring_backend: str,
+) -> str:
+    payload = {
+        "motif_id": str(motif_id),
+        "sequence": str(sequence),
+        "scoring_backend": str(scoring_backend),
+    }
+    return _hash_payload(payload)
diff --git a/src/dnadesign/densegen/src/core/artifacts/library.py b/src/dnadesign/densegen/src/core/artifacts/library.py
index ca3ac62e..013986e4 100644
--- a/src/dnadesign/densegen/src/core/artifacts/library.py
+++ b/src/dnadesign/densegen/src/core/artifacts/library.py
@@ -8,6 +8,7 @@
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import Iterable
 
 import pandas as pd
 
@@ -95,3 +96,193 @@ def load_library_artifact(out_dir: Path) -> LibraryArtifact:
     if not manifest_path.exists():
         raise FileNotFoundError(f"Library manifest not found: {manifest_path}")
     return LibraryArtifact.load(manifest_path)
+
+
+@dataclass(frozen=True)
+class LibraryRecord:
+    input_name: str
+    plan_name: str
+    library_index: int
+    library_hash: str
+    library_id: str
+    library_tfbs: list[str]
+    library_tfs: list[str]
+    library_site_ids: list[str | None]
+    library_sources: list[str | None]
+    library_tfbs_ids: list[str | None]
+    library_motif_ids: list[str | None]
+    pool_strategy: str | None
+    library_sampling_strategy: str | None
+    library_size: int
+    target_length: int | None
+    achieved_length: int | None
+    relaxed_cap: bool | None
+    final_cap: int | None
+    iterative_max_libraries: int | None
+    iterative_min_new_solutions: int | None
+    required_regulators_selected: list[str] | None
+
+    def sampling_info(self) -> dict:
+        return {
+            "library_index": int(self.library_index),
+            "library_hash": str(self.library_hash),
+            "pool_strategy": self.pool_strategy,
+            "library_sampling_strategy": self.library_sampling_strategy,
+            "library_size": int(self.library_size),
+            "target_length": self.target_length,
+            "achieved_length": self.achieved_length,
+            "relaxed_cap": self.relaxed_cap,
+            "final_cap": self.final_cap,
+            "iterative_max_libraries": self.iterative_max_libraries,
+            "iterative_min_new_solutions": self.iterative_min_new_solutions,
+            "required_regulators_selected": self.required_regulators_selected,
+            "site_id_by_index": list(self.library_site_ids),
+            "source_by_index": list(self.library_sources),
+            "tfbs_id_by_index": list(self.library_tfbs_ids),
+            "motif_id_by_index": list(self.library_motif_ids),
+        }
+
+
+def _ensure_list(val) -> list:
+    if val is None:
+        return []
+    if isinstance(val, list):
+        return list(val)
+    if isinstance(val, tuple):
+        return list(val)
+    if isinstance(val, str):
+        text = val.strip()
+        if (text.startswith("[") and text.endswith("]")) or (text.startswith("{") and text.endswith("}")):
+            try:
+                parsed = json.loads(text)
+                if isinstance(parsed, list):
+                    return list(parsed)
+            except Exception:
+                return []
+        return []
+    return []
+
+
+def _required_columns(df: pd.DataFrame, cols: Iterable[str], *, label: str) -> None:
+    missing = [c for c in cols if c not in df.columns]
+    if missing:
+        raise ValueError(f"{label} is missing required columns: {', '.join(missing)}")
+
+
+def load_library_records(artifact: LibraryArtifact) -> dict[tuple[str, str], list[LibraryRecord]]:
+    install_native_stderr_filters(suppress_solver_messages=False)
+    builds_df = pd.read_parquet(artifact.builds_path)
+    members_df = pd.read_parquet(artifact.members_path)
+    _required_columns(
+        builds_df,
+        [
+            "input_name",
+            "plan_name",
+            "library_index",
+            "library_hash",
+            "library_id",
+        ],
+        label="library_builds.parquet",
+    )
+    _required_columns(
+        members_df,
+        [
+            "input_name",
+            "plan_name",
+            "library_index",
+            "position",
+            "tfbs",
+        ],
+        label="library_members.parquet",
+    )
+    records: dict[tuple[str, str], list[LibraryRecord]] = {}
+    for _, row in builds_df.iterrows():
+        input_name = str(row.get("input_name") or "")
+        plan_name = str(row.get("plan_name") or "")
+        key = (input_name, plan_name)
+        library_index = int(row.get("library_index") or 0)
+        library_hash = str(row.get("library_hash") or "")
+        library_id = str(row.get("library_id") or library_hash)
+        if not input_name or not plan_name:
+            raise ValueError("library_builds.parquet contains empty input_name/plan_name")
+        if not library_hash:
+            raise ValueError(f"library_builds.parquet missing library_hash for {input_name}/{plan_name}")
+        sub = members_df[
+            (members_df["input_name"] == input_name)
+            & (members_df["plan_name"] == plan_name)
+            & (members_df["library_index"] == library_index)
+        ].sort_values("position")
+        if sub.empty:
+            raise ValueError(
+                f"library_members.parquet missing members for {input_name}/{plan_name} index={library_index}"
+            )
+        library_tfbs = [str(x) for x in sub["tfbs"].tolist()]
+        library_tfs = [str(x) for x in _ensure_list(sub.get("tf"))] if "tf" in sub.columns else []
+        if not library_tfs and "tf" in sub.columns:
+            library_tfs = [str(x) for x in sub["tf"].tolist()]
+        library_site_ids = [x if x not in ("", None, "None") else None for x in _ensure_list(sub.get("site_id"))]
+        if not library_site_ids and "site_id" in sub.columns:
+            library_site_ids = [x if x not in ("", None, "None") else None for x in sub["site_id"].tolist()]
+        library_sources = [x if x not in ("", None, "None") else None for x in _ensure_list(sub.get("source"))]
+        if not library_sources and "source" in sub.columns:
+            library_sources = [x if x not in ("", None, "None") else None for x in sub["source"].tolist()]
+        library_tfbs_ids = [x if x not in ("", None, "None") else None for x in _ensure_list(sub.get("tfbs_id"))]
+        if not library_tfbs_ids and "tfbs_id" in sub.columns:
+            library_tfbs_ids = [x if x not in ("", None, "None") else None for x in sub["tfbs_id"].tolist()]
+        library_motif_ids = [x if x not in ("", None, "None") else None for x in _ensure_list(sub.get("motif_id"))]
+        if not library_motif_ids and "motif_id" in sub.columns:
+            library_motif_ids = [x if x not in ("", None, "None") else None for x in sub["motif_id"].tolist()]
+        pool_strategy = row.get("pool_strategy")
+        library_sampling_strategy = row.get("library_sampling_strategy")
+        library_size = int(row.get("library_size") or len(library_tfbs))
+        target_length = row.get("target_length")
+        achieved_length = row.get("achieved_length")
+        relaxed_cap = row.get("relaxed_cap")
+        final_cap = row.get("final_cap")
+        iterative_max_libraries = row.get("iterative_max_libraries")
+        iterative_min_new_solutions = row.get("iterative_min_new_solutions")
+        required_regulators_selected = row.get("required_regulators_selected")
+        if isinstance(required_regulators_selected, str):
+            try:
+                parsed = json.loads(required_regulators_selected)
+                if isinstance(parsed, list):
+                    required_regulators_selected = parsed
+            except Exception:
+                required_regulators_selected = None
+        record = LibraryRecord(
+            input_name=input_name,
+            plan_name=plan_name,
+            library_index=library_index,
+            library_hash=library_hash,
+            library_id=library_id,
+            library_tfbs=library_tfbs,
+            library_tfs=library_tfs,
+            library_site_ids=library_site_ids,
+            library_sources=library_sources,
+            library_tfbs_ids=library_tfbs_ids,
+            library_motif_ids=library_motif_ids,
+            pool_strategy=str(pool_strategy) if pool_strategy is not None else None,
+            library_sampling_strategy=str(library_sampling_strategy) if library_sampling_strategy is not None else None,
+            library_size=library_size,
+            target_length=int(target_length) if target_length is not None else None,
+            achieved_length=int(achieved_length) if achieved_length is not None else None,
+            relaxed_cap=bool(relaxed_cap) if relaxed_cap is not None else None,
+            final_cap=int(final_cap) if final_cap is not None else None,
+            iterative_max_libraries=int(iterative_max_libraries) if iterative_max_libraries is not None else None,
+            iterative_min_new_solutions=int(iterative_min_new_solutions)
+            if iterative_min_new_solutions is not None
+            else None,
+            required_regulators_selected=list(required_regulators_selected)
+            if isinstance(required_regulators_selected, list)
+            else None,
+        )
+        records.setdefault(key, []).append(record)
+
+    for key, items in records.items():
+        items.sort(key=lambda item: int(item.library_index))
+        seen = set()
+        for item in items:
+            if item.library_index in seen:
+                raise ValueError(f"Duplicate library_index={item.library_index} for {key[0]}/{key[1]}")
+            seen.add(item.library_index)
+    return records
diff --git a/src/dnadesign/densegen/src/core/artifacts/records.py b/src/dnadesign/densegen/src/core/artifacts/records.py
new file mode 100644
index 00000000..3c89a178
--- /dev/null
+++ b/src/dnadesign/densegen/src/core/artifacts/records.py
@@ -0,0 +1,160 @@
+"""
+Typed record definitions for attempt/solution artifacts.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from .ids import hash_attempt_id
+
+
+@dataclass(frozen=True)
+class AttemptRecord:
+    attempt_id: str
+    attempt_index: int
+    run_id: str
+    input_name: str
+    plan_name: str
+    created_at: str
+    status: str
+    reason: str
+    detail_json: str
+    sequence: str
+    sequence_hash: str
+    solution_id: str | None
+    used_tf_counts_json: str
+    used_tf_list: list[str]
+    sampling_library_index: int
+    sampling_library_hash: str
+    solver_status: str | None
+    solver_objective: float | None
+    solver_solve_time_s: float | None
+    dense_arrays_version: str | None
+    dense_arrays_version_source: str
+    library_tfbs: list[str]
+    library_tfs: list[str]
+    library_site_ids: list[str]
+    library_sources: list[str]
+
+    @classmethod
+    def build(
+        cls,
+        *,
+        attempt_index: int,
+        run_id: str,
+        input_name: str,
+        plan_name: str,
+        created_at: str,
+        status: str,
+        reason: str,
+        detail_json: str,
+        sequence: str,
+        sequence_hash: str,
+        solution_id: str | None,
+        used_tf_counts_json: str,
+        used_tf_list: list[str],
+        sampling_library_index: int,
+        sampling_library_hash: str,
+        solver_status: str | None,
+        solver_objective: float | None,
+        solver_solve_time_s: float | None,
+        dense_arrays_version: str | None,
+        dense_arrays_version_source: str,
+        library_tfbs: list[str],
+        library_tfs: list[str],
+        library_site_ids: list[str],
+        library_sources: list[str],
+    ) -> "AttemptRecord":
+        attempt_id = hash_attempt_id(
+            run_id=run_id,
+            input_name=input_name,
+            plan_name=plan_name,
+            library_hash=sampling_library_hash,
+            attempt_index=int(attempt_index),
+        )
+        return cls(
+            attempt_id=attempt_id,
+            attempt_index=int(attempt_index),
+            run_id=str(run_id),
+            input_name=str(input_name),
+            plan_name=str(plan_name),
+            created_at=str(created_at),
+            status=str(status),
+            reason=str(reason),
+            detail_json=str(detail_json),
+            sequence=str(sequence),
+            sequence_hash=str(sequence_hash),
+            solution_id=str(solution_id) if solution_id is not None else None,
+            used_tf_counts_json=str(used_tf_counts_json),
+            used_tf_list=list(used_tf_list or []),
+            sampling_library_index=int(sampling_library_index),
+            sampling_library_hash=str(sampling_library_hash),
+            solver_status=str(solver_status) if solver_status is not None else None,
+            solver_objective=float(solver_objective) if solver_objective is not None else None,
+            solver_solve_time_s=float(solver_solve_time_s) if solver_solve_time_s is not None else None,
+            dense_arrays_version=str(dense_arrays_version) if dense_arrays_version is not None else None,
+            dense_arrays_version_source=str(dense_arrays_version_source),
+            library_tfbs=list(library_tfbs or []),
+            library_tfs=list(library_tfs or []),
+            library_site_ids=list(library_site_ids or []),
+            library_sources=list(library_sources or []),
+        )
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "attempt_id": self.attempt_id,
+            "attempt_index": int(self.attempt_index),
+            "run_id": self.run_id,
+            "input_name": self.input_name,
+            "plan_name": self.plan_name,
+            "created_at": self.created_at,
+            "status": self.status,
+            "reason": self.reason,
+            "detail_json": self.detail_json,
+            "sequence": self.sequence,
+            "sequence_hash": self.sequence_hash,
+            "solution_id": self.solution_id,
+            "used_tf_counts_json": self.used_tf_counts_json,
+            "used_tf_list": list(self.used_tf_list),
+            "sampling_library_index": int(self.sampling_library_index),
+            "sampling_library_hash": self.sampling_library_hash,
+            "solver_status": self.solver_status,
+            "solver_objective": self.solver_objective,
+            "solver_solve_time_s": self.solver_solve_time_s,
+            "dense_arrays_version": self.dense_arrays_version,
+            "dense_arrays_version_source": self.dense_arrays_version_source,
+            "library_tfbs": list(self.library_tfbs),
+            "library_tfs": list(self.library_tfs),
+            "library_site_ids": list(self.library_site_ids),
+            "library_sources": list(self.library_sources),
+        }
+
+
+@dataclass(frozen=True)
+class SolutionRecord:
+    solution_id: str
+    attempt_id: str
+    run_id: str
+    input_name: str
+    plan_name: str
+    created_at: str
+    sequence: str
+    sequence_hash: str
+    sampling_library_index: int
+    sampling_library_hash: str
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "solution_id": self.solution_id,
+            "attempt_id": self.attempt_id,
+            "run_id": self.run_id,
+            "input_name": self.input_name,
+            "plan_name": self.plan_name,
+            "created_at": self.created_at,
+            "sequence": self.sequence,
+            "sequence_hash": self.sequence_hash,
+            "sampling_library_index": int(self.sampling_library_index),
+            "sampling_library_hash": self.sampling_library_hash,
+        }
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 13801bbe..5293c6e8 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -43,9 +43,17 @@
     resolve_run_root,
 )
 from ..utils.logging_utils import install_native_stderr_filters
+from .artifacts.candidates import build_candidate_artifact
 from .artifacts.ids import hash_tfbs_id
-from .artifacts.library import write_library_artifact
+from .artifacts.library import (
+    LibraryArtifact,
+    LibraryRecord,
+    load_library_artifact,
+    load_library_records,
+    write_library_artifact,
+)
 from .artifacts.pool import POOL_MODE_SEQUENCE, POOL_MODE_TFBS, PoolData, build_pool_artifact
+from .artifacts.records import AttemptRecord, SolutionRecord
 from .metadata import build_metadata
 from .postprocess import random_fill
 from .pvalue_bins import resolve_pvalue_bins
@@ -118,12 +126,9 @@ def select_solver(
     optimizer: OptimizerAdapter,
     *,
     strategy: str,
-    fallback_to_cbc: bool = False,
     test_length: int = 10,
 ) -> str | None:
-    """
-    Probe the requested solver once. If it fails, optionally fall back to CBC.
-    """
+    """Probe the requested solver once and fail fast if unavailable."""
     if strategy == "approximate":
         return preferred
     if not preferred:
@@ -132,13 +137,6 @@ def select_solver(
         optimizer.probe_solver(preferred, test_length=test_length)
         return preferred
     except Exception as exc:
-        if fallback_to_cbc and str(preferred).upper() != "CBC":
-            log.warning(
-                "Requested solver '%s' failed; falling back to CBC (solver.fallback_to_cbc=true).",
-                preferred,
-            )
-            optimizer.probe_solver("CBC", test_length=test_length)
-            return "CBC"
         raise RuntimeError(
             f"Requested solver '{preferred}' failed during probe: {exc}\n"
             "Please install/configure this solver or choose another in solver.backend."
@@ -1391,6 +1389,7 @@ def _write_effective_config(
 
 
 ATTEMPTS_CHUNK_SIZE = 256
+SOLUTIONS_CHUNK_SIZE = 256
 
 
 def _flush_attempts(outputs_root: Path, buffer: list[dict]) -> None:
@@ -1405,6 +1404,7 @@ def _flush_attempts(outputs_root: Path, buffer: list[dict]) -> None:
     schema = pa.schema(
         [
             pa.field("attempt_id", pa.string()),
+            pa.field("attempt_index", pa.int64()),
             pa.field("run_id", pa.string()),
             pa.field("input_name", pa.string()),
             pa.field("plan_name", pa.string()),
@@ -1414,7 +1414,7 @@ def _flush_attempts(outputs_root: Path, buffer: list[dict]) -> None:
             pa.field("detail_json", pa.string()),
             pa.field("sequence", pa.string()),
             pa.field("sequence_hash", pa.string()),
-            pa.field("output_id", pa.string()),
+            pa.field("solution_id", pa.string()),
             pa.field("used_tf_counts_json", pa.string()),
             pa.field("used_tf_list", pa.list_(pa.string())),
             pa.field("sampling_library_index", pa.int64()),
@@ -1437,6 +1437,36 @@ def _flush_attempts(outputs_root: Path, buffer: list[dict]) -> None:
     buffer.clear()
 
 
+def _flush_solutions(outputs_root: Path, buffer: list[dict]) -> None:
+    if not buffer:
+        return
+    try:
+        import pyarrow as pa
+        import pyarrow.parquet as pq
+    except Exception as exc:
+        raise RuntimeError("pyarrow is required to write solutions logs.") from exc
+
+    schema = pa.schema(
+        [
+            pa.field("solution_id", pa.string()),
+            pa.field("attempt_id", pa.string()),
+            pa.field("run_id", pa.string()),
+            pa.field("input_name", pa.string()),
+            pa.field("plan_name", pa.string()),
+            pa.field("created_at", pa.string()),
+            pa.field("sequence", pa.string()),
+            pa.field("sequence_hash", pa.string()),
+            pa.field("sampling_library_index", pa.int64()),
+            pa.field("sampling_library_hash", pa.string()),
+        ]
+    )
+    table = pa.Table.from_pylist(buffer, schema=schema)
+    outputs_root.mkdir(parents=True, exist_ok=True)
+    filename = f"solutions_part-{uuid.uuid4().hex}.parquet"
+    pq.write_table(table, outputs_root / filename)
+    buffer.clear()
+
+
 def _load_failure_counts_from_attempts(
     outputs_root: Path,
 ) -> dict[tuple[str, str, str, str, str | None], dict[str, int]]:
@@ -1511,12 +1541,77 @@ def _load_existing_library_index(outputs_root: Path) -> int:
     return max_idx
 
 
+def _load_existing_library_index_by_plan(
+    outputs_root: Path,
+) -> dict[tuple[str, str], int]:
+    attempts_path = outputs_root / "attempts.parquet"
+    paths: list[Path] = []
+    if attempts_path.exists():
+        paths.append(attempts_path)
+    paths.extend(sorted(outputs_root.glob("attempts_part-*.parquet")))
+    if not paths:
+        return {}
+    max_by_plan: dict[tuple[str, str], int] = {}
+    for path in paths:
+        try:
+            df = pd.read_parquet(path, columns=["input_name", "plan_name", "sampling_library_index"])
+        except Exception:
+            continue
+        if df.empty:
+            continue
+        for _, row in df.iterrows():
+            input_name = str(row.get("input_name") or "")
+            plan_name = str(row.get("plan_name") or "")
+            idx = row.get("sampling_library_index")
+            try:
+                idx_val = int(idx) if idx is not None else 0
+            except Exception:
+                idx_val = 0
+            key = (input_name, plan_name)
+            max_by_plan[key] = max(max_by_plan.get(key, 0), idx_val)
+    return max_by_plan
+
+
+def _load_existing_attempt_index_by_plan(outputs_root: Path) -> dict[tuple[str, str], int]:
+    attempts_path = outputs_root / "attempts.parquet"
+    paths: list[Path] = []
+    if attempts_path.exists():
+        paths.append(attempts_path)
+    paths.extend(sorted(outputs_root.glob("attempts_part-*.parquet")))
+    if not paths:
+        return {}
+    max_by_plan: dict[tuple[str, str], int] = {}
+    for path in paths:
+        try:
+            df = pd.read_parquet(path)
+        except Exception:
+            continue
+        if df.empty:
+            continue
+        if "attempt_index" not in df.columns:
+            raise RuntimeError(
+                f"attempts file missing attempt_index column: {path}. "
+                "Regenerate outputs with the current DenseGen version."
+            )
+        for _, row in df.iterrows():
+            input_name = str(row.get("input_name") or "")
+            plan_name = str(row.get("plan_name") or "")
+            key = (input_name, plan_name)
+            try:
+                idx_val = int(row.get("attempt_index") or 0)
+            except Exception:
+                idx_val = 0
+            max_by_plan[key] = max(max_by_plan.get(key, 0), idx_val)
+    return max_by_plan
+
+
 def _append_attempt(
     outputs_root: Path,
     *,
     run_id: str,
     input_name: str,
     plan_name: str,
+    attempt_index: int,
     status: str,
     reason: str,
     detail: dict | None,
@@ -1530,50 +1625,54 @@ def _append_attempt(
     solver_solve_time_s: float | None,
     dense_arrays_version: str | None,
     dense_arrays_version_source: str,
-    output_id: str | None = None,
+    solution_id: str | None = None,
     library_tfbs: list[str] | None = None,
     library_tfs: list[str] | None = None,
     library_site_ids: list[str | None] | None = None,
     library_sources: list[str | None] | None = None,
     attempts_buffer: list[dict] | None = None,
-) -> None:
+) -> str:
     sequence_val = sequence or ""
     lib_tfbs = [str(x) for x in (library_tfbs or [])]
     lib_tfs = [str(x) for x in (library_tfs or [])]
     lib_site_ids = [str(x) if x is not None else "" for x in (library_site_ids or [])]
     lib_sources = [str(x) if x is not None else "" for x in (library_sources or [])]
-    payload = {
-        "attempt_id": uuid.uuid4().hex,
-        "run_id": run_id,
-        "input_name": input_name,
-        "plan_name": plan_name,
-        "created_at": datetime.now(timezone.utc).isoformat(),
-        "status": status,
-        "reason": reason,
-        "detail_json": json.dumps(detail or {}),
-        "sequence": sequence_val,
-        "sequence_hash": hashlib.sha256(sequence_val.encode("utf-8")).hexdigest() if sequence_val else "",
-        "output_id": output_id,
-        "used_tf_counts_json": json.dumps(used_tf_counts or {}),
-        "used_tf_list": used_tf_list or [],
-        "sampling_library_index": int(sampling_library_index),
-        "sampling_library_hash": sampling_library_hash,
-        "solver_status": solver_status,
-        "solver_objective": solver_objective,
-        "solver_solve_time_s": solver_solve_time_s,
-        "dense_arrays_version": dense_arrays_version,
-        "dense_arrays_version_source": dense_arrays_version_source,
-        "library_tfbs": lib_tfbs,
-        "library_tfs": lib_tfs,
-        "library_site_ids": lib_site_ids,
-        "library_sources": lib_sources,
-    }
+    created_at = datetime.now(timezone.utc).isoformat()
+    seq_hash = hashlib.sha256(sequence_val.encode("utf-8")).hexdigest() if sequence_val else ""
+    record = AttemptRecord.build(
+        attempt_index=int(attempt_index),
+        run_id=run_id,
+        input_name=input_name,
+        plan_name=plan_name,
+        created_at=created_at,
+        status=status,
+        reason=reason,
+        detail_json=json.dumps(detail or {}),
+        sequence=sequence_val,
+        sequence_hash=seq_hash,
+        solution_id=solution_id,
+        used_tf_counts_json=json.dumps(used_tf_counts or {}),
+        used_tf_list=used_tf_list or [],
+        sampling_library_index=int(sampling_library_index),
+        sampling_library_hash=str(sampling_library_hash),
+        solver_status=solver_status,
+        solver_objective=solver_objective,
+        solver_solve_time_s=solver_solve_time_s,
+        dense_arrays_version=dense_arrays_version,
+        dense_arrays_version_source=dense_arrays_version_source,
+        library_tfbs=lib_tfbs,
+        library_tfs=lib_tfs,
+        library_site_ids=lib_site_ids,
+        library_sources=lib_sources,
+    )
+    payload = record.to_dict()
     if attempts_buffer is not None:
         attempts_buffer.append(payload)
         if len(attempts_buffer) >= ATTEMPTS_CHUNK_SIZE:
             _flush_attempts(outputs_root, attempts_buffer)
-        return
+        return record.attempt_id
     _flush_attempts(outputs_root, [payload])
+    return record.attempt_id
 
 
 def _log_rejection(
@@ -1582,6 +1681,7 @@ def _log_rejection(
     run_id: str,
     input_name: str,
     plan_name: str,
+    attempt_index: int,
     reason: str,
     detail: dict | None,
     sequence: str,
@@ -1606,6 +1706,7 @@ def _log_rejection(
         run_id=run_id,
         input_name=input_name,
         plan_name=plan_name,
+        attempt_index=attempt_index,
         status=status,
         reason=reason,
         detail=detail,
@@ -1619,6 +1720,7 @@ def _log_rejection(
         solver_solve_time_s=solver_solve_time_s,
         dense_arrays_version=dense_arrays_version,
         dense_arrays_version_source=dense_arrays_version_source,
+        solution_id=None,
         library_tfbs=library_tfbs,
         library_tfs=library_tfs,
         library_site_ids=library_site_ids,
@@ -1696,14 +1798,26 @@ def _process_plan_for_source(
     write_state: Callable[[], None] | None = None,
     site_failure_counts: dict[tuple[str, str, str, str, str | None], dict[str, int]] | None = None,
     source_cache: dict[str, PoolData] | None = None,
+    attempt_counters: dict[tuple[str, str], int] | None = None,
+    library_records: dict[tuple[str, str], list[LibraryRecord]] | None = None,
+    library_cursor: dict[tuple[str, str], int] | None = None,
+    library_source: str | None = None,
     library_build_rows: list[dict] | None = None,
     library_member_rows: list[dict] | None = None,
+    solution_rows: list[dict] | None = None,
     composition_rows: list[dict] | None = None,
     events_path: Path | None = None,
 ) -> tuple[int, dict]:
     source_label = source_cfg.name
     plan_name = plan_item.name
     quota = int(plan_item.quota)
+    attempt_counters = attempt_counters or {}
+
+    def _next_attempt_index() -> int:
+        key = (source_label, plan_name)
+        current = int(attempt_counters.get(key, 0)) + 1
+        attempt_counters[key] = current
+        return current
 
     gen = global_cfg.generation
     seq_len = int(gen.sequence_length)
@@ -1719,6 +1833,8 @@ def _record_library_build(
         library_site_ids: list[str | None],
         library_sources: list[str | None],
     ) -> None:
+        if str(getattr(sampling_cfg, "library_source", "build")).lower() == "artifact":
+            return
         if library_build_rows is None or library_member_rows is None:
             return
         library_index = int(sampling_info.get("library_index") or 0)
@@ -1992,7 +2108,7 @@ def _record_library_build(
                 mining_label = ", ".join(parts) if parts else "enabled"
             log.info(
                 "PWM input sampling for %s: motifs=%d | sites=%s | strategy=%s | backend=%s | score=%s | "
-                "selection=%s | bins=%s | mining=%s | oversample=%s | max_candidates=%s | length=%s",
+                "selection=%s | bins=%s | mining=%s | oversample=%s | caps=%s | length=%s",
                 source_label,
                 len(input_meta.get("input_pwm_ids") or []),
                 counts_label or "-",
@@ -2047,23 +2163,103 @@ def _record_library_build(
     tfbs_parts: List[str]
     libraries_built = existing_library_builds
     libraries_built_start = existing_library_builds
+    libraries_used = 0
+    library_source_label = str(library_source or getattr(sampling_cfg, "library_source", "build")).lower()
+    if library_source_label not in {"build", "artifact"}:
+        raise ValueError(f"Unsupported sampling.library_source: {library_source_label}")
+    if library_source_label == "artifact" and library_cursor is not None:
+        prior_used = int(library_cursor.get((source_label, plan_name), 0))
+        libraries_built = prior_used
+        libraries_built_start = prior_used
+
+    def _select_library_from_artifact() -> tuple[list[str], list[str], list[str], dict]:
+        if library_records is None or library_cursor is None:
+            raise RuntimeError("Library artifacts requested but no library records were provided.")
+        key = (source_label, plan_name)
+        records = library_records.get(key) or []
+        if not records:
+            raise RuntimeError(
+                f"No libraries available in artifact for {source_label}/{plan_name}. "
+                "Build libraries with `dense stage-b build-libraries` and re-run."
+            )
+        cursor = int(library_cursor.get(key, 0))
+        if cursor >= len(records):
+            raise RuntimeError(
+                f"Library artifact exhausted for {source_label}/{plan_name} "
+                f"(requested index={cursor + 1}, available={len(records)}). "
+                "Build more libraries or reduce resampling."
+            )
+        record = records[cursor]
+        library_cursor[key] = cursor + 1
+        if record.pool_strategy is None or record.library_sampling_strategy is None:
+            raise RuntimeError(
+                f"Library artifact missing sampling metadata for {source_label}/{plan_name} "
+                f"(library_index={record.library_index}). Rebuild libraries with the current version."
+            )
+        if str(record.pool_strategy) != str(pool_strategy):
+            raise RuntimeError(
+                f"Library artifact pool_strategy mismatch for {source_label}/{plan_name}: "
+                f"artifact={record.pool_strategy} config={pool_strategy}."
+            )
+        if str(record.library_sampling_strategy) != str(library_sampling_strategy):
+            raise RuntimeError(
+                f"Library artifact sampling strategy mismatch for {source_label}/{plan_name}: "
+                f"artifact={record.library_sampling_strategy} config={library_sampling_strategy}."
+            )
+        if pool_strategy != "full" and record.library_size != int(getattr(sampling_cfg, "library_size", 0)):
+            raise RuntimeError(
+                f"Library artifact size mismatch for {source_label}/{plan_name}: "
+                f"artifact={record.library_size} config={sampling_cfg.library_size}."
+            )
+        tfbs_parts_local = []
+        for idx, tfbs in enumerate(record.library_tfbs):
+            tf = record.library_tfs[idx] if idx < len(record.library_tfs) else ""
+            tfbs_parts_local.append(f"{tf}:{tfbs}" if tf else str(tfbs))
+        if events_path is not None:
+            try:
+                _emit_event(
+                    events_path,
+                    event="LIBRARY_SELECTED",
+                    payload={
+                        "input_name": source_label,
+                        "plan_name": plan_name,
+                        "library_index": int(record.library_index),
+                        "library_hash": str(record.library_hash),
+                        "library_size": int(record.library_size),
+                    },
+                )
+            except Exception:
+                log.debug("Failed to emit LIBRARY_SELECTED event.", exc_info=True)
+        return record.library_tfbs, tfbs_parts_local, record.library_tfs, record.sampling_info()
+
+    def _build_next_library() -> tuple[list[str], list[str], list[str], dict]:
+        nonlocal libraries_built, libraries_used
+        if library_source_label == "artifact":
+            libraries_used += 1
+            libraries_built = libraries_used
+            return _select_library_from_artifact()
+        library_for_opt_local, tfbs_parts_local, regulator_labels_local, sampling_info_local = build_library_for_plan(
+            source_label=source_label,
+            plan_item=plan_item,
+            pool=pool,
+            sampling_cfg=sampling_cfg,
+            seq_len=seq_len,
+            min_count_per_tf=min_count_per_tf,
+            usage_counts=usage_counts,
+            failure_counts=failure_counts if failure_counts else None,
+            rng=rng,
+            np_rng=np_rng,
+            library_index_start=libraries_built,
+        )
+        libraries_built = int(sampling_info_local.get("library_index", libraries_built))
+        libraries_used += 1
+        return library_for_opt_local, tfbs_parts_local, regulator_labels_local, sampling_info_local
 
     if pool_strategy != "iterative_subsample" and not one_subsample_only:
         max_per_subsample = quota
-    library_for_opt, tfbs_parts, regulator_labels, sampling_info = build_library_for_plan(
-        source_label=source_label,
-        plan_item=plan_item,
-        pool=pool,
-        sampling_cfg=sampling_cfg,
-        seq_len=seq_len,
-        min_count_per_tf=min_count_per_tf,
-        usage_counts=usage_counts,
-        failure_counts=failure_counts if failure_counts else None,
-        rng=rng,
-        np_rng=np_rng,
-        library_index_start=libraries_built,
-    )
-    libraries_built = int(sampling_info.get("library_index", libraries_built))
+    library_for_opt, tfbs_parts, regulator_labels, sampling_info = _build_next_library()
+    if library_source_label != "artifact":
+        libraries_built = int(sampling_info.get("library_index", libraries_built))
     site_id_by_index = sampling_info.get("site_id_by_index")
     source_by_index = sampling_info.get("source_by_index")
     tfbs_id_by_index = sampling_info.get("tfbs_id_by_index")
@@ -2356,11 +2552,13 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         failed_solutions += 1
                         failed_min_count_per_tf += 1
                         _record_site_failures("min_count_per_tf")
+                        attempt_index = _next_attempt_index()
                         _log_rejection(
                             outputs_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
+                            attempt_index=attempt_index,
                             reason="min_count_per_tf",
                             detail={
                                 "min_count_per_tf": min_count_per_tf,
@@ -2395,11 +2593,13 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         failed_solutions += 1
                         failed_required_regulators += 1
                         _record_site_failures("required_regulators")
+                        attempt_index = _next_attempt_index()
                         _log_rejection(
                             outputs_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
+                            attempt_index=attempt_index,
                             reason="required_regulators",
                             detail={
                                 "required_regulators": required_regulators,
@@ -2437,11 +2637,13 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         failed_solutions += 1
                         failed_min_count_by_regulator += 1
                         _record_site_failures("min_count_by_regulator")
+                        attempt_index = _next_attempt_index()
                         _log_rejection(
                             outputs_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
+                            attempt_index=attempt_index,
                             reason="min_count_by_regulator",
                             detail={
                                 "min_count_by_regulator": [
@@ -2483,11 +2685,13 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         failed_solutions += 1
                         failed_min_required_regulators += 1
                         _record_site_failures("min_required_regulators")
+                        attempt_index = _next_attempt_index()
                         _log_rejection(
                             outputs_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
+                            attempt_index=attempt_index,
                             reason="min_required_regulators",
                             detail={
                                 "required_regulators": required_regulators,
@@ -2523,11 +2727,13 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         failed_solutions += 1
                         failed_min_required_regulators += 1
                         _record_site_failures("min_required_regulators")
+                        attempt_index = _next_attempt_index()
                         _log_rejection(
                             outputs_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
+                            attempt_index=attempt_index,
                             reason="min_required_regulators",
                             detail={
                                 "min_required_regulators": int(min_required_regulators),
@@ -2660,11 +2866,13 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                 if not accepted:
                     failed_solutions += 1
                     duplicate_records += 1
+                    attempt_index = _next_attempt_index()
                     _log_rejection(
                         outputs_root,
                         run_id=run_id,
                         input_name=source_label,
                         plan_name=plan_name,
+                        attempt_index=attempt_index,
                         reason="output_duplicate",
                         detail={},
                         sequence=final_seq,
@@ -2694,11 +2902,14 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     )
                     continue
 
+                composition_start = None
                 if composition_rows is not None:
+                    composition_start = len(composition_rows)
                     for placement_index, entry in enumerate(used_tfbs_detail or []):
                         composition_rows.append(
                             {
-                                "sequence_id": record.id,
+                                "solution_id": record.id,
+                                "attempt_id": None,
                                 "input_name": source_label,
                                 "plan_name": plan_name,
                                 "library_index": int(sampling_library_index),
@@ -2718,11 +2929,13 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                             }
                         )
 
-                _append_attempt(
+                attempt_index = _next_attempt_index()
+                attempt_id = _append_attempt(
                     outputs_root,
                     run_id=run_id,
                     input_name=source_label,
                     plan_name=plan_name,
+                    attempt_index=attempt_index,
                     status="success",
                     reason="ok",
                     detail={},
@@ -2736,13 +2949,33 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     solver_solve_time_s=solver_solve_time_s,
                     dense_arrays_version=dense_arrays_version,
                     dense_arrays_version_source=dense_arrays_version_source,
-                    output_id=record.id,
+                    solution_id=record.id,
                     library_tfbs=library_tfbs,
                     library_tfs=library_tfs,
                     library_site_ids=library_site_ids,
                     library_sources=library_sources,
                     attempts_buffer=attempts_buffer,
                 )
+                if composition_rows is not None and composition_start is not None:
+                    for idx in range(composition_start, len(composition_rows)):
+                        composition_rows[idx]["attempt_id"] = attempt_id
+                if solution_rows is not None:
+                    solution_rows.append(
+                        SolutionRecord(
+                            solution_id=record.id,
+                            attempt_id=attempt_id,
+                            run_id=str(run_id),
+                            input_name=source_label,
+                            plan_name=plan_name,
+                            created_at=created_at,
+                            sequence=final_seq,
+                            sequence_hash=hashlib.sha256(final_seq.encode("utf-8")).hexdigest(),
+                            sampling_library_index=int(sampling_library_index),
+                            sampling_library_hash=str(sampling_library_hash),
+                        ).to_dict()
+                    )
+                    if len(solution_rows) >= SOLUTIONS_CHUNK_SIZE:
+                        _flush_solutions(outputs_root, solution_rows)
 
                 _update_usage_counts(usage_counts, used_tfbs_detail)
                 for tf, count in used_tf_counts.items():
@@ -2905,11 +3138,13 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             if produced_this_library == 0:
                 reason = "stall_no_solution" if stall_triggered else "no_solution"
                 _record_site_failures(reason)
+                attempt_index = _next_attempt_index()
                 _append_attempt(
                     outputs_root,
                     run_id=run_id,
                     input_name=source_label,
                     plan_name=plan_name,
+                    attempt_index=attempt_index,
                     status="failed",
                     reason=reason,
                     detail={"stall_seconds": stall_seconds} if stall_triggered else {},
@@ -2923,6 +3158,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     solver_solve_time_s=None,
                     dense_arrays_version=dense_arrays_version,
                     dense_arrays_version_source=dense_arrays_version_source,
+                    solution_id=None,
                     library_tfbs=library_tfbs,
                     library_tfs=library_tfs,
                     library_site_ids=library_site_ids,
@@ -2985,26 +3221,15 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                 )
                 break
 
-            if iterative_max_libraries > 0 and libraries_built >= iterative_max_libraries:
+            if iterative_max_libraries > 0 and libraries_used >= iterative_max_libraries:
                 raise RuntimeError(
                     f"[{source_label}/{plan_name}] Exceeded iterative_max_libraries={iterative_max_libraries}."
                 )
 
             # New library
-            library_for_opt, tfbs_parts, regulator_labels, sampling_info = build_library_for_plan(
-                source_label=source_label,
-                plan_item=plan_item,
-                pool=pool,
-                sampling_cfg=sampling_cfg,
-                seq_len=seq_len,
-                min_count_per_tf=min_count_per_tf,
-                usage_counts=usage_counts,
-                failure_counts=failure_counts if failure_counts else None,
-                rng=rng,
-                np_rng=np_rng,
-                library_index_start=libraries_built,
-            )
-            libraries_built = int(sampling_info.get("library_index", libraries_built))
+            library_for_opt, tfbs_parts, regulator_labels, sampling_info = _build_next_library()
+            if library_source_label != "artifact":
+                libraries_built = int(sampling_info.get("library_index", libraries_built))
             site_id_by_index = sampling_info.get("site_id_by_index")
             source_by_index = sampling_info.get("source_by_index")
             tfbs_id_by_index = sampling_info.get("tfbs_id_by_index")
@@ -3067,6 +3292,8 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
 
         if one_subsample_only:
             _flush_attempts(outputs_root, attempts_buffer)
+            if solution_rows is not None:
+                _flush_solutions(outputs_root, solution_rows)
             if state_counts is not None:
                 state_counts[(source_label, plan_name)] = int(global_generated)
                 if write_state is not None:
@@ -3090,6 +3317,8 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             }
 
     _flush_attempts(outputs_root, attempts_buffer)
+    if solution_rows is not None:
+        _flush_solutions(outputs_root, solution_rows)
     log.info("Completed %s/%s: %d/%d", source_label, plan_name, global_generated, quota)
     if state_counts is not None:
         state_counts[(source_label, plan_name)] = int(global_generated)
@@ -3139,7 +3368,6 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
         cfg.solver.backend,
         deps.optimizer,
         strategy=str(cfg.solver.strategy),
-        fallback_to_cbc=bool(cfg.solver.fallback_to_cbc),
     )
     dense_arrays_version, dense_arrays_version_source = _resolve_dense_arrays_version(loaded.path)
 
@@ -3157,6 +3385,7 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
     source_cache: dict[str, PoolData] = {}
     library_build_rows: list[dict] = []
     library_member_rows: list[dict] = []
+    solution_rows: list[dict] = []
     composition_rows: list[dict] = []
     outputs_root = run_outputs_root(run_root)
     outputs_root.mkdir(parents=True, exist_ok=True)
@@ -3200,6 +3429,89 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
         log.debug("Failed to emit POOL_BUILT event.", exc_info=True)
     for name, pool in pool_data.items():
         source_cache[name] = pool
+    candidates_dir = outputs_root / "candidates"
+    candidate_files = list(candidates_dir.rglob("candidates__*.parquet")) if candidates_dir.exists() else []
+    if candidate_files:
+        try:
+            build_candidate_artifact(
+                candidates_dir=candidates_dir,
+                cfg_path=loaded.path,
+                run_id=str(cfg.run.id),
+                run_root=run_root,
+                overwrite=True,
+            )
+        except Exception as exc:
+            raise RuntimeError(f"Failed to write candidate artifacts: {exc}") from exc
+    library_records: dict[tuple[str, str], list[LibraryRecord]] | None = None
+    library_cursor: dict[tuple[str, str], int] | None = None
+    library_artifact: LibraryArtifact | None = None
+    sampling_cfg = cfg.generation.sampling
+    library_source = str(getattr(sampling_cfg, "library_source", "build")).lower()
+    if library_source == "artifact":
+        artifact_path = resolve_relative_path(loaded.path, sampling_cfg.library_artifact_path)
+        if not artifact_path.exists():
+            raise RuntimeError(f"Library artifact directory not found: {artifact_path}")
+        library_artifact = load_library_artifact(artifact_path)
+        library_records = load_library_records(library_artifact)
+        library_cursor = {}
+        existing_library_by_plan = _load_existing_library_index_by_plan(outputs_root)
+        for inp in cfg.inputs:
+            for plan_item in pl:
+                key = (inp.name, plan_item.name)
+                records = library_records.get(key)
+                if not records:
+                    raise RuntimeError(
+                        f"Library artifact missing libraries for {inp.name}/{plan_item.name}. "
+                        "Build libraries with `dense stage-b build-libraries` using this config."
+                    )
+                max_used = existing_library_by_plan.get(key, 0)
+                used_count = sum(1 for rec in records if int(rec.library_index) <= int(max_used))
+                if max_used and used_count == 0:
+                    raise RuntimeError(
+                        f"Library artifact indices do not cover previously used library_index={max_used} "
+                        f"for {inp.name}/{plan_item.name}."
+                    )
+                library_cursor[key] = used_count
+                for rec in records:
+                    if int(rec.library_index) <= 0:
+                        raise RuntimeError(
+                            f"Library artifact has non-positive library_index={rec.library_index} "
+                            f"for {inp.name}/{plan_item.name}."
+                        )
+                    if rec.library_sampling_strategy is None or rec.pool_strategy is None:
+                        raise RuntimeError(
+                            f"Library artifact missing sampling metadata for {inp.name}/{plan_item.name} "
+                            f"(library_index={rec.library_index})."
+                        )
+                    required = list(dict.fromkeys(plan_item.required_regulators or []))
+                    if required:
+                        present = {tf for tf in rec.library_tfs if tf}
+                        k_required = plan_item.min_required_regulators
+                        if k_required is not None:
+                            if len(present.intersection(required)) < int(k_required):
+                                raise RuntimeError(
+                                    f"Library artifact for {inp.name}/{plan_item.name} "
+                                    f"(library_index={rec.library_index}) cannot satisfy "
+                                    f"min_required_regulators={k_required}."
+                                )
+                        else:
+                            missing = [tf for tf in required if tf not in present]
+                            if missing:
+                                raise RuntimeError(
+                                    f"Library artifact for {inp.name}/{plan_item.name} "
+                                    f"(library_index={rec.library_index}) is missing required regulators: "
+                                    f"{', '.join(missing)}"
+                                )
+                    for tf, min_count in (plan_item.min_count_by_regulator or {}).items():
+                        found = sum(1 for t in rec.library_tfs if t == tf)
+                        if found < int(min_count):
+                            raise RuntimeError(
+                                f"Library artifact for {inp.name}/{plan_item.name} "
+                                f"(library_index={rec.library_index}) has tf={tf} count={found} "
+                                f"< min_count_by_regulator={min_count}."
+                            )
+    elif library_source != "build":
+        raise RuntimeError(f"Unsupported sampling.library_source: {library_source}")
     ensure_run_meta_dir(run_root)
     state_path = run_state_path(run_root)
     state_created_at = datetime.now(timezone.utc).isoformat()
@@ -3225,6 +3537,7 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
     existing_counts: dict[tuple[str, str], int] = {}
     existing_usage_by_plan: dict[tuple[str, str], dict[tuple[str, str], int]] = {}
     site_failure_counts = _load_failure_counts_from_attempts(outputs_root)
+    attempt_counters = _load_existing_attempt_index_by_plan(outputs_root)
     if cfg.output.targets:
         try:
             df_existing, _ = load_records_from_config(
@@ -3383,8 +3696,13 @@ def _write_state() -> None:
                     write_state=_write_state,
                     site_failure_counts=site_failure_counts,
                     source_cache=source_cache,
+                    attempt_counters=attempt_counters,
+                    library_records=library_records,
+                    library_cursor=library_cursor,
+                    library_source=library_source,
                     library_build_rows=library_build_rows,
                     library_member_rows=library_member_rows,
+                    solution_rows=solution_rows,
                     composition_rows=composition_rows,
                     events_path=events_path,
                 )
@@ -3435,8 +3753,13 @@ def _write_state() -> None:
                         write_state=_write_state,
                         site_failure_counts=site_failure_counts,
                         source_cache=source_cache,
+                        attempt_counters=attempt_counters,
+                        library_records=library_records,
+                        library_cursor=library_cursor,
+                        library_source=library_source,
                         library_build_rows=library_build_rows,
                         library_member_rows=library_member_rows,
+                        solution_rows=solution_rows,
                         composition_rows=composition_rows,
                         events_path=events_path,
                     )
@@ -3453,9 +3776,27 @@ def _write_state() -> None:
 
     outputs_root = run_outputs_root(run_root)
     _consolidate_parts(outputs_root, part_glob="attempts_part-*.parquet", final_name="attempts.parquet")
+    _consolidate_parts(outputs_root, part_glob="solutions_part-*.parquet", final_name="solutions.parquet")
 
-    if library_build_rows:
-        libraries_dir = outputs_root / "libraries"
+    libraries_dir = outputs_root / "libraries"
+    if library_source == "artifact":
+        if library_artifact is None:
+            raise RuntimeError("sampling.library_source=artifact but no library artifact was loaded.")
+        try:
+            build_rows = pd.read_parquet(library_artifact.builds_path).to_dict("records")
+            member_rows = pd.read_parquet(library_artifact.members_path).to_dict("records")
+            write_library_artifact(
+                out_dir=libraries_dir,
+                builds=build_rows,
+                members=member_rows,
+                cfg_path=loaded.path,
+                run_id=str(cfg.run.id),
+                run_root=run_root,
+                overwrite=True,
+            )
+        except Exception as exc:
+            raise RuntimeError(f"Failed to write library artifacts: {exc}") from exc
+    elif library_build_rows:
         existing_builds: list[dict] = []
         existing_members: list[dict] = []
         builds_path = libraries_dir / "library_builds.parquet"
@@ -3516,12 +3857,12 @@ def _write_state() -> None:
                 log.warning("Failed to read existing composition.parquet; overwriting.", exc_info=True)
                 existing_rows = []
         existing_keys = {
-            (str(row.get("sequence_id") or ""), int(row.get("placement_index") or 0)) for row in existing_rows
+            (str(row.get("solution_id") or ""), int(row.get("placement_index") or 0)) for row in existing_rows
         }
         new_rows = [
             row
             for row in composition_rows
-            if (str(row.get("sequence_id") or ""), int(row.get("placement_index") or 0)) not in existing_keys
+            if (str(row.get("solution_id") or ""), int(row.get("placement_index") or 0)) not in existing_keys
         ]
         pd.DataFrame(existing_rows + new_rows).to_parquet(composition_path, index=False)
 
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index 8a933140..6236f2eb 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -12,7 +12,6 @@
 
 from __future__ import annotations
 
-import hashlib
 import json
 import logging
 from dataclasses import dataclass
@@ -65,13 +64,12 @@ def _ensure_list_of_dicts(val) -> list[dict]:
     raise ValueError(f"Expected list of dicts; got {type(val).__name__}.")
 
 
-def _sequence_id(row: pd.Series, fallback: str) -> str:
+def _solution_id(row: pd.Series) -> str:
+    if "solution_id" in row and isinstance(row["solution_id"], str) and row["solution_id"]:
+        return row["solution_id"]
     if "id" in row and isinstance(row["id"], str) and row["id"]:
         return row["id"]
-    seq = row.get("sequence")
-    if isinstance(seq, str) and seq:
-        return hashlib.sha256(seq.encode("utf-8")).hexdigest()
-    return fallback
+    raise ValueError("Output records missing solution_id/id; regenerate outputs before reporting.")
 
 
 def _ensure_list(val: Any) -> list:
@@ -93,6 +91,21 @@ def _ensure_list(val: Any) -> list:
     return []
 
 
+def _load_events(events_path: Path) -> pd.DataFrame:
+    if not events_path.exists():
+        return pd.DataFrame(columns=["event", "created_at", "input_name", "plan_name", "library_index", "library_hash"])
+    rows = []
+    for line in events_path.read_text().splitlines():
+        if not line.strip():
+            continue
+        try:
+            payload = json.loads(line)
+        except Exception:
+            continue
+        rows.append(payload)
+    return pd.DataFrame(rows)
+
+
 def _explode_used(df: pd.DataFrame) -> pd.DataFrame:
     used_col = _dg("used_tfbs_detail")
     lib_hash_col = _dg("sampling_library_hash")
@@ -106,7 +119,7 @@ def _explode_used(df: pd.DataFrame) -> pd.DataFrame:
         used_detail = _ensure_list_of_dicts(row.get(used_col))
         if not used_detail:
             continue
-        seq_id = _sequence_id(row, fallback=f"row-{idx}")
+        seq_id = _solution_id(row)
         for entry in used_detail:
             tf = str(entry.get("tf") or "").strip()
             tfbs = str(entry.get("tfbs") or "").strip()
@@ -114,7 +127,7 @@ def _explode_used(df: pd.DataFrame) -> pd.DataFrame:
                 continue
             records.append(
                 {
-                    "sequence_id": seq_id,
+                    "solution_id": seq_id,
                     "library_hash": str(row.get(lib_hash_col) or ""),
                     "library_index": int(row.get(lib_index_col) or 0),
                     "plan": str(row.get(plan_col) or ""),
@@ -277,7 +290,7 @@ def _compute_cooccurrence(used_df: pd.DataFrame) -> pd.DataFrame:
     if used_df.empty:
         return pd.DataFrame(columns=["library_hash", "plan", "tf_left", "tf_right", "count"])
     rows = []
-    grouped = used_df.groupby(["library_hash", "plan", "sequence_id"])
+    grouped = used_df.groupby(["library_hash", "plan", "solution_id"])
     for (library_hash, plan, seq_id), group in grouped:
         tfs = sorted({tf for tf in group["tf"].tolist() if tf})
         for i in range(len(tfs)):
@@ -286,7 +299,7 @@ def _compute_cooccurrence(used_df: pd.DataFrame) -> pd.DataFrame:
                     {
                         "library_hash": library_hash,
                         "plan": plan,
-                        "sequence_id": seq_id,
+                        "solution_id": seq_id,
                         "tf_left": tfs[i],
                         "tf_right": tfs[j],
                     }
@@ -307,7 +320,7 @@ def _compute_adjacency(used_df: pd.DataFrame) -> pd.DataFrame:
     if used_df.empty:
         return pd.DataFrame(columns=["library_hash", "plan", "tf_left", "tf_right", "count", "mean_distance"])
     rows = []
-    for (library_hash, plan, seq_id), group in used_df.groupby(["library_hash", "plan", "sequence_id"]):
+    for (library_hash, plan, seq_id), group in used_df.groupby(["library_hash", "plan", "solution_id"]):
         sub = group.dropna(subset=["offset"]).sort_values("offset")
         if sub.empty or len(sub) < 2:
             continue
@@ -322,7 +335,7 @@ def _compute_adjacency(used_df: pd.DataFrame) -> pd.DataFrame:
                 {
                     "library_hash": library_hash,
                     "plan": plan,
-                    "sequence_id": seq_id,
+                    "solution_id": seq_id,
                     "tf_left": left.tf,
                     "tf_right": right.tf,
                     "distance": dist,
@@ -364,6 +377,11 @@ def collect_report_data(
         _dg("sampling_library_index"),
         _dg("used_tfbs_detail"),
         _dg("required_regulators"),
+        _dg("covers_required_regulators"),
+        _dg("covers_all_tfs_in_solution"),
+        _dg("min_required_regulators"),
+        _dg("used_tf_list"),
+        _dg("min_count_per_tf"),
     ]
     df, source_label = load_records_from_config(root_cfg, cfg_path, columns=cols)
     if df.empty:
@@ -378,15 +396,39 @@ def collect_report_data(
         )
     attempts_df = pd.read_parquet(attempts_path)
     library_df = _explode_library_from_attempts(attempts_df)
-
+    solutions_path = outputs_root / "solutions.parquet"
+    if not solutions_path.exists():
+        raise ValueError(
+            "outputs/solutions.parquet is required for reports. "
+            "Re-run `dense run -c <config.yaml>` to regenerate solutions."
+        )
+    try:
+        solutions_df = pd.read_parquet(solutions_path)
+    except Exception as exc:
+        raise RuntimeError("Failed to load solutions.parquet for report tables.") from exc
     tables: Dict[str, pd.DataFrame] = {}
+    tables["solutions"] = solutions_df
 
     stage_a_bins = pd.DataFrame(columns=["input_name", "tf", "bin_id", "bin_low", "bin_high", "count", "total"])
+    stage_a_score_summary = pd.DataFrame(
+        columns=[
+            "input_name",
+            "tf",
+            "metric",
+            "count",
+            "min",
+            "p10",
+            "p50",
+            "p90",
+            "max",
+        ]
+    )
     pool_dir = outputs_root / "pools"
     if pool_dir.exists():
         try:
             pool_artifact = load_pool_artifact(pool_dir)
             rows: list[dict[str, Any]] = []
+            score_rows: list[dict[str, Any]] = []
             for entry in pool_artifact.inputs.values():
                 if entry.pool_mode != POOL_MODE_TFBS:
                     continue
@@ -394,34 +436,101 @@ def collect_report_data(
                 if not pool_path.exists():
                     continue
                 df_pool = pd.read_parquet(pool_path)
-                if "fimo_bin_id" not in df_pool.columns or "tf" not in df_pool.columns:
+                if "tf" not in df_pool.columns:
                     continue
-                total_counts = df_pool.groupby("tf").size().to_dict()
-                grouped = df_pool.groupby(["tf", "fimo_bin_id"])
-                for (tf, bin_id), group in grouped:
-                    bin_low = None
-                    bin_high = None
-                    if "fimo_bin_low" in group.columns and not group["fimo_bin_low"].empty:
-                        bin_low = float(group["fimo_bin_low"].iloc[0])
-                    if "fimo_bin_high" in group.columns and not group["fimo_bin_high"].empty:
-                        bin_high = float(group["fimo_bin_high"].iloc[0])
-                    rows.append(
-                        {
-                            "input_name": entry.name,
-                            "tf": tf,
-                            "bin_id": int(bin_id),
-                            "bin_low": bin_low,
-                            "bin_high": bin_high,
-                            "count": int(len(group)),
-                            "total": int(total_counts.get(tf, len(group))),
-                        }
-                    )
+                if "fimo_bin_id" in df_pool.columns:
+                    total_counts = df_pool.groupby("tf").size().to_dict()
+                    grouped = df_pool.groupby(["tf", "fimo_bin_id"])
+                    for (tf, bin_id), group in grouped:
+                        bin_low = None
+                        bin_high = None
+                        if "fimo_bin_low" in group.columns and not group["fimo_bin_low"].empty:
+                            bin_low = float(group["fimo_bin_low"].iloc[0])
+                        if "fimo_bin_high" in group.columns and not group["fimo_bin_high"].empty:
+                            bin_high = float(group["fimo_bin_high"].iloc[0])
+                        rows.append(
+                            {
+                                "input_name": entry.name,
+                                "tf": tf,
+                                "bin_id": int(bin_id),
+                                "bin_low": bin_low,
+                                "bin_high": bin_high,
+                                "count": int(len(group)),
+                                "total": int(total_counts.get(tf, len(group))),
+                            }
+                        )
+                for tf, sub in df_pool.groupby("tf"):
+                    if sub.empty:
+                        continue
+                    if "fimo_pvalue" in sub.columns:
+                        vals = pd.to_numeric(sub["fimo_pvalue"], errors="coerce").dropna()
+                        if not vals.empty:
+                            score_rows.append(
+                                {
+                                    "input_name": entry.name,
+                                    "tf": tf,
+                                    "metric": "fimo_pvalue",
+                                    "count": int(len(vals)),
+                                    "min": float(vals.min()),
+                                    "p10": float(vals.quantile(0.1)),
+                                    "p50": float(vals.quantile(0.5)),
+                                    "p90": float(vals.quantile(0.9)),
+                                    "max": float(vals.max()),
+                                }
+                            )
+                        if "fimo_score" in sub.columns:
+                            vals = pd.to_numeric(sub["fimo_score"], errors="coerce").dropna()
+                            if not vals.empty:
+                                score_rows.append(
+                                    {
+                                        "input_name": entry.name,
+                                        "tf": tf,
+                                        "metric": "fimo_score",
+                                        "count": int(len(vals)),
+                                        "min": float(vals.min()),
+                                        "p10": float(vals.quantile(0.1)),
+                                        "p50": float(vals.quantile(0.5)),
+                                        "p90": float(vals.quantile(0.9)),
+                                        "max": float(vals.max()),
+                                    }
+                                )
+                        if "score" in sub.columns:
+                            vals = pd.to_numeric(sub["score"], errors="coerce").dropna()
+                            if not vals.empty:
+                                score_rows.append(
+                                    {
+                                        "input_name": entry.name,
+                                        "tf": tf,
+                                        "metric": "densegen_score",
+                                        "count": int(len(vals)),
+                                        "min": float(vals.min()),
+                                        "p10": float(vals.quantile(0.1)),
+                                        "p50": float(vals.quantile(0.5)),
+                                        "p90": float(vals.quantile(0.9)),
+                                        "max": float(vals.max()),
+                                    }
+                                )
             if rows:
                 stage_a_bins = pd.DataFrame(rows)
+            if score_rows:
+                stage_a_score_summary = pd.DataFrame(score_rows)
         except Exception:
             log.warning("Failed to load Stage-A pool bins for report.", exc_info=True)
 
     tables["stage_a_bins"] = stage_a_bins
+    tables["stage_a_score_summary"] = stage_a_score_summary
+
+    candidates_summary = pd.DataFrame(
+        columns=["input_name", "motif_id", "scoring_backend", "total_candidates", "accepted", "selected", "rejected"]
+    )
+    candidates_dir = outputs_root / "candidates"
+    cand_summary_path = candidates_dir / "candidates_summary.parquet"
+    if cand_summary_path.exists():
+        try:
+            candidates_summary = pd.read_parquet(cand_summary_path)
+        except Exception:
+            log.warning("Failed to load candidates_summary.parquet for report.", exc_info=True)
+    tables["candidates_summary"] = candidates_summary
 
     library_summary = pd.DataFrame(
         columns=["library_hash", "library_index", "input_name", "plan_name", "size", "total_bp", "outputs"]
@@ -485,6 +594,40 @@ def collect_report_data(
             library_usage["outputs"] = 0
     tables["library_usage"] = library_usage
 
+    plan_summary = pd.DataFrame(
+        columns=[
+            "input_name",
+            "plan_name",
+            "outputs",
+            "unique_solutions",
+            "coverage_required_rate",
+            "coverage_all_tfs_rate",
+            "avg_used_tf_count",
+            "min_required_regulators",
+            "min_count_per_tf",
+        ]
+    )
+    if not df.empty:
+        df_plan = df.copy()
+        df_plan["_used_tf_count"] = df_plan[_dg("used_tf_list")].apply(lambda x: len(_ensure_list(x)))
+        df_plan["_covers_required"] = df_plan[_dg("covers_required_regulators")].fillna(False).astype(bool)
+        df_plan["_covers_all"] = df_plan[_dg("covers_all_tfs_in_solution")].fillna(False).astype(bool)
+        plan_summary = (
+            df_plan.groupby([_dg("input_name"), _dg("plan")])
+            .agg(
+                outputs=("sequence", "size"),
+                unique_solutions=("id", pd.Series.nunique),
+                coverage_required_rate=("_covers_required", "mean"),
+                coverage_all_tfs_rate=("_covers_all", "mean"),
+                avg_used_tf_count=("_used_tf_count", "mean"),
+                min_required_regulators=(_dg("min_required_regulators"), "max"),
+                min_count_per_tf=(_dg("min_count_per_tf"), "max"),
+            )
+            .reset_index()
+            .rename(columns={_dg("input_name"): "input_name", _dg("plan"): "plan_name"})
+        )
+    tables["plan_summary"] = plan_summary
+
     offered_tf = pd.DataFrame(columns=["library_hash", "tf", "offered_instances", "offered_unique_tfbs"])
     offered_tfbs = pd.DataFrame(columns=["library_hash", "tf", "tfbs", "offered_instances"])
     if not library_df.empty:
@@ -503,13 +646,13 @@ def collect_report_data(
             .agg(
                 used_placements=("tf", "size"),
                 used_unique_tfbs=("tfbs", pd.Series.nunique),
-                used_sequences=("sequence_id", pd.Series.nunique),
+                used_sequences=("solution_id", pd.Series.nunique),
             )
             .reset_index()
         )
         used_tfbs = (
             used_df.groupby(["library_hash", "plan", "tf", "tfbs"])
-            .agg(used_placements=("tfbs", "size"), used_sequences=("sequence_id", pd.Series.nunique))
+            .agg(used_placements=("tfbs", "size"), used_sequences=("solution_id", pd.Series.nunique))
             .reset_index()
         )
     else:
@@ -517,7 +660,7 @@ def collect_report_data(
         used_tfbs = pd.DataFrame(columns=["library_hash", "plan", "tf", "tfbs", "used_placements", "used_sequences"])
 
     total_sequences = (
-        used_df.groupby("library_hash")["sequence_id"].nunique().reset_index(name="total_sequences")
+        used_df.groupby("library_hash")["solution_id"].nunique().reset_index(name="total_sequences")
         if not used_df.empty
         else pd.DataFrame(columns=["library_hash", "total_sequences"])
     )
@@ -557,6 +700,86 @@ def collect_report_data(
     tables["offered_vs_used_tfbs"] = offered_vs_used_tfbs
     tables["attempts"] = attempts_df
 
+    events_path = outputs_root / "meta" / "events.jsonl"
+    events_df = _load_events(events_path)
+    tables["events"] = events_df
+
+    resample_diffs = pd.DataFrame(
+        columns=[
+            "input_name",
+            "plan_name",
+            "prev_library_hash",
+            "library_hash",
+            "tf_added",
+            "tf_removed",
+            "tfbs_added",
+            "tfbs_removed",
+            "reason",
+        ]
+    )
+    library_members_path = outputs_root / "libraries" / "library_members.parquet"
+    if library_members_path.exists():
+        try:
+            members_df = pd.read_parquet(library_members_path)
+            grouped = members_df.groupby(["input_name", "plan_name", "library_index", "library_hash"])
+            library_sets = []
+            for (input_name, plan_name, library_index, library_hash), sub in grouped:
+                tf_set = set(str(x) for x in sub.get("tf", []))
+                tfbs_set = set(str(x) for x in sub.get("tfbs", []))
+                library_sets.append(
+                    {
+                        "input_name": str(input_name),
+                        "plan_name": str(plan_name),
+                        "library_index": int(library_index),
+                        "library_hash": str(library_hash),
+                        "tf_set": tf_set,
+                        "tfbs_set": tfbs_set,
+                    }
+                )
+            diff_rows: list[dict[str, Any]] = []
+            if library_sets:
+                for _, sub in pd.DataFrame(library_sets).groupby(["input_name", "plan_name"]):
+                    sub = sub.sort_values("library_index")
+                    prev = None
+                    for _, row in sub.iterrows():
+                        if prev is not None:
+                            tf_added = len(row["tf_set"] - prev["tf_set"])
+                            tf_removed = len(prev["tf_set"] - row["tf_set"])
+                            tfbs_added = len(row["tfbs_set"] - prev["tfbs_set"])
+                            tfbs_removed = len(prev["tfbs_set"] - row["tfbs_set"])
+                            diff_rows.append(
+                                {
+                                    "input_name": row["input_name"],
+                                    "plan_name": row["plan_name"],
+                                    "prev_library_hash": prev["library_hash"],
+                                    "library_hash": row["library_hash"],
+                                    "tf_added": tf_added,
+                                    "tf_removed": tf_removed,
+                                    "tfbs_added": tfbs_added,
+                                    "tfbs_removed": tfbs_removed,
+                                    "reason": None,
+                                }
+                            )
+                        prev = row
+            if diff_rows:
+                resample_diffs = pd.DataFrame(diff_rows)
+                if not events_df.empty and "event" in events_df.columns:
+                    resample_events = events_df[events_df["event"] == "RESAMPLE_TRIGGERED"]
+                    required_cols = {"input_name", "plan_name", "library_hash", "reason"}
+                    if not resample_events.empty and required_cols.issubset(resample_events.columns):
+                        resample_diffs = resample_diffs.merge(
+                            resample_events[["input_name", "plan_name", "library_hash", "reason"]],
+                            left_on=["input_name", "plan_name", "prev_library_hash"],
+                            right_on=["input_name", "plan_name", "library_hash"],
+                            how="left",
+                        )
+                        resample_diffs = resample_diffs.drop(columns=["library_hash_y"]).rename(
+                            columns={"library_hash_x": "library_hash"}
+                        )
+        except Exception:
+            log.warning("Failed to load library resample diffs for report.", exc_info=True)
+    tables["resample_diffs"] = resample_diffs
+
     if include_combinatorics:
         tables["tf_cooccurrence"] = _compute_cooccurrence(used_df)
         tables["tf_adjacency"] = _compute_adjacency(used_df)
@@ -627,6 +850,12 @@ def collect_report_data(
         "attempts_success": attempts_success,
         "attempts_failed": attempts_failed,
         "attempts_path": str(attempts_path) if attempts_path.exists() else None,
+        "solutions_path": str(solutions_path) if solutions_path.exists() else None,
+        "events_path": str(events_path) if events_path.exists() else None,
+        "candidates_path": str(candidates_dir / "candidates.parquet")
+        if (candidates_dir / "candidates.parquet").exists()
+        else None,
+        "candidates_summary_path": str(cand_summary_path) if cand_summary_path.exists() else None,
         "outputs_path": str(outputs_root / "dense_arrays.parquet"),
         "effective_config_path": str(outputs_root / "meta" / "effective_config.json")
         if (outputs_root / "meta" / "effective_config.json").exists()
@@ -662,6 +891,22 @@ def _safe_filename(text: str) -> str:
     return "".join(ch if ch.isalnum() or ch in {"-", "_", "."} else "_" for ch in text) or "densegen"
 
 
+def _markdown_table(df: pd.DataFrame, *, columns: list[str] | None = None, max_rows: int = 10) -> str:
+    if df is None or df.empty:
+        return ""
+    cols = columns or list(df.columns)
+    cols = [c for c in cols if c in df.columns]
+    if not cols:
+        return ""
+    sub = df[cols].head(max_rows)
+    header = "| " + " | ".join(cols) + " |"
+    sep = "| " + " | ".join(["---"] * len(cols)) + " |"
+    lines = [header, sep]
+    for _, row in sub.iterrows():
+        lines.append("| " + " | ".join(str(row.get(c, "")) for c in cols) + " |")
+    return "\n".join(lines)
+
+
 def _generate_report_plots(bundle: ReportBundle, *, cfg_path: Path, out_dir: Path) -> dict[str, list[str]]:
     if not _plot_available():
         log.info("matplotlib not available; skipping report plots.")
@@ -805,11 +1050,15 @@ def _render_report_md(bundle: ReportBundle) -> str:
         "## Outputs",
         "- outputs/dense_arrays.parquet",
         "- outputs/attempts.parquet",
+        "- outputs/solutions.parquet",
         "- outputs/composition.parquet",
         "- outputs/libraries/library_builds.parquet",
         "- outputs/libraries/library_members.parquet",
         "- outputs/pools/pool_manifest.json",
         "- outputs/meta/effective_config.json",
+        "- outputs/meta/events.jsonl",
+        "- outputs/candidates/candidates.parquet (when candidate logging is enabled)",
+        "- outputs/candidates/candidates_summary.parquet (when candidate logging is enabled)",
     ]
     stage_a_bins = bundle.tables.get("stage_a_bins")
     if stage_a_bins is not None and not stage_a_bins.empty:
@@ -828,6 +1077,59 @@ def _render_report_md(bundle: ReportBundle) -> str:
                     label = f"bin{bin_id}"
                 parts.append(f"{label}:{count}")
             lines.append(f"- {input_name}/{tf}: " + " ".join(parts))
+    stage_a_score_summary = bundle.tables.get("stage_a_score_summary")
+    if stage_a_score_summary is not None and not stage_a_score_summary.empty:
+        lines.extend(["", "## Stage-A score/p-value summary (per TF)"])
+        lines.append(
+            _markdown_table(
+                stage_a_score_summary,
+                columns=["input_name", "tf", "metric", "count", "min", "p10", "p50", "p90", "max"],
+                max_rows=20,
+            )
+        )
+    candidates_summary = bundle.tables.get("candidates_summary")
+    if candidates_summary is not None and not candidates_summary.empty:
+        lines.extend(["", "## Candidate mining summary"])
+        lines.append(
+            _markdown_table(
+                candidates_summary,
+                columns=[
+                    "input_name",
+                    "motif_id",
+                    "scoring_backend",
+                    "total_candidates",
+                    "accepted",
+                    "selected",
+                    "rejected",
+                ],
+                max_rows=20,
+            )
+        )
+    plan_summary = bundle.tables.get("plan_summary")
+    if plan_summary is not None and not plan_summary.empty:
+        summary = plan_summary.copy()
+        for col in ("coverage_required_rate", "coverage_all_tfs_rate"):
+            if col in summary.columns:
+                summary[col] = summary[col].apply(
+                    lambda v: f"{float(v):.1%}" if v is not None and not pd.isna(v) else "-"
+                )
+        lines.extend(["", "## Plan coverage summary"])
+        lines.append(
+            _markdown_table(
+                summary,
+                columns=[
+                    "input_name",
+                    "plan_name",
+                    "outputs",
+                    "unique_solutions",
+                    "coverage_required_rate",
+                    "coverage_all_tfs_rate",
+                    "avg_used_tf_count",
+                    "min_required_regulators",
+                ],
+                max_rows=20,
+            )
+        )
     library_usage = bundle.tables.get("library_usage")
     if library_usage is not None and not library_usage.empty:
         lines.extend(["", "## Library usage (top 5)"])
@@ -838,6 +1140,87 @@ def _render_report_md(bundle: ReportBundle) -> str:
             outputs = int(row.get("outputs") or 0)
             plan_name = str(row.get("plan_name") or "")
             lines.append(f"- {plan_name}/{lib_hash}: attempts={attempts} outputs={outputs}")
+    resample_diffs = bundle.tables.get("resample_diffs")
+    if resample_diffs is not None and not resample_diffs.empty:
+        diffs = resample_diffs.copy()
+        diffs["prev_library_hash"] = diffs["prev_library_hash"].apply(lambda v: str(v)[:8])
+        diffs["library_hash"] = diffs["library_hash"].apply(lambda v: str(v)[:8])
+        lines.extend(["", "## Resample diffs (library deltas)"])
+        lines.append(
+            _markdown_table(
+                diffs,
+                columns=[
+                    "input_name",
+                    "plan_name",
+                    "prev_library_hash",
+                    "library_hash",
+                    "tf_added",
+                    "tf_removed",
+                    "tfbs_added",
+                    "tfbs_removed",
+                    "reason",
+                ],
+                max_rows=10,
+            )
+        )
+    events = bundle.tables.get("events")
+    if events is not None and not events.empty and "event" in events.columns:
+        event_summary = (
+            events.groupby("event")
+            .agg(count=("event", "size"), last_created_at=("created_at", "max"))
+            .reset_index()
+            .sort_values("count", ascending=False)
+        )
+        lines.extend(["", "## Events summary"])
+        lines.append(_markdown_table(event_summary, columns=["event", "count", "last_created_at"], max_rows=10))
+    solutions = bundle.tables.get("solutions")
+    if solutions is not None and not solutions.empty:
+        preview = solutions.copy()
+        if "sequence" in preview.columns:
+            preview["sequence_len"] = preview["sequence"].apply(lambda s: len(s) if isinstance(s, str) else 0)
+            preview["sequence_preview"] = preview["sequence"].apply(
+                lambda s: (s[:24] + "…") if isinstance(s, str) and len(s) > 25 else s
+            )
+        lines.extend(["", "## Solutions (sample)"])
+        lines.append(
+            _markdown_table(
+                preview,
+                columns=[
+                    "solution_id",
+                    "attempt_id",
+                    "input_name",
+                    "plan_name",
+                    "sampling_library_hash",
+                    "sequence_len",
+                    "sequence_preview",
+                ],
+                max_rows=10,
+            )
+        )
+    composition = bundle.tables.get("composition")
+    if composition is not None and not composition.empty:
+        comp = composition.copy()
+        if "library_hash" in comp.columns:
+            comp["library_hash"] = comp["library_hash"].apply(lambda v: str(v)[:8])
+        lines.extend(["", "## Composition (sample)"])
+        lines.append(
+            _markdown_table(
+                comp,
+                columns=[
+                    "solution_id",
+                    "attempt_id",
+                    "placement_index",
+                    "tf",
+                    "tfbs",
+                    "motif_id",
+                    "tfbs_id",
+                    "orientation",
+                    "offset",
+                    "library_hash",
+                ],
+                max_rows=12,
+            )
+        )
     leaderboard = report.get("leaderboard_latest") or {}
     leader_tf = leaderboard.get("tf") or []
     leader_tfbs = leaderboard.get("tfbs") or []
diff --git a/src/dnadesign/densegen/src/integrations/meme_suite.py b/src/dnadesign/densegen/src/integrations/meme_suite.py
index bbc1579f..9ea33f6f 100644
--- a/src/dnadesign/densegen/src/integrations/meme_suite.py
+++ b/src/dnadesign/densegen/src/integrations/meme_suite.py
@@ -17,6 +17,31 @@
 from pathlib import Path
 
 
+def _find_pixi_root() -> Path | None:
+    env_root = os.getenv("PIXI_PROJECT_ROOT")
+    if env_root:
+        return Path(env_root).expanduser()
+    cwd = Path.cwd()
+    for base in [cwd, *cwd.parents]:
+        if (base / "pixi.toml").exists() or (base / ".pixi").exists():
+            return base
+    return None
+
+
+def _find_pixi_tool(tool: str) -> Path | None:
+    root = _find_pixi_root()
+    if root is None:
+        return None
+    envs_dir = root / ".pixi" / "envs"
+    if not envs_dir.exists():
+        return None
+    for env_dir in sorted(envs_dir.iterdir()):
+        candidate = env_dir / "bin" / tool
+        if candidate.exists():
+            return candidate
+    return None
+
+
 def resolve_executable(tool: str, *, tool_path: Path | None = None) -> Path | None:
     if tool_path is not None:
         resolved = tool_path.expanduser()
@@ -37,6 +62,9 @@ def resolve_executable(tool: str, *, tool_path: Path | None = None) -> Path | No
         candidate = Path(env_dir).expanduser() / tool
         if candidate.exists():
             return candidate
+    pixi_candidate = _find_pixi_tool(tool)
+    if pixi_candidate is not None:
+        return pixi_candidate
     found = shutil.which(tool)
     return Path(found) if found else None
 
diff --git a/src/dnadesign/densegen/tests/test_cli_summarize_library.py b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
index 1e37bdf9..a8ef62c5 100644
--- a/src/dnadesign/densegen/tests/test_cli_summarize_library.py
+++ b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
@@ -176,6 +176,7 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
         [
             {
                 "attempt_id": "a1",
+                "attempt_index": 1,
                 "run_id": "demo",
                 "input_name": "demo_input",
                 "plan_name": "demo_plan",
@@ -185,7 +186,7 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
                 "detail_json": "{}",
                 "sequence": "ATGCATGCAT",
                 "sequence_hash": "hash",
-                "output_id": "out1",
+                "solution_id": "out1",
                 "used_tf_counts_json": "{}",
                 "used_tf_list": ["lexA", "cpxR"],
                 "sampling_library_index": 1,
@@ -204,6 +205,24 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
     )
     attempts_df.to_parquet(outputs_dir / "attempts.parquet", index=False)
 
+    solutions_df = pd.DataFrame(
+        [
+            {
+                "solution_id": "out1",
+                "attempt_id": "a1",
+                "run_id": "demo",
+                "input_name": "demo_input",
+                "plan_name": "demo_plan",
+                "created_at": "2026-01-14T00:00:01+00:00",
+                "sequence": "ATGCATGCAT",
+                "sequence_hash": "hash",
+                "sampling_library_index": 1,
+                "sampling_library_hash": "abc123",
+            }
+        ]
+    )
+    solutions_df.to_parquet(outputs_dir / "solutions.parquet", index=False)
+
     # run manifest
     manifest = RunManifest(
         run_id="demo",
diff --git a/src/dnadesign/densegen/tests/test_pwm_artifact_source.py b/src/dnadesign/densegen/tests/test_pwm_artifact_source.py
index 45ed337d..2d029d81 100644
--- a/src/dnadesign/densegen/tests/test_pwm_artifact_source.py
+++ b/src/dnadesign/densegen/tests/test_pwm_artifact_source.py
@@ -37,6 +37,7 @@ def test_pwm_artifact_sampling_exact(tmp_path: Path) -> None:
     ds = PWMArtifactDataSource(
         path=str(artifact_path),
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         sampling={
             "strategy": "stochastic",
             "n_sites": 5,
@@ -59,6 +60,7 @@ def test_pwm_artifact_sampling_range(tmp_path: Path) -> None:
     ds = PWMArtifactDataSource(
         path=str(artifact_path),
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         sampling={
             "strategy": "stochastic",
             "n_sites": 6,
@@ -100,6 +102,7 @@ def test_pwm_artifact_rejects_nonfinite_log_odds(tmp_path: Path) -> None:
     ds = PWMArtifactDataSource(
         path=str(artifact_path),
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         sampling={
             "strategy": "stochastic",
             "n_sites": 2,
@@ -119,6 +122,7 @@ def test_pwm_sampling_error_includes_motif_id(tmp_path: Path) -> None:
     ds = PWMArtifactDataSource(
         path=str(artifact_path),
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         sampling={
             "strategy": "stochastic",
             "n_sites": 2,
diff --git a/src/dnadesign/densegen/tests/test_pwm_meme_set_source.py b/src/dnadesign/densegen/tests/test_pwm_meme_set_source.py
index 466af887..31df16fa 100644
--- a/src/dnadesign/densegen/tests/test_pwm_meme_set_source.py
+++ b/src/dnadesign/densegen/tests/test_pwm_meme_set_source.py
@@ -33,6 +33,7 @@ def test_pwm_meme_set_sampling(tmp_path: Path) -> None:
     ds = PWMMemeSetDataSource(
         paths=[str(meme_a), str(meme_b)],
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         motif_ids=["lexA", "cpxR"],
         sampling={
             "strategy": "stochastic",
@@ -56,6 +57,7 @@ def test_pwm_meme_set_duplicate_motif_ids(tmp_path: Path) -> None:
     ds = PWMMemeSetDataSource(
         paths=[str(meme_a), str(meme_b)],
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         motif_ids=None,
         sampling={
             "strategy": "stochastic",
diff --git a/src/dnadesign/densegen/tests/test_pwm_meme_source.py b/src/dnadesign/densegen/tests/test_pwm_meme_source.py
index 48ec8979..7333d2d1 100644
--- a/src/dnadesign/densegen/tests/test_pwm_meme_source.py
+++ b/src/dnadesign/densegen/tests/test_pwm_meme_source.py
@@ -36,6 +36,7 @@ def test_pwm_meme_sampling_stochastic(tmp_path: Path) -> None:
     ds = PWMMemeDataSource(
         path=str(meme_path),
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         motif_ids=["M1"],
         sampling={
             "strategy": "stochastic",
@@ -57,6 +58,7 @@ def test_pwm_meme_consensus_requires_one_site(tmp_path: Path) -> None:
     ds = PWMMemeDataSource(
         path=str(meme_path),
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         motif_ids=["M2"],
         sampling={
             "strategy": "consensus",
@@ -101,6 +103,7 @@ def test_pwm_sampling_error_context(tmp_path: Path) -> None:
     ds = PWMMemeDataSource(
         path=str(meme_path),
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         motif_ids=["M1"],
         sampling={
             "strategy": "stochastic",
diff --git a/src/dnadesign/densegen/tests/test_pwm_other_sources.py b/src/dnadesign/densegen/tests/test_pwm_other_sources.py
index c1f60fba..3b5ec498 100644
--- a/src/dnadesign/densegen/tests/test_pwm_other_sources.py
+++ b/src/dnadesign/densegen/tests/test_pwm_other_sources.py
@@ -20,6 +20,7 @@ def test_pwm_jaspar_sampling(tmp_path: Path) -> None:
     ds = PWMJasparDataSource(
         path=str(jaspar_path),
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         motif_ids=["M1"],
         sampling={
             "strategy": "stochastic",
@@ -40,6 +41,7 @@ def test_pwm_matrix_csv_sampling(tmp_path: Path) -> None:
     ds = PWMMatrixCSVDataSource(
         path=str(csv_path),
         cfg_path=tmp_path / "config.yaml",
+        input_name="demo_input",
         motif_id="M1",
         columns={"A": "A", "C": "C", "G": "G", "T": "T"},
         sampling={
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index 281471b7..f57fc76b 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -27,8 +27,7 @@ densegen:
         selection_policy: stratified
         mining:
           batch_size: 5000
-          max_candidates: 20000
-          max_batches: 4
+          max_seconds: 60
           retain_bin_ids: [0, 1, 2, 3]
           log_every_batches: 1
         length_policy: range

From 1dfa1619ff33a67a1462103042d368a6b42bec4d Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Wed, 21 Jan 2026 12:43:21 -0500
Subject: [PATCH 14/40] Fix candidate logging scoping and report fallbacks

---
 src/dnadesign/densegen/docs/guide/inputs.md   |  6 +-
 .../densegen/docs/guide/outputs-metadata.md   |  8 ++-
 .../densegen/docs/guide/workspace.md          |  3 +
 src/dnadesign/densegen/docs/reference/cli.md  |  3 +-
 .../densegen/docs/reference/config.md         |  4 +-
 .../densegen/docs/reference/outputs.md        |  5 +-
 .../src/adapters/sources/pwm_artifact.py      |  3 +-
 .../src/adapters/sources/pwm_artifact_set.py  |  3 +-
 .../src/adapters/sources/pwm_jaspar.py        |  3 +-
 .../src/adapters/sources/pwm_matrix_csv.py    |  3 +-
 .../densegen/src/adapters/sources/pwm_meme.py |  3 +-
 .../src/adapters/sources/pwm_meme_set.py      |  3 +-
 .../src/adapters/sources/pwm_sampling.py      |  1 +
 src/dnadesign/densegen/src/cli.py             | 56 +++++++++++++-----
 .../densegen/src/core/artifacts/candidates.py | 19 +++++-
 .../densegen/src/core/artifacts/ids.py        |  2 +
 src/dnadesign/densegen/src/core/pipeline.py   | 58 ++++++++++++++-----
 src/dnadesign/densegen/src/core/reporting.py  | 53 +++++++++++------
 src/dnadesign/densegen/src/core/run_paths.py  |  6 ++
 src/dnadesign/densegen/workspaces/README.md   |  2 +
 20 files changed, 181 insertions(+), 63 deletions(-)

diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index d7301f1d..a1ab4449 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -112,7 +112,8 @@ Required sampling fields:
   - `log_every_batches` (int > 0): log yield summaries every N batches
 - `bgfile` (optional): MEME bfile-format background model for FIMO
 - `keep_all_candidates_debug` (optional): write raw FIMO TSVs and candidate-level Parquet
-  (`candidates__<label>.parquet`) under `outputs/candidates/<input_name>/` for inspection
+  (`candidates__<label>.parquet`) under `outputs/candidates/current/<input_name>/` for inspection
+  (overwritten each run)
 - `include_matched_sequence` (optional): include `fimo_matched_sequence` column in the TFBS table
 
 Notes:
@@ -215,7 +216,8 @@ bins if `retain_bin_ids` is unset), so you can track how many candidates land in
 adjust thresholds or oversampling accordingly. With `selection_policy: stratified`, the selected‑bin
 counts show how evenly the final pool spans strata.
 If candidate logging is enabled, DenseGen also writes aggregated mining summaries to
-`outputs/candidates/candidates_summary.parquet`.
+`outputs/candidates/current/candidates_summary.parquet` (overwritten each run).
+Copy `outputs/candidates/current/` elsewhere if you want to keep per-run mining logs.
 
 #### Stdout UX for long runs
 DenseGen supports three logging styles so long runs stay readable:
diff --git a/src/dnadesign/densegen/docs/guide/outputs-metadata.md b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
index acf7297e..4dcd2b4a 100644
--- a/src/dnadesign/densegen/docs/guide/outputs-metadata.md
+++ b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
@@ -74,11 +74,13 @@ TFBS pools include stable `motif_id` and `tfbs_id` hashes plus optional FIMO met
 (`fimo_pvalue`, `fimo_bin_id`, etc.). Sequence pools include `tfbs_id` for joinability.
 
 If `keep_all_candidates_debug: true`, DenseGen writes per-candidate debug artifacts under
-`outputs/candidates/<input_name>/`:
+`outputs/candidates/current/<input_name>/` (overwritten each run):
 - `candidates__<label>.parquet` — candidate p‑values, bins, acceptance, and reject reasons.
 - `<label>__fimo.tsv` — raw FIMO TSV (when enabled).
-DenseGen also aggregates these into `outputs/candidates/candidates.parquet` and
-`outputs/candidates/candidates_summary.parquet` with a manifest (`candidates_manifest.json`).
+DenseGen also aggregates these into `outputs/candidates/current/candidates.parquet` and
+`outputs/candidates/current/candidates_summary.parquet` with a manifest (`candidates_manifest.json`).
+These are overwritten each run; copy the `outputs/candidates/current` directory if you want
+to keep prior mining logs.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/guide/workspace.md b/src/dnadesign/densegen/docs/guide/workspace.md
index cabad8c2..177c4da0 100644
--- a/src/dnadesign/densegen/docs/guide/workspace.md
+++ b/src/dnadesign/densegen/docs/guide/workspace.md
@@ -36,6 +36,9 @@ densegen/
 - **Predictable logs**: default logs land in `outputs/logs/<run_id>.log` within the workspace.
 - **Resume‑safe**: if `outputs/dense_arrays.parquet` already exists, DenseGen resumes from existing sequences
   (only when the config hash and run_id match), so interrupted runs can continue without manual cleanup.
+- **Candidate mining artifacts**: `outputs/candidates/current` is overwritten each run to avoid mixing
+  mining outputs across sessions; copy it elsewhere if you want to keep prior candidates.
+  To start a *fresh* solver run, delete `outputs/` or choose a new `densegen.run.id`.
 
 ## Config snippet (run-scoped paths)
 
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index 65e07924..0488b766 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -109,7 +109,8 @@ Options:
 Outputs:
 - `pool_manifest.json`
 - `<input>__pool.parquet` per input
-- `outputs/candidates/candidates.parquet` + `candidates_summary.parquet` (when candidate logging is enabled)
+- `outputs/candidates/current/candidates.parquet` + `candidates_summary.parquet` (when candidate logging is enabled)
+  (candidate artifacts are overwritten each run)
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 8bd6cafe..647c0bdd 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -77,8 +77,8 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
         retained bins are the only bins reported in yield summaries
       - `log_every_batches` (int > 0; default 1) - log per‑bin yield summaries every N batches
     - `bgfile` (optional path) - MEME bfile-format background model for FIMO
-    - `keep_all_candidates_debug` (bool, default false) - write raw FIMO TSVs to `outputs/candidates/<input_name>/`
-      for inspection
+    - `keep_all_candidates_debug` (bool, default false) - write raw FIMO TSVs to
+      `outputs/candidates/current/<input_name>/` for inspection (overwritten each run)
     - `include_matched_sequence` (bool, default false) - include `fimo_matched_sequence` in TFBS outputs
     - `length_policy`: `exact | range` (default: `exact`)
     - `length_range`: `[min, max]` (required when `length_policy=range`; `min` >= motif length)
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index 700738f6..c71d7b32 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -139,8 +139,9 @@ DenseGen can materialize Stage‑A/Stage‑B artifacts without running the solve
 - `dense stage-a build-pool` writes:
   - `outputs/pools/pool_manifest.json`
   - `outputs/pools/<input>__pool.parquet`
-  - `outputs/candidates/<input_name>/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
-  - `outputs/candidates/candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json`
+  - `outputs/candidates/current/<input_name>/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
+  - `outputs/candidates/current/candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json`
+    (candidate artifacts are overwritten each run)
 - `dense stage-b build-libraries` writes:
   - `outputs/libraries/library_builds.parquet`
   - `outputs/libraries/library_members.parquet`
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
index cf3c2ab2..2f8d1608 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
@@ -19,6 +19,7 @@
 from typing import Any, List
 
 from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
+from ...core.run_paths import candidates_root
 from .base import BaseDataSource, resolve_path
 from .pwm_sampling import PWMMotif, normalize_background, sample_pwm_sites
 
@@ -196,7 +197,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
+            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
 
         return_meta = scoring_backend == "fimo"
         result = sample_pwm_sites(
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
index 7a613006..41ef6ad0 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
@@ -17,6 +17,7 @@
 from typing import List
 
 from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
+from ...core.run_paths import candidates_root
 from .base import BaseDataSource, resolve_path
 from .pwm_artifact import load_artifact
 from .pwm_sampling import sample_pwm_sites
@@ -92,7 +93,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                     raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
             debug_output_dir: Path | None = None
             if keep_all_candidates_debug and outputs_root is not None:
-                debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
+                debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
             return_meta = scoring_backend == "fimo"
             result = sample_pwm_sites(
                 rng,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
index 2361d75b..6b94913b 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
@@ -18,6 +18,7 @@
 from typing import List, Optional
 
 from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
+from ...core.run_paths import candidates_root
 from .base import BaseDataSource, resolve_path
 from .pwm_sampling import PWMMotif, normalize_background, sample_pwm_sites
 
@@ -130,7 +131,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
+            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
 
         entries = []
         all_rows = []
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
index a3502bab..e32007c7 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
@@ -18,6 +18,7 @@
 import pandas as pd
 
 from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
+from ...core.run_paths import candidates_root
 from .base import BaseDataSource, resolve_path
 from .pwm_sampling import PWMMotif, normalize_background, sample_pwm_sites
 
@@ -100,7 +101,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
+            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
 
         return_meta = scoring_backend == "fimo"
         result = sample_pwm_sites(
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
index 3cc81af6..e03db9cc 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
@@ -19,6 +19,7 @@
 from dnadesign.cruncher.io.parsers.meme import MemeMotif, parse_meme_file
 
 from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
+from ...core.run_paths import candidates_root
 from .base import BaseDataSource, resolve_path
 from .pwm_sampling import PWMMotif, normalize_background, sample_pwm_sites
 
@@ -108,7 +109,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
+            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
 
         entries = []
         all_rows = []
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
index 1dc8c8df..dc126567 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
@@ -19,6 +19,7 @@
 from dnadesign.cruncher.io.parsers.meme import MemeMotif, parse_meme_file
 
 from ...core.artifacts.ids import hash_pwm_motif, hash_tfbs_id
+from ...core.run_paths import candidates_root
 from .base import BaseDataSource, resolve_path
 from .pwm_meme import _background_from_meta, _motif_to_pwm
 from .pwm_sampling import sample_pwm_sites
@@ -102,7 +103,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = Path(outputs_root) / "candidates" / self.input_name
+            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
 
         entries = []
         all_rows = []
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 2ffed46e..6bc7f79c 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -752,6 +752,7 @@ def _record_candidate(
             if candidate_records is None:
                 return
             candidate_id = hash_candidate_id(
+                input_name=input_name,
                 motif_id=motif.motif_id,
                 sequence=seq,
                 scoring_backend=scoring_backend,
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index e6f30a19..a841ef0a 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -60,7 +60,7 @@
     resolve_run_root,
     resolve_run_scoped_path,
 )
-from .core.artifacts.candidates import build_candidate_artifact
+from .core.artifacts.candidates import build_candidate_artifact, find_candidate_files, prepare_candidates_dir
 from .core.artifacts.library import write_library_artifact
 from .core.artifacts.pool import (
     POOL_MODE_SEQUENCE,
@@ -79,7 +79,7 @@
 )
 from .core.reporting import collect_report_data, write_report
 from .core.run_manifest import load_run_manifest
-from .core.run_paths import run_manifest_path, run_state_path
+from .core.run_paths import candidates_root, run_manifest_path, run_state_path
 from .core.run_state import load_run_state
 from .core.seeding import derive_seed_map
 from .integrations.meme_suite import require_executable
@@ -149,6 +149,18 @@ def _input_uses_fimo(input_cfg) -> bool:
     return False
 
 
+def _candidate_logging_enabled(loaded, *, selected: set[str] | None = None) -> bool:
+    for inp in loaded.inputs:
+        if selected is not None and inp.name not in selected:
+            continue
+        sampling = getattr(inp, "sampling", None)
+        if sampling is None:
+            continue
+        if getattr(sampling, "keep_all_candidates_debug", False):
+            return True
+    return False
+
+
 def _ensure_fimo_available(cfg, *, strict: bool = True) -> None:
     if not any(_input_uses_fimo(inp) for inp in cfg.inputs):
         return
@@ -1384,6 +1396,16 @@ def stage_a_build_pool(
     deps = default_deps()
     outputs_root = run_root / "outputs"
     outputs_root.mkdir(parents=True, exist_ok=True)
+    candidate_logging = _candidate_logging_enabled(loaded, selected=set(selected) if selected else None)
+    candidates_dir = candidates_root(outputs_root)
+    if candidate_logging:
+        try:
+            existed = prepare_candidates_dir(candidates_dir, overwrite=overwrite)
+        except FileExistsError as exc:
+            console.print(f"[bold red]{exc}[/]")
+            raise typer.Exit(code=1)
+        if existed:
+            console.print(f"[yellow]Cleared prior candidate artifacts at {candidates_dir} to avoid mixing runs.[/]")
 
     with _suppress_pyarrow_sysctl_warnings():
         try:
@@ -1400,20 +1422,24 @@ def stage_a_build_pool(
         except FileExistsError as exc:
             console.print(f"[bold red]{exc}[/]")
             raise typer.Exit(code=1)
-        candidates_dir = outputs_root / "candidates"
-        candidate_files = list(candidates_dir.rglob("candidates__*.parquet")) if candidates_dir.exists() else []
-        if candidate_files:
-            try:
-                build_candidate_artifact(
-                    candidates_dir=candidates_dir,
-                    cfg_path=cfg_path,
-                    run_id=str(cfg.run.id),
-                    run_root=run_root,
-                    overwrite=True,
+        if candidate_logging:
+            candidate_files = find_candidate_files(candidates_dir)
+            if candidate_files:
+                try:
+                    build_candidate_artifact(
+                        candidates_dir=candidates_dir,
+                        cfg_path=cfg_path,
+                        run_id=str(cfg.run.id),
+                        run_root=run_root,
+                        overwrite=True,
+                    )
+                except Exception as exc:
+                    console.print(f"[bold red]Failed to write candidate artifacts:[/] {exc}")
+                    raise typer.Exit(code=1)
+            else:
+                console.print(
+                    f"[yellow]Candidate logging enabled but no candidate records found under {candidates_dir}.[/]"
                 )
-            except Exception as exc:
-                console.print(f"[bold red]Failed to write candidate artifacts:[/] {exc}")
-                raise typer.Exit(code=1)
 
     for pool in pool_data.values():
         if pool.df is None:
diff --git a/src/dnadesign/densegen/src/core/artifacts/candidates.py b/src/dnadesign/densegen/src/core/artifacts/candidates.py
index 1bb51f85..d342c80c 100644
--- a/src/dnadesign/densegen/src/core/artifacts/candidates.py
+++ b/src/dnadesign/densegen/src/core/artifacts/candidates.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import json
+import shutil
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
@@ -39,6 +40,22 @@ def _candidates_path(out_dir: Path) -> Path:
     return out_dir / "candidates.parquet"
 
 
+def find_candidate_files(candidates_dir: Path) -> list[Path]:
+    if not candidates_dir.exists():
+        return []
+    return sorted(candidates_dir.rglob("candidates__*.parquet"))
+
+
+def prepare_candidates_dir(candidates_dir: Path, *, overwrite: bool = True) -> bool:
+    existed = candidates_dir.exists()
+    if existed:
+        if not overwrite:
+            raise FileExistsError(f"Candidate artifacts already exist in {candidates_dir}")
+        shutil.rmtree(candidates_dir)
+    candidates_dir.mkdir(parents=True, exist_ok=True)
+    return existed
+
+
 def build_candidate_artifact(
     *,
     candidates_dir: Path,
@@ -57,7 +74,7 @@ def build_candidate_artifact(
         if candidates_path.exists() or summary_path.exists() or manifest_path.exists():
             raise FileExistsError(f"Candidate artifacts already exist in {candidates_dir}")
 
-    files = sorted(candidates_dir.rglob("candidates__*.parquet"))
+    files = find_candidate_files(candidates_dir)
     if not files:
         raise FileNotFoundError(f"No candidate parquet files found under {candidates_dir}")
 
diff --git a/src/dnadesign/densegen/src/core/artifacts/ids.py b/src/dnadesign/densegen/src/core/artifacts/ids.py
index ff55446e..8f4a408e 100644
--- a/src/dnadesign/densegen/src/core/artifacts/ids.py
+++ b/src/dnadesign/densegen/src/core/artifacts/ids.py
@@ -114,11 +114,13 @@ def hash_solution_id(
 
 def hash_candidate_id(
     *,
+    input_name: str | None = None,
     motif_id: str,
     sequence: str,
     scoring_backend: str,
 ) -> str:
     payload = {
+        "input_name": str(input_name) if input_name is not None else None,
         "motif_id": str(motif_id),
         "sequence": str(sequence),
         "scoring_backend": str(scoring_backend),
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 5293c6e8..0d6ba3de 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -43,7 +43,7 @@
     resolve_run_root,
 )
 from ..utils.logging_utils import install_native_stderr_filters
-from .artifacts.candidates import build_candidate_artifact
+from .artifacts.candidates import build_candidate_artifact, find_candidate_files, prepare_candidates_dir
 from .artifacts.ids import hash_tfbs_id
 from .artifacts.library import (
     LibraryArtifact,
@@ -59,6 +59,7 @@
 from .pvalue_bins import resolve_pvalue_bins
 from .run_manifest import PlanManifest, RunManifest
 from .run_paths import (
+    candidates_root,
     ensure_run_meta_dir,
     inputs_manifest_path,
     run_manifest_path,
@@ -79,6 +80,16 @@ class RunSummary:
     per_plan: dict[tuple[str, str], int]
 
 
+def _candidate_logging_enabled(cfg: DenseGenConfig) -> bool:
+    for inp in cfg.inputs:
+        sampling = getattr(inp, "sampling", None)
+        if sampling is None:
+            continue
+        if getattr(sampling, "keep_all_candidates_debug", False):
+            return True
+    return False
+
+
 def _write_run_state(
     path: Path,
     *,
@@ -3389,6 +3400,21 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
     composition_rows: list[dict] = []
     outputs_root = run_outputs_root(run_root)
     outputs_root.mkdir(parents=True, exist_ok=True)
+    candidates_dir = candidates_root(outputs_root)
+    candidate_logging = _candidate_logging_enabled(cfg)
+    if candidate_logging:
+        try:
+            existed = prepare_candidates_dir(candidates_dir, overwrite=True)
+        except Exception as exc:
+            raise RuntimeError(f"Failed to prepare candidate artifacts directory: {exc}") from exc
+        if existed:
+            log.warning(
+                "Cleared prior candidate artifacts at %s to avoid mixing runs. "
+                "Copy this directory elsewhere if you want to keep previous mining output.",
+                candidates_dir,
+            )
+        else:
+            log.info("Candidate mining artifacts will be written to %s", candidates_dir)
     events_path = outputs_root / "meta" / "events.jsonl"
     try:
         _write_effective_config(
@@ -3429,19 +3455,25 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
         log.debug("Failed to emit POOL_BUILT event.", exc_info=True)
     for name, pool in pool_data.items():
         source_cache[name] = pool
-    candidates_dir = outputs_root / "candidates"
-    candidate_files = list(candidates_dir.rglob("candidates__*.parquet")) if candidates_dir.exists() else []
-    if candidate_files:
-        try:
-            build_candidate_artifact(
-                candidates_dir=candidates_dir,
-                cfg_path=loaded.path,
-                run_id=str(cfg.run.id),
-                run_root=run_root,
-                overwrite=True,
+    if candidate_logging:
+        candidate_files = find_candidate_files(candidates_dir)
+        if candidate_files:
+            try:
+                build_candidate_artifact(
+                    candidates_dir=candidates_dir,
+                    cfg_path=loaded.path,
+                    run_id=str(cfg.run.id),
+                    run_root=run_root,
+                    overwrite=True,
+                )
+            except Exception as exc:
+                raise RuntimeError(f"Failed to write candidate artifacts: {exc}") from exc
+        else:
+            log.warning(
+                "Candidate logging enabled but no candidate records were written under %s. "
+                "Check keep_all_candidates_debug and PWM inputs.",
+                candidates_dir,
             )
-        except Exception as exc:
-            raise RuntimeError(f"Failed to write candidate artifacts: {exc}") from exc
     library_records: dict[tuple[str, str], list[LibraryRecord]] | None = None
     library_cursor: dict[tuple[str, str], int] | None = None
     library_artifact: LibraryArtifact | None = None
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index 6236f2eb..15c0537d 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -27,7 +27,7 @@
 from ..utils.mpl_utils import ensure_mpl_cache_dir
 from .artifacts.pool import POOL_MODE_TFBS, load_pool_artifact
 from .run_manifest import load_run_manifest
-from .run_paths import run_manifest_path, run_outputs_root
+from .run_paths import candidates_root, run_manifest_path, run_outputs_root
 
 log = logging.getLogger(__name__)
 
@@ -368,6 +368,7 @@ def collect_report_data(
 ) -> ReportBundle:
     run_root = resolve_run_root(cfg_path, root_cfg.densegen.run.root)
     outputs_root = run_outputs_root(run_root)
+    warnings: list[str] = []
     cols = [
         "id",
         "sequence",
@@ -383,29 +384,35 @@ def collect_report_data(
         _dg("used_tf_list"),
         _dg("min_count_per_tf"),
     ]
-    df, source_label = load_records_from_config(root_cfg, cfg_path, columns=cols)
+    try:
+        df, source_label = load_records_from_config(root_cfg, cfg_path, columns=cols)
+    except Exception as exc:
+        warnings.append(f"No output records available; report will focus on Stage-A/Stage-B diagnostics. ({exc})")
+        df = pd.DataFrame(columns=cols)
+        source_label = "missing"
     if df.empty:
-        raise ValueError("No output records found; cannot build report.")
+        warnings.append("Output records are empty; solution-focused sections will be blank.")
 
     used_df = _explode_used(df)
     attempts_path = outputs_root / "attempts.parquet"
     if not attempts_path.exists():
-        raise ValueError(
-            "outputs/attempts.parquet is required for report/summarize. "
-            "Re-run `dense run -c <config.yaml>` to regenerate attempts."
-        )
-    attempts_df = pd.read_parquet(attempts_path)
+        warnings.append("outputs/attempts.parquet is missing; library usage and resample summaries may be incomplete.")
+        attempts_df = pd.DataFrame()
+    else:
+        attempts_df = pd.read_parquet(attempts_path)
     library_df = _explode_library_from_attempts(attempts_df)
     solutions_path = outputs_root / "solutions.parquet"
     if not solutions_path.exists():
-        raise ValueError(
-            "outputs/solutions.parquet is required for reports. "
-            "Re-run `dense run -c <config.yaml>` to regenerate solutions."
+        warnings.append(
+            "outputs/solutions.parquet is missing; solution previews and composition summaries will be skipped."
         )
-    try:
-        solutions_df = pd.read_parquet(solutions_path)
-    except Exception as exc:
-        raise RuntimeError("Failed to load solutions.parquet for report tables.") from exc
+        solutions_df = pd.DataFrame()
+    else:
+        try:
+            solutions_df = pd.read_parquet(solutions_path)
+        except Exception as exc:
+            warnings.append(f"Failed to load solutions.parquet; skipping solution tables. ({exc})")
+            solutions_df = pd.DataFrame()
     tables: Dict[str, pd.DataFrame] = {}
     tables["solutions"] = solutions_df
 
@@ -523,7 +530,7 @@ def collect_report_data(
     candidates_summary = pd.DataFrame(
         columns=["input_name", "motif_id", "scoring_backend", "total_candidates", "accepted", "selected", "rejected"]
     )
-    candidates_dir = outputs_root / "candidates"
+    candidates_dir = candidates_root(outputs_root)
     cand_summary_path = candidates_dir / "candidates_summary.parquet"
     if cand_summary_path.exists():
         try:
@@ -824,6 +831,7 @@ def collect_report_data(
         "schema_version": root_cfg.densegen.schema_version,
         "created_at": datetime.now(timezone.utc).isoformat(),
         "output_source": source_label,
+        "warnings": warnings,
         "output_rows": int(len(df)),
         "output_unique_sequences": int(df["id"].nunique()) if "id" in df.columns else int(len(df)),
         "libraries_in_outputs": int(len(set(library_hashes))),
@@ -1046,6 +1054,7 @@ def _render_report_md(bundle: ReportBundle) -> str:
         f"- Diversity (unique TFs): {report.get('diversity_unique_tfs')}",
         f"- Diversity (unique TFBS): {report.get('diversity_unique_tfbs')}",
         f"- Diversity entropy (TFBS): {report.get('diversity_entropy_tfbs')}",
+        f"- Warnings: {len(report.get('warnings') or [])}",
         "",
         "## Outputs",
         "- outputs/dense_arrays.parquet",
@@ -1057,9 +1066,14 @@ def _render_report_md(bundle: ReportBundle) -> str:
         "- outputs/pools/pool_manifest.json",
         "- outputs/meta/effective_config.json",
         "- outputs/meta/events.jsonl",
-        "- outputs/candidates/candidates.parquet (when candidate logging is enabled)",
-        "- outputs/candidates/candidates_summary.parquet (when candidate logging is enabled)",
+        "- outputs/candidates/current/candidates.parquet (when candidate logging is enabled)",
+        "- outputs/candidates/current/candidates_summary.parquet (when candidate logging is enabled)",
     ]
+    warnings = report.get("warnings") or []
+    if warnings:
+        lines.extend(["", "## Notes"])
+        for warning in warnings:
+            lines.append(f"- {warning}")
     stage_a_bins = bundle.tables.get("stage_a_bins")
     if stage_a_bins is not None and not stage_a_bins.empty:
         lines.extend(["", "## Stage-A p-value bins"])
@@ -1197,6 +1211,9 @@ def _render_report_md(bundle: ReportBundle) -> str:
                 max_rows=10,
             )
         )
+    else:
+        lines.extend(["", "## Solutions"])
+        lines.append("- No solutions found yet. Review attempts/events and adjust constraints or runtime settings.")
     composition = bundle.tables.get("composition")
     if composition is not None and not composition.empty:
         comp = composition.copy()
diff --git a/src/dnadesign/densegen/src/core/run_paths.py b/src/dnadesign/densegen/src/core/run_paths.py
index 9c67572a..16e991b9 100644
--- a/src/dnadesign/densegen/src/core/run_paths.py
+++ b/src/dnadesign/densegen/src/core/run_paths.py
@@ -16,6 +16,8 @@
 
 RUN_OUTPUTS_DIR = "outputs"
 RUN_META_DIR = "meta"
+CANDIDATES_DIR = "candidates"
+CANDIDATES_CURRENT_DIR = "current"
 
 RUN_MANIFEST_NAME = "run_manifest.json"
 INPUTS_MANIFEST_NAME = "inputs_manifest.json"
@@ -26,6 +28,10 @@ def run_outputs_root(run_root: Path) -> Path:
     return run_root / RUN_OUTPUTS_DIR
 
 
+def candidates_root(outputs_root: Path) -> Path:
+    return outputs_root / CANDIDATES_DIR / CANDIDATES_CURRENT_DIR
+
+
 def run_meta_root(run_root: Path) -> Path:
     return run_outputs_root(run_root) / RUN_META_DIR
 
diff --git a/src/dnadesign/densegen/workspaces/README.md b/src/dnadesign/densegen/workspaces/README.md
index 13e80a8f..34f6ae3b 100644
--- a/src/dnadesign/densegen/workspaces/README.md
+++ b/src/dnadesign/densegen/workspaces/README.md
@@ -13,3 +13,5 @@ The canonical demo lives under `demo_meme_two_tf/` and uses MEME motif files cop
 the basic Cruncher demo workspace (`inputs/local_motifs`). DenseGen reads these with the
 shared Cruncher MEME parser to keep parsing DRY and consistent.
 Use `dense inspect run --root workspaces/_archive` if you want to inspect archived workspaces.
+Only `demo_meme_two_tf/` is tracked in git; any other workspace directories here are ignored
+and intended for local experiments.

From a26c1e86300652ae71da147077ccca013e878d2d Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Wed, 21 Jan 2026 13:20:38 -0500
Subject: [PATCH 15/40] Fix stage-a pool build checks and report warnings

---
 src/dnadesign/densegen/src/cli.py             |  9 ++++--
 src/dnadesign/densegen/src/core/reporting.py  | 28 +++++++++++++++----
 .../tests/test_cli_candidate_logging.py       | 17 +++++++++++
 3 files changed, 45 insertions(+), 9 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/test_cli_candidate_logging.py

diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index a841ef0a..b347d2f5 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -149,8 +149,8 @@ def _input_uses_fimo(input_cfg) -> bool:
     return False
 
 
-def _candidate_logging_enabled(loaded, *, selected: set[str] | None = None) -> bool:
-    for inp in loaded.inputs:
+def _candidate_logging_enabled(cfg, *, selected: set[str] | None = None) -> bool:
+    for inp in cfg.inputs:
         if selected is not None and inp.name not in selected:
             continue
         sampling = getattr(inp, "sampling", None)
@@ -1396,7 +1396,7 @@ def stage_a_build_pool(
     deps = default_deps()
     outputs_root = run_root / "outputs"
     outputs_root.mkdir(parents=True, exist_ok=True)
-    candidate_logging = _candidate_logging_enabled(loaded, selected=set(selected) if selected else None)
+    candidate_logging = _candidate_logging_enabled(cfg, selected=set(selected) if selected else None)
     candidates_dir = candidates_root(outputs_root)
     if candidate_logging:
         try:
@@ -1422,6 +1422,9 @@ def stage_a_build_pool(
         except FileExistsError as exc:
             console.print(f"[bold red]{exc}[/]")
             raise typer.Exit(code=1)
+        except Exception as exc:
+            console.print(f"[bold red]Failed to build Stage-A pools:[/] {exc}")
+            raise typer.Exit(code=1)
         if candidate_logging:
             candidate_files = find_candidate_files(candidates_dir)
             if candidate_files:
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index 15c0537d..dee54bde 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -527,16 +527,32 @@ def collect_report_data(
     tables["stage_a_bins"] = stage_a_bins
     tables["stage_a_score_summary"] = stage_a_score_summary
 
+    def _candidate_logging_enabled() -> bool:
+        for inp in root_cfg.densegen.inputs:
+            sampling = getattr(inp, "sampling", None)
+            if sampling is None:
+                continue
+            if getattr(sampling, "keep_all_candidates_debug", False):
+                return True
+        return False
+
+    candidate_logging = _candidate_logging_enabled()
     candidates_summary = pd.DataFrame(
         columns=["input_name", "motif_id", "scoring_backend", "total_candidates", "accepted", "selected", "rejected"]
     )
     candidates_dir = candidates_root(outputs_root)
     cand_summary_path = candidates_dir / "candidates_summary.parquet"
     if cand_summary_path.exists():
-        try:
-            candidates_summary = pd.read_parquet(cand_summary_path)
-        except Exception:
-            log.warning("Failed to load candidates_summary.parquet for report.", exc_info=True)
+        if candidate_logging:
+            try:
+                candidates_summary = pd.read_parquet(cand_summary_path)
+            except Exception:
+                log.warning("Failed to load candidates_summary.parquet for report.", exc_info=True)
+        else:
+            warnings.append(
+                "Candidate summary exists but keep_all_candidates_debug is false; "
+                "candidate artifacts may be stale. Enable keep_all_candidates_debug to refresh."
+            )
     tables["candidates_summary"] = candidates_summary
 
     library_summary = pd.DataFrame(
@@ -861,9 +877,9 @@ def collect_report_data(
         "solutions_path": str(solutions_path) if solutions_path.exists() else None,
         "events_path": str(events_path) if events_path.exists() else None,
         "candidates_path": str(candidates_dir / "candidates.parquet")
-        if (candidates_dir / "candidates.parquet").exists()
+        if candidate_logging and (candidates_dir / "candidates.parquet").exists()
         else None,
-        "candidates_summary_path": str(cand_summary_path) if cand_summary_path.exists() else None,
+        "candidates_summary_path": str(cand_summary_path) if candidate_logging and cand_summary_path.exists() else None,
         "outputs_path": str(outputs_root / "dense_arrays.parquet"),
         "effective_config_path": str(outputs_root / "meta" / "effective_config.json")
         if (outputs_root / "meta" / "effective_config.json").exists()
diff --git a/src/dnadesign/densegen/tests/test_cli_candidate_logging.py b/src/dnadesign/densegen/tests/test_cli_candidate_logging.py
new file mode 100644
index 00000000..78a38248
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_cli_candidate_logging.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from dnadesign.densegen.src.cli import _candidate_logging_enabled
+from dnadesign.densegen.src.config import load_config
+
+
+def test_candidate_logging_enabled_toggle() -> None:
+    cfg_path = Path("src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml")
+    loaded = load_config(cfg_path)
+    cfg = loaded.root.densegen
+
+    assert _candidate_logging_enabled(cfg) is False
+
+    cfg.inputs[0].sampling.keep_all_candidates_debug = True
+    assert _candidate_logging_enabled(cfg) is True

From e08b974914fc0b1c6804d574f806f8fbbd36ed87 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Wed, 21 Jan 2026 14:30:27 -0500
Subject: [PATCH 16/40] densegen: run-scoped candidates and explicit resume

---
 .../densegen/src/adapters/sources/base.py     |   8 +-
 .../src/adapters/sources/binding_sites.py     |   2 +-
 .../src/adapters/sources/pwm_artifact.py      |  12 +-
 .../src/adapters/sources/pwm_artifact_set.py  |  12 +-
 .../src/adapters/sources/pwm_jaspar.py        |  12 +-
 .../src/adapters/sources/pwm_matrix_csv.py    |  12 +-
 .../densegen/src/adapters/sources/pwm_meme.py |  12 +-
 .../src/adapters/sources/pwm_meme_set.py      |  12 +-
 .../src/adapters/sources/pwm_sampling.py      |  18 +-
 .../src/adapters/sources/sequence_library.py  |   2 +-
 .../src/adapters/sources/usr_sequences.py     |   2 +-
 src/dnadesign/densegen/src/cli.py             |  92 ++++++++--
 src/dnadesign/densegen/src/config/__init__.py |   9 +-
 .../densegen/src/core/artifacts/candidates.py |  19 +-
 .../densegen/src/core/artifacts/pool.py       |   2 +-
 src/dnadesign/densegen/src/core/pipeline.py   | 134 ++++++++------
 src/dnadesign/densegen/src/core/reporting.py  |  18 +-
 src/dnadesign/densegen/src/core/run_paths.py  |  20 ++-
 .../densegen/tests/test_cli_run_modes.py      |  78 +++++++++
 .../tests/test_cli_summarize_library.py       | 163 ++++++++++++++++++
 .../tests/test_required_regulators.py         |   4 +-
 .../tests/test_sequence_length_guard.py       |   2 +-
 .../densegen/tests/test_source_cache.py       |   2 +-
 23 files changed, 539 insertions(+), 108 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/test_cli_run_modes.py

diff --git a/src/dnadesign/densegen/src/adapters/sources/base.py b/src/dnadesign/densegen/src/adapters/sources/base.py
index 2d645291..506121ea 100644
--- a/src/dnadesign/densegen/src/adapters/sources/base.py
+++ b/src/dnadesign/densegen/src/adapters/sources/base.py
@@ -39,7 +39,13 @@ def infer_format(path: Path) -> str | None:
 
 class BaseDataSource(abc.ABC):
     @abc.abstractmethod
-    def load_data(self, *, rng=None, outputs_root: Path | None = None) -> Tuple[List, Optional[pd.DataFrame]]:
+    def load_data(
+        self,
+        *,
+        rng=None,
+        outputs_root: Path | None = None,
+        run_id: str | None = None,
+    ) -> Tuple[List, Optional[pd.DataFrame]]:
         """
         Returns:
             (data_entries, meta_df)
diff --git a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
index dd8b6479..93d5af8e 100644
--- a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
+++ b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
@@ -58,7 +58,7 @@ def _load_table(self, path: Path, fmt: str) -> pd.DataFrame:
             return pd.read_excel(path)
         raise ValueError(f"Unsupported binding_sites.format: {fmt}")
 
-    def load_data(self, *, rng=None, outputs_root: Path | None = None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         data_path = resolve_path(self.cfg_path, self.path)
         if not (data_path.exists() and data_path.is_file()):
             raise FileNotFoundError(f"Binding sites file not found. Looked here:\n  - {data_path}")
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
index 2f8d1608..4e9c7a2b 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
@@ -155,7 +155,7 @@ class PWMArtifactDataSource(BaseDataSource):
     sampling: dict
     input_name: str
 
-    def load_data(self, *, rng=None, outputs_root: Path | None = None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         artifact_path = resolve_path(self.cfg_path, self.path)
@@ -196,14 +196,20 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             if not (bgfile_path.exists() and bgfile_path.is_file()):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
-        if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
+        if keep_all_candidates_debug:
+            if outputs_root is None:
+                raise ValueError("keep_all_candidates_debug requires outputs_root to be set.")
+            if run_id is None:
+                raise ValueError("keep_all_candidates_debug requires run_id to be set.")
+            debug_output_dir = candidates_root(Path(outputs_root), run_id) / self.input_name
 
         return_meta = scoring_backend == "fimo"
         result = sample_pwm_sites(
             rng,
             motif,
             input_name=self.input_name,
+            motif_hash=motif_hash,
+            run_id=run_id,
             strategy=strategy,
             n_sites=n_sites,
             oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
index 41ef6ad0..3d321cc6 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
@@ -31,7 +31,7 @@ class PWMArtifactSetDataSource(BaseDataSource):
     overrides_by_motif_id: dict[str, dict] | None = None
     input_name: str = ""
 
-    def load_data(self, *, rng=None, outputs_root: Path | None = None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
 
@@ -92,13 +92,19 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 if not (bgfile_path.exists() and bgfile_path.is_file()):
                     raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
             debug_output_dir: Path | None = None
-            if keep_all_candidates_debug and outputs_root is not None:
-                debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
+            if keep_all_candidates_debug:
+                if outputs_root is None:
+                    raise ValueError("keep_all_candidates_debug requires outputs_root to be set.")
+                if run_id is None:
+                    raise ValueError("keep_all_candidates_debug requires run_id to be set.")
+                debug_output_dir = candidates_root(Path(outputs_root), run_id) / self.input_name
             return_meta = scoring_backend == "fimo"
             result = sample_pwm_sites(
                 rng,
                 motif,
                 input_name=self.input_name,
+                motif_hash=motif_hash,
+                run_id=run_id,
                 strategy=strategy,
                 n_sites=n_sites,
                 oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
index 6b94913b..e05ce92f 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
@@ -90,7 +90,7 @@ class PWMJasparDataSource(BaseDataSource):
     sampling: dict
     input_name: str
 
-    def load_data(self, *, rng=None, outputs_root: Path | None = None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         jaspar_path = resolve_path(self.cfg_path, self.path)
@@ -130,8 +130,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             if not (bgfile_path.exists() and bgfile_path.is_file()):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
-        if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
+        if keep_all_candidates_debug:
+            if outputs_root is None:
+                raise ValueError("keep_all_candidates_debug requires outputs_root to be set.")
+            if run_id is None:
+                raise ValueError("keep_all_candidates_debug requires run_id to be set.")
+            debug_output_dir = candidates_root(Path(outputs_root), run_id) / self.input_name
 
         entries = []
         all_rows = []
@@ -147,6 +151,8 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 rng,
                 motif,
                 input_name=self.input_name,
+                motif_hash=motif_hash,
+                run_id=run_id,
                 strategy=strategy,
                 n_sites=n_sites,
                 oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
index e32007c7..d06cd59d 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
@@ -32,7 +32,7 @@ class PWMMatrixCSVDataSource(BaseDataSource):
     sampling: dict
     input_name: str
 
-    def load_data(self, *, rng=None, outputs_root: Path | None = None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         if not self.motif_id or not str(self.motif_id).strip():
@@ -100,14 +100,20 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             if not (bgfile_path.exists() and bgfile_path.is_file()):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
-        if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
+        if keep_all_candidates_debug:
+            if outputs_root is None:
+                raise ValueError("keep_all_candidates_debug requires outputs_root to be set.")
+            if run_id is None:
+                raise ValueError("keep_all_candidates_debug requires run_id to be set.")
+            debug_output_dir = candidates_root(Path(outputs_root), run_id) / self.input_name
 
         return_meta = scoring_backend == "fimo"
         result = sample_pwm_sites(
             rng,
             motif,
             input_name=self.input_name,
+            motif_hash=motif_hash,
+            run_id=run_id,
             strategy=strategy,
             n_sites=n_sites,
             oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
index e03db9cc..82edd20a 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
@@ -59,7 +59,7 @@ class PWMMemeDataSource(BaseDataSource):
     sampling: dict
     input_name: str
 
-    def load_data(self, *, rng=None, outputs_root: Path | None = None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         meme_path = resolve_path(self.cfg_path, self.path)
@@ -108,8 +108,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             if not (bgfile_path.exists() and bgfile_path.is_file()):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
-        if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
+        if keep_all_candidates_debug:
+            if outputs_root is None:
+                raise ValueError("keep_all_candidates_debug requires outputs_root to be set.")
+            if run_id is None:
+                raise ValueError("keep_all_candidates_debug requires run_id to be set.")
+            debug_output_dir = candidates_root(Path(outputs_root), run_id) / self.input_name
 
         entries = []
         all_rows = []
@@ -126,6 +130,8 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 rng,
                 pwm,
                 input_name=self.input_name,
+                motif_hash=motif_hash,
+                run_id=run_id,
                 strategy=strategy,
                 n_sites=n_sites,
                 oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
index dc126567..457f4780 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
@@ -45,7 +45,7 @@ class PWMMemeSetDataSource(BaseDataSource):
     sampling: dict
     input_name: str
 
-    def load_data(self, *, rng=None, outputs_root: Path | None = None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
             raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         resolved = [resolve_path(self.cfg_path, path) for path in self.paths]
@@ -102,8 +102,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
             if not (bgfile_path.exists() and bgfile_path.is_file()):
                 raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
-        if keep_all_candidates_debug and outputs_root is not None:
-            debug_output_dir = candidates_root(Path(outputs_root)) / self.input_name
+        if keep_all_candidates_debug:
+            if outputs_root is None:
+                raise ValueError("keep_all_candidates_debug requires outputs_root to be set.")
+            if run_id is None:
+                raise ValueError("keep_all_candidates_debug requires run_id to be set.")
+            debug_output_dir = candidates_root(Path(outputs_root), run_id) / self.input_name
 
         entries = []
         all_rows = []
@@ -120,6 +124,8 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None):
                 rng,
                 pwm,
                 input_name=self.input_name,
+                motif_hash=motif_hash,
+                run_id=run_id,
                 strategy=strategy,
                 n_sites=n_sites,
                 oversample_factor=oversample_factor,
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 6bc7f79c..f575bc17 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -69,8 +69,12 @@ def _write_candidate_records(
     debug_output_dir: Path,
     debug_label: str,
     motif_id: str,
+    motif_hash: str | None = None,
 ) -> Path:
-    safe_label = _safe_label(debug_label or motif_id)
+    suffix = ""
+    if motif_hash:
+        suffix = f"__{_safe_label(motif_hash[:10])}"
+    safe_label = f"{_safe_label(debug_label or motif_id)}{suffix}"
     debug_output_dir.mkdir(parents=True, exist_ok=True)
     path = debug_output_dir / f"candidates__{safe_label}.parquet"
     pd.DataFrame(records).to_parquet(path, index=False)
@@ -446,6 +450,8 @@ def sample_pwm_sites(
     motif: PWMMotif,
     *,
     input_name: Optional[str] = None,
+    motif_hash: str | None = None,
+    run_id: str | None = None,
     strategy: str,
     n_sites: int,
     oversample_factor: int,
@@ -505,6 +511,8 @@ def sample_pwm_sites(
                 "PWM sampling scoring_backend=fimo ignores score_threshold/score_percentile for motif %s.",
                 motif.motif_id,
             )
+    if keep_all_candidates_debug and run_id is None:
+        raise ValueError("PWM sampling keep_all_candidates_debug requires run_id to be set.")
     if strategy == "consensus" and n_sites != 1:
         raise ValueError("PWM sampling strategy 'consensus' requires n_sites=1")
 
@@ -751,17 +759,20 @@ def _record_candidate(
         ) -> None:
             if candidate_records is None:
                 return
+            resolved_motif_id = motif_hash or motif.motif_id
             candidate_id = hash_candidate_id(
                 input_name=input_name,
-                motif_id=motif.motif_id,
+                motif_id=resolved_motif_id,
                 sequence=seq,
                 scoring_backend=scoring_backend,
             )
             candidate_records.append(
                 {
                     "candidate_id": candidate_id,
+                    "run_id": run_id,
                     "input_name": input_name,
-                    "motif_id": motif.motif_id,
+                    "motif_id": resolved_motif_id,
+                    "motif_label": motif.motif_id,
                     "scoring_backend": scoring_backend,
                     "sequence": seq,
                     "pvalue": None if hit is None else hit.pvalue,
@@ -1093,6 +1104,7 @@ def _record_candidate(
                     debug_output_dir=debug_dir,
                     debug_label=debug_label or motif.motif_id,
                     motif_id=motif.motif_id,
+                    motif_hash=motif_hash,
                 )
                 log.info("FIMO candidate records written: %s", path)
             except Exception:
diff --git a/src/dnadesign/densegen/src/adapters/sources/sequence_library.py b/src/dnadesign/densegen/src/adapters/sources/sequence_library.py
index d17bbf04..79742c7f 100644
--- a/src/dnadesign/densegen/src/adapters/sources/sequence_library.py
+++ b/src/dnadesign/densegen/src/adapters/sources/sequence_library.py
@@ -50,7 +50,7 @@ def _load_table(self, path: Path, fmt: str) -> pd.DataFrame:
             return pq.read_table(path).to_pandas()
         raise ValueError(f"Unsupported sequence_library.format: {fmt}")
 
-    def load_data(self, *, rng=None, outputs_root: Path | None = None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         data_path = resolve_path(self.cfg_path, self.path)
         if not (data_path.exists() and data_path.is_file()):
             raise FileNotFoundError(f"Sequence library file not found. Looked here:\n  - {data_path}")
diff --git a/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py b/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py
index 66606d5c..9e99fe35 100644
--- a/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py
+++ b/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py
@@ -26,7 +26,7 @@ class USRSequencesDataSource(BaseDataSource):
     root: str
     limit: Optional[int] = None
 
-    def load_data(self, *, rng=None, outputs_root: Path | None = None):
+    def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         try:
             from dnadesign.usr.src.dataset import Dataset as USRDataset  # type: ignore
         except Exception as e:  # pragma: no cover - depends on optional USR install
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index b347d2f5..d20c2f60 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -79,7 +79,13 @@
 )
 from .core.reporting import collect_report_data, write_report
 from .core.run_manifest import load_run_manifest
-from .core.run_paths import candidates_root, run_manifest_path, run_state_path
+from .core.run_paths import (
+    candidates_root,
+    has_existing_run_outputs,
+    run_manifest_path,
+    run_outputs_root,
+    run_state_path,
+)
 from .core.run_state import load_run_state
 from .core.seeding import derive_seed_map
 from .integrations.meme_suite import require_executable
@@ -807,6 +813,7 @@ def inspect_run(
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Show failure breakdown columns."),
     library: bool = typer.Option(False, "--library", help="Include offered-vs-used library summaries."),
+    library_limit: int = typer.Option(0, "--library-limit", help="Limit libraries shown in summaries (0 = all)."),
     top: int = typer.Option(10, "--top", help="Rows to show for library summaries."),
     by_library: bool = typer.Option(True, "--by-library/--no-by-library", help="Group library summaries per build."),
     top_per_tf: Optional[int] = typer.Option(None, "--top-per-tf", help="Limit TFBS rows per TF when summarizing."),
@@ -820,6 +827,9 @@ def inspect_run(
     if root is not None and run is not None:
         console.print("[bold red]Choose either --root or --run, not both.[/]")
         raise typer.Exit(code=1)
+    if library_limit < 0:
+        console.print("[bold red]--library-limit must be >= 0.[/]")
+        raise typer.Exit(code=1)
     if root is not None:
         workspaces_root = root.resolve()
         if not workspaces_root.exists() or not workspaces_root.is_dir():
@@ -975,6 +985,27 @@ def inspect_run(
         elif isinstance(offered_vs_used_tf, pd.DataFrame):
             library_hashes = set(offered_vs_used_tf.get("library_hash", []))
 
+        total_libraries = 0
+        display_count = 0
+        truncated_libraries = False
+        display_library_summary = library_summary
+        display_hashes: list[str] = []
+        if isinstance(library_summary, pd.DataFrame) and not library_summary.empty:
+            display_library_summary = library_summary.sort_values("library_index")
+            total_libraries = len(display_library_summary)
+            if library_limit > 0 and total_libraries > library_limit:
+                display_library_summary = display_library_summary.head(library_limit)
+                truncated_libraries = True
+            display_count = len(display_library_summary)
+            display_hashes = [str(val) for val in display_library_summary.get("library_hash", [])]
+        elif library_hashes:
+            display_hashes = sorted(library_hashes)
+            total_libraries = len(display_hashes)
+            if library_limit > 0 and total_libraries > library_limit:
+                display_hashes = display_hashes[:library_limit]
+                truncated_libraries = True
+            display_count = len(display_hashes)
+
         lib_table = Table(
             "library_index",
             "library_hash",
@@ -987,8 +1018,8 @@ def inspect_run(
         sampling_cfg = loaded.root.densegen.generation.sampling
         target_len = loaded.root.densegen.generation.sequence_length + int(sampling_cfg.subsample_over_length_budget_by)
 
-        if isinstance(library_summary, pd.DataFrame) and not library_summary.empty:
-            for _, row in library_summary.sort_values("library_index").iterrows():
+        if isinstance(display_library_summary, pd.DataFrame) and not display_library_summary.empty:
+            for _, row in display_library_summary.iterrows():
                 lib_hash = str(row.get("library_hash") or "")
                 lib_hash_disp = lib_hash if show_library_hash else _short_hash(lib_hash)
                 achieved = row.get("total_bp")
@@ -1002,8 +1033,8 @@ def inspect_run(
                     achieved_label,
                     str(int(row.get("outputs") or 0)),
                 )
-        elif library_hashes:
-            for lib_hash in sorted(library_hashes):
+        elif display_hashes:
+            for lib_hash in display_hashes:
                 lib_hash_disp = lib_hash if show_library_hash else _short_hash(lib_hash)
                 lib_table.add_row("-", lib_hash_disp, "-", "-", "-", f"-/{target_len}", "0")
         else:
@@ -1015,6 +1046,10 @@ def inspect_run(
             console.print(f"  - dense run -c {cfg_path}")
         console.print("[bold]Library build summary[/]")
         console.print(lib_table)
+        if truncated_libraries:
+            console.print(
+                f"[yellow]Showing {display_count} of {total_libraries} libraries. Use --library-limit 0 to show all.[/]"
+            )
 
         if not isinstance(offered_vs_used_tf, pd.DataFrame) or offered_vs_used_tf.empty:
             console.print("[yellow]No offered/used TF data found (attempts missing).[/]")
@@ -1085,15 +1120,15 @@ def _render_tfbs_tables(lib_hash: str) -> None:
                 console.print(f"[bold]Top {top} TFBS by used placements (per library)[/]")
                 console.print(tfbs_table)
 
-        if by_library and isinstance(library_summary, pd.DataFrame) and not library_summary.empty:
-            for _, row in library_summary.sort_values("library_index").iterrows():
+        if by_library and isinstance(display_library_summary, pd.DataFrame) and not display_library_summary.empty:
+            for _, row in display_library_summary.iterrows():
                 lib_hash = str(row.get("library_hash") or "")
                 lib_index = int(row.get("library_index") or 0)
                 console.print(f"[bold]Library {lib_index}[/]: {_fmt_hash(lib_hash)}")
                 _render_tf_tables(lib_hash)
                 _render_tfbs_tables(lib_hash)
-        elif by_library and library_hashes:
-            for lib_hash in sorted(library_hashes):
+        elif by_library and display_hashes:
+            for lib_hash in display_hashes:
                 console.print(f"[bold]Library[/]: {_fmt_hash(lib_hash)}")
                 _render_tf_tables(lib_hash)
                 _render_tfbs_tables(lib_hash)
@@ -1397,7 +1432,7 @@ def stage_a_build_pool(
     outputs_root = run_root / "outputs"
     outputs_root.mkdir(parents=True, exist_ok=True)
     candidate_logging = _candidate_logging_enabled(cfg, selected=set(selected) if selected else None)
-    candidates_dir = candidates_root(outputs_root)
+    candidates_dir = candidates_root(outputs_root, cfg.run.id)
     if candidate_logging:
         try:
             existed = prepare_candidates_dir(candidates_dir, overwrite=overwrite)
@@ -1686,6 +1721,8 @@ def stage_b_build_libraries(
 def run(
     ctx: typer.Context,
     no_plot: bool = typer.Option(False, help="Do not auto-run plots even if configured."),
+    fresh: bool = typer.Option(False, "--fresh", help="Clear outputs and start a new run."),
+    resume: bool = typer.Option(False, "--resume", help="Resume from existing outputs."),
     log_file: Optional[Path] = typer.Option(None, help="Override logfile path."),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
@@ -1696,6 +1733,39 @@ def run(
     run_root = _run_root_for(loaded)
     _ensure_fimo_available(cfg, strict=True)
 
+    if fresh and resume:
+        console.print("[bold red]Choose either --fresh or --resume, not both.[/]")
+        raise typer.Exit(code=1)
+    outputs_root = run_outputs_root(run_root)
+    existing_outputs = has_existing_run_outputs(run_root)
+    if fresh:
+        if outputs_root.exists():
+            try:
+                shutil.rmtree(outputs_root)
+            except Exception as exc:
+                console.print(f"[bold red]Failed to clear outputs:[/] {exc}")
+                raise typer.Exit(code=1)
+            console.print(f":broom: [bold yellow]Cleared outputs[/]: {outputs_root}")
+        else:
+            console.print("[yellow]No outputs directory found; starting fresh.[/]")
+        resume_run = False
+    elif resume:
+        if not existing_outputs:
+            console.print(
+                f"[bold red]--resume requested but no outputs were found under[/] {outputs_root}. "
+                "Run without --resume or use --fresh to reset the workspace."
+            )
+            raise typer.Exit(code=1)
+        resume_run = True
+    else:
+        if existing_outputs:
+            console.print(
+                f"[bold red]Existing outputs found under[/] {outputs_root}. "
+                "Use --resume to continue or --fresh to clear outputs."
+            )
+            raise typer.Exit(code=1)
+        resume_run = False
+
     # Logging setup
     log_cfg = cfg.logging
     log_dir = resolve_run_scoped_path(loaded.path, run_root, Path(log_cfg.log_dir), label="logging.log_dir")
@@ -1714,7 +1784,7 @@ def run(
     pl = resolve_plan(loaded)
     console.print("[bold]Quota plan[/]: " + ", ".join(f"{p.name}={p.quota}" for p in pl))
     try:
-        run_pipeline(loaded)
+        run_pipeline(loaded, resume=resume_run)
     except FileNotFoundError as exc:
         _render_missing_input_hint(cfg_path, loaded, exc)
         raise typer.Exit(code=1)
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index 9b460da3..1fd9dabe 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -124,9 +124,14 @@ class RunConfig(BaseModel):
     @field_validator("id")
     @classmethod
     def _id_nonempty(cls, v: str):
-        if not v or not str(v).strip():
+        value = str(v).strip()
+        if not value:
             raise ValueError("run.id must be a non-empty string")
-        return str(v).strip()
+        if "/" in value or "\\" in value:
+            raise ValueError("run.id must not contain path separators")
+        if value in {".", ".."}:
+            raise ValueError("run.id must not be '.' or '..'")
+        return value
 
     @field_validator("root")
     @classmethod
diff --git a/src/dnadesign/densegen/src/core/artifacts/candidates.py b/src/dnadesign/densegen/src/core/artifacts/candidates.py
index d342c80c..ebc289ea 100644
--- a/src/dnadesign/densegen/src/core/artifacts/candidates.py
+++ b/src/dnadesign/densegen/src/core/artifacts/candidates.py
@@ -85,12 +85,26 @@ def build_candidate_artifact(
         except Exception as exc:
             raise RuntimeError(f"Failed to read candidate parquet: {path}") from exc
     df = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
+    required = {"input_name", "motif_id", "motif_label", "scoring_backend", "sequence", "run_id"}
+    missing = sorted(required - set(df.columns))
+    if missing:
+        raise ValueError(
+            "Candidate artifacts missing required columns "
+            f"{missing}. Clear outputs/candidates and re-run with keep_all_candidates_debug."
+        )
+    df = df[df["run_id"] == str(run_id)].copy()
+    if df.empty:
+        raise ValueError(
+            "No candidate records match the current run_id. "
+            "Clear outputs/candidates and re-run with keep_all_candidates_debug."
+        )
     df.to_parquet(candidates_path, index=False)
 
     summary = pd.DataFrame(
         columns=[
             "input_name",
             "motif_id",
+            "motif_label",
             "scoring_backend",
             "total_candidates",
             "accepted",
@@ -99,9 +113,9 @@ def build_candidate_artifact(
         ]
     )
     if not df.empty:
-        grouped = df.groupby(["input_name", "motif_id", "scoring_backend"], dropna=False)
+        grouped = df.groupby(["input_name", "motif_id", "motif_label", "scoring_backend"], dropna=False)
         rows = []
-        for (input_name, motif_id, scoring_backend), sub in grouped:
+        for (input_name, motif_id, motif_label, scoring_backend), sub in grouped:
             total = int(len(sub))
             accepted = int(sub["accepted"].sum()) if "accepted" in sub.columns else 0
             selected = int(sub["selected"].sum()) if "selected" in sub.columns else 0
@@ -110,6 +124,7 @@ def build_candidate_artifact(
                 {
                     "input_name": str(input_name),
                     "motif_id": str(motif_id),
+                    "motif_label": str(motif_label),
                     "scoring_backend": str(scoring_backend),
                     "total_candidates": total,
                     "accepted": accepted,
diff --git a/src/dnadesign/densegen/src/core/artifacts/pool.py b/src/dnadesign/densegen/src/core/artifacts/pool.py
index 58d0f509..372a2e8b 100644
--- a/src/dnadesign/densegen/src/core/artifacts/pool.py
+++ b/src/dnadesign/densegen/src/core/artifacts/pool.py
@@ -146,7 +146,7 @@ def build_pool_artifact(
         if selected_inputs and inp.name not in selected_inputs:
             continue
         src = deps.source_factory(inp, cfg_path)
-        data_entries, meta_df = src.load_data(rng=rng, outputs_root=outputs_root)
+        data_entries, meta_df = src.load_data(rng=rng, outputs_root=outputs_root, run_id=str(cfg.run.id))
         if meta_df is None:
             df = _build_sequence_pool(data_entries)
         else:
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 0d6ba3de..9dfda450 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -61,6 +61,7 @@
 from .run_paths import (
     candidates_root,
     ensure_run_meta_dir,
+    has_existing_run_outputs,
     inputs_manifest_path,
     run_manifest_path,
     run_outputs_root,
@@ -1984,7 +1985,11 @@ def _record_library_build(
     cached = source_cache.get(cache_key) if source_cache is not None else None
     if cached is None:
         src_obj = deps.source_factory(source_cfg, cfg_path)
-        data_entries, meta_df = src_obj.load_data(rng=np_rng, outputs_root=outputs_root)
+        data_entries, meta_df = src_obj.load_data(
+            rng=np_rng,
+            outputs_root=outputs_root,
+            run_id=str(run_id),
+        )
         if meta_df is not None and isinstance(meta_df, pd.DataFrame):
             sequences = meta_df["tfbs"].tolist() if "tfbs" in meta_df.columns else list(data_entries or [])
             pool = PoolData(
@@ -3354,7 +3359,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
     }
 
 
-def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> RunSummary:
+def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | None = None) -> RunSummary:
     deps = deps or default_deps()
     cfg = loaded.root.densegen
     install_native_stderr_filters(suppress_solver_messages=bool(cfg.logging.suppress_solver_stderr))
@@ -3366,6 +3371,20 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
     except ValueError:
         run_cfg_path = str(loaded.path)
 
+    outputs_root = run_outputs_root(run_root)
+    existing_outputs = has_existing_run_outputs(run_root)
+    if resume:
+        if not existing_outputs:
+            raise RuntimeError(
+                f"resume=True requested but no outputs were found under {outputs_root}. "
+                "Start a fresh run or remove resume=True."
+            )
+    else:
+        if existing_outputs:
+            raise RuntimeError(
+                f"Existing outputs found under {outputs_root}. Explicit resume is required to continue this run."
+            )
+
     # Seed
     seed = int(cfg.runtime.random_seed)
     seeds = derive_seed_map(seed, ["stage_a", "stage_b", "solver"])
@@ -3398,9 +3417,8 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
     library_member_rows: list[dict] = []
     solution_rows: list[dict] = []
     composition_rows: list[dict] = []
-    outputs_root = run_outputs_root(run_root)
     outputs_root.mkdir(parents=True, exist_ok=True)
-    candidates_dir = candidates_root(outputs_root)
+    candidates_dir = candidates_root(outputs_root, cfg.run.id)
     candidate_logging = _candidate_logging_enabled(cfg)
     if candidate_logging:
         try:
@@ -3565,62 +3583,66 @@ def run_pipeline(loaded: LoadedConfig, *, deps: PipelineDeps | None = None) -> R
         if existing_state.created_at:
             state_created_at = existing_state.created_at
 
-    # Resume from existing outputs if present and aligned with config/run.
     existing_counts: dict[tuple[str, str], int] = {}
     existing_usage_by_plan: dict[tuple[str, str], dict[tuple[str, str], int]] = {}
-    site_failure_counts = _load_failure_counts_from_attempts(outputs_root)
-    attempt_counters = _load_existing_attempt_index_by_plan(outputs_root)
-    if cfg.output.targets:
-        try:
-            df_existing, _ = load_records_from_config(
-                loaded.root,
-                loaded.path,
-                columns=[
-                    "densegen__run_config_sha256",
-                    "densegen__run_id",
-                    "densegen__input_name",
-                    "densegen__plan",
-                    "densegen__used_tfbs_detail",
-                ],
-            )
-        except Exception:
-            df_existing = None
-        if df_existing is not None and not df_existing.empty:
-            if "densegen__run_config_sha256" in df_existing.columns:
-                mismatched = df_existing["densegen__run_config_sha256"].dropna().unique().tolist()
-                if mismatched and any(val != config_sha for val in mismatched):
-                    raise RuntimeError(
-                        "Existing outputs were produced with a different config. "
-                        "Remove outputs/ or stage a new run root to start fresh."
+    site_failure_counts: dict[tuple[str, str, str, str, str | None], dict[str, int]] = {}
+    attempt_counters: dict[tuple[str, str], int] = {}
+    if resume:
+        site_failure_counts = _load_failure_counts_from_attempts(outputs_root)
+        attempt_counters = _load_existing_attempt_index_by_plan(outputs_root)
+        if cfg.output.targets:
+            try:
+                df_existing, _ = load_records_from_config(
+                    loaded.root,
+                    loaded.path,
+                    columns=[
+                        "densegen__run_config_sha256",
+                        "densegen__run_id",
+                        "densegen__input_name",
+                        "densegen__plan",
+                        "densegen__used_tfbs_detail",
+                    ],
+                )
+            except Exception:
+                df_existing = None
+            if df_existing is not None and not df_existing.empty:
+                if "densegen__run_config_sha256" in df_existing.columns:
+                    mismatched = df_existing["densegen__run_config_sha256"].dropna().unique().tolist()
+                    if mismatched and any(val != config_sha for val in mismatched):
+                        raise RuntimeError(
+                            "Existing outputs were produced with a different config. "
+                            "Remove outputs/ or stage a new run root to start fresh."
+                        )
+                if "densegen__run_id" in df_existing.columns:
+                    run_ids = df_existing["densegen__run_id"].dropna().unique().tolist()
+                    if run_ids and any(val != cfg.run.id for val in run_ids):
+                        raise RuntimeError(
+                            "Existing outputs were produced with a different run_id. "
+                            "Remove outputs/ or stage a new run root to start fresh."
+                        )
+                if {"densegen__input_name", "densegen__plan"} <= set(df_existing.columns):
+                    counts = (
+                        df_existing.groupby(["densegen__input_name", "densegen__plan"]).size().astype(int).to_dict()
                     )
-            if "densegen__run_id" in df_existing.columns:
-                run_ids = df_existing["densegen__run_id"].dropna().unique().tolist()
-                if run_ids and any(val != cfg.run.id for val in run_ids):
-                    raise RuntimeError(
-                        "Existing outputs were produced with a different run_id. "
-                        "Remove outputs/ or stage a new run root to start fresh."
+                    existing_counts = {(str(k[0]), str(k[1])): int(v) for k, v in counts.items()}
+                if "densegen__used_tfbs_detail" in df_existing.columns:
+                    for _, row in df_existing.iterrows():
+                        input_name = str(row.get("densegen__input_name") or "")
+                        plan_name = str(row.get("densegen__plan") or "")
+                        if not input_name or not plan_name:
+                            continue
+                        key = (input_name, plan_name)
+                        counts = existing_usage_by_plan.setdefault(key, {})
+                        used = _parse_used_tfbs_detail(row.get("densegen__used_tfbs_detail"))
+                        _update_usage_counts(counts, used)
+                if existing_counts:
+                    total = sum(existing_counts.values())
+                    per_plan = dict(existing_counts)
+                    log.info(
+                        "Resuming from existing outputs: %d sequences across %d plan(s).",
+                        total,
+                        len(existing_counts),
                     )
-            if {"densegen__input_name", "densegen__plan"} <= set(df_existing.columns):
-                counts = df_existing.groupby(["densegen__input_name", "densegen__plan"]).size().astype(int).to_dict()
-                existing_counts = {(str(k[0]), str(k[1])): int(v) for k, v in counts.items()}
-            if "densegen__used_tfbs_detail" in df_existing.columns:
-                for _, row in df_existing.iterrows():
-                    input_name = str(row.get("densegen__input_name") or "")
-                    plan_name = str(row.get("densegen__plan") or "")
-                    if not input_name or not plan_name:
-                        continue
-                    key = (input_name, plan_name)
-                    counts = existing_usage_by_plan.setdefault(key, {})
-                    used = _parse_used_tfbs_detail(row.get("densegen__used_tfbs_detail"))
-                    _update_usage_counts(counts, used)
-            if existing_counts:
-                total = sum(existing_counts.values())
-                per_plan = dict(existing_counts)
-                log.info(
-                    "Resuming from existing outputs: %d sequences across %d plan(s).",
-                    total,
-                    len(existing_counts),
-                )
 
     def _accumulate_stats(key: tuple[str, str], stats: dict) -> None:
         if key not in plan_stats:
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index dee54bde..ff0f29fd 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -538,9 +538,18 @@ def _candidate_logging_enabled() -> bool:
 
     candidate_logging = _candidate_logging_enabled()
     candidates_summary = pd.DataFrame(
-        columns=["input_name", "motif_id", "scoring_backend", "total_candidates", "accepted", "selected", "rejected"]
+        columns=[
+            "input_name",
+            "motif_id",
+            "motif_label",
+            "scoring_backend",
+            "total_candidates",
+            "accepted",
+            "selected",
+            "rejected",
+        ]
     )
-    candidates_dir = candidates_root(outputs_root)
+    candidates_dir = candidates_root(outputs_root, root_cfg.densegen.run.id)
     cand_summary_path = candidates_dir / "candidates_summary.parquet"
     if cand_summary_path.exists():
         if candidate_logging:
@@ -1082,8 +1091,8 @@ def _render_report_md(bundle: ReportBundle) -> str:
         "- outputs/pools/pool_manifest.json",
         "- outputs/meta/effective_config.json",
         "- outputs/meta/events.jsonl",
-        "- outputs/candidates/current/candidates.parquet (when candidate logging is enabled)",
-        "- outputs/candidates/current/candidates_summary.parquet (when candidate logging is enabled)",
+        "- outputs/candidates/<run_id>/candidates.parquet (when candidate logging is enabled)",
+        "- outputs/candidates/<run_id>/candidates_summary.parquet (when candidate logging is enabled)",
     ]
     warnings = report.get("warnings") or []
     if warnings:
@@ -1125,6 +1134,7 @@ def _render_report_md(bundle: ReportBundle) -> str:
                 candidates_summary,
                 columns=[
                     "input_name",
+                    "motif_label",
                     "motif_id",
                     "scoring_backend",
                     "total_candidates",
diff --git a/src/dnadesign/densegen/src/core/run_paths.py b/src/dnadesign/densegen/src/core/run_paths.py
index 16e991b9..58d418cd 100644
--- a/src/dnadesign/densegen/src/core/run_paths.py
+++ b/src/dnadesign/densegen/src/core/run_paths.py
@@ -17,19 +17,22 @@
 RUN_OUTPUTS_DIR = "outputs"
 RUN_META_DIR = "meta"
 CANDIDATES_DIR = "candidates"
-CANDIDATES_CURRENT_DIR = "current"
 
 RUN_MANIFEST_NAME = "run_manifest.json"
 INPUTS_MANIFEST_NAME = "inputs_manifest.json"
 RUN_STATE_NAME = "run_state.json"
+IGNORED_OUTPUT_ENTRIES = {".DS_Store", ".gitkeep"}
 
 
 def run_outputs_root(run_root: Path) -> Path:
     return run_root / RUN_OUTPUTS_DIR
 
 
-def candidates_root(outputs_root: Path) -> Path:
-    return outputs_root / CANDIDATES_DIR / CANDIDATES_CURRENT_DIR
+def candidates_root(outputs_root: Path, run_id: str) -> Path:
+    run_label = str(run_id).strip()
+    if not run_label:
+        raise ValueError("run_id must be a non-empty string for candidate artifacts.")
+    return outputs_root / CANDIDATES_DIR / run_label
 
 
 def run_meta_root(run_root: Path) -> Path:
@@ -52,3 +55,14 @@ def inputs_manifest_path(run_root: Path) -> Path:
 
 def run_state_path(run_root: Path) -> Path:
     return run_meta_root(run_root) / RUN_STATE_NAME
+
+
+def has_existing_run_outputs(run_root: Path) -> bool:
+    outputs_root = run_outputs_root(run_root)
+    if not outputs_root.exists():
+        return False
+    for entry in outputs_root.iterdir():
+        if entry.name in IGNORED_OUTPUT_ENTRIES:
+            continue
+        return True
+    return False
diff --git a/src/dnadesign/densegen/tests/test_cli_run_modes.py b/src/dnadesign/densegen/tests/test_cli_run_modes.py
new file mode 100644
index 00000000..4d4226c4
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_cli_run_modes.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from dnadesign.densegen.src.cli import app
+
+
+def _write_config(run_root: Path) -> Path:
+    cfg_path = run_root / "config.yaml"
+    cfg_path.write_text(
+        """
+        densegen:
+          schema_version: "2.4"
+          run:
+            id: demo
+            root: "."
+          inputs:
+            - name: demo_input
+              type: binding_sites
+              path: inputs.csv
+          output:
+            targets: [parquet]
+            schema:
+              bio_type: dna
+              alphabet: dna_4
+            parquet:
+              path: outputs/dense_arrays.parquet
+          generation:
+            sequence_length: 10
+            quota: 1
+            plan:
+              - name: demo_plan
+                quota: 1
+          solver:
+            backend: CBC
+            strategy: iterate
+          logging:
+            log_dir: outputs/logs
+        """.strip()
+        + "\n"
+    )
+    return cfg_path
+
+
+def _write_inputs(run_root: Path) -> None:
+    (run_root / "inputs.csv").write_text("tf,sequence\nlexA,ATGC\n")
+
+
+def test_run_requires_explicit_mode_when_outputs_exist(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    _write_inputs(run_root)
+    cfg_path = _write_config(run_root)
+
+    outputs_dir = run_root / "outputs"
+    outputs_dir.mkdir(parents=True, exist_ok=True)
+    (outputs_dir / "existing.txt").write_text("seed")
+
+    runner = CliRunner()
+    result = runner.invoke(app, ["run", "-c", str(cfg_path)])
+
+    assert result.exit_code != 0, result.output
+    assert "Existing outputs found" in result.output
+
+
+def test_run_resume_requires_outputs(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    _write_inputs(run_root)
+    cfg_path = _write_config(run_root)
+
+    runner = CliRunner()
+    result = runner.invoke(app, ["run", "-c", str(cfg_path), "--resume"])
+
+    assert result.exit_code != 0, result.output
+    assert "--resume requested but no outputs were found" in result.output
diff --git a/src/dnadesign/densegen/tests/test_cli_summarize_library.py b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
index a8ef62c5..9b438125 100644
--- a/src/dnadesign/densegen/tests/test_cli_summarize_library.py
+++ b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
@@ -262,3 +262,166 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
     assert "Library build summary" in result.output
     assert "abc123" in result.output
     assert "Library 1" in result.output
+
+
+def test_summarize_library_limit_truncates(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    cfg_path = run_root / "config.yaml"
+    _write_config(cfg_path)
+
+    out_file = run_root / "outputs" / "dense_arrays.parquet"
+    sink = ParquetSink(path=str(out_file), chunk_size=1)
+    for lib_hash, lib_index in [("abc123", 1), ("def456", 2)]:
+        meta = _base_meta(library_hash=lib_hash, library_index=lib_index)
+        rec = OutputRecord.from_sequence(
+            sequence="ATGCATGCAT",
+            meta=meta,
+            source="densegen:demo",
+            bio_type="dna",
+            alphabet="dna_4",
+        )
+        sink.add(rec)
+    sink.finalize()
+
+    outputs_dir = run_root / "outputs"
+    outputs_dir.mkdir(parents=True, exist_ok=True)
+    attempts_df = pd.DataFrame(
+        [
+            {
+                "attempt_id": "a1",
+                "attempt_index": 1,
+                "run_id": "demo",
+                "input_name": "demo_input",
+                "plan_name": "demo_plan",
+                "created_at": "2026-01-14T00:00:01+00:00",
+                "status": "success",
+                "reason": "ok",
+                "detail_json": "{}",
+                "sequence": "ATGCATGCAT",
+                "sequence_hash": "hash",
+                "solution_id": "out1",
+                "used_tf_counts_json": "{}",
+                "used_tf_list": ["lexA", "cpxR"],
+                "sampling_library_index": 1,
+                "sampling_library_hash": "abc123",
+                "solver_status": "optimal",
+                "solver_objective": 0.0,
+                "solver_solve_time_s": 0.1,
+                "dense_arrays_version": None,
+                "dense_arrays_version_source": "unknown",
+                "library_tfbs": ["AAA", "CCC"],
+                "library_tfs": ["lexA", "cpxR"],
+                "library_site_ids": ["", ""],
+                "library_sources": ["inputs.csv", "inputs.csv"],
+            },
+            {
+                "attempt_id": "a2",
+                "attempt_index": 2,
+                "run_id": "demo",
+                "input_name": "demo_input",
+                "plan_name": "demo_plan",
+                "created_at": "2026-01-14T00:00:02+00:00",
+                "status": "success",
+                "reason": "ok",
+                "detail_json": "{}",
+                "sequence": "ATGCATGCAT",
+                "sequence_hash": "hash2",
+                "solution_id": "out2",
+                "used_tf_counts_json": "{}",
+                "used_tf_list": ["lexA", "cpxR"],
+                "sampling_library_index": 2,
+                "sampling_library_hash": "def456",
+                "solver_status": "optimal",
+                "solver_objective": 0.0,
+                "solver_solve_time_s": 0.1,
+                "dense_arrays_version": None,
+                "dense_arrays_version_source": "unknown",
+                "library_tfbs": ["AAA", "CCC"],
+                "library_tfs": ["lexA", "cpxR"],
+                "library_site_ids": ["", ""],
+                "library_sources": ["inputs.csv", "inputs.csv"],
+            },
+        ]
+    )
+    attempts_df.to_parquet(outputs_dir / "attempts.parquet", index=False)
+
+    solutions_df = pd.DataFrame(
+        [
+            {
+                "solution_id": "out1",
+                "attempt_id": "a1",
+                "run_id": "demo",
+                "input_name": "demo_input",
+                "plan_name": "demo_plan",
+                "created_at": "2026-01-14T00:00:01+00:00",
+                "sequence": "ATGCATGCAT",
+                "sequence_hash": "hash",
+                "sampling_library_index": 1,
+                "sampling_library_hash": "abc123",
+            },
+            {
+                "solution_id": "out2",
+                "attempt_id": "a2",
+                "run_id": "demo",
+                "input_name": "demo_input",
+                "plan_name": "demo_plan",
+                "created_at": "2026-01-14T00:00:02+00:00",
+                "sequence": "ATGCATGCAT",
+                "sequence_hash": "hash2",
+                "sampling_library_index": 2,
+                "sampling_library_hash": "def456",
+            },
+        ]
+    )
+    solutions_df.to_parquet(outputs_dir / "solutions.parquet", index=False)
+
+    manifest = RunManifest(
+        run_id="demo",
+        created_at="2026-01-14T00:00:00+00:00",
+        schema_version="2.4",
+        config_sha256="dummy",
+        run_root=str(run_root),
+        random_seed=123,
+        seed_stage_a=456,
+        seed_stage_b=789,
+        seed_solver=101112,
+        solver_backend="CBC",
+        solver_strategy="iterate",
+        solver_options=[],
+        solver_strands="double",
+        dense_arrays_version=None,
+        dense_arrays_version_source="unknown",
+        items=[
+            PlanManifest(
+                input_name="demo_input",
+                plan_name="demo_plan",
+                generated=2,
+                duplicates_skipped=0,
+                failed_solutions=0,
+                total_resamples=0,
+                libraries_built=2,
+                stall_events=0,
+            )
+        ],
+    )
+    ensure_run_meta_dir(run_root)
+    manifest.write_json(run_manifest_path(run_root))
+
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        [
+            "inspect",
+            "run",
+            "--run",
+            str(run_root),
+            "--library",
+            "--library-limit",
+            "1",
+        ],
+    )
+    assert result.exit_code == 0, result.output
+    assert "Showing 1 of 2 libraries" in result.output
+    assert "abc123" in result.output
+    assert "def456" not in result.output
diff --git a/src/dnadesign/densegen/tests/test_required_regulators.py b/src/dnadesign/densegen/tests/test_required_regulators.py
index 0691bbef..0841e4fd 100644
--- a/src/dnadesign/densegen/tests/test_required_regulators.py
+++ b/src/dnadesign/densegen/tests/test_required_regulators.py
@@ -140,7 +140,7 @@ def test_required_regulators_filtering(tmp_path: Path) -> None:
         optimizer=_DummyAdapter(),
         gap_fill=lambda *args, **kwargs: "",
     )
-    summary = run_pipeline(loaded, deps=deps)
+    summary = run_pipeline(loaded, deps=deps, resume=False)
     assert summary.total_generated == 1
     assert len(sink.records) == 1
     assert sink.records[0].sequence == "CCC"
@@ -218,7 +218,7 @@ def test_required_regulators_k_of_n(tmp_path: Path) -> None:
         optimizer=_DummyAdapter(),
         gap_fill=lambda *args, **kwargs: "",
     )
-    summary = run_pipeline(loaded, deps=deps)
+    summary = run_pipeline(loaded, deps=deps, resume=False)
     assert summary.total_generated == 1
     assert len(sink.records) == 1
     assert sink.records[0].sequence == "AAA"
diff --git a/src/dnadesign/densegen/tests/test_sequence_length_guard.py b/src/dnadesign/densegen/tests/test_sequence_length_guard.py
index 8401b797..faefb2a6 100644
--- a/src/dnadesign/densegen/tests/test_sequence_length_guard.py
+++ b/src/dnadesign/densegen/tests/test_sequence_length_guard.py
@@ -135,4 +135,4 @@ def test_sequence_length_guard_shorter_than_motif(tmp_path: Path) -> None:
         gap_fill=lambda *args, **kwargs: "",
     )
     with pytest.raises(ValueError, match="sequence_length"):
-        run_pipeline(loaded, deps=deps)
+        run_pipeline(loaded, deps=deps, resume=False)
diff --git a/src/dnadesign/densegen/tests/test_source_cache.py b/src/dnadesign/densegen/tests/test_source_cache.py
index 1e975705..5af83b39 100644
--- a/src/dnadesign/densegen/tests/test_source_cache.py
+++ b/src/dnadesign/densegen/tests/test_source_cache.py
@@ -75,7 +75,7 @@ def __init__(self, entries: list[str]) -> None:
         self.entries = entries
         self.calls = 0
 
-    def load_data(self, *, rng, outputs_root):
+    def load_data(self, *, rng, outputs_root, run_id=None):
         self.calls += 1
         return self.entries, None
 

From 1da8834ef0e5a33207f00f8102316733e211dc45 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Wed, 21 Jan 2026 14:32:03 -0500
Subject: [PATCH 17/40] densegen: docs and demo alignment for explicit runs

---
 src/dnadesign/densegen/AGENTS.md              | 68 +++++++++++++++++++
 src/dnadesign/densegen/README.md              |  3 +
 .../densegen/docs/demo/demo_basic.md          |  7 +-
 .../densegen/docs/guide/generation.md         |  2 +-
 src/dnadesign/densegen/docs/guide/inputs.md   |  9 +--
 .../densegen/docs/guide/outputs-metadata.md   | 10 +--
 .../densegen/docs/guide/workspace.md          | 15 ++--
 src/dnadesign/densegen/docs/reference/cli.md  | 10 ++-
 .../densegen/docs/reference/config.md         |  5 +-
 .../densegen/docs/reference/outputs.md        |  6 +-
 10 files changed, 110 insertions(+), 25 deletions(-)
 create mode 100644 src/dnadesign/densegen/AGENTS.md

diff --git a/src/dnadesign/densegen/AGENTS.md b/src/dnadesign/densegen/AGENTS.md
new file mode 100644
index 00000000..ffcefd6c
--- /dev/null
+++ b/src/dnadesign/densegen/AGENTS.md
@@ -0,0 +1,68 @@
+## `densegen` for agents
+
+Supplement to repo-root `AGENTS.md` with `densegen`-specific locations + run shape.
+
+### Key paths
+- README: `src/dnadesign/densegen/README.md`
+- Tool code: `src/dnadesign/densegen/src/` (entrypoint lives here)
+- Default demo workspace: `src/dnadesign/densegen/workspaces/demo_meme_two_tf/`
+- Demo config: `src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml`
+- Outputs: per-workspace `outputs/` (generated)
+
+### External deps (do not install unless asked)
+- MILP solver required (e.g., CBC or GUROBI).
+
+### Generated vs hand-edited
+- Hand-edited: `workspaces/*/config.yaml`, `workspaces/*/inputs/**`, pipeline code
+- Generated: `workspaces/*/outputs/**` (parquet, plots, logs, manifests)
+
+### Run ergonomics (explicit)
+- If `outputs/` already exists, use `dense run --resume` to continue or `dense run --fresh` to clear
+  outputs and start over. Runs do not auto-resume.
+
+### Commands (copy/paste)
+DenseGen CLI is exposed as `dense` in this repo:
+```bash
+uv run dense --help
+
+pixi run dense validate-config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+uv run dense inspect plan     -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+pixi run dense run      -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --fresh
+uv run dense plot     -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+# subset example:
+uv run dense plot -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --only tf_usage,tf_coverage
+```
+
+### MEME Suite / FIMO pressure testing
+
+When the demo uses `scoring_backend: fimo`, prefer the pixi workflow so MEME Suite is on PATH:
+
+```bash
+pixi run fimo --version
+pixi run dense validate-config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+pixi run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot --fresh
+```
+
+Pixi includes a `dense` task alias in `pixi.toml`, so `pixi run dense ...` works without `uv run`.
+If `fimo` is not found, either use `pixi run ...` or set `MEME_BIN` to the MEME bin directory
+(`.pixi/envs/<env>/bin`).
+
+### Candidate artifacts
+
+When `keep_all_candidates_debug: true`, candidate artifacts land under:
+`outputs/candidates/<run_id>/<input_name>/` and are overwritten each run for that run_id.
+If you want to keep a snapshot, copy the directory elsewhere before rerunning.
+
+### Tests
+
+If you modify `densegen`, run:
+
+```bash
+uv run pytest -q
+```
+
+For FIMO-enabled tests, use pixi so MEME Suite is on PATH:
+
+```bash
+pixi run pytest
+```
diff --git a/src/dnadesign/densegen/README.md b/src/dnadesign/densegen/README.md
index fea80095..8746202a 100644
--- a/src/dnadesign/densegen/README.md
+++ b/src/dnadesign/densegen/README.md
@@ -34,6 +34,9 @@ pixi run dense report -c "$CFG" --format md
 pixi run dense plot -c "$CFG" --only tf_usage,tf_coverage
 ```
 
+If you rerun a workspace that already has outputs, choose `--resume` (continue) or
+`--fresh` (clear outputs and start over).
+
 For a full end-to-end walkthrough with expected outputs, see
 [DenseGen demo](docs/demo/demo_basic.md).
 
diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 3028424f..7170e01b 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -103,8 +103,7 @@ Example output:
 ```
 
 If you re-run the demo in the same run root and DenseGen’s schema has changed, you may see a
-Parquet schema mismatch. Either delete `outputs/dense_arrays.parquet` +
-`outputs/_densegen_ids.sqlite` or stage a fresh workspace.
+Parquet schema mismatch. Use `dense run --fresh` to clear outputs, or stage a fresh workspace.
 
 ## 3) Validate config
 
@@ -184,6 +183,10 @@ pixi run dense stage-b build-libraries -c /private/tmp/densegen-demo-20260115-14
 pixi run dense run -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --no-plot
 ```
 
+If you rerun the same workspace, choose:
+- `--resume` to continue from existing outputs, or
+- `--fresh` to clear outputs and start over.
+
 The demo config sets `logging.progress_style: screen`, so in a TTY you will see a
 refreshing dashboard (progress, leaderboards, last sequence). To see per‑sequence
 logs, set `progress_style: stream` (and optionally tune `progress_every`).
diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index 25936a9f..370822d6 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -150,7 +150,7 @@ uses the same policy per plan, but round‑robin can trigger more frequent libra
 `pool_strategy: iterative_subsample` is used. Expect extra compute if many plans are active.
 
 Input PWM sampling is performed **once per run** and cached across round‑robin passes. If you
-need a fresh PWM sample, start a new run (or stage a new workspace).
+need a fresh PWM sample, start a new run with `dense run --fresh` (or stage a new workspace).
 
 ### Runtime policy knobs (resampling + stop conditions)
 
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index a1ab4449..c4201a6e 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -112,8 +112,8 @@ Required sampling fields:
   - `log_every_batches` (int > 0): log yield summaries every N batches
 - `bgfile` (optional): MEME bfile-format background model for FIMO
 - `keep_all_candidates_debug` (optional): write raw FIMO TSVs and candidate-level Parquet
-  (`candidates__<label>.parquet`) under `outputs/candidates/current/<input_name>/` for inspection
-  (overwritten each run)
+  (`candidates__<label>.parquet`) under `outputs/candidates/<run_id>/<input_name>/` for inspection
+  (overwritten each run for that run_id)
 - `include_matched_sequence` (optional): include `fimo_matched_sequence` column in the TFBS table
 
 Notes:
@@ -216,8 +216,9 @@ bins if `retain_bin_ids` is unset), so you can track how many candidates land in
 adjust thresholds or oversampling accordingly. With `selection_policy: stratified`, the selected‑bin
 counts show how evenly the final pool spans strata.
 If candidate logging is enabled, DenseGen also writes aggregated mining summaries to
-`outputs/candidates/current/candidates_summary.parquet` (overwritten each run).
-Copy `outputs/candidates/current/` elsewhere if you want to keep per-run mining logs.
+`outputs/candidates/<run_id>/candidates_summary.parquet` (overwritten by `dense run` or
+`stage-a build-pool --overwrite`). Copy `outputs/candidates/<run_id>/` elsewhere if you want
+to keep per-run mining logs.
 
 #### Stdout UX for long runs
 DenseGen supports three logging styles so long runs stay readable:
diff --git a/src/dnadesign/densegen/docs/guide/outputs-metadata.md b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
index 4dcd2b4a..bac522a4 100644
--- a/src/dnadesign/densegen/docs/guide/outputs-metadata.md
+++ b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
@@ -74,13 +74,13 @@ TFBS pools include stable `motif_id` and `tfbs_id` hashes plus optional FIMO met
 (`fimo_pvalue`, `fimo_bin_id`, etc.). Sequence pools include `tfbs_id` for joinability.
 
 If `keep_all_candidates_debug: true`, DenseGen writes per-candidate debug artifacts under
-`outputs/candidates/current/<input_name>/` (overwritten each run):
+`outputs/candidates/<run_id>/<input_name>/` (overwritten by `dense run` or `stage-a build-pool --overwrite`):
 - `candidates__<label>.parquet` — candidate p‑values, bins, acceptance, and reject reasons.
 - `<label>__fimo.tsv` — raw FIMO TSV (when enabled).
-DenseGen also aggregates these into `outputs/candidates/current/candidates.parquet` and
-`outputs/candidates/current/candidates_summary.parquet` with a manifest (`candidates_manifest.json`).
-These are overwritten each run; copy the `outputs/candidates/current` directory if you want
-to keep prior mining logs.
+DenseGen also aggregates these into `outputs/candidates/<run_id>/candidates.parquet` and
+`outputs/candidates/<run_id>/candidates_summary.parquet` with a manifest (`candidates_manifest.json`).
+These are overwritten by `dense run` or `stage-a build-pool --overwrite`; copy the
+`outputs/candidates/<run_id>` directory if you want to keep prior mining logs.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/guide/workspace.md b/src/dnadesign/densegen/docs/guide/workspace.md
index 177c4da0..6472eacd 100644
--- a/src/dnadesign/densegen/docs/guide/workspace.md
+++ b/src/dnadesign/densegen/docs/guide/workspace.md
@@ -34,11 +34,13 @@ densegen/
 - **No fallbacks**: all paths are explicit and resolve relative to `config.yaml`.
 - **Scalable**: large runs do not collide in shared output directories.
 - **Predictable logs**: default logs land in `outputs/logs/<run_id>.log` within the workspace.
-- **Resume‑safe**: if `outputs/dense_arrays.parquet` already exists, DenseGen resumes from existing sequences
-  (only when the config hash and run_id match), so interrupted runs can continue without manual cleanup.
-- **Candidate mining artifacts**: `outputs/candidates/current` is overwritten each run to avoid mixing
-  mining outputs across sessions; copy it elsewhere if you want to keep prior candidates.
-  To start a *fresh* solver run, delete `outputs/` or choose a new `densegen.run.id`.
+- **Resume‑safe (explicit)**: if `outputs/` already exists, you must choose `dense run --resume`
+  (continue in‑place) or `dense run --fresh` (clear outputs and start over). This prevents accidental
+  mixing of runs and makes intent explicit.
+- **Candidate mining artifacts**: `outputs/candidates/<run_id>` is overwritten by `dense run` or
+  `stage-a build-pool --overwrite` to avoid mixing mining outputs across sessions; copy it elsewhere
+  if you want to keep prior candidates. Use `dense run --fresh` to clear outputs when restarting
+  a workspace.
 
 ## Config snippet (run-scoped paths)
 
@@ -65,6 +67,9 @@ plots:
 ```
 
 When a run is complete, archive or sync the workspace as a unit.
+If you rerun in the same workspace, DenseGen requires an explicit choice:
+use `dense run --resume` to continue from existing outputs or `dense run --fresh`
+to clear `outputs/` and start over.
 
 Tip: use `dense workspace init --id <run_name>` to scaffold a new workspace. Use
 `dense inspect run --root workspaces/_archive` to inspect archived workspaces.
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index 0488b766..238c0a3d 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -87,6 +87,7 @@ Options:
 - `--config` - config path (used to resolve run root when `--run` is not set).
 - `--verbose` - show failure breakdown columns (constraint filters + duplicate solutions).
 - `--library` - include offered-vs-used summaries (TF/TFBS usage).
+- `--library-limit` - limit library builds shown in per-library summaries (`0` = all).
 - `--top` - number of rows to show in library summaries.
 - `--by-library/--no-by-library` - group library summaries per build attempt.
 - `--top-per-tf` - limit TFBS rows per TF when summarizing.
@@ -94,7 +95,7 @@ Options:
 - `--events` - show event summary (stalls/resamples, library rebuilds).
 
 Tip:
-- For large runs, prefer `--no-by-library` or lower `--top`/`--top-per-tf` to keep output readable.
+- For large runs, prefer `--library-limit`, `--no-by-library`, or lower `--top`/`--top-per-tf` to keep output readable.
 
 ---
 
@@ -109,8 +110,8 @@ Options:
 Outputs:
 - `pool_manifest.json`
 - `<input>__pool.parquet` per input
-- `outputs/candidates/current/candidates.parquet` + `candidates_summary.parquet` (when candidate logging is enabled)
-  (candidate artifacts are overwritten each run)
+- `outputs/candidates/<run_id>/candidates.parquet` + `candidates_summary.parquet` (when candidate logging is enabled)
+  (candidate artifacts are overwritten by `dense run` or by `stage-a build-pool --overwrite` for that run_id)
 
 ---
 
@@ -147,12 +148,15 @@ Run the full generation pipeline.
 
 Options:
 - `--no-plot` - skip auto-plotting even if `plots` is configured in YAML.
+- `--fresh` - delete the workspace `outputs/` directory before running.
+- `--resume` - resume from existing outputs in the workspace.
 - `--log-file PATH` - override the log file path. Otherwise DenseGen writes
   to `logging.log_dir/<run_id>.log` inside the workspace. The override path
   must still resolve inside `densegen.run.root`.
 
 Notes:
 - If you enable `scoring_backend: fimo`, run via `pixi run dense ...` (or ensure `fimo` is on PATH).
+- If the workspace already has outputs, you must choose `--resume` or `--fresh`.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 647c0bdd..a8e524a3 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -78,7 +78,8 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
       - `log_every_batches` (int > 0; default 1) - log per‑bin yield summaries every N batches
     - `bgfile` (optional path) - MEME bfile-format background model for FIMO
     - `keep_all_candidates_debug` (bool, default false) - write raw FIMO TSVs to
-      `outputs/candidates/current/<input_name>/` for inspection (overwritten each run)
+      `outputs/candidates/<run_id>/<input_name>/` for inspection (overwritten by `dense run` or
+      `stage-a build-pool --overwrite`)
     - `include_matched_sequence` (bool, default false) - include `fimo_matched_sequence` in TFBS outputs
     - `length_policy`: `exact | range` (default: `exact`)
     - `length_range`: `[min, max]` (required when `length_policy=range`; `min` >= motif length)
@@ -128,7 +129,7 @@ Output, logs, and plots must resolve inside `densegen.run.root`.
 
 ### `densegen.run`
 
-- `id` - run identifier (string; required)
+- `id` - run identifier (string; required; must not contain path separators)
 - `root` - run root directory (required; config must live inside it)
 
 ---
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index c71d7b32..c79be9ed 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -139,9 +139,9 @@ DenseGen can materialize Stage‑A/Stage‑B artifacts without running the solve
 - `dense stage-a build-pool` writes:
   - `outputs/pools/pool_manifest.json`
   - `outputs/pools/<input>__pool.parquet`
-  - `outputs/candidates/current/<input_name>/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
-  - `outputs/candidates/current/candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json`
-    (candidate artifacts are overwritten each run)
+  - `outputs/candidates/<run_id>/<input_name>/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
+  - `outputs/candidates/<run_id>/candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json`
+    (overwritten by `dense run` or `stage-a build-pool --overwrite` for that run_id)
 - `dense stage-b build-libraries` writes:
   - `outputs/libraries/library_builds.parquet`
   - `outputs/libraries/library_members.parquet`

From c8c5840bd459e2e6801198154b2beaa131160058 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Wed, 21 Jan 2026 15:02:50 -0500
Subject: [PATCH 18/40] densegen: expand report plots and composition export

---
 .../densegen/docs/guide/outputs-metadata.md   |  4 +-
 src/dnadesign/densegen/docs/reference/cli.md  |  1 +
 .../densegen/docs/reference/outputs.md        |  1 +
 src/dnadesign/densegen/src/core/reporting.py  | 71 +++++++++++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/src/dnadesign/densegen/docs/guide/outputs-metadata.md b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
index bac522a4..099a62ca 100644
--- a/src/dnadesign/densegen/docs/guide/outputs-metadata.md
+++ b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
@@ -122,7 +122,9 @@ This is a lightweight, machine-readable trace of the run’s control flow.
 ### Report assets
 
 `dense report` emits summary plots under `outputs/report_assets/` and links them in `report.html`.
-These plots include Stage‑A p‑value histograms and Stage‑B utilization summaries.
+These plots include Stage‑A p‑value/score histograms and Stage‑B utilization summaries. When
+composition is available, the report also exports a full `composition.csv` under
+`outputs/report_assets/`.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index 238c0a3d..1720e22a 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -184,6 +184,7 @@ Options:
 Report outputs:
 - `report.json`, `report.md`, `report.html`
 - `report_assets/` (plots referenced by the HTML report)
+- `report_assets/composition.csv` (full composition table when available)
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index c79be9ed..a81c506a 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -125,6 +125,7 @@ The `dense report` command writes a compact audit summary under `outputs/`:
 - `outputs/report.md`
 - `outputs/report.html` (basic HTML wrapper for quick sharing)
 - `outputs/report_assets/` (plots linked by `report.html`)
+- `outputs/report_assets/composition.csv` (full composition table when available)
 
 These summarize run scope and link to the canonical outputs (`dense_arrays.parquet` and
 `attempts.parquet`).
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index ff0f29fd..fdeba454 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -986,6 +986,58 @@ def _generate_report_plots(bundle: ReportBundle, *, cfg_path: Path, out_dir: Pat
         except Exception:
             log.warning("Failed to generate Stage-A p-value histograms.", exc_info=True)
 
+    # Stage-A score histograms per input/TF
+    if pool_dir.exists():
+        try:
+            pool_artifact = load_pool_artifact(pool_dir)
+            for entry in pool_artifact.inputs.values():
+                if entry.pool_mode != POOL_MODE_TFBS:
+                    continue
+                pool_path = pool_dir / entry.pool_path
+                if not pool_path.exists():
+                    continue
+                df_pool = pd.read_parquet(pool_path)
+                if "tf" not in df_pool.columns:
+                    continue
+                for tf, sub in df_pool.groupby("tf"):
+                    if sub.empty:
+                        continue
+                    if "fimo_score" in sub.columns:
+                        vals = pd.to_numeric(sub["fimo_score"], errors="coerce").dropna()
+                        if not vals.empty:
+                            fig, ax = plt.subplots(figsize=(6, 4))
+                            ax.hist(vals, bins=30, color="#72b7b2", edgecolor="white")
+                            ax.set_title(f"Stage-A FIMO score histogram: {entry.name}/{tf}")
+                            ax.set_xlabel("FIMO score")
+                            ax.set_ylabel("count")
+                            fname = (
+                                f"stage_a_fimo_score_hist__{_safe_filename(entry.name)}__{_safe_filename(str(tf))}.png"
+                            )
+                            path = assets_dir / fname
+                            fig.tight_layout()
+                            fig.savefig(path)
+                            plt.close(fig)
+                            plots.setdefault("stage_a_fimo_score_hist", []).append(str(path.relative_to(out_dir)))
+                    if "score" in sub.columns:
+                        vals = pd.to_numeric(sub["score"], errors="coerce").dropna()
+                        if not vals.empty:
+                            fig, ax = plt.subplots(figsize=(6, 4))
+                            ax.hist(vals, bins=30, color="#e45756", edgecolor="white")
+                            ax.set_title(f"Stage-A densegen score histogram: {entry.name}/{tf}")
+                            ax.set_xlabel("densegen score")
+                            ax.set_ylabel("count")
+                            fname = (
+                                "stage_a_densegen_score_hist__"
+                                f"{_safe_filename(entry.name)}__{_safe_filename(str(tf))}.png"
+                            )
+                            path = assets_dir / fname
+                            fig.tight_layout()
+                            fig.savefig(path)
+                            plt.close(fig)
+                            plots.setdefault("stage_a_densegen_score_hist", []).append(str(path.relative_to(out_dir)))
+        except Exception:
+            log.warning("Failed to generate Stage-A score histograms.", exc_info=True)
+
     # Stage-A bin occupancy bar charts (per input)
     stage_a_bins = bundle.tables.get("stage_a_bins")
     if stage_a_bins is not None and not stage_a_bins.empty:
@@ -1044,6 +1096,17 @@ def write_report(
     out_path.mkdir(parents=True, exist_ok=True)
 
     bundle = collect_report_data(root_cfg, cfg_path, include_combinatorics=include_combinatorics)
+    composition = bundle.tables.get("composition")
+    if composition is not None and not composition.empty:
+        bundle.run_report["composition_rows"] = int(len(composition))
+        try:
+            assets_dir = out_path / "report_assets"
+            assets_dir.mkdir(parents=True, exist_ok=True)
+            composition_csv = assets_dir / "composition.csv"
+            composition.to_csv(composition_csv, index=False)
+            bundle.run_report["composition_csv"] = str(composition_csv.relative_to(out_path))
+        except Exception:
+            log.warning("Failed to export composition CSV for report.", exc_info=True)
     try:
         plots = _generate_report_plots(bundle, cfg_path=cfg_path, out_dir=out_path)
         bundle.plots = plots
@@ -1093,6 +1156,8 @@ def _render_report_md(bundle: ReportBundle) -> str:
         "- outputs/meta/events.jsonl",
         "- outputs/candidates/<run_id>/candidates.parquet (when candidate logging is enabled)",
         "- outputs/candidates/<run_id>/candidates_summary.parquet (when candidate logging is enabled)",
+        "- outputs/report_assets/ (plots linked by report.html)",
+        "- outputs/report_assets/composition.csv (full composition table, when available)",
     ]
     warnings = report.get("warnings") or []
     if warnings:
@@ -1264,6 +1329,12 @@ def _render_report_md(bundle: ReportBundle) -> str:
                 max_rows=12,
             )
         )
+        comp_rows = report.get("composition_rows")
+        comp_csv = report.get("composition_csv")
+        if comp_rows is not None:
+            lines.append(f"- Full composition rows: {comp_rows}")
+        if comp_csv:
+            lines.append(f"- Full composition CSV: {comp_csv}")
     leaderboard = report.get("leaderboard_latest") or {}
     leader_tf = leaderboard.get("tf") or []
     leader_tfbs = leaderboard.get("tfbs") or []

From e6f1e811efa382e01e86532606db8d9172472837 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 09:32:13 -0500
Subject: [PATCH 19/40] Refactor densegen pad config and outputs schema

---
 pyproject.toml                                |   5 +
 src/dnadesign/densegen/AGENTS.md              |  39 +-
 src/dnadesign/densegen/README.md              |  81 ++-
 .../densegen/docs/demo/demo_basic.md          | 449 ++++-----------
 .../densegen/docs/dev/architecture.md         |  59 +-
 .../densegen/docs/dev/improvements.md         | 413 --------------
 .../densegen/docs/guide/generation.md         | 161 +++---
 src/dnadesign/densegen/docs/guide/index.md    |  41 --
 src/dnadesign/densegen/docs/guide/inputs.md   | 481 +++++++---------
 .../densegen/docs/guide/outputs-metadata.md   | 222 +++-----
 .../densegen/docs/guide/postprocess.md        |  25 +-
 .../densegen/docs/guide/workspace.md          |  83 +--
 src/dnadesign/densegen/docs/reference/cli.md  | 202 +++----
 .../densegen/docs/reference/config.md         | 112 ++--
 .../docs/reference/motif_artifacts.md         |  27 +-
 .../densegen/docs/reference/outputs.md        |  87 ++-
 .../docs/workflows/cruncher_pwm_pipeline.md   |  92 ++-
 .../src/adapters/optimizer/dense_arrays.py    |  28 +
 .../densegen/src/adapters/outputs/factory.py  |   8 +-
 .../densegen/src/adapters/outputs/id_index.py |   5 +-
 .../densegen/src/adapters/outputs/loader.py   |   6 +-
 .../densegen/src/adapters/outputs/parquet.py  |  25 +-
 .../src/adapters/sources/binding_sites.py     |   3 +-
 .../src/adapters/sources/pwm_artifact.py      |   4 +-
 .../src/adapters/sources/pwm_artifact_set.py  |   4 +-
 .../src/adapters/sources/pwm_jaspar.py        |   6 +-
 .../src/adapters/sources/pwm_matrix_csv.py    |   4 +-
 .../densegen/src/adapters/sources/pwm_meme.py |   6 +-
 .../src/adapters/sources/pwm_meme_set.py      |   4 +-
 .../src/adapters/sources/pwm_sampling.py      |  28 +-
 src/dnadesign/densegen/src/cli.py             | 538 ++++++++++++------
 src/dnadesign/densegen/src/config/__init__.py |  87 ++-
 .../densegen/src/core/artifacts/candidates.py |   4 +-
 src/dnadesign/densegen/src/core/metadata.py   |  28 +-
 .../densegen/src/core/metadata_schema.py      | 109 ++--
 src/dnadesign/densegen/src/core/pipeline.py   | 286 ++++++----
 .../densegen/src/core/postprocess/__init__.py |   4 +-
 .../densegen/src/core/postprocess/gap_fill.py |  97 +++-
 .../densegen/src/core/pvalue_bins.py          |   2 +-
 src/dnadesign/densegen/src/core/reporting.py  |  65 ++-
 src/dnadesign/densegen/src/core/run_paths.py  | 106 +++-
 src/dnadesign/densegen/src/utils/mpl_utils.py |  22 +-
 .../densegen/src/viz/plot_registry.py         |   6 +-
 src/dnadesign/densegen/src/viz/plotting.py    |  30 +-
 .../densegen/tests/test_artifacts_pool.py     |   6 +-
 .../densegen/tests/test_cli_config_option.py  |  25 +-
 .../densegen/tests/test_cli_describe.py       |   8 +-
 .../densegen/tests/test_cli_run_modes.py      |  36 +-
 .../tests/test_cli_summarize_library.py       |  42 +-
 .../tests/test_cli_workspace_command.py       |  38 ++
 .../densegen/tests/test_cli_workspace_init.py |  10 +-
 .../densegen/tests/test_config_strict.py      |  88 ++-
 .../densegen/tests/test_demo_config.py        |   2 -
 .../densegen/tests/test_gc_fill_policy.py     |  29 +-
 .../densegen/tests/test_optimizer_wrapper.py  |  50 ++
 .../densegen/tests/test_outputs_parquet.py    |  26 +-
 .../tests/test_required_regulators.py         |  21 +-
 .../tests/test_round_robin_chunk_cap.py       | 154 ++++-
 .../densegen/tests/test_run_manifest.py       |   6 +-
 .../densegen/tests/test_run_paths.py          |  39 ++
 .../densegen/tests/test_run_state.py          |   6 +-
 .../tests/test_sequence_length_guard.py       |  11 +-
 .../densegen/tests/test_source_cache.py       |  11 +-
 .../densegen/tests/test_used_tfbs_offsets.py  |   6 +-
 src/dnadesign/densegen/workspaces/README.md   |  14 +-
 .../workspaces/demo_meme_two_tf/config.yaml   |  44 +-
 66 files changed, 2432 insertions(+), 2334 deletions(-)
 delete mode 100644 src/dnadesign/densegen/docs/dev/improvements.md
 delete mode 100644 src/dnadesign/densegen/docs/guide/index.md
 create mode 100644 src/dnadesign/densegen/tests/test_cli_workspace_command.py
 create mode 100644 src/dnadesign/densegen/tests/test_run_paths.py

diff --git a/pyproject.toml b/pyproject.toml
index 1aec315d..a255ad46 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -100,6 +100,10 @@ where = ["src"]
 
 [tool.setuptools.package-data]
 "dnadesign.cruncher.ingest.certs" = ["*.pem"]
+"dnadesign.densegen" = [
+  "workspaces/demo_meme_two_tf/config.yaml",
+  "workspaces/demo_meme_two_tf/inputs/*.txt",
+]
 
 [tool.pytest.ini_options]
 addopts = "-ra -q"
@@ -109,6 +113,7 @@ norecursedirs = ["*/archived/*", ".venv", "venv", "build", "dist", "*.egg-info"]
 markers = ["slow: sampling-heavy tests (>10 s)"]
 filterwarnings = [
   "ignore:ArviZ is undergoing a major refactor.*:FutureWarning",
+  "ignore::FutureWarning:arviz.*",
   "ignore:builtin type SwigPyPacked has no __module__ attribute:DeprecationWarning",
   "ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning",
   "ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning",
diff --git a/src/dnadesign/densegen/AGENTS.md b/src/dnadesign/densegen/AGENTS.md
index ffcefd6c..dc641fe2 100644
--- a/src/dnadesign/densegen/AGENTS.md
+++ b/src/dnadesign/densegen/AGENTS.md
@@ -5,9 +5,8 @@ Supplement to repo-root `AGENTS.md` with `densegen`-specific locations + run sha
 ### Key paths
 - README: `src/dnadesign/densegen/README.md`
 - Tool code: `src/dnadesign/densegen/src/` (entrypoint lives here)
-- Default demo workspace: `src/dnadesign/densegen/workspaces/demo_meme_two_tf/`
-- Demo config: `src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml`
-- Outputs: per-workspace `outputs/` (generated)
+- Packaged demo template id: `demo_meme_two_tf`
+- Outputs: per-workspace `outputs/` (generated; run artifacts live in tables/plots/report)
 
 ### External deps (do not install unless asked)
 - MILP solver required (e.g., CBC or GUROBI).
@@ -17,30 +16,38 @@ Supplement to repo-root `AGENTS.md` with `densegen`-specific locations + run sha
 - Generated: `workspaces/*/outputs/**` (parquet, plots, logs, manifests)
 
 ### Run ergonomics (explicit)
-- If `outputs/` already exists, use `dense run --resume` to continue or `dense run --fresh` to clear
-  outputs and start over. Runs do not auto-resume.
+- If run outputs already exist (e.g., `outputs/tables/attempts.parquet` or `outputs/meta/run_state.json`),
+  use `dense run --resume` to continue or `dense run --fresh` to clear outputs and start over.
+  Runs do not auto-resume.
 
 ### Commands (copy/paste)
 DenseGen CLI is exposed as `dense` in this repo:
 ```bash
 uv run dense --help
 
-pixi run dense validate-config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-uv run dense inspect plan     -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-pixi run dense run      -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --fresh
-uv run dense plot     -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-# subset example:
-uv run dense plot -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --only tf_usage,tf_coverage
+# Workspace-first demo flow (no repo-root paths).
+uv run dense workspace init --id demo --template-id demo_meme_two_tf --copy-inputs
+cd demo
+uv run dense validate-config --probe-solver
+uv run dense inspect inputs
+uv run dense inspect config
+uv run dense stage-a build-pool
+uv run dense stage-b build-libraries
+uv run dense run
+uv run dense inspect run --library --events
+uv run dense ls-plots
+uv run dense plot --only tf_usage,tf_coverage
 ```
 
 ### MEME Suite / FIMO pressure testing
 
-When the demo uses `scoring_backend: fimo`, prefer the pixi workflow so MEME Suite is on PATH:
+When the demo uses `scoring_backend: fimo`, prefer the pixi workflow so MEME Suite is on PATH
+(run these from a workspace directory):
 
 ```bash
 pixi run fimo --version
-pixi run dense validate-config -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-pixi run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot --fresh
+pixi run dense validate-config --probe-solver
+pixi run dense run --fresh
 ```
 
 Pixi includes a `dense` task alias in `pixi.toml`, so `pixi run dense ...` works without `uv run`.
@@ -49,8 +56,8 @@ If `fimo` is not found, either use `pixi run ...` or set `MEME_BIN` to the MEME
 
 ### Candidate artifacts
 
-When `keep_all_candidates_debug: true`, candidate artifacts land under:
-`outputs/candidates/<run_id>/<input_name>/` and are overwritten each run for that run_id.
+When `keep_all_candidates_debug: true`, candidate artifacts land under
+`outputs/pools/candidates/` (files named `candidates__<label>.parquet`) and are overwritten each run.
 If you want to keep a snapshot, copy the directory elsewhere before rerunning.
 
 ### Tests
diff --git a/src/dnadesign/densegen/README.md b/src/dnadesign/densegen/README.md
index 8746202a..d9b9ff84 100644
--- a/src/dnadesign/densegen/README.md
+++ b/src/dnadesign/densegen/README.md
@@ -1,7 +1,13 @@
 ## DenseGen — Dense Array Generator
 
-**DenseGen** packs transcription factor binding sites (TFBSs) into compact, synthetic nucleotide sequences. It wraps the [`dense-arrays`](https://github.com/e-south/dense-arrays) ILP optimizer and adds library sampling, constraint planning, run-scoped IO, and plotting so you can generate, audit, and reproduce dense arrays end-to-end.
+**DenseGen** packs transcription factor binding sites (TFBSs) into dense synthetic nucleic acid sequences, wrapping the ["dense-arrays"](https://github.com/e-south/dense-arrays) ILP solver with added features. It involves two distinct sampling stages:
 
+1. **Stage‑A sampling (input sampling)** — mine TFBSs from PWM artifacts (via [FIMO](https://meme-suite.org/meme/doc/fimo.html)) to build TF‑aware TFBS pools.
+2. **Stage‑B sampling (library sampling)** — feed TFBS pools to the dense-array solver.
+
+DenseGen also plans sequence constraints/quotas, dense-array generation with run‑scoped, fail‑fast Parquet I/O and manifests for reproducibility, and produces audit reports plus plots (e.g., TF usage and coverage).
+
+For a full walkthrough with expected outputs, see [DenseGen demo](docs/demo/demo_basic.md).
 
 ### Contents
 
@@ -12,42 +18,63 @@
 
 ### Quick start
 
-Prerequisites include Python, dense-arrays, and a MILP solver. CBC is open-source; [GUROBI](https://www.gurobi.com/) is supported if installed and licensed.
-
-Use the canonical demo config (small, Parquet-only). The demo uses MEME motif files
-copied from the Cruncher basic demo workspace (`inputs/local_motifs`) and parsed with
-Cruncher’s MEME parser for DRY, consistent parsing.
-FIMO-backed PWM sampling is supported when MEME Suite is available (`fimo` on PATH via `pixi run`).
-Stratified FIMO sampling uses canonical p‑value bins by default; see the guide for mining workflows.
+Prerequisites include Python, dense-arrays, and a MILP solver. CBC is open-source; [GUROBI](https://www.gurobi.com/) is supported if installed and licensed. Stage‑A FIMO sampling requires MEME Suite (`fimo` on PATH; use `pixi run` if needed).
 
 ```bash
-pixi run dense workspace init --id demo --root /tmp --template src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --copy-inputs
-CFG=/tmp/demo/config.yaml
-
-pixi run dense validate-config -c "$CFG" --probe-solver
-pixi run dense inspect inputs -c "$CFG"
-pixi run dense stage-a build-pool -c "$CFG"
-pixi run dense stage-b build-libraries -c "$CFG"
-pixi run dense run -c "$CFG" --no-plot
-pixi run dense inspect run --run /tmp/demo --library --top-per-tf 5
-pixi run dense report -c "$CFG" --format md
-pixi run dense plot -c "$CFG" --only tf_usage,tf_coverage
-```
+# 1) Scaffold a workspace from a packaged template (self-contained).
+dense workspace init --id demo --template-id demo_meme_two_tf --copy-inputs
+
+# 2) Enter the workspace so config.yaml is auto‑discovered.
+cd demo
+
+# 3) Validate schema + solver availability before long runs.
+dense validate-config --probe-solver
+
+# 4) Inspect Stage‑A inputs and sampling settings.
+dense inspect inputs
 
-If you rerun a workspace that already has outputs, choose `--resume` (continue) or
-`--fresh` (clear outputs and start over).
+# 5) Inspect resolved outputs + Stage‑A/Stage‑B settings.
+dense inspect config
+
+# 6) Stage‑A: materialize TFBS pools (optional, for inspection).
+dense stage-a build-pool
+
+# 7) Stage‑B: materialize solver libraries (optional, for inspection).
+dense stage-b build-libraries
+
+# 8) Run generation (use --resume or --fresh if outputs already exist).
+dense run
+
+# 9) Inspect run summary (library + events are optional add‑ons).
+dense inspect run --library --events
+
+# 10) Emit an audit report.
+dense report --format md
+
+# 11) List plots and render a subset.
+dense ls-plots
+dense plot --only tf_usage,tf_coverage
+```
 
-For a full end-to-end walkthrough with expected outputs, see
-[DenseGen demo](docs/demo/demo_basic.md).
+If you rerun a workspace that already has run outputs (e.g., `outputs/tables/attempts.parquet` or `outputs/meta/run_state.json`), choose `--resume` (continue) or `--fresh` (clear outputs and start over).
 
 ---
 
 ### More documentation
 
 Docs live in `docs/`:
-- [DenseGen demo](docs/demo/demo_basic.md) - canonical end-to-end walkthrough.
-- [DenseGen guide](docs/guide/index.md) - concepts and data flow.
-- [Reference](docs/reference/cli.md) - CLI, config, outputs (schema-level detail).
+- [Demo walkthrough](docs/demo/demo_basic.md) - progressive end‑to‑end tour of all commands.
+- [Guide: workspace layout](docs/guide/workspace.md) - workspace-first structure and rationale.
+- [Guide: inputs (Stage‑A)](docs/guide/inputs.md) - input ingestion + Stage‑A sampling.
+- [Guide: generation (Stage‑B)](docs/guide/generation.md) - Stage‑B sampling and constraints.
+- [Guide: outputs + metadata](docs/guide/outputs-metadata.md) - what outputs mean and how to join them.
+- [Guide: postprocess](docs/guide/postprocess.md) - gap‑fill policies.
+- [Reference: CLI](docs/reference/cli.md) - operator manual (commands + flags).
+- [Reference: config](docs/reference/config.md) - strict schema definition.
+- [Reference: outputs](docs/reference/outputs.md) - output formats + manifests.
+- [Reference: motif artifacts](docs/reference/motif_artifacts.md) - contract for PWM artifacts.
+- [Dev: architecture](docs/dev/architecture.md) - pipeline + module map.
+- [Workflow: Cruncher → DenseGen](docs/workflows/cruncher_pwm_pipeline.md) - artifact‑first handoff.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 7170e01b..73fb3dfb 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -1,26 +1,27 @@
-# DenseGen demo (end-to-end)
-
-This is the canonical DenseGen demo. It stages a workspace, validates config, plans
-constraints, generates sequences, inspects outputs, and renders plots. The demo is Parquet-only
-and uses the dense-arrays CBC backend. All paths are explicit; missing files fail fast.
-
-## Contents
-- [0) Prereqs](#0-prereqs) - sync deps and set a run root.
-- [1) Inspect demo inputs](#1-inspect-demo-inputs) - confirm the input files used by the demo.
-- [1b) (Optional) Rebuild inputs from Cruncher](#1b-optional-rebuild-inputs-from-cruncher) - cross-tool flow.
-- [2) Stage a workspace](#2-stage-a-workspace) - copy inputs and rewrite paths.
-- [3) Validate config](#3-validate-config) - schema and sanity checks.
-- [4) Plan constraints](#4-plan-constraints) - see resolved quotas and constraint buckets.
-- [5) Inspect the resolved run config](#5-inspect-the-resolved-run-config) - verify inputs, outputs, solver.
-- [6) (Optional) Stage‑A + Stage‑B previews](#6-optional-stagea--stageb-previews) - preview pools and libraries.
-- [7) Run generation](#7-run-generation) - produce sequences and metadata.
-- [8) Inspect run summary](#8-inspect-run-summary) - review run-level counts.
-- [9) Audit report](#9-audit-report) - build offered-vs-used tables.
-- [10) Inspect outputs](#10-inspect-outputs) - list Parquet artifacts.
-- [11) Plot analysis](#11-plot-analysis) - render tf_usage and tf_coverage.
-- [Appendix (optional)](#appendix-optional) - PWM sampling + USR output.
-
-## 0) Prereqs
+## DenseGen demo (workspace‑first)
+
+This walkthrough uses the packaged demo template. The staged workspace contains MEME `.txt` motifs
+in `inputs/` (lexA + cpxR), and Stage‑A sampling uses those files directly.
+
+### Contents
+- [0) Prereqs](#0-prereqs) - sync deps and ensure solver tools.
+- [1) Stage a workspace](#1-stage-a-workspace) - scaffold a self‑contained workspace.
+- [2) Validate config](#2-validate-config) - schema + solver probe.
+- [3) Inspect inputs](#3-inspect-inputs) - Stage‑A inputs + sampling summary.
+- [3b) (Optional) Build inputs via Cruncher (external workspace)](#3b-optional-build-inputs-via-cruncher-external-workspace)
+- [4) Inspect config](#4-inspect-config) - resolved outputs + Stage‑A/Stage‑B settings.
+- [5) Stage‑A build‑pool](#5-stage-a-build-pool) - materialize TFBS pools.
+- [6) Stage‑B build‑libraries](#6-stage-b-build-libraries) - materialize solver libraries.
+- [7) Run generation](#7-run-generation) - execute Stage‑A + Stage‑B + optimization.
+- [8) Inspect run summary](#8-inspect-run-summary) - library + events.
+- [9) List plots](#9-list-plots) - available plot names.
+- [10) Plot](#10-plot) - render plots.
+- [11) Report](#11-report) - write audit report.
+- [12) Reset the demo](#12-reset-the-demo) - wipe outputs for a clean rerun.
+
+---
+
+### 0) Prereqs
 
 If you have not synced dependencies yet:
 
@@ -28,404 +29,186 @@ If you have not synced dependencies yet:
 uv sync --locked
 ```
 
-This demo uses **FIMO** (MEME Suite) to adjudicate strong motif matches. Ensure `fimo` is on PATH
-or set `MEME_BIN` to the MEME bin directory. If you use pixi, run commands via
-`pixi run dense ...` so MEME tools are available (recommended for validation + run steps).
+Stage‑A FIMO sampling requires MEME Suite (`fimo` on PATH). If you use pixi, run commands via
+`pixi run dense ...` so MEME tools are available. If running from source, prefix commands with
+`uv run`.
 
-All commands below assume you are at the repo root. We will write the demo run to a scratch
-directory; set a run root:
+---
 
-```bash
-RUN_ROOT=/private/tmp/densegen-demo-20260115-1405
-mkdir -p "$RUN_ROOT"
-```
-
-Pick any writable scratch path; the example outputs below match this path.
-
-## 1) Inspect demo inputs
-
-The canonical demo inputs live in the DenseGen demo folder (copied from the Cruncher
-basic demo so the run is self‑contained). They are merged into one TF pool via
-`pwm_meme_set`:
-
-```
-src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/lexA.txt
-src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/cpxR.txt
-```
-
-These are MEME files parsed with Cruncher’s MEME parser (DenseGen reuses the same parsing
-logic for DRY). The demo uses LexA + CpxR motifs and exercises PWM sampling bounds. Sampling
-uses FIMO p-values to define “strong” matches and `selection_policy: stratified` to balance
-across canonical p‑value bins (see the input-stage sampling table in `dense inspect inputs`).
-
-Inspect the resolved inputs + Stage‑A sampling table:
-
-```bash
-pixi run dense inspect inputs -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
-```
+### 1) Stage a workspace
 
-### 1b) (Optional) Rebuild inputs from Cruncher
-
-If you want to see the cross‑tool flow (DAP‑seq/RegulonDB → MEME → DenseGen), regenerate
-inputs from the Cruncher demo workspace:
+Why: create a self‑contained workspace with `config.yaml`, `inputs/`, and `outputs/`.
 
 ```bash
-CRUNCHER_CFG=src/dnadesign/cruncher/workspaces/demo_basics_two_tf/config.yaml
-
-# Fetch cached sites + motifs (local MEME demo source)
-uv run cruncher fetch sites --source demo_local_meme --tf lexA --tf cpxR --update -c "$CRUNCHER_CFG"
-uv run cruncher fetch motifs --source demo_local_meme --tf lexA --tf cpxR --update -c "$CRUNCHER_CFG"
-uv run cruncher lock -c "$CRUNCHER_CFG"
-
-# Export DenseGen inputs
-uv run cruncher catalog export-sites --set 1 --out /tmp/densegen_sites.csv -c "$CRUNCHER_CFG"
-uv run cruncher catalog export-densegen --set 1 --out /tmp/densegen_pwms -c "$CRUNCHER_CFG"
+dense workspace init --id demo --template-id demo_meme_two_tf --copy-inputs
+cd demo
 ```
 
-Then point DenseGen `inputs` to `/tmp/densegen_sites.csv` (binding‑sites mode) or to the
-artifact directory `/tmp/densegen_pwms` (PWM artifact mode).
+---
 
-## 2) Stage a workspace
+### 2) Validate config
 
-Stage a self-contained workspace from the demo template (this copies inputs and rewrites
-paths):
+Why: fail fast on schema issues and confirm solver availability.
 
 ```bash
-pixi run dense workspace init --id demo_press --root "$RUN_ROOT" \
-  --template src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml \
-  --copy-inputs
+dense validate-config --probe-solver
 ```
 
-Example output:
+---
 
-```text
-✨ Workspace staged: /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
-```
+### 3) Inspect inputs
 
-If you re-run the demo in the same run root and DenseGen’s schema has changed, you may see a
-Parquet schema mismatch. Use `dense run --fresh` to clear outputs, or stage a fresh workspace.
-
-## 3) Validate config
+Why: confirm Stage‑A inputs and sampling settings.
 
 ```bash
-pixi run dense validate-config -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+dense inspect inputs
 ```
 
-Example output:
+The demo uses MEME `.txt` motifs already in `inputs/` (`lexA.txt`, `cpxR.txt`).
 
-```text
-✅ Config is valid.
-```
+---
 
-## 4) Plan constraints
+### 3b) (Optional) Build inputs via Cruncher (external workspace)
 
-```bash
-pixi run dense inspect plan -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
-```
+Why: generate Stage‑A motif artifacts and binding‑site tables in **Cruncher’s** workspace, then
+copy the exports into this DenseGen workspace.
 
-Example output:
+Follow the Cruncher demo (see `cruncher/docs/demos/demo_basics_two_tf.md`) in its own workspace.
+From the Cruncher workspace directory, export DenseGen inputs (no `-c` flag needed when you run in CWD):
 
-```text
-┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ name ┃ quota ┃ has promoter_constraints ┃
-┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ meme_demo │ 50 │ no                      │
-└──────┴───────┴──────────────────────────┘
+```bash
+cd <cruncher_workspace>
+cruncher catalog export-sites --set 1 --out outputs/exports/densegen_sites.csv
+cruncher catalog export-densegen --set 1 --out outputs/exports/densegen_pwms
 ```
 
-## 5) Inspect the resolved run config
-
-This step shows the resolved inputs, outputs, solver selection, and the two-stage sampling knobs.
+Copy those exports into this DenseGen workspace:
 
 ```bash
-pixi run dense inspect config -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
+cp outputs/exports/densegen_sites.csv <densegen_workspace>/inputs/
+cp -R outputs/exports/densegen_pwms <densegen_workspace>/inputs/motif_artifacts
 ```
 
-Example output (abridged):
-
-```text
-Config: /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
-Run: id=demo_press root=/private/tmp/densegen-demo-20260115-1405/demo_press
-┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ name           ┃ type          ┃ source                                                       ┃
-┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ lexA_cpxR_meme │ pwm_meme_set  │ 2 files (/private/tmp/densegen-demo-20260115-1405/demo_press… │
-└────────────────┴───────────────┴──────────────────────────────────────────────────────────────┘
-┏━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
-┃ backend ┃ strategy ┃ options ┃ strands ┃
-┡━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━┩
-│ CBC     │ iterate  │ 0       │ double  │
-└─────────┴──────────┴─────────┴─────────┘
-Input-stage PWM sampling
-... (PWM sampling settings + candidate caps shown here)
-Solver-stage library sampling
-...
-```
-
-## 6) (Optional) Stage‑A + Stage‑B previews
-
-Stage‑A: materialize the TFBS pool (FIMO mining + stratified selection). This is useful when
-you want to inspect mining yields per p‑value bin before running the solver:
+To use these exports, update `config.yaml` inputs to `type: binding_sites` (CSV/Parquet) or
+`type: pwm_artifact_set` (JSON artifacts). The DenseGen workspace remains config‑centric (one
+runtime config), while Cruncher keeps its own workspace + config.
 
-```bash
-pixi run dense stage-a build-pool -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
-```
+---
 
-Stage‑B: build a solver library from the pool without running the solver:
+### 4) Inspect config
 
-```bash
-pixi run dense stage-b build-libraries -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml
-```
+Why: confirm resolved outputs, Stage‑A sampling knobs, fixed elements, and Stage‑B sampling policy.
 
-## 7) Run generation
+Rationale for the demo settings: we want **~100 binding sites per motif**, so we set Stage‑A
+`n_sites` and oversampling/mining caps to reach that target; Stage‑B sampling then builds fixed‑size
+libraries before running the solver.
+This demo also pins a strong σ70 promoter pair (`TTGACA`/`TATAAT`) as fixed elements; the default
+`tf_coverage` plot overlays these sites when `plots.options.tf_coverage.include_promoter_sites: true`.
+`generation.sequence_length` is set to 90 so the fixed promoter plus required TFBS sites can fit
+without solver infeasibility.
 
 ```bash
-pixi run dense run -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --no-plot
+dense inspect config
 ```
 
-If you rerun the same workspace, choose:
-- `--resume` to continue from existing outputs, or
-- `--fresh` to clear outputs and start over.
+---
 
-The demo config sets `logging.progress_style: screen`, so in a TTY you will see a
-refreshing dashboard (progress, leaderboards, last sequence). To see per‑sequence
-logs, set `progress_style: stream` (and optionally tune `progress_every`).
+### 5) Stage‑A build‑pool
 
-Example output (abridged):
+Why: materialize TFBS pools for inspection and for deterministic Stage‑B previews.
 
-```text
-2026-01-15 14:02:02 | INFO | dnadesign.densegen.src.utils.logging_utils | Logging initialized (level=INFO)
-Quota plan: meme_demo=50
-2026-01-15 14:02:02 | INFO | dnadesign.densegen.src.adapters.optimizer.dense_arrays | Solver selected: CBC
-2026-01-15 14:02:05 | INFO | dnadesign.densegen.src.adapters.sources.pwm_sampling | FIMO yield for motif lexA: hits=120 accepted=120 selected=80 bins=(0e+00,1e-10]:40 (1e-10,1e-08]:35 ... selected_bins=(0e+00,1e-10]:26 ...
-2026-01-15 14:02:06 | INFO | dnadesign.densegen.src.core.pipeline | [demo/demo] 2/50 (4.00%) (local 2/2) CR=1.050 | seq ATTGACAGTAAACCTGCGGGAAATATAATTTACTCCGTATTTGCACATGGTTATCCACAG
-2026-01-15 14:02:05 | INFO | dnadesign.densegen.src.core.pipeline | Inputs manifest written: /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/meta/inputs_manifest.json
-🎉 Run complete.
+```bash
+dense stage-a build-pool
 ```
 
-DenseGen suppresses noisy pyarrow sysctl warnings to keep stdout clean during long runs.
+---
 
-## 8) Inspect run summary
+### 6) Stage‑B build‑libraries
 
-DenseGen writes `outputs/meta/run_manifest.json`, `outputs/meta/inputs_manifest.json`, and
-`outputs/meta/effective_config.json`. Summarize the run manifest:
+Why: preview solver libraries without running the optimizer.
 
 ```bash
-pixi run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press
+dense stage-b build-libraries
 ```
 
-Example output:
+---
 
-```text
-Run: demo_press  Root: /private/tmp/densegen-demo-20260115-1405/demo_press  Schema: 2.4  dense-arrays: <version> (<source>)
-┏━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┓
-┃ input        ┃ plan ┃ generated ┃ duplica… ┃ failed ┃ resamples ┃ librari… ┃ stalls ┃
-┡━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━┩
-│ lexA_cpxR_meme │ meme_demo │ 50  │ 0        │ 0      │ 0         │ 3        │ 0      │
-└──────────────┴──────┴───────────┴──────────┴────────┴───────────┴──────────┴────────┘
-```
+### 7) Run generation
 
-Use `--verbose` for constraint-failure breakdowns and duplicate-solution counts.
-Use `--library` to print offered-vs-used summaries for quick debugging:
+Why: execute Stage‑A sampling (if needed), Stage‑B sampling, and solver optimization.
 
 ```bash
-pixi run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press --library --top-per-tf 5
+dense run
 ```
 
-This library summary is the quickest way to audit which TFBS were offered vs
-used in the solver stage (Stage‑B sampling).
+This demo config also enables plot generation from the run (`plots.default`) and saves plots in
+`outputs/plots/` using `plots.format` (switch to `pdf` or `svg` in `config.yaml` if desired).
+The demo quota is intentionally small (`generation.quota: 12` with `runtime.max_seconds_per_plan: 60`)
+to keep the end‑to‑end run fast; scale these up for production runs.
+The demo also uses `solver.strategy: approximate` for speed; switch to `iterate` or `diverse`
+once you want full solver runs.
+If run outputs already exist (e.g., `outputs/tables/*.parquet` or `outputs/meta/run_state.json`),
+choose `--resume` to continue or `--fresh` to clear outputs. Use `dense run --no-plot` to skip
+auto‑plots when re‑running.
 
-Use `--events` to view stall/resample events and library rebuilds:
+---
 
-```bash
-pixi run dense inspect run --run /private/tmp/densegen-demo-20260115-1405/demo_press --events
-```
-
-DenseGen writes all attempts to `outputs/attempts.parquet` and accepted solutions
-to `outputs/solutions.parquet` (joinable via `attempt_id` / `solution_id`).
-
-## 9) Audit report
+### 8) Inspect run summary
 
-Generate an audit-grade summary of the run:
+Why: inspect Stage‑B library usage and runtime events.
 
 ```bash
-pixi run dense report -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --format all
+dense inspect run --library --events
 ```
 
-This writes `outputs/report.json`, `outputs/report.md`, `outputs/report.html`, and `outputs/report_assets/`.
+---
 
-## 10) Inspect outputs
+### 9) List plots
 
-List the generated Parquet artifacts and manifests:
+Why: see available plot names before selecting a subset.
 
 ```bash
-ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs
+dense ls-plots
 ```
 
-Example output:
-
-```text
-attempts.parquet
-composition.parquet
-dense_arrays.parquet
-candidates
-libraries
-pools
-report.html
-report.json
-report.md
-report_assets
-solutions.parquet
-meta
-```
-
-Inspect Stage‑A pools and Stage‑B libraries:
-
-```bash
-ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/pools
-ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/libraries
-```
+---
 
-## 11) Plot analysis
+### 10) Plot
 
-First, list the available plots:
+Why: render selected plots from existing outputs.
 
 ```bash
-pixi run dense ls-plots
-```
-
-Example output:
-
-```text
-┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ plot                ┃ description                                          ┃
-┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ compression_ratio   │ Histogram of compression ratios across sequences.    │
-│ tf_usage            │ TF usage summary (stacked by length/TFBS or totals). │
-│ gap_fill_gc         │ GC content target vs actual for gap-fill pads.       │
-│ plan_counts         │ Plan counts over time by promoter constraint bucket. │
-│ tf_coverage         │ Per-base TFBS coverage across sequences.             │
-│ tfbs_positional_frequency │ TFBS positional frequency (line plot).        │
-│ tfbs_positional_histogram │ Positional TFBS histogram (overlaid, per-nt).  │
-│ diversity_health    │ Diversity health over time (coverage + entropy).      │
-│ tfbs_length_density │ TFBS length distribution (histogram/KDE).            │
-│ tfbs_usage          │ TFBS usage by TF, ranked by occurrences.             │
-└─────────────────────┴──────────────────────────────────────────────────────┘
+dense plot --only tf_usage,tf_coverage
 ```
 
-Then render four plots:
+If Matplotlib complains about cache permissions, set a workspace‑scoped cache:
 
 ```bash
-pixi run dense plot -c /private/tmp/densegen-demo-20260115-1405/demo_press/config.yaml --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
+export MPLCONFIGDIR=outputs/.mpl-cache
 ```
 
-Example output (abridged):
-
-```text
-DenseGen plotting • source: parquet:/private/tmp/densegen-demo-20260115-1405/demo_press/outputs/dense_arrays.parquet • rows: 5
-Output: /private/tmp/densegen-demo-20260115-1405/demo_press/outputs
-┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓
-┃ plot        ┃ saved to                                                                  ┃ status ┃
-┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩
-│ tf_usage    │ /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/tf_usage.png    │ ok     │
-│ tf_coverage │ /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/tf_coverage.png │ ok     │
-│ tfbs_positional_histogram │ /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/tfbs_positional_histogram.png │ ok │
-│ diversity_health │ /private/tmp/densegen-demo-20260115-1405/demo_press/outputs/diversity_health.png │ ok │
-└─────────────┴───────────────────────────────────────────────────────────────────────────┴────────┘
-📊 Plots written.
-```
+---
 
-If Matplotlib complains about cache permissions, set a writable cache directory:
+### 11) Report
 
-```bash
-export MPLCONFIGDIR=/tmp/matplotlib
-```
-
-List the generated plots:
+Why: generate a human‑readable audit summary.
 
 ```bash
-ls /private/tmp/densegen-demo-20260115-1405/demo_press/outputs
+dense report --format md
 ```
 
-Example output:
+---
 
-```text
-tf_coverage.png
-tf_usage.png
-```
+### 12) Reset the demo
 
-## Appendix (optional)
-
-### PWM sampling input
-
-DenseGen can sample binding sites directly from PWM files. The example below uses the
-LexA MEME motif (copied from the Cruncher demo so it is self-contained) and a
-low-percentile (background-like) sampling strategy:
-
-```yaml
-inputs:
-  - name: lexA_meme
-    type: pwm_meme
-    path: inputs/lexA.txt
-    motif_ids: [lexA]
-    sampling:
-      strategy: background
-      scoring_backend: densegen
-      n_sites: 200
-      oversample_factor: 5
-      score_percentile: 10
-```
+Why: wipe run outputs and state so you can re-run the demo cleanly.
 
-Swap `type` and `path` to `pwm_jaspar` or `pwm_matrix_csv` with the same `sampling` block.
-
-For **strong match** sampling with FIMO p-values:
-
-```yaml
-inputs:
-  - name: lexA_meme
-    type: pwm_meme
-    path: inputs/lexA.txt
-    motif_ids: [lexA]
-    sampling:
-      strategy: stochastic
-      scoring_backend: fimo
-      pvalue_threshold: 1e-4
-      selection_policy: top_n
-      n_sites: 80
-      oversample_factor: 10
+```bash
+dense campaign-reset
 ```
 
-To mine specific affinity strata, add canonical p‑value bins and select bins by index:
-
-```yaml
-    sampling:
-      scoring_backend: fimo
-      pvalue_threshold: 1e-3
-      selection_policy: stratified
-      pvalue_bins: [1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]
-      mining:
-        batch_size: 5000
-        max_batches: 4
-        retain_bin_ids: [1, 2]  # (1e-6..1e-4] and (1e-4..1e-3]
-```
+This removes the workspace `outputs/` directory but leaves `config.yaml` and `inputs/` intact.
 
-### Add USR output
-
-USR is an optional I/O adapter. To write both Parquet and USR:
-
-```yaml
-output:
-  targets: [usr, parquet]
-  schema:
-    bio_type: dna
-    alphabet: dna_4
-  usr:
-    dataset: demo_densegen
-    root: /path/to/usr/datasets
-    allow_overwrite: false
-  parquet:
-    path: outputs/dense_arrays.parquet
-    deduplicate: true
-```
+---
 
-When multiple outputs are configured, DenseGen requires them to be in sync before writing.
+@e-south
diff --git a/src/dnadesign/densegen/docs/dev/architecture.md b/src/dnadesign/densegen/docs/dev/architecture.md
index 5dd67aea..bdce24b5 100644
--- a/src/dnadesign/densegen/docs/dev/architecture.md
+++ b/src/dnadesign/densegen/docs/dev/architecture.md
@@ -1,7 +1,6 @@
 ## DenseGen Architecture
 
-DenseGen is a strict, staged pipeline. Each stage consumes typed config and produces
-well-defined artifacts, enabling decoupling and easy replacement of individual components.
+DenseGen is a strict, staged pipeline. Each stage consumes typed config and produces well-defined artifacts, enabling decoupling and easy replacement of individual components.
 
 ### Contents
 - [Pipeline (data flow)](#pipeline-data-flow) - high-level stage order.
@@ -16,12 +15,12 @@ well-defined artifacts, enabling decoupling and easy replacement of individual c
 ```
 YAML config
   -> config schema (validation + defaults)
-  -> input ingestion (TF/TFBS or sequences)
-  -> sampling (library / subsample)
+  -> Stage‑A sampling (input ingestion + PWM mining → pools)
+  -> Stage‑B sampling (library construction / resampling → libraries)
   -> optimization (dense-arrays ILP)
-  -> postprocess (gap fill, policies)
-  -> outputs (USR + Parquet)
-  -> plots (optional, derived from outputs)
+  -> postprocess (pad, policies)
+  -> outputs (tables + manifests)
+  -> plots (outputs/plots; report assets under outputs/report/assets)
 ```
 
 ---
@@ -29,31 +28,39 @@ YAML config
 ### Modules and responsibilities
 
 - `config/` - strict, versioned config schema; no fallback parsing.
-- `core/canonical.py` - local canonical normalization + ID computation (aligned with USR).
-- `adapters/sources/` - input source adapters (binding-site tables, PWM MEME/JASPAR/CSV,
-  sequence libraries, USR). Paths resolved relative to the config file.
-- `core/sampler.py` - TF library construction and subsampling with explicit coverage policies.
-- `adapters/optimizer/` - optimizer adapters (dense-arrays wrapper + strategy selection).
-- `core/postprocess/` - gap fill and other sequence transforms.
-- `core/pipeline.py` - CLI-agnostic orchestration and runtime guards (DI-ready).
+- `adapters/sources/` - Stage‑A input adapters (binding-site tables, PWM MEME/JASPAR/CSV,
+  PWM artifacts, sequence libraries, USR sequences). Paths resolve relative to the config file.
+- `core/artifacts/pool.py` + `core/artifacts/candidates.py` - Stage‑A pool + candidate artifacts.
+- `core/sampler.py` - Stage‑B library sampling policies (coverage, balancing, penalties).
+- `core/artifacts/library.py` - Stage‑B library artifacts.
+- `adapters/optimizer/` - optimizer adapters (dense‑arrays wrapper + strategy selection).
+- `core/pipeline.py` - CLI‑agnostic orchestration + runtime guards.
+- `core/postprocess/` - pad and other sequence transforms.
+- `core/metadata.py` + `core/metadata_schema.py` - output metadata derivation + validation.
+- `adapters/outputs/` + `adapters/outputs/record.py` - Parquet/USR sinks + canonical IDs.
+- `core/run_paths.py` + `core/run_manifest.py` + `core/run_state.py` + `core/runtime_policy.py` + `core/seeding.py`
+  - run paths, manifests, checkpoints, runtime policy, and seeds.
+- `core/reporting.py` + `viz/plot_registry.py` + `viz/plotting.py` - reports and plots.
 - `cli.py` - thin CLI wrapper around the pipeline.
-- `adapters/outputs/` - USR/Parquet sinks with canonical IDs + namespaced metadata.
-- `adapters/outputs/record.py` - canonical OutputRecord builder shared by all sinks.
-- `viz/plot_registry.py` - plot names + descriptions (no matplotlib import).
-- `viz/plotting.py` - rendering from outputs.
 
 ---
 
-### Architectural contracts (high-level)
-
-- **Strict config:** unknown keys, mixed quota/fraction plans, or missing required fields are
-  errors.
-- **Explicit policies:** sampling, solver, and GC fill behaviors are recorded in metadata.
+### Architectural contracts
+
+- **Workspace‑first execution:** CLI resolves config from `./config.yaml` in CWD unless `-c` is provided.
+- **No config fallbacks:** missing config exits immediately with an actionable error message.
+- **Strict schema:** unknown keys, mixed quota/fraction plans, or missing required fields are errors.
+- **Run‑scoped I/O:** outputs/tables/logs/plots/report must resolve inside `outputs/` under
+  `densegen.run.root` (enforced).
+- **Stage‑A invariants:** Stage‑A sampling is defined per input and produces pools (plus optional
+  candidate artifacts). Stage‑A pools are cached per run.
+- **Stage‑B invariants:** Stage‑B sampling constructs solver libraries from pools or artifacts;
+  resampling happens only in Stage‑B. Library artifacts capture sampling metadata.
+- **Explicit policies:** Stage‑A/Stage‑B sampling, solver settings, and pad policies are
+  recorded in metadata.
 - **Canonical IDs:** Parquet and USR share the same deterministic ID computation.
 - **Output schema:** `output.schema` defines `bio_type` and `alphabet` once for all sinks.
-- **Run-scoped I/O:** config must live inside `densegen.run.root`; outputs/logs/plots are confined
-  to the run root.
-- **USR optional:** Parquet-only workflows must not import USR modules.
+- **Optional deps:** USR support is imported only when `output.targets` includes `usr`.
 - **No hidden state:** RNG seeds are explicit; no global mutable caches outside runtime guards.
 - **Valid motifs:** TFBS and sequence inputs must be A/C/G/T only.
 
diff --git a/src/dnadesign/densegen/docs/dev/improvements.md b/src/dnadesign/densegen/docs/dev/improvements.md
deleted file mode 100644
index a6f86d33..00000000
--- a/src/dnadesign/densegen/docs/dev/improvements.md
+++ /dev/null
@@ -1,413 +0,0 @@
-## DenseGen Improvement Plan
-
-This document captures the current critical evaluation of DenseGen, a prioritized improvement plan,
-and Codex-ready prompt packs to drive concrete patches. It is engineering-first: correctness > auditability
-> UX > performance.
-
-### Contents
-- [Critical evaluation of DenseGen as it stands](#critical-evaluation-of-densegen-as-it-stands)
-- [High-ROI improvements, prioritized](#high-roi-improvements-prioritized)
-- [Design decisions to fortify intent](#design-decisions-to-fortify-intent)
-- [Codex CLI prompt packs](#codex-cli-prompt-packs)
-- [Implementation notes / gotchas](#implementation-notes--gotchas)
-- [Recommended prompt order](#recommended-prompt-order)
-
----
-
-## Critical evaluation of DenseGen as it stands
-
-### What is strong (and worth preserving)
-
-- Strictness is a feature here. Config validation is tight (extra="forbid", strict YAML duplicate-key
-  failure, run-scoped path enforcement). That is exactly what you want for reproducible bio-design
-  pipelines.
-- Separation of concerns is clean enough: sources -> sampling -> optimizer adapter -> postprocess ->
-  sinks -> plotting. The adapter boundary around dense_arrays is good.
-- Metadata is centralized + schema-validated (build_metadata + metadata_schema.validate_metadata).
-  That is high-leverage, and it is why the gaps are clear.
-- Pipeline determinism: a single seed drives Python RNG + NumPy RNG and thus PWM sampling + TFBS
-  sampling. That is critical for scientific reproducibility.
-
-### What is fragile or incorrect (high priority)
-
-These are the ones to treat as must-fix before trusting downstream analyses:
-
-1. Offset correctness bug when gap fill is 5prime
-   used_tfbs_detail.offset is extracted from the pre-padding sequence (sol.sequence). If
-   gap_fill_end=5prime, final sequence coordinates shift, but metadata does not. This is a hard
-   correctness defect for any downstream indexing/coverage/variant mapping.
-
-2. Promoter motif filtering is incomplete if multiple promoter constraints exist
-   _compute_used_tf_info() only inspects pcs[0] to build promoter_motifs. But the adapter can apply
-   multiple promoter constraints (it iterates and calls opt.add_promoter_constraints for each).
-   That means some fixed motifs may leak into used_tfbs_detail or be inconsistently excluded.
-
-3. Constraint double enforcement can mask underlying problems
-   Regulator constraints are pushed into dense_arrays via add_regulator_constraints, then also
-   filtered after solving (required_regulators, min_count_by_regulator, min_required_regulators,
-   etc.). This is defensible if DenseArrays is approximate or constraints are soft, but it is also
-   a smell: it can waste solver time, stall/resample due to metadata interpretation rather than
-   solver feasibility, and silently diverge between solver constraints and DenseGen filters.
-   If the post-filter remains, record which constraint failed and whether the solver claimed it
-   satisfied constraints.
-
-### What is missing for auditability / reproducibility (medium-high priority)
-
-- Global sequence composition is untracked (GC only for padding). This blocks basic QA (composition
-  drifts, batch effects, library bias).
-- No stable identity for libraries per resample (hard to group or debug why a library stalled).
-- No solver provenance / status / timing (cannot compare backends or prove runs are comparable).
-- No run-level manifest (metadata is per-record; lacks a compact run summary of resamples/failures).
-
-### What is already better than the report implies (minor correction)
-
-- Side-bias motifs are not excluded from used_tfbs_detail by current code (only promoter motifs and
-  motifs beyond orig_n are excluded). If a report claims side-bias motifs are excluded, that is
-  incorrect for the current pipeline.
-
----
-
-## High-ROI improvements, prioritized
-
-Implement in phases aligned with impact and merge risk.
-
-### Phase 0 - Fix correctness and stabilize semantics (do first)
-
-1. Fix offsets under 5prime padding
-   - Define semantics: used_tfbs_detail.offset refers to final sequence coordinates.
-   - Add offset_raw alongside it for traceability.
-   - Add length and end (or end_final) per placement.
-
-2. Fix promoter constraint motif handling
-   - Collect upstream/downstream motifs across all promoter constraints, not just the first.
-
-3. Record padding length explicitly in placement entries
-   - If gap_fill_used and gap_fill_end=5prime, each placement should record pad_left = gap_fill_bases
-     (or 0 otherwise).
-
-### Phase 1 - Auditability: GC + input + provenance
-
-4. Add gc_total (final sequence) and gc_core (pre-pad sol.sequence) top-level metadata fields.
-
-5. Add input dataset stats:
-   - input_row_count
-   - input_tf_count (0 for sequence-only inputs)
-   - input_tfbs_count (for sequence inputs, treat as unique sequence count)
-   - sampling_fraction (library size / input pool size; define precisely and document)
-
-6. Add provenance for binding-site inputs:
-   - For each placement, attach site_id and source (when provided) by propagating index-aligned
-     provenance through sampling.
-
-### Phase 2 - Reproducibility: library identity + solver provenance
-
-7. Add:
-   - sampling_library_index (1,2,3... within plan/source)
-   - sampling_library_hash (sha256 of stable serialization of library entries + labels)
-   - optional sampling_resample_index (if useful to distinguish new vs first build)
-
-8. Add solver provenance:
-   - dense_arrays_version
-   - solver_status (if available)
-   - solver_objective (if available)
-   - solver_solve_time_s (measure around each yielded solution)
-
-### Phase 3 - UX: run manifest (and small CLI upgrades)
-
-9. Emit a run-level JSON manifest, e.g., outputs/meta/run_manifest.json, containing:
-   - config hash, run_id, timestamp
-   - per input + plan: generated, duplicates rejected, failed solutions, resamples, libraries_built,
-     stall events
-   - solver backend/strategy/options summary
-   - optional histograms (compression_ratio, gc_total)
-
-10. Add a CLI command like dense inspect run (or extend workspace listing) to read and pretty-print the manifest.
-
-### Phase 4 - Performance / resilience (optional but worthwhile)
-
-11. Rewrite gap_fill.random_fill() to construct GC-valid strings directly instead of rejection
-    sampling (keep semantics, remove unnecessary failure modes).
-
----
-
-## Design decisions to fortify intent
-
-### A) Lock down coordinate semantics
-
-Document in the metadata schema (description text) that:
-
-- used_tfbs_detail.offset is always in final sequence coordinates.
-- offset_raw is solver/core coordinates (pre-gap-fill).
-- end = offset + length (half-open interval) is best practice.
-
-This prevents downstream confusion and makes coverage plots correct by default.
-
-### B) Treat provenance as index-aligned data, not inferred strings
-
-Do not encode site provenance into tf:tfbs strings. Keep them as fields:
-
-- For each placement: site_id, source, and maybe library_index (the motif's index in the sampled
-  library).
-
-### C) Add one escape hatch for future metadata without schema churn
-
-Consider adding a top-level field like extras: dict to the metadata schema (optional), so future
-incremental info does not force schema edits. Keep high-value fields top-level; keep experimental
-or rare fields in extras.
-
----
-
-## Codex CLI prompt packs
-
-These are written to be dropped into Codex as implement-this-patch tasks.
-
-### Prompt Pack 1 - Fix placement offsets + enrich placement detail (correctness)
-
-```text
-You are editing the DenseGen repo.
-
-Goal:
-Fix a correctness bug: used_tfbs_detail.offset must reflect FINAL sequence coordinates after gap fill.
-Also enrich each placement dict with derived fields: offset_raw, offset (final), length, end, and pad_left.
-
-Constraints:
-- Keep backward compatibility: used_tfbs_detail must still include keys tf/tfbs/orientation/offset.
-- Do NOT change the top-level metadata keys yet unless necessary.
-- If gap_fill_end == "5prime" and gap_fill_used is true, shift offsets by gap_fill_bases.
-- Store original solver offsets as offset_raw.
-- Add per placement: length = len(tfbs), end = offset + length.
-- Also add pad_left = gap_fill_bases if 5prime else 0.
-
-Scope:
-- Update core/pipeline.py (_compute_used_tf_info and/or where used_tfbs_detail is finalized).
-- Fix promoter motif detection to consider ALL promoter constraints (not just pcs[0]).
-
-Acceptance:
-- If gap_fill_end=5prime and gap length is N, then every placement offset increases by N and end increases by N.
-- If gap_fill_end=3prime or gap_fill_used=false, offsets remain unchanged.
-- Promoter motifs set must union upstream/downstream across all promoter_constraints.
-
-Add minimal unit tests:
-- Create a tiny dummy sol object with:
-  - sequence = "AAAA" (len 4)
-  - library = ["TT", "GG"]
-  - offset_indices_in_order() returning [(0,0), (2,1)] for two placements
-- Simulate gap_fill_used=true, gap_fill_end=5prime, gap_fill_bases=3 and assert offsets shift.
-Use pytest, and keep tests independent of dense_arrays installation.
-```
-
-### Prompt Pack 2 - Add GC metrics (gc_total, gc_core) to metadata schema + derivation
-
-```text
-Goal:
-Add gc_total and gc_core to per-record metadata.
-
-Definitions:
-- gc_core: GC fraction of sol.sequence (pre-gap-fill core)
-- gc_total: GC fraction of final_seq (after padding)
-
-Implementation:
-- Add fields to core/metadata_schema.py:
-  - gc_total: numbers.Real, required, not None
-  - gc_core: numbers.Real, required, allow_none=False (or allow_none only if sol.sequence can be missing)
-- Update core/metadata.py build_metadata() to accept gc_total/gc_core and include them.
-- Update core/pipeline.py to compute these values per record.
-  - Use uppercase counting consistent with gap_fill._gc_fraction, but do not import private helpers.
-  - Implement a small local helper: gc_fraction(seq) -> float.
-
-Acceptance:
-- validate_metadata passes for newly built records.
-- gc_total always exists and is between 0 and 1.
-- gc_core always exists and is between 0 and 1.
-- If no padding is used, gc_total == gc_core when sol.sequence == final_seq.
-```
-
-### Prompt Pack 3 - Add input dataset stats + sampling fraction
-
-```text
-Goal:
-Add input dataset stats to metadata so users know the size of the input pool vs sampled library.
-
-Add these top-level metadata fields (core/metadata_schema.py + build_metadata):
-- input_row_count: int (required)
-- input_tf_count: int (required)
-- input_tfbs_count: int (required)
-- sampling_fraction: numbers.Real (required, allow_none=True if division not meaningful)
-
-Definitions:
-- input_row_count: number of rows/sequences in the source pool before sampling (after source validation).
-- input_tf_count:
-  - for binding_sites: number of unique TFs in meta_df
-  - for sequence_library/usr_sequences/pwm_sampled: 0
-- input_tfbs_count:
-  - for binding_sites: number of unique tfbs sequences in meta_df
-  - for sequence_library/usr_sequences/pwm_sampled: number of unique sequences in the pool
-- sampling_fraction:
-  - if pool_strategy=="full": 1.0
-  - else: len(sampled_library) / max(1, input_row_count) or / input_tfbs_count (choose ONE and document in metadata_schema description)
-
-Implementation:
-- Compute these stats in core/pipeline.py when loading the source and when building the sampled library.
-- Plumb into build_metadata.
-
-Acceptance:
-- Works for binding_sites, sequence_library, usr_sequences, and pwm_* inputs.
-- validate_metadata passes.
-```
-
-### Prompt Pack 4 - Binding-site provenance per placement (site_id, source)
-
-```text
-Goal:
-When binding_sites input provides site_id and/or source columns, propagate those to placements.
-
-Implementation strategy:
-- Carry index-aligned arrays alongside library_for_opt and regulator_labels:
-  - site_id_by_index: list[str|None]
-  - source_by_index: list[str|None]
-- For pool_strategy=full:
-  - derive site_id_by_index/source_by_index directly from lib_df if those columns exist
-- For subsampling:
-  - Update core/sampler.py TFSampler.generate_binding_site_subsample() to ALSO return provenance lists aligned with returned sites/labels.
-  - Keep backwards compatibility by returning a dict inside the existing info return (e.g. info["site_id_by_index"], info["source_by_index"]) OR change return signature and update call sites cleanly.
-- Update _compute_used_tf_info to attach:
-  - site_id and source to each used_tfbs_detail entry when available.
-
-Acceptance:
-- For binding_sites inputs with site_id/source configured, used_tfbs_detail entries include those keys for non-promoter placements.
-- For other input types, those keys are absent or set to None (but do not break consumers).
-- No changes to tfbs_parts formatting.
-```
-
-### Prompt Pack 5 - Library identity per resample (index + hash)
-
-```text
-Goal:
-Allow grouping records by the exact sampled library.
-
-Add metadata fields:
-- sampling_library_index: int (required)
-- sampling_library_hash: str (required)
-
-Definition:
-- sampling_library_index increments each time a new library is built for a given (source, plan) execution.
-- sampling_library_hash is sha256 over a stable serialization of:
-  - motifs in library_for_opt in order
-  - regulator_labels aligned to motifs
-  - (optional) site_id/source aligned if available
-Use a stable delimiter and explicit "None" placeholders.
-
-Implementation:
-- core/pipeline.py: maintain a counter per _process_plan_for_source invocation.
-- Compute hash each time _build_library returns.
-- Add to sampling_info or pass directly to build_metadata; but final values must appear as top-level metadata keys.
-- Update core/metadata_schema.py and build_metadata accordingly.
-
-Acceptance:
-- Two records produced from the same library in the same resample share the same hash and index.
-- Different libraries (different sampling) have different hash.
-```
-
-### Prompt Pack 6 - Solver provenance: dense_arrays version + status + solve time
-
-```text
-Goal:
-Record solver provenance and per-solution solve timing.
-
-Add metadata fields:
-- dense_arrays_version: str (required, allow_none=True if not discoverable)
-- solver_status: str (required, allow_none=True)
-- solver_objective: numbers.Real (required, allow_none=True)
-- solver_solve_time_s: numbers.Real (required, allow_none=True)
-
-Implementation:
-- In adapters/optimizer/dense_arrays.py, wrap solution generation for all strategies to measure solve time:
-  - measure time.monotonic() around each solver call / next(generator)
-  - attach solve time to the solution object as an attribute like sol._densegen_solve_time_s
-- In core/pipeline.py, read getattr(sol, "_densegen_solve_time_s", None)
-- Discover dense_arrays version:
-  - prefer getattr(dense_arrays, "__version__", None)
-  - fallback: importlib.metadata.version("dense-arrays") or ("dense_arrays") depending on package name, handle exceptions
-- solver_status/objective: attempt getattr(sol, "status", None), getattr(sol, "objective", None), getattr(sol, "objective_value", None) with conservative fallbacks.
-
-Acceptance:
-- Fields exist on all records (may be None if unavailable).
-- For optimal strategy, solver_solve_time_s is non-null and >0 for non-trivial cases.
-```
-
-### Prompt Pack 7 - Run-level manifest (JSON) + CLI surface
-
-```text
-Goal:
-Write a run-level JSON summary manifest so users do not have to scan per-record metadata.
-
-Implementation:
-- Create core/run_manifest.py (or similar) defining a RunManifest dataclass with:
-  - run_id, created_at, config_sha256, run_root, solver backend/strategy/options/strands
-  - per (input_name, plan_name): generated, duplicates_skipped, failed_solutions, total_resamples, libraries_built, stall_events
-- In core/pipeline.py:
-  - accumulate these stats during _process_plan_for_source
-  - write manifest at the end of run_pipeline to outputs/meta/run_manifest.json
-- In cli.py:
-  - add a summarize command that loads and prints outputs/meta/run_manifest.json in a Rich table
-
-Acceptance:
-- Running a pipeline produces outputs/meta/run_manifest.json.
-- summarize prints a readable table without needing parquet/usr outputs.
-- Manifest writing is atomic (write temp then rename).
-```
-
-### Prompt Pack 8 - Make gap fill deterministic + GC-valid without rejection sampling
-
-```text
-Goal:
-Replace rejection sampling in core/postprocess/gap_fill.py random_fill() with direct construction so it never fails when the GC window is feasible.
-
-Algorithm:
-- Compute lo = ceil(length * gc_min), hi = floor(length * gc_max).
-- If lo > hi:
-  - strict: raise
-  - adaptive: relax to [0, length] and mark relaxed
-- Choose gc_count uniformly between lo and hi.
-- Construct a list of bases:
-  - gc_count bases chosen from {G,C}
-  - length-gc_count bases chosen from {A,T}
-- Shuffle and join.
-- attempts should be 1 (or 0 when length=0).
-Return the same info dict keys as before.
-
-Acceptance:
-- strict mode never fails for feasible windows.
-- adaptive mode matches previous behavior for infeasible windows but with deterministic construction.
-- gc_actual always within final window.
-```
-
----
-
-## Implementation notes / gotchas
-
-- validate_metadata rejects unknown top-level keys. Every new top-level field must be added to
-  META_FIELDS or record writing will crash.
-- Nested dicts/lists are permissive about extra keys, but top-level is not.
-- If sinks flatten metadata keys into Parquet columns, update output adapters to include new fields.
-- Update plotting only after new columns exist; prefer plots that tolerate missing columns or make
-  them optional.
-
----
-
-## Recommended prompt order
-
-1. Prompt Pack 1 (offset correctness + promoter constraints)
-2. Prompt Pack 2 (GC metrics)
-3. Prompt Pack 5 (library index/hash)
-4. Prompt Pack 6 (solver provenance + timing)
-5. Prompt Pack 3 (input stats + sampling fraction)
-6. Prompt Pack 4 (site_id/source provenance)
-7. Prompt Pack 7 (run manifest + CLI summarize)
-8. Prompt Pack 8 (gap fill optimization)
-
----
-
-If you want, we can also produce a single mega prompt for Codex that implements Phases 0-2 in one
-PR (offset fix + GC + library hash + solver timing), but in practice reviewability is better with
-the prompt packs above.
diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index 370822d6..3774d0bb 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -8,15 +8,14 @@ and DenseGen enforces fixed motifs and regulator requirements through the dense-
 - [Promoter constraints](#promoter-constraints) - fixed motifs and spacing.
 - [Side biases](#side-biases-positional-preferences) - left/right placement preferences.
 - [Solver strategy](#solver-strategy) - solution ordering and backend selection.
-- [Sampling controls](#sampling-controls) - how libraries are built and resampled.
+- [Stage‑B sampling controls](#stage-b-sampling-controls) - how libraries are built and resampled.
 - [Regulator constraints](#regulator-constraints) - per-plan requirements and validation.
 
 ---
 
 ### Plan definition
 
-- Each plan item has a `name` and either `quota` or `fraction`.
-- Mixing quotas and fractions across items is not allowed.
+Each plan item has a `name` and either `quota` or `fraction`.
 
 ```yaml
 plan:
@@ -32,15 +31,13 @@ plan:
           downstream: "TATAAT"
           spacer_length: [16, 18]
 ```
-Note: `generation.sequence_length` must be at least as long as the widest motif in the library
-or fixed elements; DenseGen fails fast if a motif cannot fit.
+Note: `generation.sequence_length` must be at least as long as the widest motif in the library or fixed elements; DenseGen fails fast if a motif cannot fit.
 
 ---
 
 ### Promoter constraints
 
-Use `fixed_elements.promoter_constraints` to enforce fixed motifs and spacing. Motifs must be
-A/C/G/T only.
+Use `fixed_elements.promoter_constraints` to enforce fixed motifs and spacing. Motifs must be A/C/G/T only.
 
 Fields:
 - `upstream`, `downstream` (motif strings)
@@ -76,9 +73,8 @@ DenseGen exposes dense-arrays solution modes via `solver.strategy`:
 - `optimal` - only the best solution per library.
 - `approximate` - heuristic solution per library (no solver options; backend optional).
 - `strands` - `single | double` (default: `double`).
-DenseGen fails fast if the requested solver backend is unavailable; use
-`dense validate-config --probe-solver` or `dense inspect config --probe-solver`
-to check availability before long runs.
+
+DenseGen fails fast if the requested solver backend is unavailable; use `dense validate-config --probe-solver` or `dense inspect config --probe-solver` to check availability before long runs.
 
 ```yaml
 solver:
@@ -89,75 +85,96 @@ solver:
   allow_unknown_options: false
 ```
 
-DenseGen validates solver option keys for known backends and fails fast on unknown options. If you
-need to pass custom solver flags, set `solver.allow_unknown_options: true` explicitly.
+DenseGen validates solver option keys for known backends and fails fast on unknown options. If you need to pass custom solver flags, set `solver.allow_unknown_options: true` explicitly.
 
 ---
 
-### Sampling controls
-
-`generation.sampling` controls how binding-site libraries are built (pool strategy, coverage,
-uniqueness, caps, and relaxation). DenseGen records sampling policy and outcomes in metadata.
-
-Key fields:
-- `pool_strategy`: `full | subsample | iterative_subsample`
-- `library_source`: `build | artifact` (use `artifact` to replay prebuilt libraries)
-- `library_artifact_path`: path to `outputs/libraries` from `dense stage-b build-libraries`
-- `library_size` (used for subsample strategies)
-- `library_sampling_strategy` (`tf_balanced | uniform_over_pairs | coverage_weighted`)
-- `coverage_boost_alpha`, `coverage_boost_power` (used with `coverage_weighted`)
-- `avoid_failed_motifs`, `failure_penalty_alpha`, `failure_penalty_power` (optional penalties for motifs tied to failed solves)
-- `cover_all_regulators`, `unique_binding_sites`, `max_sites_per_regulator`
-- `iterative_max_libraries`, `iterative_min_new_solutions`
-
-Notes:
-- `pool_strategy: full` uses a single library (no resampling) and ignores `library_size`, `subsample_over_length_budget_by`,
-  and related sampling caps/strategies (DenseGen warns in `dense validate-config`/`dense inspect plan`).
-- `subsample` can resample reactively on stalls/duplicate guards.
-- `iterative_subsample` resamples proactively after `arrays_generated_before_resample` or when a
-  library under-produces.
-- `unique_binding_sites` enforces uniqueness at the regulator+sequence pair level.
-- `coverage_weighted` dynamically boosts underused TFBS based on the run’s usage counts.
-- `avoid_failed_motifs: true` down-weights TFBS that repeatedly appear in failed solve attempts (tracked in attempts.parquet).
-
-### Stage‑A vs Stage‑B sampling (mental model)
-
-**Stage‑A (input sampling)** lives under `densegen.inputs[].sampling` and defines how TFBS pools
-are generated from PWMs (e.g., DenseGen log‑odds vs FIMO p‑values, thresholds, mining limits,
-length policy). Stage‑A produces the realized TFBS pool (`input_tfbs_count`), which is cached
-once per run and reused across round‑robin passes.
-
-**Stage‑B (library sampling)** lives under `densegen.generation.sampling` and selects a **solver
-library** from the Stage‑A pool (or from a binding‑site table / sequence library). This is where
-`pool_strategy`, `library_size`, and sampling strategies (tf‑balanced, uniform over pairs,
-coverage‑weighted) apply. Stage‑B is the only place that resampling happens.
-
-Use `dense stage-a build-pool` to materialize pools and `dense stage-b build-libraries` to preview
-solver libraries without running the solver.
-To **replay** a specific library artifact deterministically, set
-`generation.sampling.library_source: artifact` and point
-`generation.sampling.library_artifact_path` at the library artifact directory.
+### Stage‑B sampling controls
+
+Stage‑B reminder: Stage‑A sampling lives under `densegen.inputs[].sampling` and produces TFBS pools.
+Stage‑B sampling below selects solver libraries from those pools (or from library artifacts) and
+is the only place resampling happens. Stage‑B outcomes are recorded in `outputs/libraries/*`,
+`outputs/tables/attempts.parquet`, and `outputs/meta/run_manifest.json`.
+
+Per‑field guide (what it does → when to use → failure modes → artifacts impacted):
+
+- `pool_strategy` — chooses Stage‑B library construction mode (full vs subsample vs iterative). Use `full`
+  for tiny pools, `subsample` for large pools, `iterative_subsample` to resample aggressively. Failure
+  modes: `full` ignores **Stage‑B resampling** and can stall if pools are weak; `iterative_subsample` with low caps
+  can terminate early. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `library_source` — Stage‑B source of libraries (`build` vs `artifact`). Use `artifact` for
+  deterministic replays. Failure: missing or mismatched artifact metadata. Artifacts: `outputs/libraries/*`,
+  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `library_artifact_path` — path to a Stage‑B library artifact directory when `library_source: artifact`.
+  Use when replaying a prior Stage‑B build. Failure: path missing or incompatible metadata. Artifacts:
+  `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `library_size` — number of TFBS per Stage‑B library (subsample modes). Use to control library
+  diversity/size. Failure: too small can starve constraints; too large can over‑constrain the solver.
+  Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `subsample_over_length_budget_by` — Stage‑B budget in bp to bias sampling toward longer libraries.
+  Use to penalize over‑length libraries when pools contain long motifs. Failure: too low can bias
+  against required motifs. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `library_sampling_strategy` — Stage‑B selection policy (`tf_balanced`, `uniform_over_pairs`,
+  `coverage_weighted`). Use `tf_balanced` for even TF coverage, `uniform_over_pairs` for pair diversity,
+  `coverage_weighted` to boost under‑used motifs. Failure: aggressive weighting can overfit recent runs.
+  Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `coverage_boost_alpha` — Stage‑B weighting strength for `coverage_weighted`. Use small values (e.g., 0.1)
+  to avoid oscillation. Failure: too large can destabilize coverage. Artifacts: `outputs/libraries/*`,
+  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `coverage_boost_power` — Stage‑B exponent for `coverage_weighted`. Use to shape how quickly under‑used
+  motifs are boosted. Failure: extreme values can flatten or over‑amplify weights. Artifacts:
+  `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `avoid_failed_motifs` — Stage‑B penalty toggle for motifs tied to failed solve attempts. Use when
+  repeated failures are dominated by a few motifs. Failure: can over‑penalize rare motifs in small pools.
+  Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `failure_penalty_alpha` — Stage‑B penalty strength when `avoid_failed_motifs` is enabled. Use low values
+  to soften penalties. Failure: too large can collapse library diversity. Artifacts: `outputs/libraries/*`,
+  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `failure_penalty_power` — Stage‑B penalty exponent when `avoid_failed_motifs` is enabled. Use to sharpen
+  or smooth penalties. Failure: extreme values can zero‑out motifs. Artifacts: `outputs/libraries/*`,
+  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `cover_all_regulators` — Stage‑B rule to ensure each TF appears in the library. Use when constraints
+  require per‑TF coverage. Failure: can be impossible for sparse pools. Artifacts: `outputs/libraries/*`,
+  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `unique_binding_sites` — Stage‑B uniqueness filter at the TF+TFBS pair level. Use to avoid duplicate
+  sites. Failure: can under‑fill libraries when pools are small. Artifacts: `outputs/libraries/*`,
+  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `max_sites_per_regulator` — Stage‑B cap per TF. Use to prevent dominance by a single TF. Failure:
+  too low can make libraries infeasible for constraint plans. Artifacts: `outputs/libraries/*`,
+  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `relax_on_exhaustion` — Stage‑B relaxation toggle when sampling can’t fill a library. Use with small
+  pools to avoid hard failures. Failure: relaxed libraries can violate intended coverage. Artifacts:
+  `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `allow_incomplete_coverage` — Stage‑B permit missing TF coverage when pools are sparse. Use for
+  exploratory runs. Failure: can hide missing TFs unless monitored. Artifacts: `outputs/libraries/*`,
+  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `iterative_max_libraries` — Stage‑B cap for `iterative_subsample` library rebuilds. Use to bound
+  runtime. Failure: too low can terminate early with unmet quotas. Artifacts: `outputs/libraries/*`,
+  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `iterative_min_new_solutions` — Stage‑B threshold to decide whether a new library “worked.”
+  Use to prevent wasteful **Stage‑B resampling**. Failure: too high can force endless resamples. Artifacts:
+  `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
 
 ### Run scheduling (round‑robin)
 
-`runtime.round_robin` controls **scheduling**, not sampling. When enabled, DenseGen interleaves plan
-items across inputs so each plan advances in turn (one subsample per pass). This is useful when you
-have multiple constraint sets (e.g., different fixed sequences) and want a single run to progress
-each design target in parallel.
-
-Round‑robin is **distinct from Stage‑B sampling** (`generation.sampling`): library sampling still
-uses the same policy per plan, but round‑robin can trigger more frequent library rebuilds when
-`pool_strategy: iterative_subsample` is used. Expect extra compute if many plans are active.
+`runtime.round_robin` controls **scheduling**, not Stage‑B sampling. When enabled, DenseGen interleaves
+plan items across inputs so each plan advances in turn (one subsample per pass). This is useful when
+you have multiple constraint sets (e.g., different fixed sequences) and want a single run to progress
+each design target in parallel. Round‑robin is distinct from Stage‑B sampling (`generation.sampling`):
+Stage‑B library sampling still uses the same policy per plan, but round‑robin can trigger more frequent
+Stage‑B library rebuilds when `pool_strategy: iterative_subsample` is used. Expect extra compute if many
+plans are active.
 
-Input PWM sampling is performed **once per run** and cached across round‑robin passes. If you
-need a fresh PWM sample, start a new run with `dense run --fresh` (or stage a new workspace).
+Stage‑A PWM sampling is performed **once per run** and cached across round‑robin passes. If you need a
+fresh Stage‑A sample, start a new run with `dense run --fresh` (or stage a new workspace).
 
-### Runtime policy knobs (resampling + stop conditions)
+### Runtime policy knobs (Stage‑B resampling + stop conditions)
 
 Key `runtime.*` controls:
 - `arrays_generated_before_resample` — number of successful arrays to emit before forcing a new
-  library (for iterative subsampling).
-- `stall_seconds_before_resample` — idle time with no new solutions before resampling.
+  Stage‑B library (for iterative subsampling).
+- `stall_seconds_before_resample` — idle time with no new solutions before Stage‑B resampling.
+  This also applies a per‑solve time limit (seconds) for solver‑based strategies; set to `0` to disable.
 - `stall_warning_every_seconds` — how often to log stall warnings.
 - `max_resample_attempts` / `max_total_resamples` — caps on resample retries.
 - `max_seconds_per_plan` — time budget per plan item (0 = no limit).
@@ -170,11 +187,9 @@ Key `runtime.*` controls:
 
 DenseGen supports three regulator constraint modes per plan item:
 
-- `required_regulators`: when `min_required_regulators` is **unset**, enforce at least one site
-  per listed regulator (**all-of**, applied to final sequences).
-- `min_required_regulators`: when set, enforce at least K distinct regulators in the **final
-  sequence**. If `required_regulators` is provided, it becomes the candidate set (k-of-n).
-  If `required_regulators` is empty, the constraint applies to the full regulator pool.
+- `required_regulators`: when `min_required_regulators` is unset, enforce at least one site
+  per listed regulator (all-of, applied to final sequences).
+- `min_required_regulators`: when set, enforce at least K distinct regulators in the final sequence. If `required_regulators` is provided, it becomes the candidate set (k-of-n). If `required_regulators` is empty, the constraint applies to the full regulator pool.
 - `min_count_by_regulator`: enforce per-regulator minimum counts.
 
 Solver strategies (`iterate|diverse|optimal`) enforce these constraints at the solver level;
diff --git a/src/dnadesign/densegen/docs/guide/index.md b/src/dnadesign/densegen/docs/guide/index.md
deleted file mode 100644
index bbf9f3a4..00000000
--- a/src/dnadesign/densegen/docs/guide/index.md
+++ /dev/null
@@ -1,41 +0,0 @@
-## DenseGen Guide
-
-This guide explains how DenseGen thinks about inputs, constraints, outputs, and run ergonomics.
-Use it when you are building or modifying configs, or when you want to understand the data flow.
-
-### Contents
-- [What this guide covers](#what-this-guide-covers) - scope and audience.
-- [Suggested reading order](#suggested-reading-order) - fastest path to a working run.
-- [Guide sections](#guide-sections) - direct links to each topic.
-
----
-
-### What this guide covers
-
-The guide focuses on how to supply inputs, define constraints, interpret outputs, and keep runs
-reproducible. It does not list every config key; use the reference for the full schema.
-For a hands-on walkthrough, start with the demo.
-
----
-
-### Suggested reading order
-
-1) Inputs and libraries: `inputs.md`
-2) Planning and constraints: `generation.md`
-3) Outputs and metadata: `outputs-metadata.md`
-4) Gap fill behavior: `postprocess.md`
-5) Run layout and ergonomics: `workspace.md`
-
----
-
-### Guide sections
-
-- Inputs: `inputs.md`
-- Generation and constraints: `generation.md`
-- Outputs and metadata: `outputs-metadata.md`
-- Postprocess (gap fill): `postprocess.md`
-- Workspace layout: `workspace.md`
-
----
-
-@e-south
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index c4201a6e..8f591a44 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -1,427 +1,340 @@
-## Inputs
-
-Inputs define the binding-site library that feeds planning and optimization. Each entry in
-`densegen.inputs` is a named source (choose one input type per entry), and paths resolve relative
-to the config file location.
-
-PWM inputs perform **input sampling** (sampling sites from PWMs) via
-`densegen.inputs[].sampling`. This is distinct from **library sampling**
-(`densegen.generation.sampling`), which selects a solver library (`library_size`) from the
-realized TFBS pool (`input_tfbs_count`). PWM sampling size is controlled only by the input
-sampling config; solver library size is controlled only by `densegen.generation.sampling`.
-
-Sample inputs live in:
-- `src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs` (LexA + CpxR MEME files, copied from
-  the Cruncher demo workspace for a self-contained example)
-- For PWM artifacts, generate files with `cruncher catalog export-densegen` and place them
-  under your run’s `inputs/` directory (see examples below).
-
-Common end-to-end flow (Cruncher → DenseGen):
-1) `cruncher fetch sites` (DAP-seq, RegulonDB, local MEME sites) and merge as needed.
-2) `cruncher lock` → `cruncher parse` (optional) → `cruncher discover` (MEME/STREME) to generate PWMs.
-3) `cruncher catalog export-densegen` to emit per-motif JSON artifacts.
-4) (Optional) `cruncher catalog export-sites` to emit a binding-site table for DenseGen.
-5) Point DenseGen `inputs` at those artifacts (or at MEME/JASPAR files directly).
+## Inputs (Stage‑A)
+
+This guide covers **Stage‑A ingestion and sampling** (`densegen.inputs[]`). Stage‑B sampling
+(`densegen.generation.sampling` for Stage‑B sampling) is documented in the generation guide.
 
 ### Contents
+- [Stage‑A PWM sampling config (common)](#stage-a-pwm-sampling-config-common) - shared Stage‑A sampling fields.
 - [Binding site table](#binding-site-table-type-binding_sites) - explicit TF/TFBS pairs.
 - [Sequence library](#sequence-library-type-sequence_library) - raw sequence seeds.
 - [PWM MEME](#pwm-meme-type-pwm_meme) - sample from MEME PWMs.
-- [PWM MEME set](#pwm-meme-set-type-pwm_meme_set) - merge multiple MEME files into one TF pool.
+- [PWM MEME set](#pwm-meme-set-type-pwm_meme_set) - merge multiple MEME files.
 - [PWM JASPAR](#pwm-jaspar-type-pwm_jaspar) - sample from JASPAR PFMs.
 - [PWM matrix CSV](#pwm-matrix-csv-type-pwm_matrix_csv) - sample from CSV matrices.
-- [PWM artifact JSON](#pwm-artifact-json-type-pwm_artifact) - sample from contract-first motif artifacts.
-- [PWM artifact set](#pwm-artifact-set-json-type-pwm_artifact_set) - combine multiple motif artifacts into one input.
+- [PWM artifact JSON](#pwm-artifact-json-type-pwm_artifact) - sample from artifact contracts.
+- [PWM artifact set](#pwm-artifact-set-json-type-pwm_artifact_set) - merge multiple artifacts.
 - [USR sequences](#usr-sequences-type-usr_sequences) - read sequences from USR.
-- [Path resolution](#path-resolution) - how relative paths are resolved.
-- [Interaction with constraints](#interaction-with-constraints) - constraints that depend on inputs.
+- [Path resolution](#path-resolution) - how relative paths resolve.
+- [Interaction with constraints](#interaction-with-constraints) - Stage‑A inputs + constraints.
 
 ---
 
-### Binding site table (`type: binding_sites`)
+### Stage‑A PWM sampling config (common)
+
+Applies to `pwm_meme`, `pwm_meme_set`, `pwm_jaspar`, `pwm_matrix_csv`, `pwm_artifact`, and
+`pwm_artifact_set`.
+
+Required (always):
+- `n_sites` (int > 0)
+
+Required when `scoring_backend: densegen`:
+- `scoring_backend: densegen` (default)
+- exactly one of `score_threshold` or `score_percentile`
+
+Required when `scoring_backend: fimo`:
+- `scoring_backend: fimo`
+- `pvalue_threshold` (float in (0, 1])
+
+Optional (supported):
+- `strategy`: `consensus | stochastic | background` (default `stochastic`)
+- `oversample_factor` (int > 0; default `10`)
+- `max_candidates` (densegen‑only; int > 0 when set)
+- `max_seconds` (densegen‑only; float > 0 when set)
+- `selection_policy` (fimo‑only): `random_uniform | top_n | stratified`
+- `pvalue_bins` (fimo‑only): list of floats, strictly increasing, must end with `1.0`
+- `mining` (fimo‑only):
+  - `batch_size` (int > 0)
+  - `max_batches` (optional int > 0)
+  - `max_candidates` (optional int > 0; must be ≥ `n_sites`)
+  - `max_seconds` (optional float > 0; default 60s)
+  - `retain_bin_ids` (optional list of ints; unique, in‑range)
+  - `log_every_batches` (int > 0)
+- `bgfile` (fimo‑only): MEME background file
+- `keep_all_candidates_debug` (bool): write candidate‑level Parquet under `outputs/pools/candidates/`
+  (files named `candidates__<label>.parquet`) and aggregate to
+  `outputs/pools/candidates/candidates.parquet` + `outputs/pools/candidates/candidates_summary.parquet`
+- `include_matched_sequence` (fimo‑only)
+- `length_policy`: `exact | range` (default `exact`)
+- `length_range`: `[min, max]` (required when `length_policy: range`)
+- `trim_window_length` (optional int > 0)
+- `trim_window_strategy`: `max_info`
+
+Strict validation behavior:
+- Unknown keys are errors (extra fields are rejected).
+- DenseGen backend requires exactly one of `score_threshold` or `score_percentile`.
+- FIMO backend requires `pvalue_threshold`; `max_candidates`/`max_seconds` are **not** allowed.
+- `consensus` requires `n_sites: 1`.
+
+Minimal Stage‑A PWM example (DenseGen backend):
+
+```yaml
+inputs:
+  - name: lexA
+    type: pwm_meme
+    path: inputs/lexA.txt
+    sampling:  # Stage‑A sampling
+      n_sites: 80
+      score_percentile: 80
+```
+
+Minimal Stage‑A PWM example (FIMO backend):
+
+```yaml
+inputs:
+  - name: lexA
+    type: pwm_meme
+    path: inputs/lexA.txt
+    sampling:  # Stage‑A sampling
+      scoring_backend: fimo
+      pvalue_threshold: 1e-4
+      n_sites: 80
+```
 
-Use a CSV, Parquet, or XLSX table with regulator and binding-site sequences.
+---
 
-Required columns (override via `columns`):
-- `regulator` (default column: `tf`)
-- `sequence` (default column: `tfbs`)
+### Binding site table (`type: binding_sites`)
+
+Required fields:
+- `name`, `type`, `path`
 
-Optional columns:
-- `site_id`
-- `source`
+Supported optional fields:
+- `format`: `csv | parquet | xlsx`
+- `columns.regulator` (default: `tf`)
+- `columns.sequence` (default: `tfbs`)
+- `columns.site_id`, `columns.source`
 
-Strict rules:
-- Regulator + sequence must be non-empty.
-- Duplicate regulator/sequence pairs are allowed; use `generation.sampling.unique_binding_sites`
-  to dedupe at sampling time (or keep duplicates as implicit weights).
-- Sequences must be A/C/G/T only (DNA_4).
+Strict validation:
+- Regulator + sequence must be non‑empty.
+- Sequences must be A/C/G/T only.
 
-Example:
+Minimal example:
 
 ```yaml
 inputs:
   - name: demo
     type: binding_sites
-    path: inputs/binding_sites.xlsx
-    format: xlsx
+    path: inputs/binding_sites.csv
 ```
 
 ---
 
 ### Sequence library (`type: sequence_library`)
 
-Use a CSV or Parquet table with a `sequence` column (override via `sequence_column`).
+Required fields:
+- `name`, `type`, `path`
 
-Strict rules:
-- Sequences must be non-empty.
-- Sequences must be A/C/G/T only.
+Supported optional fields:
+- `format`: `csv | parquet`
+- `sequence_column` (default: `sequence`)
 
-Example:
+Strict validation:
+- Sequence column must exist.
+- Sequences must be non‑empty and A/C/G/T only.
+
+Minimal example:
 
 ```yaml
 inputs:
   - name: seeds
     type: sequence_library
     path: inputs/seed_sequences.csv
-    format: csv
 ```
 
 ---
 
 ### PWM MEME (`type: pwm_meme`)
 
-Use a MEME-format PWM file and explicitly sample binding sites.
-
-Required sampling fields:
-- `strategy`: `consensus | stochastic | background`
-- `n_sites`: number of binding sites to generate per motif
-- `scoring_backend`: `densegen | fimo` (default: `densegen`)
-- `score_threshold` or `score_percentile` (exactly one; densegen backend only)
-- `pvalue_threshold` (float in (0, 1]; fimo backend only)
-- `oversample_factor`: oversampling multiplier for candidate generation
-- `max_candidates` (optional): cap on candidate generation; helps bound long motifs (**densegen** backend only)
-- `max_seconds` (optional): time limit for candidate generation per batch (best-effort cap; **densegen** backend only)
-- `selection_policy`: `random_uniform | top_n | stratified` (default: `random_uniform`; fimo only)
-- `pvalue_bins` (optional): list of p‑value bin edges (strictly increasing; must end with `1.0`)
-- `mining` (fimo only): batch/time controls for mining with FIMO
-  - `batch_size` (int > 0): candidates per batch
-  - `max_batches` (optional int > 0): limit batches per motif (quota-style)
-  - `max_candidates` (optional int > 0): total candidates per motif (quota-style)
-  - `max_seconds` (optional float > 0; default 60s): limit total mining time per motif
-  - `retain_bin_ids` (optional list of ints): keep only specific p‑value bins
-  - `log_every_batches` (int > 0): log yield summaries every N batches
-- `bgfile` (optional): MEME bfile-format background model for FIMO
-- `keep_all_candidates_debug` (optional): write raw FIMO TSVs and candidate-level Parquet
-  (`candidates__<label>.parquet`) under `outputs/candidates/<run_id>/<input_name>/` for inspection
-  (overwritten each run for that run_id)
-- `include_matched_sequence` (optional): include `fimo_matched_sequence` column in the TFBS table
-
-Notes:
-- `densegen` scoring uses PWM log-odds with the motif background (from MEME when available).
-- `fimo` scoring scans the entire emitted TFBS and uses a model-based p-value threshold.
-  `pvalue_threshold` controls match strength (smaller values are stronger).
-- `fimo` backend requires the `fimo` executable on PATH (run via pixi).
-- If `bgfile` is omitted, FIMO uses the motif background (or uniform if none provided).
-- `background` selects low-scoring sequences (<= threshold/percentile; or pvalue >= threshold for fimo).
-- `selection_policy: stratified` uses fixed p‑value bins to balance strong/weak matches.
-- Canonical p‑value bins (default): `[1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]`.
-  Bin 0 is `(0, 1e-10]`, bin 1 is `(1e-10, 1e-8]`, etc.
-- For FIMO, the candidate target is `n_sites * oversample_factor`, but mining caps or time limits
-  can stop early. Expect fewer candidates if `mining.max_seconds`, `mining.max_batches`, or
-  `mining.max_candidates` are binding.
-- FIMO mining defaults to **time-based** limits (`mining.max_seconds: 60`). To switch to a quota,
-  set `mining.max_seconds: null` and use `mining.max_candidates` or `mining.max_batches`
-  (with `mining.batch_size`) as the primary cap.
-- `mining.max_candidates` must be >= `n_sites`; DenseGen fails fast otherwise.
-- If you omit `mining` entirely, DenseGen uses the default mining settings (batch size + time cap)
-  for FIMO-backed sampling.
-
-#### FIMO p-values (beginner-friendly)
-- A **p-value** is the probability that a random sequence (under the background model)
-  would score **at least as well** as the observed match.
-- Smaller p-values mean **stronger** motif matches; larger p-values mean **weaker** matches.
-- As a rule of thumb: `1e-4` is a strong match, `1e-3` is moderate, `1e-2` is weak.
-- DenseGen accepts a candidate if its **best hit** within the emitted TFBS passes the threshold.
-- For `strategy: background`, DenseGen keeps **weak** matches where `pvalue >= pvalue_threshold`.
-- If you set `mining.retain_bin_ids`, DenseGen only keeps candidates in those bins (useful for mining
-  specific affinity ranges).
-- FIMO adds per‑TFBS metadata columns: `fimo_score`, `fimo_pvalue`, `fimo_start`, `fimo_stop`,
-  `fimo_strand`, `fimo_bin_id`, `fimo_bin_low`, `fimo_bin_high`, and (optionally)
-  `fimo_matched_sequence` (the best‑hit window within the TFBS; includes strand-aware match).
-- `length_policy` defaults to `exact`. Use `length_policy: range` with `length_range: [min, max]`
-  to sample variable lengths (min must be >= motif length).
-- `trim_window_length` optionally trims the PWM to a max‑information window before sampling (useful
-  for long motifs when you want shorter cores); `trim_window_strategy` currently supports `max_info`.
-- `consensus` requires `n_sites: 1`.
+Required fields:
+- `name`, `type`, `path`, Stage‑A `sampling` config
 
-Example:
+Supported optional fields:
+- `motif_ids` (list of motif IDs to include)
 
-```yaml
-inputs:
-  - name: lexA_meme
-    type: pwm_meme
-    path: inputs/lexA.txt
-    motif_ids: [lexA]
-    sampling:
-      strategy: stochastic
-      n_sites: 80
-      oversample_factor: 12
-      max_candidates: 50000
-      max_seconds: 5
-      score_percentile: 80
-      length_policy: range
-      length_range: [22, 28]
-```
+Strict validation:
+- `motif_ids` must be unique, non‑empty strings.
+- Stage‑A sampling config is validated as above.
 
-FIMO-backed example:
+Minimal example:
 
 ```yaml
 inputs:
-  - name: lexA_meme
+  - name: lexA
     type: pwm_meme
     path: inputs/lexA.txt
     motif_ids: [lexA]
-    sampling:
-      strategy: stochastic
-      scoring_backend: fimo
-      pvalue_threshold: 1e-4
-      selection_policy: top_n
+    sampling:  # Stage‑A sampling
       n_sites: 80
-      oversample_factor: 200
-      mining:
-        batch_size: 5000
-        max_candidates: 20000
-        max_batches: 4
-        retain_bin_ids: [0, 1, 2, 3]
-        log_every_batches: 1
+      score_percentile: 80
 ```
 
-#### Mining workflow (p‑value strata)
-If you want to **mine** sequences across affinity strata, use `selection_policy: stratified` plus
-canonical p‑value bins and the `mining` block. A typical workflow:
-
-1) Oversample candidates (`oversample_factor`) or set a direct quota (`mining.max_candidates`),
-   then score with FIMO in batches (`mining.batch_size`).
-2) Accept candidates using `pvalue_threshold` (global strength cutoff).
-3) Use `mining.retain_bin_ids` to select one or more bins (e.g., moderate matches only).
-4) Repeat runs (or increase `mining.max_candidates` / `mining.max_batches` / `mining.max_seconds`)
-   to accumulate a deduplicated reservoir of sequences per bin. By default mining runs for 60
-   seconds per motif; set `mining.max_seconds: null` to make quotas the primary cap.
-5) Use `dense stage-a build-pool` to materialize the pool, then `dense stage-b build-libraries`
-   to preview Stage‑B library sampling without running the solver.
-6) Use `dense inspect run --library` to inspect which TFBS were offered vs used in Stage‑B sampling.
-
-DenseGen reports per‑bin yield summaries (hits, accepted, selected) for retained bins only (or all
-bins if `retain_bin_ids` is unset), so you can track how many candidates land in each stratum and
-adjust thresholds or oversampling accordingly. With `selection_policy: stratified`, the selected‑bin
-counts show how evenly the final pool spans strata.
-If candidate logging is enabled, DenseGen also writes aggregated mining summaries to
-`outputs/candidates/<run_id>/candidates_summary.parquet` (overwritten by `dense run` or
-`stage-a build-pool --overwrite`). Copy `outputs/candidates/<run_id>/` elsewhere if you want
-to keep per-run mining logs.
-
-#### Stdout UX for long runs
-DenseGen supports three logging styles so long runs stay readable:
-
-- `progress_style: stream` (default) logs per‑sequence updates; tune `progress_every` to reduce noise.
-- `progress_style: summary` hides per‑sequence logs and only prints periodic leaderboard summaries.
-- `progress_style: screen` clears and redraws a compact dashboard (progress, leaderboards, last sequence)
-  at `progress_refresh_seconds`.
-
-For iterative mining workflows, `screen` or `summary` modes are recommended to avoid log spam while still
-seeing yield/leaderboard progress over time.
-
 ---
 
 ### PWM MEME set (`type: pwm_meme_set`)
 
-Use multiple MEME files and sample them into a single TF pool. This is the
-recommended way to combine LexA + CpxR motifs for DenseGen so the solver stage
-can see both TFs at once (rather than sampling two independent inputs).
+Required fields:
+- `name`, `type`, `paths` (non‑empty list), Stage‑A `sampling`
+
+Supported optional fields:
+- `motif_ids` (list of motif IDs to include)
 
-Required sampling fields are identical to `pwm_meme`.
+Strict validation:
+- `paths` must be unique, non‑empty.
+- `motif_ids` must be unique, non‑empty.
 
-Example:
+Minimal example:
 
 ```yaml
 inputs:
-  - name: lexA_cpxR_meme
+  - name: lexA_cpxR
     type: pwm_meme_set
-    paths:
-      - inputs/lexA.txt
-      - inputs/cpxR.txt
-    motif_ids: [lexA, cpxR]
-    sampling:
-      strategy: stochastic
+    paths: [inputs/lexA.txt, inputs/cpxR.txt]
+    sampling:  # Stage‑A sampling
       n_sites: 80
-      oversample_factor: 12
-      max_candidates: 50000
       score_percentile: 80
-      length_policy: range
-      length_range: [22, 28]
 ```
 
 ---
 
 ### PWM JASPAR (`type: pwm_jaspar`)
 
-Use a JASPAR PFM file and explicitly sample binding sites.
+Required fields:
+- `name`, `type`, `path`, Stage‑A `sampling`
+
+Supported optional fields:
+- `motif_ids` (list of motif IDs to include)
 
-Required sampling fields are identical to `pwm_meme`.
+Strict validation:
+- `motif_ids` must be unique, non‑empty.
 
-Example:
+Minimal example:
 
 ```yaml
 inputs:
   - name: jaspar_demo
     type: pwm_jaspar
-    # Replace with your JASPAR PFM file.
-    path: /path/to/motifs.jaspar
-    motif_ids: [YourTF]
-    sampling:
-      strategy: background
-      n_sites: 200
-      oversample_factor: 5
-      score_percentile: 10
+    path: inputs/motifs.jaspar
+    sampling:  # Stage‑A sampling
+      n_sites: 80
+      score_percentile: 80
 ```
 
 ---
 
 ### PWM matrix CSV (`type: pwm_matrix_csv`)
 
-Use a CSV matrix with `A,C,G,T` columns (override via `columns`) and a single motif ID.
+Required fields:
+- `name`, `type`, `path`, `motif_id`, Stage‑A `sampling`
+
+Supported optional fields:
+- `columns` (map column names to A/C/G/T)
 
-Required sampling fields are identical to `pwm_meme`.
+Strict validation:
+- `motif_id` must be a non‑empty string.
 
-Example:
+Minimal example:
 
 ```yaml
 inputs:
-  - name: matrix_demo
+  - name: pwm_csv
     type: pwm_matrix_csv
-    # Replace with your CSV matrix file.
-    path: /path/to/motif_matrix.csv
-    motif_id: YourTF
-    columns:
-      A: A
-      C: C
-      G: G
-      T: T
-    sampling:
-      strategy: stochastic
-      n_sites: 200
-      oversample_factor: 5
-      score_threshold: -9.0
+    path: inputs/lexA_matrix.csv
+    motif_id: lexA
+    sampling:  # Stage‑A sampling
+      n_sites: 80
+      score_percentile: 80
 ```
 
 ---
 
 ### PWM artifact JSON (`type: pwm_artifact`)
 
-Use a per-motif JSON artifact that follows DenseGen's motif artifact contract
-(see `docs/reference/motif_artifacts.md`). This is the most decoupled path:
-DenseGen consumes explicit artifact files without parsing MEME/JASPAR directly.
-Artifacts in the Cruncher demo are generated via `cruncher catalog export-densegen`
-(implemented in `cruncher/src/app/motif_artifacts.py`).
+Required fields:
+- `name`, `type`, `path`, Stage‑A `sampling`
 
-Required sampling fields are identical to `pwm_meme`.
-Log-odds in the artifact are used for scoring.
+Supported optional fields:
+- none (aside from the Stage‑A sampling config)
 
-Example:
+Strict validation:
+- Artifact must match the motif contract (see `reference/motif_artifacts.md`).
+
+Minimal example:
 
 ```yaml
 inputs:
-  - name: motif_artifact
+  - name: lexA_artifact
     type: pwm_artifact
-    path: inputs/motif_artifacts/lexA__demo_local_meme__lexA.json
-    sampling:
-      strategy: stochastic
-      n_sites: 200
-      oversample_factor: 5
-      score_percentile: 90
-      length_policy: exact
+    path: inputs/motif_artifacts/lexA.json
+    sampling:  # Stage‑A sampling
+      n_sites: 80
+      score_percentile: 80
 ```
 
 ---
 
-### PWM artifact set JSON (`type: pwm_artifact_set`)
+### PWM artifact set (`type: pwm_artifact_set`)
+
+Required fields:
+- `name`, `type`, `paths` (non‑empty list), Stage‑A `sampling`
 
-Use multiple per-motif JSON artifacts (one file per motif) and sample each PWM
-with the same policy. This keeps parsing decoupled while producing a single
-combined input library.
+Supported optional fields:
+- `overrides_by_motif_id` (per‑motif Stage‑A sampling overrides)
 
-Notes:
-- Each artifact must declare a unique `motif_id` (duplicates are errors).
-- `overrides_by_motif_id` lets you override PWM sampling per motif while keeping a shared default.
+Strict validation:
+- `paths` must be unique, non‑empty.
+- `overrides_by_motif_id` keys must be unique, non‑empty strings.
 
-Example:
+Minimal example:
 
 ```yaml
 inputs:
   - name: lexA_cpxR_artifacts
     type: pwm_artifact_set
     paths:
-      - inputs/motif_artifacts/lexA__demo_local_meme__lexA.json
-      - inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
-    sampling:
-      strategy: stochastic
+      - inputs/motif_artifacts/lexA.json
+      - inputs/motif_artifacts/cpxR.json
+    sampling:  # Stage‑A sampling
       n_sites: 80
-      oversample_factor: 10
-      score_percentile: 90
-      length_policy: exact
-    overrides_by_motif_id:
-      cpxR:
-        strategy: stochastic
-        n_sites: 40
-        oversample_factor: 10
-        score_percentile: 85
-        length_policy: exact
+      score_percentile: 80
 ```
 
 ---
 
 ### USR sequences (`type: usr_sequences`)
 
-Read sequences from a USR dataset.
+Required fields:
+- `name`, `type`, `dataset`, `root`
+
+Supported optional fields:
+- `limit`
 
-Strict rules:
-- `root` is required (no default lookup).
-- Sequences must be non-empty and A/C/G/T only.
+Strict validation:
+- USR must be installed when this input is used.
+- Sequences must be A/C/G/T only.
 
-Example:
+Minimal example:
 
 ```yaml
 inputs:
-  - name: usr_seed
+  - name: usr_seqs
     type: usr_sequences
     dataset: my_dataset
-    root: /path/to/usr/datasets
+    root: inputs/usr_datasets
 ```
 
 ---
 
 ### Path resolution
 
-All relative paths resolve against the config file directory.
+Input paths resolve relative to the config file directory. For `usr_sequences`, `root` is explicit
+and must exist; there is no fallback search.
 
 ---
 
 ### Interaction with constraints
 
-- Promoter constraints are fixed motifs and enforced by the optimizer.
-- `side_biases` motifs must exist in the sampled library (DenseGen fails fast if missing).
-- `required_regulators` (per plan item) must appear in the sampled library and in each solution.
-
----
-
-### Troubleshooting
-
-- Required regulators cannot be satisfied: increase `densegen.generation.sampling.library_size`,
-  reduce required regulators, or relax `cover_all_regulators`.
-- `cover_all_regulators` impossible when TF count exceeds `library_size`: set
-  `allow_incomplete_coverage: true` or increase `library_size`.
-- PWM sampling produced too few unique sites: raise `oversample_factor`, lower the
-  `score_threshold` / `score_percentile`, widen `length_range`, or raise `max_candidates` / `max_seconds`.
+Stage‑A inputs must provide motifs needed by constraints (e.g., `side_biases` or
+`promoter_constraints`). If a required motif is missing from the Stage‑A pool, DenseGen fails fast
+during Stage‑B library construction.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/guide/outputs-metadata.md b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
index 099a62ca..669813e2 100644
--- a/src/dnadesign/densegen/docs/guide/outputs-metadata.md
+++ b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
@@ -1,189 +1,102 @@
 ## Outputs and metadata
 
-DenseGen writes Parquet and/or USR outputs with a shared, deterministic ID scheme. Metadata is
-namespaced and recorded consistently so outputs remain resume-safe and auditable.
+DenseGen writes Parquet outputs with a shared, deterministic ID scheme. This guide focuses on
+**what files exist**, **what they mean**, and **how to join them**. For schema‑level detail, see
+`reference/outputs.md`.
 
 ### Contents
-- [Output targets](#output-targets) - Parquet and USR sinks.
-- [Run manifest](#run-manifest) - run-level summary JSON.
-- [Effective config](#effective-config) - resolved config + derived caps/seeds.
-- [Inputs manifest](#inputs-manifest) - resolved inputs and PWM sampling metadata.
-- [Library manifest](#library-manifest) - libraries offered to the solver.
-- [Rejection log](#rejection-log) - rejected solutions audit.
-- [Source field](#source-field) - per-record provenance string.
-- [Metadata scheme](#metadata-scheme) - namespacing and categories.
-- [Parquet vs USR encoding](#parquet-vs-usr-encoding) - differences in storage.
+- [Outputs layout](#outputs-layout) - what files exist under `outputs/`.
+- [What the artifacts mean](#what-the-artifacts-mean) - semantics by file.
+- [Joining outputs](#joining-outputs) - stable join keys.
+- [Metadata scheme](#metadata-scheme) - namespacing + categories.
+- [Parquet vs USR encoding](#parquet-vs-usr-encoding) - storage differences.
 - [Metadata registry](#metadata-registry) - canonical schema location.
-- [Report assets](#report-assets) - plots emitted by `dense report`.
 
 ---
 
-### Output targets
+### Outputs layout
 
-- **Parquet**: single-file `outputs/dense_arrays.parquet` plus audit Parquet tables
-  (`outputs/attempts.parquet`, `outputs/solutions.parquet`, `outputs/composition.parquet`).
-- **USR**: Dataset.attach with namespace `densegen`.
-
-When multiple targets are configured, DenseGen asserts all targets are in sync before writing.
-
----
-
-### Run manifest
-
-Each run writes `outputs/meta/run_manifest.json` with per-input/plan counts (generated,
-duplicates, failures, resamples, libraries built, stalls), derived seeds, solver settings,
-schema version, and the dense-arrays version source. The manifest also tracks constraint-filter
-failure reasons and duplicate-solution counts. A compact `leaderboard_latest` snapshot is recorded
-per plan (top TF/TFBS usage, failure hotspots, and diversity coverage) for quick audits without
-loading the full outputs.
-Use the CLI to summarize a run:
+Typical workspace output tree (Stage‑A + Stage‑B):
 
 ```
-uv run dense inspect run --run path/to/run
+outputs/
+  tables/
+    dense_arrays.parquet
+    attempts.parquet
+    solutions.parquet
+    composition.parquet
+  pools/
+  libraries/
+  plots/
+  report/
+  meta/
+  logs/
 ```
 
----
-
-### Effective config
-
-DenseGen writes `outputs/meta/effective_config.json`, which includes:
-- fully-resolved config values (defaults expanded),
-- derived seeds (`seed_stage_a`, `seed_stage_b`, `seed_solver`),
-- resolved input paths, and
-- computed sampling caps (requested candidates vs mining/time limits).
+Optional targets (when enabled):
+- `outputs/usr/` (USR sink)
 
 ---
 
-### Inputs manifest
+### What the artifacts mean
 
-When a run completes, DenseGen writes `outputs/meta/inputs_manifest.json`. This file captures
-the resolved input paths (or dataset roots), PWM sampling settings, and the motif IDs actually
-sampled so runs can be audited without re-opening the config or input files. PWM inputs include
-per-motif site counts to make sampling behavior explicit.
+- `outputs/tables/dense_arrays.parquet` — final sequences with `densegen__*` metadata (canonical dataset).
+- `outputs/tables/attempts.parquet` — solver attempt audit log (success, duplicate, constraint failures).
+- `outputs/tables/solutions.parquet` — accepted solutions keyed by `solution_id` + `attempt_id`.
+- `outputs/tables/composition.parquet` — per‑TFBS placements for accepted solutions.
+- `outputs/meta/run_manifest.json` — run‑level counts (Stage‑A pools, Stage‑B libraries, resamples, stalls).
+- `outputs/meta/inputs_manifest.json` — resolved inputs and **Stage‑A sampling** settings.
+- `outputs/meta/effective_config.json` — resolved config + derived seeds and caps.
+- `outputs/meta/run_state.json` — checkpoint (resume guardrails).
+- `outputs/meta/events.jsonl` — structured events (Stage‑A pool built, Stage‑B library built, stalls).
+- `outputs/pools/` — **Stage‑A** pool artifacts:
+  - `pool_manifest.json`
+  - `<input>__pool.parquet`
+- `outputs/libraries/` — **Stage‑B** library artifacts:
+  - `library_builds.parquet`
+  - `library_members.parquet`
+  - `library_manifest.json`
+- `outputs/pools/candidates/` — **Stage‑A** candidate logs when `keep_all_candidates_debug: true`:
+  - `candidates__<label>.parquet` (per‑input candidate rows)
+  - `candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json` (aggregates)
+- `outputs/plots/` — plot images from `dense run` auto‑plotting or `dense plot`
+  (format controlled by `plots.format`).
+- `outputs/report/` — audit report outputs:
+  - `report.json`, `report.md`, `report.html`
+  - `assets/` (plots linked by the HTML report)
+  - `assets/composition.csv` (full composition table)
 
 ---
 
-### Stage‑A pools (TFBS pool artifacts)
+### Joining outputs
 
-DenseGen materializes Stage‑A pools under `outputs/pools/`:
+Stable join paths (Stage‑B + outputs):
 
-- `outputs/pools/pool_manifest.json` — manifest of pool files by input.
-- `outputs/pools/<input>__pool.parquet` — TFBS pools (or sequence pools).
+- `tables/dense_arrays.parquet.id` ↔ `tables/solutions.parquet.solution_id`
+- `tables/solutions.parquet.attempt_id` ↔ `tables/attempts.parquet.attempt_id`
+- `tables/solutions.parquet.solution_id` ↔ `tables/composition.parquet.solution_id`
+- `tables/attempts.parquet.library_hash` / `library_index` ↔ `libraries/library_builds.parquet`
+- `libraries/library_members.parquet` joins on `input_name`, `plan_name`, `library_index`
 
-TFBS pools include stable `motif_id` and `tfbs_id` hashes plus optional FIMO metadata
-(`fimo_pvalue`, `fimo_bin_id`, etc.). Sequence pools include `tfbs_id` for joinability.
+Stage‑A joins:
 
-If `keep_all_candidates_debug: true`, DenseGen writes per-candidate debug artifacts under
-`outputs/candidates/<run_id>/<input_name>/` (overwritten by `dense run` or `stage-a build-pool --overwrite`):
-- `candidates__<label>.parquet` — candidate p‑values, bins, acceptance, and reject reasons.
-- `<label>__fimo.tsv` — raw FIMO TSV (when enabled).
-DenseGen also aggregates these into `outputs/candidates/<run_id>/candidates.parquet` and
-`outputs/candidates/<run_id>/candidates_summary.parquet` with a manifest (`candidates_manifest.json`).
-These are overwritten by `dense run` or `stage-a build-pool --overwrite`; copy the
-`outputs/candidates/<run_id>` directory if you want to keep prior mining logs.
-
----
-
-### Library artifacts (Stage‑B)
-
-DenseGen writes Stage‑B libraries under `outputs/libraries/`:
-
-- `library_builds.parquet` — one row per library build (index, hash, size, strategy).
-- `library_members.parquet` — normalized membership table (one row per TFBS in each library).
-- `library_manifest.json` — manifest + schema version.
-
-These artifacts provide a stable join path from solver attempts to the exact library contents.
-
----
-
-### Composition table
-
-DenseGen writes `outputs/composition.parquet`, one row per TFBS placement in each accepted
-sequence. Columns include `solution_id`, `attempt_id`, `input_name`, `plan_name`, `library_index`,
-`tf`, `tfbs`, `motif_id`, `tfbs_id`, and placement offsets.
-
----
-
-### Run state (checkpoint)
-
-DenseGen writes `outputs/meta/run_state.json` during execution. This checkpoint captures
-per-input/plan progress so long runs can resume safely after interruption.
-
----
-
-### Events log
-
-DenseGen writes `outputs/meta/events.jsonl` (JSON lines) with structured events:
-`POOL_BUILT`, `LIBRARY_BUILT`, `STALL_DETECTED`, and `RESAMPLE_TRIGGERED`.
-This is a lightweight, machine-readable trace of the run’s control flow.
-
----
-
-### Report assets
-
-`dense report` emits summary plots under `outputs/report_assets/` and links them in `report.html`.
-These plots include Stage‑A p‑value/score histograms and Stage‑B utilization summaries. When
-composition is available, the report also exports a full `composition.csv` under
-`outputs/report_assets/`.
-
----
-
-### Attempts log
-
-DenseGen writes `outputs/attempts.parquet`, a consolidated log of solver attempts (success,
-duplicate, and constraint rejections). Each row includes the attempt status, reason/detail JSON,
-the sequence (if available), solver/provenance fields, and the exact library TF/TFBS/site_id lists
-offered to the solver. Attempts include `attempt_id`, `attempt_index`, and (for successes)
-`solution_id`. If no attempts occur, the file is absent. Attempts logs use Parquet and therefore
-require `pyarrow`.
-
-### Solutions log
-
-DenseGen writes `outputs/solutions.parquet`, one row per accepted solution with `solution_id`,
-`attempt_id`, and the library hash/index. Join keys: `solutions.solution_id` ↔ `dense_arrays.id`
-and `solutions.attempt_id` ↔ `attempts.attempt_id`.
-
----
-
-### Site failure summary
-
-DenseGen does not write a separate site-failure table. Per-TFBS failure attribution can be
-derived from `outputs/attempts.parquet` by grouping failures over the offered library lists.
-
----
-
-### Source field
-
-Every record includes a `source` string:
-
-```
-source = densegen:{input_name}:{plan_name}
-```
-
-This is always present and is separate from metadata.
-Detailed placement provenance lives in `densegen__used_tfbs_detail` and the
-run-scoped library manifests.
-`densegen__used_tfbs_detail` includes `motif_id` and `tfbs_id` when available.
+- `pools/<input>__pool.parquet` join on `tfbs_id` / `motif_id` where available.
+- `pools/candidates/candidates.parquet` includes `input_name`, `motif_id`, and `run_id` for audits.
 
 ---
 
 ### Metadata scheme
 
-All metadata keys are prefixed as `densegen__<key>`.
+All metadata keys are prefixed as `densegen__<key>`. Categories include:
 
-Typical categories:
 - Provenance (`densegen__schema_version`, run identifiers, input info)
-- Solver and policy (`densegen__solver_*`, `densegen__policy_*`)
-- Library and sampling (`densegen__library_*`, `densegen__sampling_*`, `densegen__sampling_fraction*`)
-- Constraints and postprocess (`densegen__fixed_elements`, `densegen__gap_fill_*`)
+- Solver + policy (`densegen__solver_*`, `densegen__policy_*`)
+- Stage‑B library sampling (`densegen__library_*`, `densegen__sampling_*`)
+- Constraints + postprocess (`densegen__fixed_elements`, `densegen__pad_*`)
 - Placement stats (`densegen__used_tfbs*`, `densegen__required_regulators*`)
 
-`densegen__sampling_fraction` is defined as the fraction of **unique** TFBS strings in the
-solver library divided by the realized input TFBS pool (`input_tfbs_count`).
-`densegen__sampling_fraction_pairs` is the fraction of **unique TF:TFBS pairs** in the
-solver library divided by `input_tf_tfbs_pair_count` (only meaningful for regulator-bearing inputs).
-
-See `reference/outputs.md` for a fuller list and semantics.
+`densegen__sampling_fraction` is the fraction of **unique** TFBS strings in the Stage‑B library
+divided by the realized Stage‑A pool (`input_tfbs_count`).
 
 ---
 
@@ -199,8 +112,7 @@ DenseGen fails fast if a Parquet dataset schema does not match the current regis
 ### Metadata registry
 
 DenseGen validates output metadata against a typed registry in
-`src/dnadesign/densegen/src/core/metadata_schema.py` to keep fields stable and explicit as the
-schema evolves.
+`src/dnadesign/densegen/src/core/metadata_schema.py`.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/guide/postprocess.md b/src/dnadesign/densegen/docs/guide/postprocess.md
index d082df01..8f17d488 100644
--- a/src/dnadesign/densegen/docs/guide/postprocess.md
+++ b/src/dnadesign/densegen/docs/guide/postprocess.md
@@ -1,10 +1,10 @@
-## Postprocess (gap fill)
+## Postprocess (pad)
 
-If dense-arrays returns a sequence shorter than `sequence_length`, DenseGen can gap-fill with
+If dense-arrays returns a sequence shorter than `sequence_length`, DenseGen can pad with
 random bases. Postprocess runs after optimization and is recorded in metadata.
 
 ### Contents
-- [Modes](#modes) - off, strict, or adaptive gap fill.
+- [Modes](#modes) - off, strict, or adaptive pad.
 - [GC feasibility](#gc-feasibility) - why some targets are impossible.
 
 ---
@@ -12,16 +12,21 @@ random bases. Postprocess runs after optimization and is recorded in metadata.
 ### Modes
 
 - `off` - fail if the sequence is short.
-- `strict` - fill while enforcing the GC window; raise on infeasible targets.
-- `adaptive` - relax the GC window when infeasible and record the relaxation.
+- `strict` - pad while enforcing GC targets; raise on infeasible targets.
+- `adaptive` - relax GC targets when infeasible and record the relaxation.
 
 ```yaml
 postprocess:
-  gap_fill:
+  pad:
     mode: adaptive
     end: 5prime
-    gc_min: 0.40
-    gc_max: 0.60
+    gc:
+      mode: range
+      min: 0.40
+      max: 0.60
+      target: 0.50
+      tolerance: 0.10
+      min_pad_length: 4
     max_tries: 2000
 ```
 
@@ -29,8 +34,8 @@ postprocess:
 
 ### GC feasibility
 
-Very short gaps (for example, a 1 nt gap) cannot hit mid-range GC targets. `strict` fails fast
-in these cases; `adaptive` relaxes bounds and records the final target and achieved GC in
+Very short pads (for example, a 1 nt pad) cannot hit mid-range GC targets. `strict` fails fast
+in these cases; `adaptive` relaxes bounds (via `gc.min_pad_length`) and records the final target and achieved GC in
 metadata.
 
 ---
diff --git a/src/dnadesign/densegen/docs/guide/workspace.md b/src/dnadesign/densegen/docs/guide/workspace.md
index 6472eacd..b429a450 100644
--- a/src/dnadesign/densegen/docs/guide/workspace.md
+++ b/src/dnadesign/densegen/docs/guide/workspace.md
@@ -1,7 +1,6 @@
 ## Workspace layout
 
-DenseGen is most ergonomic when each run is self-contained and uses config-relative paths. That
-keeps inputs, outputs, and logs together and makes runs easy to archive or move.
+DenseGen is most ergonomic when each run is self-contained and uses config-relative paths.
 
 ### Contents
 - [Suggested directory layout](#suggested-directory-layout) - a run-scoped structure.
@@ -13,17 +12,18 @@ keeps inputs, outputs, and logs together and makes runs easy to archive or move.
 ### Suggested directory layout
 
 ```
-densegen/
-  workspaces/
-    demo_meme_two_tf/        # canonical demo config + inputs
-    2026-01-14_sigma70_demo/
-      config.yaml
-      inputs/                # optional local copies
-      outputs/               # data parquet, reports, plots, library artifacts
-        logs/
-        meta/
-    _archive/
-      legacy_run_name/       # older workspaces or artifacts kept out of the active list
+workspace/
+  config.yaml
+  inputs/
+  outputs/
+    meta/
+    logs/
+    pools/
+    libraries/
+    tables/
+    plots/
+    report/
+    pools/candidates/  # optional Stage‑A debug artifacts
 ```
 
 ---
@@ -31,18 +31,20 @@ densegen/
 ### Why this layout
 
 - **Decoupled**: moving a workspace preserves everything needed to reproduce it.
-- **No fallbacks**: all paths are explicit and resolve relative to `config.yaml`.
+- **No fallbacks**: config is resolved from `./config.yaml` in CWD unless you pass `-c`.
 - **Scalable**: large runs do not collide in shared output directories.
 - **Predictable logs**: default logs land in `outputs/logs/<run_id>.log` within the workspace.
-- **Resume‑safe (explicit)**: if `outputs/` already exists, you must choose `dense run --resume`
-  (continue in‑place) or `dense run --fresh` (clear outputs and start over). This prevents accidental
-  mixing of runs and makes intent explicit.
-- **Candidate mining artifacts**: `outputs/candidates/<run_id>` is overwritten by `dense run` or
-  `stage-a build-pool --overwrite` to avoid mixing mining outputs across sessions; copy it elsewhere
-  if you want to keep prior candidates. Use `dense run --fresh` to clear outputs when restarting
-  a workspace.
+- **Resume‑safe (explicit)**: if run outputs already exist (e.g., `outputs/tables/attempts.parquet` or
+  `outputs/meta/run_state.json`), you must choose `dense run --resume` (continue in‑place) or
+  `dense run --fresh` (clear outputs and start over). Stage‑A/Stage‑B artifacts in
+  `outputs/pools` or `outputs/libraries` do not trigger this guard.
+- **Candidate mining artifacts**: `outputs/pools/candidates/` is overwritten by `dense run` or
+  `stage-a build-pool --overwrite`; copy it elsewhere if you want to keep prior candidates. Use
+  `dense run --fresh` to clear outputs when restarting a workspace.
 
-## Config snippet (run-scoped paths)
+---
+
+### Config snippet (run-scoped paths)
 
 ```yaml
 densegen:
@@ -50,30 +52,29 @@ densegen:
     id: 2026-01-14_sigma70_demo
     root: "."
 
-output:
-  targets: [parquet]
-  schema:
-    bio_type: dna
-    alphabet: dna_4
-  parquet:
-    path: outputs/dense_arrays.parquet
-    deduplicate: true
+  output:
+    targets: [parquet]
+    schema:
+      bio_type: dna
+      alphabet: dna_4
+    parquet:
+      path: outputs/tables/dense_arrays.parquet
+      deduplicate: true
 
-logging:
-  log_dir: outputs/logs
+  logging:
+    log_dir: outputs/logs
 
 plots:
-  out_dir: outputs
+  out_dir: outputs/plots
 ```
 
-When a run is complete, archive or sync the workspace as a unit.
-If you rerun in the same workspace, DenseGen requires an explicit choice:
-use `dense run --resume` to continue from existing outputs or `dense run --fresh`
-to clear `outputs/` and start over.
+When a run is complete, archive or sync the workspace as a unit. If you rerun in the same
+workspace and run outputs already exist, DenseGen requires an explicit choice: use
+`dense run --resume` to continue from existing outputs or `dense run --fresh` to clear
+`outputs/` and start over.
 
-Tip: use `dense workspace init --id <run_name>` to scaffold a new workspace. Use
-`dense inspect run --root workspaces/_archive` to inspect archived workspaces.
-If your config references local motif files, add `--copy-inputs` so the workspace
-remains self-contained (or update paths in `config.yaml` after staging).
+Tip: use `dense workspace init --id <run_name> --template-id <template>` to scaffold a new workspace.
+
+---
 
 @e-south
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index 1720e22a..77519ee0 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -1,64 +1,47 @@
 ## DenseGen CLI
 
-DenseGen ships a Typer CLI via the `dense` console script. The CLI is strict: config paths are
-explicit, inputs resolve relative to the config file, and all outputs/logs must stay inside
-the run root. USR is optional and is only imported when configured.
+DenseGen exposes a Typer CLI via `dense`. This page is an operator manual (commands + flags). For a progressive, end‑to‑end walkthrough, see the [demo](../demo/demo_basic.md).
 
 ### Contents
-- [Invocation](#invocation) - how to call the CLI.
-- [Config option](#config-option) - global or per-command config path.
-- [Commands](#commands) - validate, inspect, stage helpers, run, plot, report.
+- [Config resolution](#config-resolution) - where `dense` looks for config.yaml.
 - [`dense validate-config`](#dense-validate-config) - schema and sanity checks.
-- [`dense inspect inputs`](#dense-inspect-inputs) - resolved inputs + PWM sampling summary.
-- [`dense inspect plan`](#dense-inspect-plan) - resolved quota plan.
-- [`dense inspect config`](#dense-inspect-config) - resolved inputs/outputs/solver details.
+- [`dense inspect inputs`](#dense-inspect-inputs) - Stage‑A input + sampling summary.
+- [`dense inspect plan`](#dense-inspect-plan) - resolved quota/fraction plan.
+- [`dense inspect config`](#dense-inspect-config) - resolved inputs, outputs, Stage‑A/Stage‑B settings.
 - [`dense inspect run`](#dense-inspect-run) - summarize run manifests or list workspaces.
-- [`dense stage-a build-pool`](#dense-stage-a-build-pool) - build TFBS pools (Stage‑A).
-- [`dense stage-b build-libraries`](#dense-stage-b-build-libraries) - build solver libraries (Stage‑B).
+- [`dense stage-a build-pool`](#dense-stage-a-build-pool) - build Stage‑A TFBS pools.
+- [`dense stage-b build-libraries`](#dense-stage-b-build-libraries) - build Stage‑B solver libraries.
 - [`dense workspace init`](#dense-workspace-init) - scaffold a workspace.
-- [`dense run`](#dense-run) - end-to-end generation.
+- [`dense run`](#dense-run) - run Stage‑A + Stage‑B + optimization.
+- [`dense campaign-reset`](#dense-campaign-reset) - remove outputs for a clean rerun (hidden command).
 - [`dense plot`](#dense-plot) - render plots from outputs.
 - [`dense ls-plots`](#dense-ls-plots) - list available plots.
 - [`dense report`](#dense-report) - write audit-grade report summary.
-- [Examples](#examples) - common command sequences.
 
 ---
 
-### Invocation
+### Config resolution
 
-```bash
-uv run dense --help
-# or
-python -m dnadesign.densegen --help
-```
+- `-c, --config PATH` — config YAML path (global or per‑command).
+- If `-c/--config` is omitted, DenseGen uses `./config.yaml` in the **current directory** only.
+- If `./config.yaml` is missing, the CLI exits non‑zero with:
+  “No config found. cd into a workspace containing config.yaml, or pass -c path/to/config.yaml.”
+- Input paths resolve against the config file directory.
+- Outputs/tables/logs/plots/report must resolve inside `outputs/` under `densegen.run.root`.
+- Config files must include `densegen.schema_version` (currently `2.5`) and `densegen.run`.
 
 ---
 
-### Config option
-
-- `-c, --config PATH` - config YAML path. Defaults to
-  `src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml` inside the package.
-  - May be passed globally (`dense -c path inspect inputs`) or per command
-    (`dense inspect inputs -c path`).
-
-Input paths resolve against the config file directory. Outputs and logs must resolve
-inside `densegen.run.root` (run-scoped I/O). Config files must include `densegen.schema_version`
-(currently `2.4`) and `densegen.run`.
-
----
-
-### Commands
-
 ### `dense validate-config`
 Validate the config YAML (schema + sanity checks). Fails fast on unknown keys or invalid values.
 
 Options:
-- `--probe-solver` - also probe the solver backend (fails fast if unavailable).
+- `--probe-solver` — also probe the solver backend (fails fast if unavailable).
 
 ---
 
 #### `dense inspect inputs`
-Print resolved inputs plus a PWM sampling summary (Stage‑A details).
+Print resolved inputs plus a Stage‑A PWM sampling summary.
 
 ---
 
@@ -68,11 +51,11 @@ Print the resolved quota plan per constraint bucket.
 ---
 
 #### `dense inspect config`
-Summarize resolved inputs, outputs, plan items, and solver settings.
+Summarize resolved inputs, outputs, Stage‑A sampling, Stage‑B sampling, and solver settings.
 
 Options:
-- `--show-constraints` - print full fixed elements per plan item.
-- `--probe-solver` - verify the solver backend before reporting.
+- `--show-constraints` — print full fixed elements per plan item.
+- `--probe-solver` — verify the solver backend before reporting.
 
 ---
 
@@ -80,22 +63,19 @@ Options:
 Summarize a run manifest (`outputs/meta/run_manifest.json`) or list workspaces.
 
 Options:
-- `--run` - workspace directory (defaults to `densegen.run.root` from config).
-- `--root` - list workspaces under a root directory.
-- `--limit` - limit workspaces displayed when using `--root`.
-- `--all` - include directories without `config.yaml` when using `--root`.
-- `--config` - config path (used to resolve run root when `--run` is not set).
-- `--verbose` - show failure breakdown columns (constraint filters + duplicate solutions).
-- `--library` - include offered-vs-used summaries (TF/TFBS usage).
-- `--library-limit` - limit library builds shown in per-library summaries (`0` = all).
-- `--top` - number of rows to show in library summaries.
-- `--by-library/--no-by-library` - group library summaries per build attempt.
-- `--top-per-tf` - limit TFBS rows per TF when summarizing.
-- `--show-library-hash/--short-library-hash` - toggle full vs short library hashes.
-- `--events` - show event summary (stalls/resamples, library rebuilds).
-
-Tip:
-- For large runs, prefer `--library-limit`, `--no-by-library`, or lower `--top`/`--top-per-tf` to keep output readable.
+- `--run` — workspace directory (defaults to `densegen.run.root` from config).
+- `--root` — list workspaces under a root directory.
+- `--limit` — limit workspaces displayed when using `--root`.
+- `--all` — include directories without `config.yaml` when using `--root`.
+- `--config` — config path (used to resolve run root when `--run` is not set).
+- `--verbose` — show failure breakdown columns (constraint filters + duplicate solutions).
+- `--library` — include Stage‑B offered‑vs‑used summaries (TF/TFBS usage).
+- `--library-limit` — limit library builds shown in per‑library summaries (`0` = all).
+- `--top` — number of rows to show in library summaries.
+- `--by-library/--no-by-library` — group library summaries per build attempt.
+- `--top-per-tf` — limit TFBS rows per TF when summarizing.
+- `--show-library-hash/--short-library-hash` — toggle full vs short library hashes.
+- `--events` — show event summary (stalls/resamples, library rebuilds).
 
 ---
 
@@ -103,15 +83,14 @@ Tip:
 Build Stage‑A TFBS pools from inputs and write a pool manifest.
 
 Options:
-- `--out` - output directory relative to run root (default: `outputs/pools`).
-- `--input/-i` - input name(s) to build (defaults to all).
-- `--overwrite` - overwrite existing pool files.
+- `--out` — output directory relative to run root (default: `outputs/pools`; must be inside `outputs/`).
+- `--input/-i` — input name(s) to build (defaults to all).
+- `--overwrite` — overwrite existing pool files.
 
 Outputs:
 - `pool_manifest.json`
 - `<input>__pool.parquet` per input
-- `outputs/candidates/<run_id>/candidates.parquet` + `candidates_summary.parquet` (when candidate logging is enabled)
-  (candidate artifacts are overwritten by `dense run` or by `stage-a build-pool --overwrite` for that run_id)
+- `outputs/pools/candidates/candidates.parquet` + `candidates_summary.parquet` (when candidate logging is enabled)
 
 ---
 
@@ -119,11 +98,12 @@ Outputs:
 Build Stage‑B libraries (one per input + plan) from Stage‑A pools.
 
 Options:
-- `--out` - output directory relative to run root (default: `outputs/libraries`).
-- `--pool` - pool directory from `stage-a build-pool` (defaults to `outputs/pools` in the workspace).
-- `--input/-i` - input name(s) to build (defaults to all).
-- `--plan/-p` - plan item name(s) to build (defaults to all).
-- `--overwrite` - overwrite existing library artifacts.
+- `--out` — output directory relative to run root (default: `outputs/libraries`; must be inside `outputs/`).
+- `--pool` — pool directory from `stage-a build-pool` (defaults to `outputs/pools` in the workspace;
+  must be inside `outputs/`).
+- `--input/-i` — input name(s) to build (defaults to all).
+- `--plan/-p` — plan item name(s) to build (defaults to all).
+- `--overwrite` — overwrite existing library artifacts.
 
 Outputs:
 - `library_builds.parquet`
@@ -133,30 +113,46 @@ Outputs:
 ---
 
 #### `dense workspace init`
-Stage a new workspace with `config.yaml`, `inputs/`, `outputs/`, plus `outputs/logs/` and `outputs/meta/`.
+Stage a new workspace with `config.yaml`, `inputs/`, `outputs/`, plus `outputs/logs/`, `outputs/meta/`,
+`outputs/tables/`, `outputs/plots/`, and `outputs/report/`.
 
 Options:
-- `--id` - run identifier (directory name).
-- `--root` - workspaces root directory (default: package `workspaces/` directory).
-- `--template` - template config YAML to copy.
-- `--copy-inputs` - copy file-based inputs into `workspace/inputs` and rewrite paths.
+- `--id` — run identifier (directory name).
+- `--root` — workspace root directory (default: current directory).
+- `--template-id` — packaged template id (e.g., `demo_meme_two_tf`).
+- `--template` — template config YAML to copy (path).
+- `--copy-inputs` — copy file-based inputs into `workspace/inputs` and rewrite paths.
 
 ---
 
 #### `dense run`
-Run the full generation pipeline.
+Run the full pipeline (Stage‑A sampling → Stage‑B sampling → optimization → outputs).
+
+Options:
+- `--no-plot` — skip auto‑plotting even if `plots` is configured in YAML.
+- `--fresh` — delete the workspace `outputs/` directory before running.
+- `--resume` — resume from existing outputs in the workspace.
+- `--log-file PATH` — override the log file path. Otherwise DenseGen writes to
+  `logging.log_dir/<run_id>.log` inside the workspace. The override path must still resolve
+  inside `outputs/` under `densegen.run.root`.
+
+Notes:
+- If Stage‑A sampling uses `scoring_backend: fimo`, ensure `fimo` is on PATH (e.g., via `pixi run`).
+- If the workspace already has run outputs (e.g., `outputs/tables/*.parquet` or
+  `outputs/meta/run_state.json`), you must choose `--resume` or `--fresh`.
+  Stage‑A/Stage‑B artifacts in `outputs/pools` or `outputs/libraries` do not trigger this guard.
+
+---
+
+#### `dense campaign-reset`
+Remove the entire `outputs/` directory under the configured run root. This is a hidden command
+intended for demo and pressure‑testing workflows; it is not listed in `dense --help`.
 
 Options:
-- `--no-plot` - skip auto-plotting even if `plots` is configured in YAML.
-- `--fresh` - delete the workspace `outputs/` directory before running.
-- `--resume` - resume from existing outputs in the workspace.
-- `--log-file PATH` - override the log file path. Otherwise DenseGen writes
-  to `logging.log_dir/<run_id>.log` inside the workspace. The override path
-  must still resolve inside `densegen.run.root`.
+- `--config` — config path (used to resolve run root).
 
 Notes:
-- If you enable `scoring_backend: fimo`, run via `pixi run dense ...` (or ensure `fimo` is on PATH).
-- If the workspace already has outputs, you must choose `--resume` or `--fresh`.
+- Inputs and configs are preserved; only run outputs/state are deleted.
 
 ---
 
@@ -164,7 +160,7 @@ Notes:
 Generate plots from existing outputs.
 
 Options:
-- `--only NAME1,NAME2` - run a subset of plots by name.
+- `--only NAME1,NAME2` — run a subset of plots by name.
 
 ---
 
@@ -174,51 +170,17 @@ List available plot names and descriptions.
 ---
 
 #### `dense report`
-Generate an audit-grade report summary for a run. Outputs are run-scoped under `outputs/` by default.
+Generate an audit-grade report summary for a run. Outputs are run‑scoped under `outputs/report/` by default.
 
 Options:
-- `--run` - run directory (defaults to config run root).
-- `--out` - output directory relative to run root (default: `outputs`).
-- `--format` - `json`, `md`, `html`, or `all` (comma-separated allowed).
+- `--run` — run directory (defaults to config run root).
+- `--out` — output directory relative to run root (default: `outputs/report`; must be inside `outputs/`).
+- `--format` — `json`, `md`, `html`, or `all` (comma‑separated allowed).
 
 Report outputs:
 - `report.json`, `report.md`, `report.html`
-- `report_assets/` (plots referenced by the HTML report)
-- `report_assets/composition.csv` (full composition table when available)
-
----
-
-### Examples
-
-```bash
-RUN_ROOT=/tmp/densegen-demo-$(date +%Y%m%d-%H%M)
-uv run dense workspace init --id demo_press --root "$RUN_ROOT" \
-  --template src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml \
-  --copy-inputs
-CFG="$RUN_ROOT/demo_press/config.yaml"
-
-pixi run dense validate-config -c "$CFG"
-pixi run dense inspect inputs -c "$CFG"
-pixi run dense inspect plan   -c "$CFG"
-pixi run dense inspect config -c "$CFG"
-pixi run dense run            -c "$CFG"
-pixi run dense plot           -c "$CFG" --only tf_usage,tf_coverage,tfbs_positional_histogram,diversity_health
-pixi run dense inspect run     --run "$RUN_ROOT/demo_press"
-pixi run dense inspect run     --root "$RUN_ROOT"
-pixi run dense report          -c "$CFG" --format all
-```
-
-Demo run (small, Parquet-only config):
-
-```bash
-pixi run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
-```
-
-FIMO-backed sampling (pixi):
-
-```bash
-pixi run dense run -c src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml --no-plot
-```
+- `assets/` (plots referenced by the HTML report)
+- `assets/composition.csv` (full composition table when available)
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index a8e524a3..25fb0cd5 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -1,19 +1,20 @@
 ## DenseGen Config Reference
 
 This is the strict YAML schema for DenseGen. Unknown keys are errors and all paths resolve
-relative to the config file directory. Use this reference for exact field names; see the guide
-for conceptual flow.
+relative to the config file directory. Stage‑A sampling lives under `densegen.inputs[].sampling`,
+Stage‑B sampling lives under `densegen.generation.sampling`. Use this reference for exact
+field names; see the guide for conceptual flow.
 
 ### Contents
 - [Top-level](#top-level) - required roots and plotting.
-- [`densegen.inputs[]`](#densegeninputs) - input sources and sampling.
+- [`densegen.inputs[]`](#densegeninputs) - input sources and Stage‑A sampling.
 - [`densegen.run`](#densegenrun) - run identifier and root.
 - [`densegen.output`](#densegenoutput) - output targets and schema.
 - [`densegen.generation`](#densegengeneration) - plan and fixed elements.
-- [`densegen.generation.sampling`](#densegengenerationsampling) - library building controls.
+- [`densegen.generation.sampling`](#densegengenerationsampling) - Stage‑B library building controls.
 - [`densegen.solver`](#densegensolver) - backend and strategy.
 - [`densegen.runtime`](#densegenruntime) - retry and guard rails.
-- [`densegen.postprocess.gap_fill`](#densegenpostprocessgap_fill) - gap fill policy.
+- [`densegen.postprocess.pad`](#densegenpostprocesspad) - pad policy.
 - [`densegen.logging`](#densegenlogging) - log file configuration.
 - [`plots`](#plots) - plotting options and defaults.
 - [Minimal example](#minimal-example) - smallest runnable config.
@@ -23,7 +24,7 @@ for conceptual flow.
 ### Top-level
 
 - `densegen` (required)
-- `densegen.schema_version` (required; supported: `2.4`)
+- `densegen.schema_version` (required; supported: `2.5`)
 - `densegen.run` (required; run-scoped I/O root)
 - `plots` (optional; required `source` when `output.targets` has multiple sinks)
 
@@ -33,9 +34,9 @@ for conceptual flow.
 
 Choose one input type per entry:
 
-PWM inputs perform **input sampling** (sampling sites from PWMs) via
-`densegen.inputs[].sampling`. This is distinct from **library sampling**
-(`densegen.generation.sampling`), which selects a solver library from the realized TFBS pool.
+PWM inputs perform **Stage‑A sampling** (sampling sites from PWMs) via
+`densegen.inputs[].sampling`. This is distinct from **Stage‑B sampling**
+(`densegen.generation.sampling` for Stage‑B sampling), which selects solver libraries from the realized TFBS pool.
 
 - `type: binding_sites`
   - `path` - CSV, Parquet, or XLSX file
@@ -45,7 +46,7 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
   - `columns.site_id` (optional)
   - `columns.source` (optional)
   - Empty regulator/sequence rows are errors
-  - Duplicate regulator+sequence rows are allowed (use `generation.sampling.unique_binding_sites` to dedupe)
+  - Duplicate regulator+sequence rows are allowed (use Stage‑B `generation.sampling.unique_binding_sites` to dedupe)
   - Sequences must be A/C/G/T only
 - `type: sequence_library`
   - `path` - CSV or Parquet file
@@ -56,7 +57,7 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
 - `type: pwm_meme`
   - `path` - MEME PWM file
   - `motif_ids` (optional list) - choose motifs by ID
-  - `sampling` (required):
+  - `sampling` (required Stage‑A config):
     - `strategy`: `consensus | stochastic | background`
     - `n_sites` (int > 0)
     - `oversample_factor` (int > 0)
@@ -66,7 +67,7 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
     - `score_threshold` or `score_percentile` (exactly one; **densegen** backend only)
     - `pvalue_threshold` (float in (0, 1]; **fimo** backend only)
     - `selection_policy`: `random_uniform | top_n | stratified` (default: `random_uniform`; fimo only)
-    - `pvalue_bins` (optional list of floats; must end with `1.0`) - p‑value bin edges for stratified sampling
+    - `pvalue_bins` (optional list of floats; must end with `1.0`) - p‑value bin edges for Stage‑A stratified sampling
     - `mining` (fimo only) - batch/time controls for mining via FIMO:
       - `batch_size` (int > 0; default 100000) - candidates per FIMO batch
       - `max_batches` (optional int > 0) - max batches per motif
@@ -77,13 +78,13 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
         retained bins are the only bins reported in yield summaries
       - `log_every_batches` (int > 0; default 1) - log per‑bin yield summaries every N batches
     - `bgfile` (optional path) - MEME bfile-format background model for FIMO
-    - `keep_all_candidates_debug` (bool, default false) - write raw FIMO TSVs to
-      `outputs/candidates/<run_id>/<input_name>/` for inspection (overwritten by `dense run` or
+    - `keep_all_candidates_debug` (bool, default false) - write candidate Parquet logs to
+      `outputs/pools/candidates/` for inspection (overwritten by `dense run` or
       `stage-a build-pool --overwrite`)
     - `include_matched_sequence` (bool, default false) - include `fimo_matched_sequence` in TFBS outputs
     - `length_policy`: `exact | range` (default: `exact`)
     - `length_range`: `[min, max]` (required when `length_policy=range`; `min` >= motif length)
-    - `trim_window_length` (optional int > 0; trims PWM to a max‑information window before sampling)
+    - `trim_window_length` (optional int > 0; trims PWM to a max‑information window before Stage‑A sampling)
     - `trim_window_strategy`: `max_info` (window selection strategy)
     - `consensus` requires `n_sites: 1`
     - `background` selects low-scoring sequences (<= threshold/percentile; or pvalue >= threshold for fimo)
@@ -99,23 +100,23 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
 - `type: pwm_meme_set`
   - `paths` - list of MEME PWM files (merged into a single TF pool)
   - `motif_ids` (optional list) - choose motifs by ID across files
-  - `sampling` (required) - same fields as `pwm_meme`
+  - `sampling` (required Stage‑A config) - same fields as `pwm_meme`
 - `type: pwm_jaspar`
   - `path` - JASPAR PFM file
   - `motif_ids` (optional list) - choose motifs by ID
-  - `sampling` (required) - same fields as `pwm_meme`
+  - `sampling` (required Stage‑A config) - same fields as `pwm_meme`
 - `type: pwm_matrix_csv`
   - `path` - CSV matrix with A/C/G/T columns
   - `motif_id` (required) - single motif ID label
   - `columns` (optional) - map columns to `A/C/G/T` (defaults to literal column names)
-  - `sampling` (required) - same fields as `pwm_meme`
+  - `sampling` (required Stage‑A config) - same fields as `pwm_meme`
 - `type: pwm_artifact`
   - `path` - per-motif JSON artifact (contract-first; see `docs/reference/motif_artifacts.md`)
-  - `sampling` (required) - same fields as `pwm_meme`
+  - `sampling` (required Stage‑A config) - same fields as `pwm_meme`
 - `type: pwm_artifact_set`
   - `paths` - list of per-motif JSON artifacts (one file per motif)
-  - `sampling` (required) - same fields as `pwm_meme`
-  - `overrides_by_motif_id` (optional dict) - per-motif sampling overrides
+  - `sampling` (required Stage‑A config) - same fields as `pwm_meme`
+  - `overrides_by_motif_id` (optional dict) - per‑motif Stage‑A sampling overrides
 - `type: usr_sequences`
   - `dataset` - USR dataset name
   - `root` - USR root path (required; no fallback)
@@ -123,14 +124,14 @@ PWM inputs perform **input sampling** (sampling sites from PWMs) via
   - Sequences must be A/C/G/T only
 
 Input paths resolve relative to the config file directory.
-Output, logs, and plots must resolve inside `densegen.run.root`.
+Outputs (tables), logs, and plots must resolve inside `outputs/` under `densegen.run.root`.
 
 ---
 
 ### `densegen.run`
 
 - `id` - run identifier (string; required; must not contain path separators)
-- `root` - run root directory (required; config must live inside it)
+- `root` - run root directory (required; must exist on disk)
 
 ---
 
@@ -144,7 +145,7 @@ Output, logs, and plots must resolve inside `densegen.run.root`.
 - `parquet` (required when `targets` includes `parquet`)
   - `path` (file), `deduplicate`, `chunk_size`
   - `path` must be a `.parquet` file (single-file output)
-- `output.usr.root` and `output.parquet.path` must be inside `densegen.run.root`
+- `output.usr.root` and `output.parquet.path` must be inside `outputs/` under `densegen.run.root`
 
 ---
 
@@ -153,7 +154,7 @@ Output, logs, and plots must resolve inside `densegen.run.root`.
 - `sequence_length` (int > 0)
 - `sequence_length` must be >= the widest required motif (library TFBS or fixed elements)
 - `quota` (int > 0)
-- `sampling` (see below)
+- `sampling` (Stage‑B; see below)
 - `plan` (required, non-empty)
   - Each item: `name`, and either `quota` or `fraction`
   - Mixing quotas and fractions across items is not allowed.
@@ -176,18 +177,20 @@ Output, logs, and plots must resolve inside `densegen.run.root`.
 
 ---
 
-### `densegen.generation.sampling`
+### `densegen.generation.sampling` (Stage‑B sampling)
 
-These controls apply after PWM input sampling. `library_size` does not change PWM sampling counts.
-`library_size` also bounds the motif count offered to the solver for binding-site and PWM-sampled inputs.
+These controls apply to **Stage‑B sampling** (library construction) after Stage‑A input sampling.
+`library_size` does not change Stage‑A sampling counts. `library_size` also bounds the motif count
+offered to the solver for binding-site and PWM-sampled inputs.
 
 - `pool_strategy`: `full | subsample | iterative_subsample`
 - `library_source`: `build | artifact` (use `artifact` to replay prebuilt libraries)
-- `library_artifact_path`: required when `library_source: artifact` (path to `outputs/libraries`)
+- `library_artifact_path`: required when `library_source: artifact` (path to `outputs/libraries`;
+  must be inside `outputs/` under `densegen.run.root`)
 - `library_size` (int > 0; used for subsample strategies)
-- `library_sampling_strategy`: `tf_balanced | uniform_over_pairs | coverage_weighted`
-- `coverage_boost_alpha` (float >= 0; used when `library_sampling_strategy=coverage_weighted`)
-- `coverage_boost_power` (float > 0; used when `library_sampling_strategy=coverage_weighted`)
+- `library_sampling_strategy` (Stage‑B): `tf_balanced | uniform_over_pairs | coverage_weighted`
+- `coverage_boost_alpha` (Stage‑B; float >= 0; used when `library_sampling_strategy=coverage_weighted`)
+- `coverage_boost_power` (Stage‑B; float > 0; used when `library_sampling_strategy=coverage_weighted`)
 - `avoid_failed_motifs` (bool; when true, down-weight TFBS that frequently fail solves)
 - `failure_penalty_alpha` (float >= 0; penalty strength for failed motifs)
 - `failure_penalty_power` (float > 0; penalty exponent for failed motifs)
@@ -202,7 +205,7 @@ These controls apply after PWM input sampling. `library_size` does not change PW
 
 Notes:
 - When `library_source: artifact`, DenseGen replays the libraries found in
-  `library_artifact_path` and validates that `pool_strategy`, `library_sampling_strategy`,
+  `library_artifact_path` and validates that `pool_strategy`, Stage‑B `library_sampling_strategy`,
   and `library_size` match the artifact metadata. Stage‑B sampling is not rebuilt.
 
 ---
@@ -217,7 +220,10 @@ Notes:
 - `strands`: `single | double` (default: `double`)
 - `allow_unknown_options` (bool; default `false`)
   - DenseGen validates solver option keys for known backends. Set to `true` to bypass validation.
-  - Known keys (case-insensitive): `Threads`, `TimeLimit`, `MIPGap`, `Seed`, `LogLevel`, `MaxSeconds`.
+  - Known keys (case-insensitive):
+    - CBC: `Threads`, `TimeLimit`, `TimeLimitSeconds`, `MaxSeconds`, `Seconds`, `RatioGap`,
+      `MIPGap`, `Seed`, `RandomSeed`, `LogLevel`
+    - GUROBI: `Threads`, `TimeLimit`, `MIPGap`, `Seed`, `LogToConsole`, `LogFile`, `Method`, `Presolve`
 
 ---
 
@@ -231,6 +237,7 @@ Notes:
 - `arrays_generated_before_resample` (int > 0)
 - `min_count_per_tf` (int >= 0)
 - `max_duplicate_solutions`, `stall_seconds_before_resample`, `stall_warning_every_seconds`
+  - `stall_seconds_before_resample` also sets a per‑solve time limit (seconds) for solver‑based strategies; `0` disables.
 - `max_resample_attempts`, `max_total_resamples`, `max_seconds_per_plan`, `max_failed_solutions`
 - `leaderboard_every` (int >= 0; 0 disables periodic leaderboard logs)
 - `checkpoint_every` (int >= 0; 0 disables run_state checkpoints)
@@ -238,18 +245,24 @@ Notes:
 
 ---
 
-### `densegen.postprocess.gap_fill`
+### `densegen.postprocess.pad`
 
 - `mode`: `off | strict | adaptive`
 - `end`: `5prime | 3prime`
-- `gc_min`, `gc_max`, `max_tries`
+- `max_tries` (int > 0)
+- `gc.mode`: `off | range | target`
+- `gc.min`, `gc.max` (floats in [0, 1]) — target GC range when `gc.mode=range`
+- `gc.target`, `gc.tolerance` (floats in [0, 1]) — target center and tolerance when `gc.mode=target`
+- `gc.min_pad_length` (int >= 0) — if the pad length is shorter than this value:
+  - `strict` → error
+  - `adaptive` → relax GC bounds to `[0, 1]` and record the relaxation
 
 ---
 
 ### `densegen.logging`
 
 - `log_dir` (required) - directory for log files (relative to config path, must be inside
-  `densegen.run.root`).
+  `outputs/` under `densegen.run.root`).
 - `level` (e.g., `INFO`)
 - `suppress_solver_stderr` (bool)
 - `print_visual` (bool)
@@ -265,13 +278,13 @@ Notes:
 ### `plots`
 
 - `source`: `usr | parquet` (required if `output.targets` has multiple sinks)
-- `out_dir` (optional; default `outputs`; must be inside `densegen.run.root`)
+- `out_dir` (optional; default `outputs/plots`; must be inside `outputs/` under `densegen.run.root`)
 - `format` (optional; `png | pdf | svg`, default `png`)
 - `default`: list of plot names to run when `dense plot` is invoked (defaults to all)
 - `options`: dict keyed by plot name (strict; unknown options error)
-- `style`: global style dict applied to every plot (can be overridden per plot)
+- `style`: global style dict applied to every plot (can be overridden per plot). Common keys:
+  - `seaborn_style` (bool; default `true`) — set to `false` if seaborn styles are unavailable.
 - `sample_rows`: optional cap on rows loaded for plotting (reads the first N rows for speed)
-  - `seaborn_style: true` requires seaborn styles to be available; otherwise set to `false`.
 
 ---
 
@@ -279,7 +292,7 @@ Notes:
 
 ```yaml
 densegen:
-  schema_version: "2.4"
+  schema_version: "2.5"
   run:
     id: demo
     root: "."
@@ -296,7 +309,7 @@ densegen:
       bio_type: dna
       alphabet: dna_4
     parquet:
-      path: outputs/dense_arrays.parquet
+      path: outputs/tables/dense_arrays.parquet
       deduplicate: true
       chunk_size: 128
 
@@ -329,11 +342,16 @@ densegen:
     random_seed: 42
 
   postprocess:
-    gap_fill:
+    pad:
       mode: strict
       end: 5prime
-      gc_min: 0.4
-      gc_max: 0.6
+      gc:
+        mode: range
+        min: 0.4
+        max: 0.6
+        target: 0.5
+        tolerance: 0.1
+        min_pad_length: 4
       max_tries: 2000
 
   logging:
@@ -342,3 +360,7 @@ densegen:
     suppress_solver_stderr: true
     print_visual: true
 ```
+
+---
+
+@e-south
diff --git a/src/dnadesign/densegen/docs/reference/motif_artifacts.md b/src/dnadesign/densegen/docs/reference/motif_artifacts.md
index a15acc63..64134ea0 100644
--- a/src/dnadesign/densegen/docs/reference/motif_artifacts.md
+++ b/src/dnadesign/densegen/docs/reference/motif_artifacts.md
@@ -1,10 +1,23 @@
 ## Motif artifact contract (JSON)
 
-DenseGen can consume **per-motif JSON artifacts** that encode a single PWM. This keeps
-DenseGen decoupled from parsing code: any producer (Cruncher or external tooling) can
-emit the contract, and DenseGen only reads the artifact path specified in `config.yaml`.
-Cruncher produces these artifacts via `cruncher catalog export-densegen`
-(implemented in `cruncher/src/app/motif_artifacts.py`).
+DenseGen can consume **per‑motif JSON artifacts** that encode a single PWM. This keeps DenseGen decoupled from parsing code: any producer (Cruncher or external tooling) can emit the contract, and DenseGen only reads the artifact path specified in `config.yaml`. Cruncher produces these artifacts via `cruncher catalog export-densegen` (implemented in `cruncher/src/app/motif_artifacts.py`).
+
+### Contents
+- [Context](#context) - why artifacts exist and where they fit.
+- [Core principles](#core-principles) - contract invariants.
+- [Required fields](#required-fields) - strict JSON keys.
+- [Scoring semantics](#scoring-semantics) - log‑odds + background.
+- [Example artifact](#example-artifact) - minimal JSON payload.
+- [Config usage](#config-usage) - Stage‑A sampling entry point.
+
+---
+
+### Context
+
+Artifact‑first PWM inputs are a decoupling contract: producers generate stable, versioned JSON,
+and DenseGen consumes them without embedding parser logic. This enables independent producers,
+reproducible ingestion, and clear provenance. DenseGen uses these artifacts in **Stage‑A sampling**
+to build TFBS pools from the PWM matrices.
 
 ### Core principles
 
@@ -66,14 +79,14 @@ apply pseudocounts before emitting artifacts.
 
 ### Config usage
 
-In `config.yaml`, reference the artifact explicitly and set sampling behavior there:
+In `config.yaml`, reference the artifact explicitly and set **Stage‑A sampling** behavior there:
 
 ```yaml
 inputs:
   - name: lexA
     type: pwm_artifact
     path: inputs/artifacts/lexA.json
-    sampling:
+    sampling:  # Stage‑A sampling
       strategy: stochastic
       n_sites: 200
       oversample_factor: 5
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index a81c506a..417842ef 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -1,8 +1,6 @@
 ## DenseGen Output Formats
 
-DenseGen can emit USR datasets and/or Parquet datasets. Both formats share the same canonical ID
-scheme and metadata semantics. Parquet is the canonical non-USR output format (columnar,
-appendable, analytics-ready).
+DenseGen can emit USR datasets and/or Parquet datasets. Both formats share the same canonical ID scheme and metadata semantics. Parquet is the canonical non-USR output format (columnar, appendable, analytics-ready).
 
 ### Contents
 - [Canonical IDs](#canonical-ids) - deterministic sequence identifiers.
@@ -28,9 +26,7 @@ appendable, analytics-ready).
 
 ### Parquet
 
-Parquet output is written as a single file (`outputs/dense_arrays.parquet`) under the run
-root. Each row contains required fields (`id`, `sequence`, `bio_type`, `alphabet`, `source`)
-plus namespaced `densegen__*` metadata columns.
+Parquet output is written as a single file (`outputs/tables/dense_arrays.parquet`) under the run root. Each row contains required fields (`id`, `sequence`, `bio_type`, `alphabet`, `source`) plus namespaced `densegen__*` metadata columns.
 
 Behavior:
 - If `deduplicate: true`, existing IDs in the dataset are loaded and skipped.
@@ -39,20 +35,16 @@ Behavior:
 - List/dict metadata values are stored as native list/struct columns (no JSON encoding).
 - If an existing dataset has a mismatched schema, DenseGen fails fast and requires a fresh output
   path.
-- DenseGen maintains a local ID index (`_densegen_ids.sqlite`) to speed deduplication and
-  alignment checks.
+- DenseGen maintains a local ID index (`outputs/meta/_densegen_ids.sqlite`) to speed deduplication and alignment checks.
 
 ---
 
 ### USR
 
 USR output uses `Dataset.attach` with a fixed namespace (`densegen`). USR integration is
-optional; if you do not include `usr` in `output.targets`, DenseGen should not import USR code.
-USR output requires an explicit `output.usr.root`. List/dict metadata values are serialized to
-JSON for attaches. USR output skips any IDs that already exist in `records.parquet` (resume-safe).
+optional; if you do not include `usr` in `output.targets`, DenseGen should not import USR code. USR output requires an explicit `output.usr.root`. List/dict metadata values are serialized to JSON for attaches. USR output skips any IDs that already exist in `records.parquet` (resume-safe).
 
-When multiple outputs are configured, all sinks must be in sync before a run. If one output
-already exists and the other does not (or IDs differ), DenseGen fails fast.
+When multiple outputs are configured, all sinks must be in sync before a run. If one output already exists and the other does not (or IDs differ), DenseGen fails fast.
 
 ---
 
@@ -61,9 +53,9 @@ already exists and the other does not (or IDs differ), DenseGen fails fast.
 Keys are namespaced as `densegen__<key>`. Categories include:
 
 - **Core + policy**: schema/run identifiers, solver/policy settings, compression ratio.
-- **Inputs**: input type, input name, file/dataset source, PWM sampling metadata.
-- **Constraints + postprocess**: fixed elements, promoter constraint tags, gap-fill policy.
-- **Library + sampling**: library size, unique TF/TFBS counts, sampling caps and relaxations.
+- **Inputs**: input type, input name, file/dataset source, Stage‑A PWM sampling metadata.
+- **Constraints + postprocess**: fixed elements, promoter constraint tags, pad policy.
+- **Library + Stage‑B sampling**: library size, unique TF/TFBS counts, sampling caps and relaxations.
 - **Placement stats**: used TFBS details, coverage of required regulators, per-TF counts.
 
 Exact fields may expand over time. For the canonical list and types, see
@@ -77,8 +69,8 @@ DenseGen writes run-level JSON files under `outputs/meta/`:
 
 - `outputs/meta/run_state.json` — checkpointed progress for resumable runs (updated during execution).
 - `outputs/meta/run_manifest.json` — summary counts per input/plan plus solver settings and derived seeds (written on completion). Includes a `leaderboard_latest` snapshot (top TF/TFBS usage, failure hotspots, diversity coverage).
-- `outputs/meta/inputs_manifest.json` — resolved input paths and PWM sampling settings used for the run.
-- `outputs/meta/effective_config.json` — resolved config with derived seeds and sampling caps.
+- `outputs/meta/inputs_manifest.json` — resolved input paths and Stage‑A PWM sampling settings used for the run.
+- `outputs/meta/effective_config.json` — resolved config with derived seeds and Stage‑A sampling caps.
 
 These are produced alongside Parquet/USR outputs and provide a compact audit trail.
 
@@ -86,9 +78,7 @@ These are produced alongside Parquet/USR outputs and provide a compact audit tra
 
 ### Events log
 
-DenseGen writes `outputs/meta/events.jsonl` (JSON lines) with structured events
-for pool builds, library builds, stalls, and resamples. This is a lightweight
-machine-readable trace of runtime control flow.
+DenseGen writes `outputs/meta/events.jsonl` (JSON lines) with structured events for pool builds, library builds, stalls, and resamples. This is a lightweight machine-readable trace of runtime control flow.
 
 ---
 
@@ -97,19 +87,14 @@ machine-readable trace of runtime control flow.
 DenseGen records solver library provenance in two places:
 
 - `outputs/libraries/library_builds.parquet` + `library_members.parquet` (canonical library artifacts).
-- `outputs/attempts.parquet` (attempt-level audit log with offered library lists).
-
-Each attempt row stores the full library offered to the solver (`library_tfbs`, `library_tfs`,
-`library_site_ids`, `library_sources`) along with the library hash/index and solver status.
-Attempts include `attempt_id` and `solution_id` (when successful) for stable joins. Output
-records carry `densegen__sampling_library_hash` so you can join placements to libraries.
+- `outputs/tables/attempts.parquet` (attempt-level audit log with offered library lists). Each attempt row stores the full library offered to the solver (`library_tfbs`, `library_tfs`,
+`library_site_ids`, `library_sources`) along with the library hash/index and solver status. Attempts include `attempt_id` and `solution_id` (when successful) for stable joins. Output records carry `densegen__sampling_library_hash` (Stage‑B) so you can join placements to libraries.
 
 ---
 
 ### Solutions log
 
-DenseGen writes `outputs/solutions.parquet` (append-only) with the canonical solution id,
-attempt id, and library hash. Join keys:
+DenseGen writes `outputs/tables/solutions.parquet` (append-only) with the canonical solution id, attempt id, and library hash. Join keys:
 
 - `solutions.solution_id` ↔ `dense_arrays.id`
 - `solutions.attempt_id` ↔ `attempts.attempt_id`
@@ -119,17 +104,22 @@ attempt id, and library hash. Join keys:
 
 ### Audit reports
 
-The `dense report` command writes a compact audit summary under `outputs/`:
+The `dense report` command writes a compact audit summary under `outputs/report/`:
+
+- `outputs/report/report.json`
+- `outputs/report/report.md`
+- `outputs/report/report.html` (basic HTML wrapper for quick sharing)
+- `outputs/report/assets/` (plots linked by `report.html`)
+- `outputs/report/assets/composition.csv` (full composition table when available)
+
+These summarize run scope and link to the canonical outputs (`outputs/tables/dense_arrays.parquet` and
+`outputs/tables/attempts.parquet`). Use `dense report --format json|md|html|all` to control which files are emitted.
+
+---
 
-- `outputs/report.json`
-- `outputs/report.md`
-- `outputs/report.html` (basic HTML wrapper for quick sharing)
-- `outputs/report_assets/` (plots linked by `report.html`)
-- `outputs/report_assets/composition.csv` (full composition table when available)
+### Plots
 
-These summarize run scope and link to the canonical outputs (`dense_arrays.parquet` and
-`attempts.parquet`).
-Use `dense report --format json|md|html|all` to control which files are emitted.
+`dense plot` writes plot images under `outputs/plots/` (format controlled by `plots.format`).
 
 ---
 
@@ -140,16 +130,15 @@ DenseGen can materialize Stage‑A/Stage‑B artifacts without running the solve
 - `dense stage-a build-pool` writes:
   - `outputs/pools/pool_manifest.json`
   - `outputs/pools/<input>__pool.parquet`
-  - `outputs/candidates/<run_id>/<input_name>/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
-  - `outputs/candidates/<run_id>/candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json`
-    (overwritten by `dense run` or `stage-a build-pool --overwrite` for that run_id)
+  - `outputs/pools/candidates/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
+  - `outputs/pools/candidates/candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json`
+    (overwritten by `dense run` or `stage-a build-pool --overwrite`)
 - `dense stage-b build-libraries` writes:
   - `outputs/libraries/library_builds.parquet`
   - `outputs/libraries/library_members.parquet`
   - `outputs/libraries/library_manifest.json`
 
-Stage‑B expects Stage‑A pools (default `outputs/pools`). These are optional inspection artifacts
-and are not required for a normal `dense run`.
+Stage‑B expects Stage‑A pools (default `outputs/pools`). These are optional inspection artifacts and are not required for a normal `dense run`.
 
 ---
 
@@ -161,25 +150,19 @@ The `source` column is always present and encodes provenance as:
 densegen:{input_name}:{plan_name}
 ```
 
-Per-placement provenance (TFBS, offsets, orientations) is recorded in
-`densegen__used_tfbs_detail` (including `motif_id`/`tfbs_id`), `outputs/composition.parquet`,
-and the attempts log.
+Per-placement provenance (TFBS, offsets, orientations) is recorded in `densegen__used_tfbs_detail` (including `motif_id`/`tfbs_id`), `outputs/tables/composition.parquet`, and the attempts log.
 
 ---
 
 ### Attempts log
 
-DenseGen writes `outputs/attempts.parquet`, an append-only audit log of solver attempts
-(success, duplicate, and constraint rejections). Each row includes the attempt status,
-reason/detail JSON, solver metadata, and library hash/index. Each attempt includes:
+DenseGen writes `outputs/tables/attempts.parquet`, an append-only audit log of solver attempts (success, duplicate, and constraint rejections). Each row includes the attempt status, reason/detail JSON, solver metadata, and library hash/index. Each attempt includes:
 
 - `attempt_id` — stable join key across artifacts
 - `solution_id` — present for successful attempts
 - `attempt_index` — per-plan monotonic counter
 
-Each attempt also records the exact library TF/TFBS/site_id lists offered to the solver
-(subset attribution). If no attempts are logged, the file is absent. Attempts logs require
-`pyarrow`.
+Each attempt also records the exact library TF/TFBS/site_id lists offered to the solver (subset attribution). If no attempts are logged, the file is absent. Attempts logs require `pyarrow`.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
index 8f6c1636..bc31d9d1 100644
--- a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
+++ b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
@@ -1,73 +1,57 @@
 ## Cruncher to DenseGen PWM workflow (artifact-first)
 
-This workflow demonstrates a decoupled path where Cruncher exports PWM artifacts
-and DenseGen consumes them via `pwm_artifact_set`. The DenseGen config remains the
-single source of truth for runtime sampling behavior.
+This workflow describes the **artifact‑first** handoff: Cruncher exports per‑motif JSON artifacts,
+and DenseGen consumes them for **Stage‑A sampling**. For a full, progressive walkthrough that
+uses Cruncher in its own workspace and then hands off to DenseGen, see the [demo](../demo/demo_basic.md).
 
-### 1) Ensure Cruncher has motifs cached
+### Contents
+- [Overview](#overview) - what this handoff enables.
+- [Minimal operator flow](#minimal-operator-flow) - cache → export → handoff.
+- [DenseGen inputs](#densegen-inputs) - Stage‑A consumption points.
 
-Use the demo workspace if you want a reproducible example:
-
-```bash
-uv run cruncher fetch motifs --source demo_local_meme --tf lexA --tf cpxR -c src/dnadesign/cruncher/workspaces/demo_basics_two_tf/config.yaml
-```
+---
 
-### 2) Export DenseGen motif artifacts (one file per motif)
+### Overview
 
-```bash
-uv run cruncher catalog export-densegen \
-  -c src/dnadesign/cruncher/workspaces/demo_basics_two_tf/config.yaml \
-  --tf lexA --tf cpxR --source demo_local_meme \
-  --out /path/to/densegen-run/inputs/motif_artifacts \
-  --background record \
-  --pseudocount 0.01
-```
+Cruncher produces stable PWM artifacts (one JSON per motif) with explicit background + log‑odds.
+DenseGen treats these artifacts as a strict contract and uses them in **Stage‑A sampling** to
+build TFBS pools. Stage‑B sampling remains fully controlled by the DenseGen config.
 
-This writes per-motif JSON files plus an `artifact_manifest.json` for inspection. Any directory
-works; keeping artifacts under the DenseGen run `inputs/` directory keeps the workspace
-self-contained.
-
-### 3) Point DenseGen at the artifacts
-
-```yaml
-densegen:
-inputs:
-    - name: lexA_cpxR
-      type: pwm_artifact_set
-      paths:
-        - inputs/motif_artifacts/demo_local_meme__lexA.json
-        - inputs/motif_artifacts/demo_local_meme__cpxR.json
-      sampling:
-        strategy: stochastic
-        n_sites: 80
-        oversample_factor: 10
-        score_percentile: 90
-        length_policy: exact
-```
+---
 
-PWM sampling is stochastic. `pool_strategy: subsample` will resample
-reactively on stalls/duplicate guards, while `iterative_subsample` resamples proactively
-after `arrays_generated_before_resample` or when a library under-produces.
+### Minimal operator flow
 
-### 4) Run DenseGen
+Run Cruncher in its **own** workspace (Cruncher owns its configs and outputs). Cruncher resolves
+its config from CWD, so no `-c` flags are required when you run inside that workspace:
 
 ```bash
-pixi run dense validate-config -c path/to/config.yaml
-uv run dense inspect config -c path/to/config.yaml
-uv run dense run -c path/to/config.yaml --no-plot
+# From a Cruncher workspace (see cruncher demo docs)
+cruncher fetch motifs --source demo_local_meme --tf lexA --tf cpxR
+cruncher fetch sites --source demo_local_meme --tf lexA --tf cpxR --hydrate
+cruncher lock
+
+# Export to a Cruncher-owned location
+cruncher catalog export-densegen --set 1 --out outputs/exports/densegen_pwms
+cruncher catalog export-sites --set 1 --out outputs/exports/densegen_sites.csv
 ```
 
-### Captured output (excerpt)
+Copy the exports into the DenseGen workspace `inputs/` (DenseGen remains config‑centric):
 
+```bash
+cp outputs/exports/densegen_sites.csv <densegen_workspace>/inputs/
+cp -R outputs/exports/densegen_pwms <densegen_workspace>/inputs/motif_artifacts
 ```
-INFO | dnadesign.densegen.src.core.pipeline | PWM input sampling for lexA_cpxR: motifs=2 | sites=lexA x 80, cpxR x 80 | strategy=stochastic | score=percentile=90 | oversample=10 | length=exact
-INFO | dnadesign.densegen.src.core.pipeline | Library for lexA_cpxR/lexA_cpxR: 16 motifs | TF counts: lexA x 8, cpxR x 8 | target=180 achieved=192 pool=subsample
-INFO | dnadesign.densegen.src.core.pipeline | [lexA_cpxR/lexA_cpxR] 8/8 (100.00%) (local 8/8) CR=1.050 | seq ...
-```
 
-DenseGen writes `outputs/meta/inputs_manifest.json` plus run-scoped library artifacts under `outputs/`
-(`outputs/attempts.parquet`),
-capturing resolved PWM sampling settings and the exact TFBS library offered to the solver.
+---
+
+### DenseGen inputs
+
+Use the exported artifacts in Stage‑A sampling inputs:
+
+- `type: pwm_artifact_set` for per‑motif JSON artifacts.
+- `type: binding_sites` for the exported `binding_sites.csv` (optional).
+
+See the config reference for exact fields and Stage‑A sampling knobs.
 
 ---
 
diff --git a/src/dnadesign/densegen/src/adapters/optimizer/dense_arrays.py b/src/dnadesign/densegen/src/adapters/optimizer/dense_arrays.py
index bc805ad0..2f42fe8c 100644
--- a/src/dnadesign/densegen/src/adapters/optimizer/dense_arrays.py
+++ b/src/dnadesign/densegen/src/adapters/optimizer/dense_arrays.py
@@ -46,6 +46,7 @@ def build(
         required_regulators: list[str] | None = None,
         min_count_by_regulator: dict[str, int] | None = None,
         min_required_regulators: int | None = None,
+        solve_timeout_seconds: float | None = None,
     ) -> OptimizerRun: ...
 
 
@@ -115,6 +116,31 @@ def _apply_regulator_constraints(
     )
 
 
+def _apply_solve_timeout(opt: da.Optimizer, *, solve_timeout_seconds: float | None) -> None:
+    if solve_timeout_seconds is None:
+        return
+    try:
+        seconds = float(solve_timeout_seconds)
+    except (TypeError, ValueError) as exc:
+        raise ValueError("solve_timeout_seconds must be a number of seconds > 0") from exc
+    if seconds <= 0:
+        return
+    if not hasattr(opt, "build_model"):
+        raise RuntimeError("Optimizer does not expose build_model; cannot apply solve timeout.")
+    original_build_model = opt.build_model
+
+    def _build_model_with_timeout(*args, **kwargs):
+        original_build_model(*args, **kwargs)
+        model = getattr(opt, "model", None)
+        if model is None:
+            raise RuntimeError("Solver model not initialized; cannot apply time limit.")
+        if not hasattr(model, "SetTimeLimit"):
+            raise RuntimeError("Solver model does not support SetTimeLimit; cannot enforce stall_seconds.")
+        model.SetTimeLimit(int(max(1, round(seconds * 1000))))
+
+    opt.build_model = _build_model_with_timeout
+
+
 class DenseArraysAdapter:
     def probe_solver(self, backend: str, *, test_length: int = 10) -> None:
         try:
@@ -141,6 +167,7 @@ def build(
         required_regulators: list[str] | None = None,
         min_count_by_regulator: dict[str, int] | None = None,
         min_required_regulators: int | None = None,
+        solve_timeout_seconds: float | None = None,
     ) -> OptimizerRun:
         if strategy != "approximate" and not solver:
             raise ValueError("solver.backend is required unless strategy=approximate")
@@ -161,6 +188,7 @@ def build(
             min_count_by_regulator=min_count_by_regulator,
             min_required_regulators=min_required_regulators,
         )
+        _apply_solve_timeout(opt, solve_timeout_seconds=solve_timeout_seconds)
         if strategy == "diverse":
             if not hasattr(opt, "solutions_diverse"):
                 raise RuntimeError("dense-arrays does not support solutions_diverse on this install.")
diff --git a/src/dnadesign/densegen/src/adapters/outputs/factory.py b/src/dnadesign/densegen/src/adapters/outputs/factory.py
index ce163c46..b0b14a3a 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/factory.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/factory.py
@@ -15,7 +15,8 @@
 from pathlib import Path
 from typing import Iterable
 
-from ...config import DenseGenConfig, resolve_run_root, resolve_run_scoped_path
+from ...config import DenseGenConfig, resolve_outputs_scoped_path, resolve_run_root
+from ...core.run_paths import id_index_path
 from .base import DEFAULT_NAMESPACE, SinkBase, USRSink
 from .parquet import ParquetSink
 
@@ -42,7 +43,7 @@ def build_sinks(cfg: DenseGenConfig, cfg_path: Path) -> Iterable[SinkBase]:
         usr_cfg = out_cfg.usr
         if usr_cfg is None:
             raise ValueError("output.usr is required when output.targets includes 'usr'")
-        root = resolve_run_scoped_path(cfg_path, run_root, usr_cfg.root, label="output.usr.root")
+        root = resolve_outputs_scoped_path(cfg_path, run_root, usr_cfg.root, label="output.usr.root")
         from .usr_writer import USRWriter
 
         writer = USRWriter(
@@ -60,7 +61,7 @@ def build_sinks(cfg: DenseGenConfig, cfg_path: Path) -> Iterable[SinkBase]:
         pq_cfg = out_cfg.parquet
         if pq_cfg is None:
             raise ValueError("output.parquet is required when output.targets includes 'parquet'")
-        path = resolve_run_scoped_path(cfg_path, run_root, pq_cfg.path, label="output.parquet.path")
+        path = resolve_outputs_scoped_path(cfg_path, run_root, pq_cfg.path, label="output.parquet.path")
         ns = DEFAULT_NAMESPACE
         sinks.append(
             ParquetSink(
@@ -70,6 +71,7 @@ def build_sinks(cfg: DenseGenConfig, cfg_path: Path) -> Iterable[SinkBase]:
                 alphabet=default_alpha,
                 deduplicate=pq_cfg.deduplicate,
                 chunk_size=int(pq_cfg.chunk_size),
+                index_path=id_index_path(run_root),
             )
         )
 
diff --git a/src/dnadesign/densegen/src/adapters/outputs/id_index.py b/src/dnadesign/densegen/src/adapters/outputs/id_index.py
index 6c003c1d..5a5d8b4f 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/id_index.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/id_index.py
@@ -35,8 +35,9 @@ def _chunked(items: List[str], size: int) -> Iterable[List[str]]:
 
 
 class IdIndex:
-    def __init__(self, root: Path):
-        self.db_path = Path(root) / INDEX_FILENAME
+    def __init__(self, db_path: Path):
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
         self._conn = sqlite3.connect(self.db_path)
         self._conn.execute("PRAGMA journal_mode=WAL")
         self._conn.execute("PRAGMA synchronous=NORMAL")
diff --git a/src/dnadesign/densegen/src/adapters/outputs/loader.py b/src/dnadesign/densegen/src/adapters/outputs/loader.py
index 38e07da5..e1e54585 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/loader.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/loader.py
@@ -20,7 +20,7 @@
 if TYPE_CHECKING:
     import pandas as pd
 
-from ...config import RootConfig, resolve_run_root, resolve_run_scoped_path
+from ...config import RootConfig, resolve_outputs_scoped_path, resolve_run_root
 from .base import DEFAULT_NAMESPACE
 from .parquet import validate_parquet_schema
 
@@ -63,7 +63,7 @@ def load_records_from_config(
         usr_cfg = out_cfg.usr
         if usr_cfg is None:
             raise ValueError("output.usr is required when source='usr'")
-        root = resolve_run_scoped_path(cfg_path, run_root, usr_cfg.root, label="output.usr.root")
+        root = resolve_outputs_scoped_path(cfg_path, run_root, usr_cfg.root, label="output.usr.root")
         try:
             from dnadesign.usr.src.dataset import Dataset
         except Exception as e:
@@ -87,7 +87,7 @@ def load_records_from_config(
         pq_cfg = out_cfg.parquet
         if pq_cfg is None:
             raise ValueError("output.parquet is required when source='parquet'")
-        root = resolve_run_scoped_path(cfg_path, run_root, pq_cfg.path, label="output.parquet.path")
+        root = resolve_outputs_scoped_path(cfg_path, run_root, pq_cfg.path, label="output.parquet.path")
         if root.exists() and root.is_dir():
             raise ValueError(f"Parquet path must be a file, got directory: {root}")
 
diff --git a/src/dnadesign/densegen/src/adapters/outputs/parquet.py b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
index f7bf64b8..d2c18751 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/parquet.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
@@ -19,7 +19,7 @@
 from ...core.metadata_schema import META_FIELDS, validate_metadata
 from ...utils.logging_utils import install_native_stderr_filters
 from .base import DEFAULT_NAMESPACE, AlignmentDigest, SinkBase
-from .id_index import IdIndex
+from .id_index import INDEX_FILENAME, IdIndex
 from .record import OutputRecord
 
 install_native_stderr_filters(suppress_solver_messages=False)
@@ -67,8 +67,8 @@ def _meta_arrow_type(name: str, pa):
         "sampling_iterative_max_libraries",
         "sampling_iterative_min_new_solutions",
         "sampling_library_index",
-        "gap_fill_bases",
-        "gap_fill_attempts",
+        "pad_bases",
+        "pad_attempts",
     }
     float_fields = {
         "compression_ratio",
@@ -78,11 +78,11 @@ def _meta_arrow_type(name: str, pa):
         "input_pwm_mining_max_seconds",
         "sampling_fraction",
         "sampling_fraction_pairs",
-        "gap_fill_gc_min",
-        "gap_fill_gc_max",
-        "gap_fill_gc_target_min",
-        "gap_fill_gc_target_max",
-        "gap_fill_gc_actual",
+        "pad_gc_min",
+        "pad_gc_max",
+        "pad_gc_target_min",
+        "pad_gc_target_max",
+        "pad_gc_actual",
         "gc_total",
         "gc_core",
         "solver_objective",
@@ -92,8 +92,8 @@ def _meta_arrow_type(name: str, pa):
         "covers_all_tfs_in_solution",
         "covers_required_regulators",
         "sampling_relaxed_cap",
-        "gap_fill_used",
-        "gap_fill_relaxed",
+        "pad_used",
+        "pad_relaxed",
         "input_pwm_keep_all_candidates_debug",
         "input_pwm_include_matched_sequence",
     }
@@ -268,6 +268,7 @@ def __init__(
         alphabet: str = "dna_4",
         deduplicate: bool = True,
         chunk_size: int = 2048,
+        index_path: str | Path | None = None,
     ):
         self.final_path = Path(path)
         if self.final_path.exists() and self.final_path.is_dir():
@@ -283,7 +284,9 @@ def __init__(
         self._seen_ids: set[str] = set()
         self._schema = None
         self._buf: list[dict[str, Any]] = []
-        self._index = IdIndex(self.final_path.parent)
+        if index_path is None:
+            index_path = self.final_path.parent / INDEX_FILENAME
+        self._index = IdIndex(Path(index_path))
         self._part_glob = f"{self.final_path.stem}__part-*.parquet"
 
         if self.final_path.exists():
diff --git a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
index 93d5af8e..337e5735 100644
--- a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
+++ b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
@@ -107,7 +107,8 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
             dup_count = int(dup_mask.sum())
             log.warning(
                 "Binding sites input contains %d duplicate regulator/binding-site pairs in %s. "
-                "Duplicates are retained; set generation.sampling.unique_binding_sites=true to dedupe at sampling.",
+                "Duplicates are retained; set generation.sampling.unique_binding_sites=true to dedupe at "
+                "Stage-B sampling.",
                 dup_count,
                 data_path,
             )
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
index 4e9c7a2b..eb316727 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
@@ -157,7 +157,7 @@ class PWMArtifactDataSource(BaseDataSource):
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
-            raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
+            raise ValueError("Stage-A PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         artifact_path = resolve_path(self.cfg_path, self.path)
         if not (artifact_path.exists() and artifact_path.is_file()):
             raise FileNotFoundError(f"PWM artifact not found. Looked here:\n  - {artifact_path}")
@@ -194,7 +194,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         if bgfile is not None:
             bgfile_path = resolve_path(self.cfg_path, str(bgfile))
             if not (bgfile_path.exists() and bgfile_path.is_file()):
-                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+                raise FileNotFoundError(f"Stage-A PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug:
             if outputs_root is None:
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
index 3d321cc6..eaedb938 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
@@ -33,7 +33,7 @@ class PWMArtifactSetDataSource(BaseDataSource):
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
-            raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
+            raise ValueError("Stage-A PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
 
         resolved = [resolve_path(self.cfg_path, path) for path in self.paths]
         for path in resolved:
@@ -90,7 +90,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
             if bgfile is not None:
                 bgfile_path = resolve_path(self.cfg_path, str(bgfile))
                 if not (bgfile_path.exists() and bgfile_path.is_file()):
-                    raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+                    raise FileNotFoundError(f"Stage-A PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
             debug_output_dir: Path | None = None
             if keep_all_candidates_debug:
                 if outputs_root is None:
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
index e05ce92f..88507430 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
@@ -3,7 +3,7 @@
 <dnadesign project>
 dnadesign/densegen/adapters/sources/pwm_jaspar.py
 
-PWM input source (JASPAR PFM format) with explicit sampling policies.
+PWM input source (JASPAR PFM format) with explicit Stage-A sampling policies.
 
 Module Author(s): Eric J. South
 Dunlop Lab
@@ -92,7 +92,7 @@ class PWMJasparDataSource(BaseDataSource):
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
-            raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
+            raise ValueError("Stage-A PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         jaspar_path = resolve_path(self.cfg_path, self.path)
         if not (jaspar_path.exists() and jaspar_path.is_file()):
             raise FileNotFoundError(f"PWM JASPAR file not found. Looked here:\n  - {jaspar_path}")
@@ -128,7 +128,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         if bgfile is not None:
             bgfile_path = resolve_path(self.cfg_path, str(bgfile))
             if not (bgfile_path.exists() and bgfile_path.is_file()):
-                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+                raise FileNotFoundError(f"Stage-A PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug:
             if outputs_root is None:
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
index d06cd59d..a4a3c27a 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
@@ -34,7 +34,7 @@ class PWMMatrixCSVDataSource(BaseDataSource):
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
-            raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
+            raise ValueError("Stage-A PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         if not self.motif_id or not str(self.motif_id).strip():
             raise ValueError("pwm_matrix_csv.motif_id must be a non-empty string")
 
@@ -98,7 +98,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         if bgfile is not None:
             bgfile_path = resolve_path(self.cfg_path, str(bgfile))
             if not (bgfile_path.exists() and bgfile_path.is_file()):
-                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+                raise FileNotFoundError(f"Stage-A PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug:
             if outputs_root is None:
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
index 82edd20a..265d24c3 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
@@ -3,7 +3,7 @@
 <dnadesign project>
 dnadesign/densegen/adapters/sources/pwm_meme.py
 
-PWM input source (MEME format) with explicit sampling policies.
+PWM input source (MEME format) with explicit Stage-A sampling policies.
 
 Module Author(s): Eric J. South
 Dunlop Lab
@@ -61,7 +61,7 @@ class PWMMemeDataSource(BaseDataSource):
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
-            raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
+            raise ValueError("Stage-A PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         meme_path = resolve_path(self.cfg_path, self.path)
         if not (meme_path.exists() and meme_path.is_file()):
             raise FileNotFoundError(f"PWM MEME file not found. Looked here:\n  - {meme_path}")
@@ -106,7 +106,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         if bgfile is not None:
             bgfile_path = resolve_path(self.cfg_path, str(bgfile))
             if not (bgfile_path.exists() and bgfile_path.is_file()):
-                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+                raise FileNotFoundError(f"Stage-A PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug:
             if outputs_root is None:
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
index 457f4780..91068d1c 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
@@ -47,7 +47,7 @@ class PWMMemeSetDataSource(BaseDataSource):
 
     def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str | None = None):
         if rng is None:
-            raise ValueError("PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
+            raise ValueError("Stage-A PWM sampling requires an RNG; pass the pipeline RNG explicitly.")
         resolved = [resolve_path(self.cfg_path, path) for path in self.paths]
         for path in resolved:
             if not (path.exists() and path.is_file()):
@@ -100,7 +100,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         if bgfile is not None:
             bgfile_path = resolve_path(self.cfg_path, str(bgfile))
             if not (bgfile_path.exists() and bgfile_path.is_file()):
-                raise FileNotFoundError(f"PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
+                raise FileNotFoundError(f"Stage-A PWM sampling bgfile not found. Looked here:\n  - {bgfile_path}")
         debug_output_dir: Path | None = None
         if keep_all_candidates_debug:
             if outputs_root is None:
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index f575bc17..5e12dc97 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -3,7 +3,7 @@
 <dnadesign project>
 dnadesign/densegen/adapters/sources/pwm_sampling.py
 
-Shared PWM sampling utilities.
+Shared Stage-A PWM sampling utilities.
 
 Module Author(s): Eric J. South
 Dunlop Lab
@@ -242,12 +242,12 @@ def select_by_score(
         unique_total = len({seq for seq, _ in candidates})
         if context is None:
             raise ValueError(
-                f"PWM sampling produced {len(unique)} unique sites after filtering; "
+                f"Stage-A PWM sampling produced {len(unique)} unique sites after filtering; "
                 f"need {n_sites}. Adjust thresholds or oversample_factor."
             )
         msg_lines = [
             (
-                "PWM sampling failed for motif "
+                "Stage-A PWM sampling failed for motif "
                 f"'{context.get('motif_id')}' "
                 f"(width={context.get('width')}, strategy={context.get('strategy')}, "
                 f"length={context.get('length_label')}, window={context.get('window_label')}, "
@@ -382,7 +382,7 @@ def _select_fimo_candidates(
     if len(unique) < n_sites:
         msg_lines = [
             (
-                "PWM sampling failed for motif "
+                "Stage-A PWM sampling failed for motif "
                 f"'{context.get('motif_id')}' "
                 f"(width={context.get('width')}, strategy={context.get('strategy')}, "
                 f"length={context.get('length_label')}, window={context.get('window_label')}, "
@@ -483,10 +483,10 @@ def sample_pwm_sites(
         raise ValueError("max_seconds must be > 0 when set")
     scoring_backend = str(scoring_backend or "densegen").lower()
     if scoring_backend not in {"densegen", "fimo"}:
-        raise ValueError(f"Unsupported pwm sampling scoring_backend: {scoring_backend}")
+        raise ValueError(f"Unsupported Stage-A PWM sampling scoring_backend: {scoring_backend}")
     if scoring_backend == "densegen":
         if (score_threshold is None) == (score_percentile is None):
-            raise ValueError("PWM sampling requires exactly one of score_threshold or score_percentile")
+            raise ValueError("Stage-A PWM sampling requires exactly one of score_threshold or score_percentile")
         if pvalue_bins is not None:
             raise ValueError("pvalue_bins is only valid when scoring_backend='fimo'")
         if mining is not None:
@@ -495,7 +495,7 @@ def sample_pwm_sites(
             raise ValueError("include_matched_sequence is only valid when scoring_backend='fimo'")
     else:
         if pvalue_threshold is None:
-            raise ValueError("PWM sampling requires pvalue_threshold when scoring_backend='fimo'")
+            raise ValueError("Stage-A PWM sampling requires pvalue_threshold when scoring_backend='fimo'")
         pvalue_threshold = float(pvalue_threshold)
         if not (0.0 < pvalue_threshold <= 1.0):
             raise ValueError("pwm.sampling.pvalue_threshold must be between 0 and 1")
@@ -508,13 +508,13 @@ def sample_pwm_sites(
             raise ValueError(f"Unsupported pwm selection_policy: {selection_policy}")
         if score_threshold is not None or score_percentile is not None:
             log.warning(
-                "PWM sampling scoring_backend=fimo ignores score_threshold/score_percentile for motif %s.",
+                "Stage-A PWM sampling scoring_backend=fimo ignores score_threshold/score_percentile for motif %s.",
                 motif.motif_id,
             )
     if keep_all_candidates_debug and run_id is None:
-        raise ValueError("PWM sampling keep_all_candidates_debug requires run_id to be set.")
+        raise ValueError("Stage-A PWM sampling keep_all_candidates_debug requires run_id to be set.")
     if strategy == "consensus" and n_sites != 1:
-        raise ValueError("PWM sampling strategy 'consensus' requires n_sites=1")
+        raise ValueError("Stage-A PWM sampling strategy 'consensus' requires n_sites=1")
 
     keep_low = strategy == "background"
     width = len(motif.matrix)
@@ -534,7 +534,7 @@ def sample_pwm_sites(
         width = len(matrix)
         window_label = f"{width}@{window_start}"
         log.debug(
-            "PWM sampling trimmed motif %s to window length %d (start=%d, score=%.3f).",
+            "Stage-A PWM sampling trimmed motif %s to window length %d (start=%d, score=%.3f).",
             motif.motif_id,
             width,
             window_start,
@@ -688,7 +688,7 @@ def _score_with_fimo(
                 tmp_dir = tempfile.mkdtemp(prefix="densegen-fimo-")
                 debug_dir = Path(tmp_dir)
                 log.warning(
-                    "PWM sampling keep_all_candidates_debug enabled without outputs_root; "
+                    "Stage-A PWM sampling keep_all_candidates_debug enabled without outputs_root; "
                     "writing FIMO debug TSVs to %s",
                     debug_dir,
                 )
@@ -1147,7 +1147,7 @@ def _record_candidate(
                 n_candidates = cap_val
                 cap_applied = True
                 log.warning(
-                    "PWM sampling capped candidate generation for motif %s: requested=%d max_candidates=%d",
+                    "Stage-A PWM sampling capped candidate generation for motif %s: requested=%d max_candidates=%d",
                     motif.motif_id,
                     requested_candidates,
                     cap_val,
@@ -1187,7 +1187,7 @@ def _record_candidate(
             candidates.append((full_seq, core))
         if time_limited:
             log.warning(
-                "PWM sampling hit max_seconds for motif %s: generated=%d requested=%d",
+                "Stage-A PWM sampling hit max_seconds for motif %s: generated=%d requested=%d",
                 motif.motif_id,
                 len(candidates),
                 requested_candidates,
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index d20c2f60..d0f04221 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -7,7 +7,7 @@
 
 Commands:
   - validate-config : Validate YAML config (schema + sanity).
-  - inspect inputs  : Show resolved inputs + PWM sampling.
+  - inspect inputs  : Show resolved inputs + Stage-A PWM sampling.
   - inspect plan    : Show resolved per-constraint quota plan.
   - inspect config  : Describe resolved config (inputs/outputs/solver).
   - inspect run     : Summarize run manifest or list workspaces.
@@ -41,6 +41,7 @@
 import sys
 import tempfile
 from datetime import datetime, timezone
+from importlib import resources
 from pathlib import Path
 from typing import Iterator, Optional
 
@@ -56,9 +57,9 @@
     LATEST_SCHEMA_VERSION,
     ConfigError,
     load_config,
+    resolve_outputs_scoped_path,
     resolve_relative_path,
     resolve_run_root,
-    resolve_run_scoped_path,
 )
 from .core.artifacts.candidates import build_candidate_artifact, find_candidate_files, prepare_candidates_dir
 from .core.artifacts.library import write_library_artifact
@@ -98,6 +99,14 @@
 log = logging.getLogger(__name__)
 install_native_stderr_filters(suppress_solver_messages=False)
 
+DEFAULT_CONFIG_FILENAME = "config.yaml"
+DEFAULT_CONFIG_MISSING_MESSAGE = (
+    "No config found. cd into a workspace containing config.yaml, or pass -c path/to/config.yaml."
+)
+PACKAGED_TEMPLATES: dict[str, str] = {
+    "demo_meme_two_tf": "workspaces/demo_meme_two_tf",
+}
+
 
 @contextlib.contextmanager
 def _suppress_pyarrow_sysctl_warnings() -> Iterator[None]:
@@ -130,12 +139,49 @@ def _suppress_pyarrow_sysctl_warnings() -> Iterator[None]:
 
 
 # ----------------- local path helpers -----------------
-def _densegen_root_from(file_path: Path) -> Path:
-    return file_path.resolve().parent.parent
+def _list_packaged_template_ids() -> list[str]:
+    return sorted(PACKAGED_TEMPLATES.keys())
 
 
-DENSEGEN_ROOT = _densegen_root_from(Path(__file__))
-DEFAULT_WORKSPACES_ROOT = DENSEGEN_ROOT / "workspaces"
+@contextlib.contextmanager
+def _resolve_template_dir(
+    *,
+    template: Optional[Path],
+    template_id: Optional[str],
+) -> Iterator[tuple[Path, Path]]:
+    if template and template_id:
+        console.print("[bold red]Choose either --template or --template-id, not both.[/]")
+        raise typer.Exit(code=1)
+    if template_id:
+        rel_dir = PACKAGED_TEMPLATES.get(template_id)
+        if not rel_dir:
+            available = ", ".join(_list_packaged_template_ids()) or "-"
+            console.print(f"[bold red]Unknown template id:[/] {template_id}")
+            console.print(f"[bold]Available template ids:[/] {available}")
+            raise typer.Exit(code=1)
+        package_root = resources.files("dnadesign.densegen")
+        template_dir = package_root.joinpath(rel_dir)
+        if not template_dir.exists():
+            console.print(f"[bold red]Packaged template not found:[/] {rel_dir}")
+            raise typer.Exit(code=1)
+        with resources.as_file(template_dir) as resolved:
+            config_path = Path(resolved) / DEFAULT_CONFIG_FILENAME
+            if not config_path.exists():
+                console.print(f"[bold red]Template config not found:[/] {config_path}")
+                raise typer.Exit(code=1)
+            yield Path(resolved), config_path
+        return
+    if template is None:
+        console.print("[bold red]No template provided.[/] Use --template-id or --template.")
+        raise typer.Exit(code=1)
+    template_path = template.expanduser().resolve()
+    if not template_path.exists():
+        console.print(f"[bold red]Template config not found:[/] {template_path}")
+        raise typer.Exit(code=1)
+    if not template_path.is_file():
+        console.print(f"[bold red]Template path is not a file:[/] {template_path}")
+        raise typer.Exit(code=1)
+    yield template_path.parent, template_path
 
 
 def _input_uses_fimo(input_cfg) -> bool:
@@ -181,12 +227,24 @@ def _ensure_fimo_available(cfg, *, strict: bool = True) -> None:
 
 
 def _default_config_path() -> Path:
-    # Prefer a realistic, self-contained MEME demo config inside the package tree.
-    return DENSEGEN_ROOT / "workspaces" / "demo_meme_two_tf" / "config.yaml"
+    return Path.cwd() / DEFAULT_CONFIG_FILENAME
 
 
-def _default_template_path() -> Path:
-    return DENSEGEN_ROOT / "workspaces" / "demo_meme_two_tf" / "config.yaml"
+def _workspace_command(command: str, *, cfg_path: Path | None = None, run_root: Path | None = None) -> str:
+    root = run_root or (cfg_path.parent if cfg_path is not None else None)
+    if root is not None:
+        try:
+            root_resolved = root.resolve()
+        except Exception:
+            root_resolved = root
+        if root_resolved == Path.cwd().resolve():
+            return command
+        candidate = root / DEFAULT_CONFIG_FILENAME
+        if candidate.exists():
+            return f"cd {root} && {command}"
+    if cfg_path is not None:
+        return f"{command} -c {cfg_path}"
+    return command
 
 
 # ----------------- schema & helpers -----------------
@@ -209,25 +267,44 @@ def _infer_input_name(inputs_cfg: list) -> str:
     return _sanitize_filename(str(name))
 
 
-def _resolve_config_path(ctx: typer.Context, override: Optional[Path]) -> Path:
+def _resolve_config_path(ctx: typer.Context, override: Optional[Path]) -> tuple[Path, bool]:
     if override is not None:
-        return override
-    if ctx.obj and "config_path" in ctx.obj:
-        return Path(ctx.obj["config_path"])
-    return _default_config_path()
+        return Path(override), False
+    if ctx.obj:
+        ctx_path = ctx.obj.get("config_path")
+        if ctx_path is not None:
+            return Path(ctx_path), False
+    return _default_config_path(), True
 
 
-def _load_config_or_exit(cfg_path: Path):
+def _load_config_or_exit(cfg_path: Path, *, missing_message: str | None = None):
     try:
         return load_config(cfg_path)
     except FileNotFoundError:
-        console.print(f"[bold red]Config file not found:[/] {cfg_path}")
+        if missing_message:
+            console.print(f"[bold red]{missing_message}[/]")
+        else:
+            console.print(f"[bold red]Config file not found:[/] {cfg_path}")
         raise typer.Exit(code=1)
     except ConfigError as e:
         console.print(f"[bold red]Config error:[/] {e}")
         raise typer.Exit(code=1)
 
 
+def _resolve_outputs_path_or_exit(
+    cfg_path: Path,
+    run_root: Path,
+    value: str | os.PathLike,
+    *,
+    label: str,
+) -> Path:
+    try:
+        return resolve_outputs_scoped_path(cfg_path, run_root, value, label=label)
+    except ConfigError as exc:
+        console.print(f"[bold red]{exc}[/]")
+        raise typer.Exit(code=1)
+
+
 def _run_root_for(loaded) -> Path:
     return resolve_run_root(loaded.path, loaded.root.densegen.run.root)
 
@@ -361,7 +438,7 @@ def _print_inputs_summary(loaded) -> None:
             str(sampling.max_seconds) if sampling.max_seconds is not None else "-",
             length_label,
         )
-    console.print("[bold]Input-stage PWM sampling[/]")
+    console.print("[bold]Stage-A PWM sampling[/]")
     console.print(pwm_table)
     console.print(
         "  -> Produces the realized TFBS pool (input_tfbs_count), captured in inputs_manifest.json after runs."
@@ -439,17 +516,6 @@ def _render_missing_input_hint(cfg_path: Path, loaded, exc: Exception) -> None:
         hints.append(
             "If this is a staged run dir, use `dense workspace init --copy-inputs` or copy files into run/inputs."
         )
-    missing_str = " ".join(str(p) for p in missing)
-    demo_paths = (
-        "cruncher/workspaces/demo_basics_two_tf",
-        "cruncher/workspaces/demo_campaigns_multi_tf",
-    )
-    if any(path in missing_str for path in demo_paths):
-        hints.append(
-            "To regenerate Cruncher demo motifs: "
-            "cruncher fetch motifs --source demo_local_meme --tf lexA --tf cpxR --update -c <CONFIG>; "
-            "cruncher lock -c <CONFIG>; cruncher parse -c <CONFIG>; cruncher sample --no-auto-opt -c <CONFIG>"
-        )
     if hints:
         console.print("[bold]Next steps[/]:")
         for hint in hints:
@@ -461,7 +527,7 @@ def _render_output_schema_hint(exc: Exception) -> bool:
     if "Existing Parquet schema does not match the current DenseGen schema" in msg:
         console.print(f"[bold red]Output schema mismatch:[/] {msg}")
         console.print("[bold]Next steps[/]:")
-        console.print("  - Remove outputs/dense_arrays.parquet and outputs/_densegen_ids.sqlite, or")
+        console.print("  - Remove outputs/tables/dense_arrays.parquet and outputs/meta/_densegen_ids.sqlite, or")
         console.print("  - Stage a fresh workspace with `dense workspace init --copy-inputs` and re-run.")
         return True
     if "Output sinks are out of sync before run" in msg:
@@ -538,7 +604,7 @@ def _warn_pwm_sampling_configs(loaded, cfg_path: Path) -> None:
                         "may fail uniqueness; consider reducing n_sites or using length_policy=range."
                     )
     if warnings:
-        console.print("[yellow]PWM sampling warnings:[/]")
+        console.print("[yellow]Stage-A PWM sampling warnings:[/]")
         for warn in warnings:
             console.print(f"  - {warn}")
 
@@ -577,8 +643,8 @@ def _list_workspaces_table(workspaces_root: Path, *, limit: int, show_all: bool)
         run_id = run_dir.name
         status = "ok"
         parquet_count = "-"
-        plots_count = _count_files(run_dir / "outputs", pattern="*")
-        logs_count = _count_files(run_dir / "logs", pattern="*")
+        plots_count = _count_files(run_dir / "outputs" / "plots", pattern="*")
+        logs_count = _count_files(run_dir / "outputs" / "logs", pattern="*")
 
         if cfg_path.exists():
             try:
@@ -586,7 +652,7 @@ def _list_workspaces_table(workspaces_root: Path, *, limit: int, show_all: bool)
                 run_id = loaded.root.densegen.run.id
                 run_root = resolve_run_root(cfg_path, loaded.root.densegen.run.root)
                 if loaded.root.densegen.output.parquet is not None:
-                    pq_dir = resolve_run_scoped_path(
+                    pq_dir = resolve_outputs_scoped_path(
                         cfg_path,
                         run_root,
                         loaded.root.densegen.output.parquet.path,
@@ -594,16 +660,16 @@ def _list_workspaces_table(workspaces_root: Path, *, limit: int, show_all: bool)
                     )
                     parquet_count = _count_files(pq_dir, pattern="*.parquet")
                 plots_count = _count_files(
-                    resolve_run_scoped_path(
+                    resolve_outputs_scoped_path(
                         cfg_path,
                         run_root,
-                        loaded.root.plots.out_dir if loaded.root.plots else "outputs",
+                        loaded.root.plots.out_dir if loaded.root.plots else "outputs/plots",
                         label="plots.out_dir",
                     ),
                     pattern="*",
                 )
                 logs_count = _count_files(
-                    resolve_run_scoped_path(
+                    resolve_outputs_scoped_path(
                         cfg_path,
                         run_root,
                         loaded.root.densegen.logging.log_dir,
@@ -649,8 +715,8 @@ def _read_events(path: Path) -> list[dict]:
     help="DenseGen — Dense Array Generator (Typer/Rich CLI)",
 )
 inspect_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Inspect configs, inputs, and runs.")
-stage_a_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Stage A helpers (input TFBS pools).")
-stage_b_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Stage B helpers (library sampling).")
+stage_a_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Stage-A helpers (input TFBS pools).")
+stage_b_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Stage-B helpers (library sampling).")
 workspace_app = typer.Typer(add_completion=False, no_args_is_help=True, help="Workspace scaffolding.")
 
 app.add_typer(inspect_app, name="inspect")
@@ -662,11 +728,11 @@ def _read_events(path: Path) -> list[dict]:
 @app.callback()
 def _root(
     ctx: typer.Context,
-    config: Path = typer.Option(
-        _default_config_path(),
+    config: Optional[Path] = typer.Option(
+        None,
         "--config",
         "-c",
-        help="Path to config YAML (can also be passed per command).",
+        help="Path to config YAML (defaults to ./config.yaml in the current directory).",
     ),
 ):
     ctx.obj = {"config_path": config}
@@ -678,8 +744,11 @@ def validate_config(
     probe_solver: bool = typer.Option(False, help="Also probe the solver backend."),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
-    cfg_path = _resolve_config_path(ctx, config)
-    loaded = _load_config_or_exit(cfg_path)
+    cfg_path, is_default = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(
+        cfg_path,
+        missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+    )
     _warn_pwm_sampling_configs(loaded, cfg_path)
     _warn_full_pool_strategy(loaded)
     _ensure_fimo_available(loaded.root.densegen, strict=True)
@@ -709,74 +778,75 @@ def ls_plots():
 @workspace_app.command("init", help="Stage a new workspace with config.yaml and standard subfolders.")
 def workspace_init(
     run_id: str = typer.Option(..., "--id", "-i", help="Run identifier (directory name)."),
-    root: Path = typer.Option(DEFAULT_WORKSPACES_ROOT, "--root", help="Workspaces root directory."),
+    root: Path = typer.Option(
+        Path("."),
+        "--root",
+        help="Workspace root directory (default: current directory).",
+    ),
+    template_id: Optional[str] = typer.Option(
+        None,
+        "--template-id",
+        help="Packaged template id (use to avoid repo-root paths).",
+    ),
     template: Optional[Path] = typer.Option(None, "--template", help="Template config YAML to copy."),
     copy_inputs: bool = typer.Option(False, help="Copy file-based inputs into workspace/inputs and rewrite paths."),
 ):
     run_id_clean = _sanitize_filename(run_id)
     if run_id_clean != run_id:
         console.print(f"[yellow]Sanitized run id:[/] {run_id} -> {run_id_clean}")
-    run_dir = (root / run_id_clean).resolve()
+    root_path = root.expanduser()
+    if root_path.exists() and not root_path.is_dir():
+        console.print(f"[bold red]Workspace root is not a directory:[/] {root_path}")
+        raise typer.Exit(code=1)
+    run_dir = (root_path / run_id_clean).resolve()
     if run_dir.exists():
         console.print(f"[bold red]Run directory already exists:[/] {run_dir}")
         raise typer.Exit(code=1)
 
-    template_path = template or _default_template_path()
-    if not template_path.exists():
-        console.print(f"[bold red]Template config not found:[/] {template_path}")
-        raise typer.Exit(code=1)
-
-    run_dir.mkdir(parents=True, exist_ok=False)
-    (run_dir / "inputs").mkdir(parents=True, exist_ok=True)
-    (run_dir / "outputs" / "logs").mkdir(parents=True, exist_ok=True)
-    (run_dir / "outputs" / "meta").mkdir(parents=True, exist_ok=True)
-
-    raw = yaml.safe_load(template_path.read_text())
-    if not isinstance(raw, dict):
-        console.print("[bold red]Template config must be a YAML mapping.[/]")
-        raise typer.Exit(code=1)
+    with _resolve_template_dir(template=template, template_id=template_id) as (_template_dir, template_path):
+        run_dir.mkdir(parents=True, exist_ok=False)
+        (run_dir / "inputs").mkdir(parents=True, exist_ok=True)
+        (run_dir / "outputs" / "logs").mkdir(parents=True, exist_ok=True)
+        (run_dir / "outputs" / "meta").mkdir(parents=True, exist_ok=True)
+        (run_dir / "outputs" / "pools").mkdir(parents=True, exist_ok=True)
+        (run_dir / "outputs" / "libraries").mkdir(parents=True, exist_ok=True)
+        (run_dir / "outputs" / "tables").mkdir(parents=True, exist_ok=True)
+        (run_dir / "outputs" / "plots").mkdir(parents=True, exist_ok=True)
+        (run_dir / "outputs" / "report").mkdir(parents=True, exist_ok=True)
+
+        raw = yaml.safe_load(template_path.read_text())
+        if not isinstance(raw, dict):
+            console.print("[bold red]Template config must be a YAML mapping.[/]")
+            raise typer.Exit(code=1)
 
-    dense = raw.setdefault("densegen", {})
-    dense["schema_version"] = LATEST_SCHEMA_VERSION
-    run_block = dense.get("run") or {}
-    run_block["id"] = run_id_clean
-    run_block["root"] = "."
-    dense["run"] = run_block
-
-    output = dense.get("output") or {}
-    if "parquet" in output and isinstance(output.get("parquet"), dict):
-        output["parquet"]["path"] = "outputs/dense_arrays.parquet"
-    if "usr" in output and isinstance(output.get("usr"), dict):
-        output["usr"]["root"] = "outputs/usr"
-    dense["output"] = output
-
-    logging_cfg = dense.get("logging") or {}
-    logging_cfg["log_dir"] = "outputs/logs"
-    dense["logging"] = logging_cfg
-
-    if "plots" in raw and isinstance(raw.get("plots"), dict):
-        raw["plots"]["out_dir"] = "outputs"
-
-    if copy_inputs:
-        inputs_cfg = dense.get("inputs") or []
-        for inp in inputs_cfg:
-            if not isinstance(inp, dict):
-                continue
-            if "path" in inp:
-                src = resolve_relative_path(template_path, inp["path"])
-                if not src.exists() or not src.is_file():
-                    console.print(f"[bold red]Input file not found:[/] {src}")
-                    raise typer.Exit(code=1)
-                dest = run_dir / "inputs" / src.name
-                if dest.exists():
-                    console.print(f"[bold red]Input file already exists:[/] {dest}")
-                    raise typer.Exit(code=1)
-                shutil.copy2(src, dest)
-                inp["path"] = str(Path("inputs") / src.name)
-            if "paths" in inp and isinstance(inp["paths"], list):
-                new_paths: list[str] = []
-                for path in inp["paths"]:
-                    src = resolve_relative_path(template_path, path)
+        dense = raw.setdefault("densegen", {})
+        dense["schema_version"] = LATEST_SCHEMA_VERSION
+        run_block = dense.get("run") or {}
+        run_block["id"] = run_id_clean
+        run_block["root"] = "."
+        dense["run"] = run_block
+
+        output = dense.get("output") or {}
+        if "parquet" in output and isinstance(output.get("parquet"), dict):
+            output["parquet"]["path"] = "outputs/tables/dense_arrays.parquet"
+        if "usr" in output and isinstance(output.get("usr"), dict):
+            output["usr"]["root"] = "outputs/usr"
+        dense["output"] = output
+
+        logging_cfg = dense.get("logging") or {}
+        logging_cfg["log_dir"] = "outputs/logs"
+        dense["logging"] = logging_cfg
+
+        if "plots" in raw and isinstance(raw.get("plots"), dict):
+            raw["plots"]["out_dir"] = "outputs/plots"
+
+        if copy_inputs:
+            inputs_cfg = dense.get("inputs") or []
+            for inp in inputs_cfg:
+                if not isinstance(inp, dict):
+                    continue
+                if "path" in inp:
+                    src = resolve_relative_path(template_path, inp["path"])
                     if not src.exists() or not src.is_file():
                         console.print(f"[bold red]Input file not found:[/] {src}")
                         raise typer.Exit(code=1)
@@ -785,22 +855,38 @@ def workspace_init(
                         console.print(f"[bold red]Input file already exists:[/] {dest}")
                         raise typer.Exit(code=1)
                     shutil.copy2(src, dest)
-                    new_paths.append(str(Path("inputs") / src.name))
-                inp["paths"] = new_paths
-
-    config_path = run_dir / "config.yaml"
-    config_path.write_text(yaml.safe_dump(raw, sort_keys=False))
-    if not copy_inputs:
-        rel_paths = _collect_relative_input_paths_from_raw(dense)
-        if rel_paths:
-            console.print(
-                "[yellow]Workspace uses file-based inputs with relative paths.[/]"
-                " They will resolve relative to the new workspace."
-            )
-            for rel_path in rel_paths[:6]:
-                console.print(f"  - {rel_path}")
-            console.print("[yellow]Tip[/]: re-run with --copy-inputs or update paths in config.yaml.")
-    console.print(f":sparkles: [bold green]Workspace staged[/]: {config_path}")
+                    inp["path"] = str(Path("inputs") / src.name)
+                if "paths" in inp and isinstance(inp["paths"], list):
+                    new_paths: list[str] = []
+                    for path in inp["paths"]:
+                        src = resolve_relative_path(template_path, path)
+                        if not src.exists() or not src.is_file():
+                            console.print(f"[bold red]Input file not found:[/] {src}")
+                            raise typer.Exit(code=1)
+                        dest = run_dir / "inputs" / src.name
+                        if dest.exists():
+                            console.print(f"[bold red]Input file already exists:[/] {dest}")
+                            raise typer.Exit(code=1)
+                        shutil.copy2(src, dest)
+                        new_paths.append(str(Path("inputs") / src.name))
+                    inp["paths"] = new_paths
+
+        # Intentionally avoid copying auxiliary tools into the DenseGen workspace
+        # to keep the workspace config-centric and low-cognitive-load.
+
+        config_path = run_dir / "config.yaml"
+        config_path.write_text(yaml.safe_dump(raw, sort_keys=False))
+        if not copy_inputs:
+            rel_paths = _collect_relative_input_paths_from_raw(dense)
+            if rel_paths:
+                console.print(
+                    "[yellow]Workspace uses file-based inputs with relative paths.[/]"
+                    " They will resolve relative to the new workspace."
+                )
+                for rel_path in rel_paths[:6]:
+                    console.print(f"  - {rel_path}")
+                console.print("[yellow]Tip[/]: re-run with --copy-inputs or update paths in config.yaml.")
+        console.print(f":sparkles: [bold green]Workspace staged[/]: {config_path}")
 
 
 @inspect_app.command("run", help="Summarize a run manifest or list workspaces.")
@@ -840,8 +926,11 @@ def inspect_run(
     cfg_path = None
     loaded = None
     if run is None:
-        cfg_path = _resolve_config_path(ctx, config)
-        loaded = _load_config_or_exit(cfg_path)
+        cfg_path, is_default = _resolve_config_path(ctx, config)
+        loaded = _load_config_or_exit(
+            cfg_path,
+            missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+        )
         run_root = _run_root_for(loaded)
     else:
         run_root = run
@@ -869,7 +958,7 @@ def inspect_run(
                 table.add_row(item.input_name, item.plan_name, str(item.generated))
             console.print(table)
             console.print("[bold]Next steps[/]:")
-            console.print(f"  - dense run -c {cfg_path or run_root / 'config.yaml'}")
+            console.print(f"  - {_workspace_command('dense run', cfg_path=cfg_path, run_root=run_root)}")
             return
 
         console.print(f"[bold red]Run manifest not found:[/] {manifest_path}")
@@ -877,7 +966,7 @@ def inspect_run(
         if entries:
             console.print(f"[bold]Run root contents[/]: {', '.join(entries)}")
         console.print("[bold]Next steps[/]:")
-        console.print(f"  - dense run -c {cfg_path or run_root / 'config.yaml'}")
+        console.print(f"  - {_workspace_command('dense run', cfg_path=cfg_path, run_root=run_root)}")
         raise typer.Exit(code=1)
 
     manifest = load_run_manifest(manifest_path)
@@ -972,7 +1061,7 @@ def inspect_run(
                 if entries:
                     console.print(f"[bold]Run root contents[/]: {', '.join(entries)}")
                 console.print("[bold]Next steps[/]:")
-                console.print(f"  - dense run -c {cfg_path}")
+                console.print(f"  - {_workspace_command('dense run', cfg_path=cfg_path, run_root=run_root)}")
                 raise typer.Exit(code=1)
 
         offered_vs_used_tf = bundle.tables.get("offered_vs_used_tf")
@@ -1038,12 +1127,12 @@ def inspect_run(
                 lib_hash_disp = lib_hash if show_library_hash else _short_hash(lib_hash)
                 lib_table.add_row("-", lib_hash_disp, "-", "-", "-", f"-/{target_len}", "0")
         else:
-            console.print("[yellow]No library attempts found (outputs/attempts.parquet missing).[/]")
+            console.print("[yellow]No library attempts found (outputs/tables/attempts.parquet missing).[/]")
             entries = _list_dir_entries(run_root, limit=8)
             if entries:
                 console.print(f"[bold]Run root contents[/]: {', '.join(entries)}")
             console.print("[bold]Next steps[/]:")
-            console.print(f"  - dense run -c {cfg_path}")
+            console.print(f"  - {_workspace_command('dense run', cfg_path=cfg_path, run_root=run_root)}")
         console.print("[bold]Library build summary[/]")
         console.print(lib_table)
         if truncated_libraries:
@@ -1168,7 +1257,11 @@ def report(
     ctx: typer.Context,
     run: Optional[Path] = typer.Option(None, "--run", "-r", help="Run directory (defaults to config run root)."),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
-    out: str = typer.Option("outputs", "--out", help="Output directory (relative to run root)."),
+    out: str = typer.Option(
+        "outputs/report",
+        "--out",
+        help="Output directory (relative to run root; must be inside outputs/).",
+    ),
     format: str = typer.Option(
         "all",
         "--format",
@@ -1184,9 +1277,13 @@ def report(
         if not cfg_path.exists():
             console.print(f"[bold red]Config not found under run:[/] {cfg_path}")
             raise typer.Exit(code=1)
+        loaded = _load_config_or_exit(cfg_path)
     else:
-        cfg_path = _resolve_config_path(ctx, config)
-    loaded = _load_config_or_exit(cfg_path)
+        cfg_path, is_default = _resolve_config_path(ctx, config)
+        loaded = _load_config_or_exit(
+            cfg_path,
+            missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+        )
     raw_formats = {f.strip().lower() for f in format.split(",") if f.strip()}
     if not raw_formats:
         raw_formats = {"all"}
@@ -1197,20 +1294,19 @@ def report(
         console.print("Allowed: json, md, html, all.")
         raise typer.Exit(code=1)
     formats_used = {"json", "md", "html"} if "all" in raw_formats else raw_formats
+    run_root = _run_root_for(loaded)
+    out_dir = _resolve_outputs_path_or_exit(cfg_path, run_root, out, label="report.out")
     try:
         with _suppress_pyarrow_sysctl_warnings():
-            write_report(loaded.root, cfg_path, out_dir=out, formats=raw_formats)
+            write_report(loaded.root, cfg_path, out_dir=out_dir, formats=raw_formats)
     except FileNotFoundError as exc:
         console.print(f"[bold red]Report failed:[/] {exc}")
-        run_root = _run_root_for(loaded)
         entries = _list_dir_entries(run_root, limit=8)
         if entries:
             console.print(f"[bold]Run root contents[/]: {', '.join(entries)}")
         console.print("[bold]Next steps[/]:")
-        console.print(f"  - dense run -c {cfg_path}")
+        console.print(f"  - {_workspace_command('dense run', cfg_path=cfg_path, run_root=run_root)}")
         raise typer.Exit(code=1)
-    run_root = _run_root_for(loaded)
-    out_dir = resolve_run_scoped_path(cfg_path, run_root, out, label="report.out")
     console.print(f":sparkles: [bold green]Report written[/]: {out_dir}")
     outputs = []
     if "json" in formats_used:
@@ -1227,8 +1323,11 @@ def inspect_plan(
     ctx: typer.Context,
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
-    cfg_path = _resolve_config_path(ctx, config)
-    loaded = _load_config_or_exit(cfg_path)
+    cfg_path, is_default = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(
+        cfg_path,
+        missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+    )
     _warn_full_pool_strategy(loaded)
     pl = resolve_plan(loaded)
     table = Table("name", "quota", "has promoter_constraints")
@@ -1245,8 +1344,11 @@ def inspect_config(
     probe_solver: bool = typer.Option(False, help="Probe the solver backend before reporting."),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
-    cfg_path = _resolve_config_path(ctx, config)
-    loaded = _load_config_or_exit(cfg_path)
+    cfg_path, is_default = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(
+        cfg_path,
+        missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+    )
     root = loaded.root
     cfg = root.densegen
     _ensure_fimo_available(cfg, strict=True)
@@ -1322,7 +1424,7 @@ def inspect_config(
     outputs = Table("target", "path")
     for target in cfg.output.targets:
         if target == "parquet":
-            parquet_path = resolve_run_scoped_path(
+            parquet_path = resolve_outputs_scoped_path(
                 loaded.path,
                 run_root,
                 cfg.output.parquet.path,
@@ -1333,7 +1435,7 @@ def inspect_config(
                 str(parquet_path),
             )
         elif target == "usr":
-            usr_root = resolve_run_scoped_path(loaded.path, run_root, cfg.output.usr.root, label="output.usr.root")
+            usr_root = resolve_outputs_scoped_path(loaded.path, run_root, cfg.output.usr.root, label="output.usr.root")
             outputs.add_row("usr", f"{cfg.output.usr.dataset} (root={usr_root})")
         else:
             outputs.add_row(target, "-")
@@ -1367,41 +1469,57 @@ def inspect_config(
     sampling_table.add_row("arrays_generated_before_resample", str(cfg.runtime.arrays_generated_before_resample))
     sampling_table.add_row("max_resample_attempts", str(cfg.runtime.max_resample_attempts))
     sampling_table.add_row("max_total_resamples", str(cfg.runtime.max_total_resamples))
-    console.print("[bold]Solver-stage library sampling[/]")
+    console.print("[bold]Stage-B library sampling[/]")
     console.print(sampling_table)
 
-    gap = cfg.postprocess.gap_fill
-    console.print(
-        "[bold]Gap fill[/]: "
-        f"mode={gap.mode} end={gap.end} gc=[{gap.gc_min:.2f}, {gap.gc_max:.2f}] "
-        f"max_tries={gap.max_tries}"
-    )
-    log_dir = resolve_run_scoped_path(loaded.path, run_root, cfg.logging.log_dir, label="logging.log_dir")
+    pad = cfg.postprocess.pad
+    pad_gc = pad.gc
+    if pad_gc.mode == "off":
+        gc_label = "off"
+    elif pad_gc.mode == "range":
+        gc_label = f"range[{pad_gc.min:.2f}, {pad_gc.max:.2f}] min_pad_length={pad_gc.min_pad_length}"
+    else:
+        target_min = pad_gc.target - pad_gc.tolerance
+        target_max = pad_gc.target + pad_gc.tolerance
+        gc_label = (
+            f"target={pad_gc.target:.2f}±{pad_gc.tolerance:.2f} "
+            f"range=[{target_min:.2f}, {target_max:.2f}] "
+            f"min_pad_length={pad_gc.min_pad_length}"
+        )
+    console.print(f"[bold]Pad[/]: mode={pad.mode} end={pad.end} gc={gc_label} max_tries={pad.max_tries}")
+    log_dir = resolve_outputs_scoped_path(loaded.path, run_root, cfg.logging.log_dir, label="logging.log_dir")
     console.print(f"[bold]Logging[/]: dir={log_dir} level={cfg.logging.level}")
 
     if root.plots:
-        out_dir = resolve_run_scoped_path(loaded.path, run_root, root.plots.out_dir, label="plots.out_dir")
+        out_dir = resolve_outputs_scoped_path(loaded.path, run_root, root.plots.out_dir, label="plots.out_dir")
         console.print(f"[bold]Plots[/]: source={root.plots.source} out_dir={out_dir}")
     else:
         console.print("[bold]Plots[/]: none")
 
 
-@inspect_app.command("inputs", help="Show resolved inputs and PWM sampling summary.")
+@inspect_app.command("inputs", help="Show resolved inputs and Stage-A PWM sampling summary.")
 def inspect_inputs(
     ctx: typer.Context,
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
-    cfg_path = _resolve_config_path(ctx, config)
-    loaded = _load_config_or_exit(cfg_path)
+    cfg_path, is_default = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(
+        cfg_path,
+        missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+    )
     console.print(f"[bold]Config[/]: {cfg_path}")
-    _ensure_fimo_available(loaded.root.densegen, strict=False)
+    _ensure_fimo_available(loaded.root.densegen, strict=True)
     _print_inputs_summary(loaded)
 
 
 @stage_a_app.command("build-pool", help="Build Stage-A TFBS pools from inputs.")
 def stage_a_build_pool(
     ctx: typer.Context,
-    out: str = typer.Option("outputs/pools", "--out", help="Output directory (relative to run root)."),
+    out: str = typer.Option(
+        "outputs/pools",
+        "--out",
+        help="Output directory (relative to run root; must be inside outputs/).",
+    ),
     input_name: Optional[list[str]] = typer.Option(
         None,
         "--input",
@@ -1411,12 +1529,15 @@ def stage_a_build_pool(
     overwrite: bool = typer.Option(False, help="Overwrite existing pool files."),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
-    cfg_path = _resolve_config_path(ctx, config)
-    loaded = _load_config_or_exit(cfg_path)
+    cfg_path, is_default = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(
+        cfg_path,
+        missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+    )
     cfg = loaded.root.densegen
     _ensure_fimo_available(cfg, strict=True)
     run_root = _run_root_for(loaded)
-    out_dir = resolve_run_scoped_path(cfg_path, run_root, out, label="stage-a.out")
+    out_dir = _resolve_outputs_path_or_exit(cfg_path, run_root, out, label="stage-a.out")
     out_dir.mkdir(parents=True, exist_ok=True)
 
     selected = {name for name in (input_name or [])}
@@ -1515,11 +1636,18 @@ def stage_a_build_pool(
 @stage_b_app.command("build-libraries", help="Build Stage-B libraries from pools or inputs.")
 def stage_b_build_libraries(
     ctx: typer.Context,
-    out: str = typer.Option("outputs/libraries", "--out", help="Output directory (relative to run root)."),
+    out: str = typer.Option(
+        "outputs/libraries",
+        "--out",
+        help="Output directory (relative to run root; must be inside outputs/).",
+    ),
     pool: Optional[Path] = typer.Option(
         None,
         "--pool",
-        help="Pool directory from `stage-a build-pool` (defaults to outputs/pools for this workspace).",
+        help=(
+            "Pool directory from `stage-a build-pool` (defaults to outputs/pools for this workspace; "
+            "must be inside outputs/)."
+        ),
     ),
     input_name: Optional[list[str]] = typer.Option(
         None,
@@ -1536,11 +1664,14 @@ def stage_b_build_libraries(
     overwrite: bool = typer.Option(False, help="Overwrite existing library_builds.parquet."),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
-    cfg_path = _resolve_config_path(ctx, config)
-    loaded = _load_config_or_exit(cfg_path)
+    cfg_path, is_default = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(
+        cfg_path,
+        missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+    )
     cfg = loaded.root.densegen
     run_root = _run_root_for(loaded)
-    out_dir = resolve_run_scoped_path(cfg_path, run_root, out, label="stage-b.out")
+    out_dir = _resolve_outputs_path_or_exit(cfg_path, run_root, out, label="stage-b.out")
     out_dir.mkdir(parents=True, exist_ok=True)
 
     selected_inputs = {name for name in (input_name or [])}
@@ -1566,7 +1697,10 @@ def stage_b_build_libraries(
     failure_counts = _load_failure_counts_from_attempts(outputs_root)
     libraries_built = _load_existing_library_index(outputs_root) if outputs_root.exists() else 0
 
-    pool_dir = resolve_relative_path(cfg_path, pool) if pool is not None else (run_root / "outputs" / "pools")
+    if pool is not None:
+        pool_dir = _resolve_outputs_path_or_exit(cfg_path, run_root, pool, label="stage-b.pool")
+    else:
+        pool_dir = run_root / "outputs" / "pools"
     if pool_dir.exists() and pool_dir.is_file():
         raise typer.BadParameter(f"Pool path must be a directory from `stage-a build-pool`, not a file: {pool_dir}")
     if not pool_dir.exists() or not pool_dir.is_dir():
@@ -1579,13 +1713,22 @@ def stage_b_build_libraries(
         if entries:
             console.print(f"[bold]Pool directory contents[/]: {', '.join(entries)}")
         console.print("[bold]Next steps[/]:")
-        console.print(f"  - dense stage-a build-pool -c {cfg_path}")
+        console.print(f"  - {_workspace_command('dense stage-a build-pool', cfg_path=cfg_path, run_root=run_root)}")
         console.print("  - ensure --pool points to the outputs/pools directory for this workspace")
         raise typer.Exit(code=1)
 
     build_rows = []
     member_rows = []
-    table = Table("input", "plan", "library_index", "library_hash", "size", "achieved/target", "pool", "sampling")
+    table = Table(
+        "input",
+        "plan",
+        "library_index",
+        "library_hash",
+        "size",
+        "achieved/target",
+        "pool",
+        "Stage-B sampling",
+    )
     with _suppress_pyarrow_sysctl_warnings():
         for inp in cfg.inputs:
             if selected_inputs and inp.name not in selected_inputs:
@@ -1723,11 +1866,17 @@ def run(
     no_plot: bool = typer.Option(False, help="Do not auto-run plots even if configured."),
     fresh: bool = typer.Option(False, "--fresh", help="Clear outputs and start a new run."),
     resume: bool = typer.Option(False, "--resume", help="Resume from existing outputs."),
-    log_file: Optional[Path] = typer.Option(None, help="Override logfile path."),
+    log_file: Optional[Path] = typer.Option(
+        None,
+        help="Override logfile path (must be inside outputs/ under the run root).",
+    ),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
-    cfg_path = _resolve_config_path(ctx, config)
-    loaded = _load_config_or_exit(cfg_path)
+    cfg_path, is_default = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(
+        cfg_path,
+        missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+    )
     root = loaded.root
     cfg = root.densegen
     run_root = _run_root_for(loaded)
@@ -1768,10 +1917,15 @@ def run(
 
     # Logging setup
     log_cfg = cfg.logging
-    log_dir = resolve_run_scoped_path(loaded.path, run_root, Path(log_cfg.log_dir), label="logging.log_dir")
+    log_dir = _resolve_outputs_path_or_exit(
+        loaded.path,
+        run_root,
+        Path(log_cfg.log_dir),
+        label="logging.log_dir",
+    )
     default_logfile = log_dir / f"{cfg.run.id}.log"
     if log_file is not None:
-        logfile = resolve_run_scoped_path(loaded.path, run_root, log_file, label="logging.log_file")
+        logfile = _resolve_outputs_path_or_exit(loaded.path, run_root, log_file, label="logging.log_file")
     else:
         logfile = default_logfile
     setup_logging(
@@ -1795,12 +1949,17 @@ def run(
 
     console.print(":tada: [bold green]Run complete[/].")
     console.print("[bold]Next steps[/]:")
-    console.print(f"  - dense inspect run --library -c {cfg_path}")
-    console.print(f"  - dense report -c {cfg_path}")
+    console.print(f"  - {_workspace_command('dense inspect run --library', cfg_path=cfg_path, run_root=run_root)}")
+    console.print(f"  - {_workspace_command('dense report', cfg_path=cfg_path, run_root=run_root)}")
 
     # Auto-plot if configured
     if not no_plot and root.plots:
-        ensure_mpl_cache_dir()
+        try:
+            ensure_mpl_cache_dir(run_root / "outputs" / ".mpl-cache")
+        except Exception as exc:
+            console.print(f"[bold red]Matplotlib cache setup failed:[/] {exc}")
+            console.print("[bold]Tip[/]: set MPLCONFIGDIR=outputs/.mpl-cache inside the workspace.")
+            raise typer.Exit(code=1)
         install_native_stderr_filters(suppress_solver_messages=False)
         from .viz.plotting import run_plots_from_config
 
@@ -1809,15 +1968,46 @@ def run(
         console.print(":bar_chart: [bold green]Plots written.[/]")
 
 
+@app.command("campaign-reset", hidden=True, help="Remove run outputs to reset a workspace.")
+def campaign_reset(
+    ctx: typer.Context,
+    config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
+):
+    cfg_path, is_default = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(
+        cfg_path,
+        missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+    )
+    run_root = resolve_run_root(loaded.path, loaded.root.densegen.run.root)
+    outputs_root = run_outputs_root(run_root)
+    if not outputs_root.exists():
+        console.print(f"[bold yellow]No outputs found under[/] {outputs_root}")
+        return
+    if not outputs_root.is_dir():
+        console.print(f"[bold red]Outputs path is not a directory:[/] {outputs_root}")
+        raise typer.Exit(code=1)
+    shutil.rmtree(outputs_root)
+    console.print(f":broom: [bold green]Removed outputs under[/] {outputs_root}")
+
+
 @app.command(help="Generate plots from outputs according to YAML. Use --only to select plots.")
 def plot(
     ctx: typer.Context,
     only: Optional[str] = typer.Option(None, help="Comma-separated plot names (subset of available plots)."),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
-    cfg_path = _resolve_config_path(ctx, config)
-    loaded = _load_config_or_exit(cfg_path)
-    ensure_mpl_cache_dir()
+    cfg_path, is_default = _resolve_config_path(ctx, config)
+    loaded = _load_config_or_exit(
+        cfg_path,
+        missing_message=DEFAULT_CONFIG_MISSING_MESSAGE if is_default else None,
+    )
+    run_root = resolve_run_root(loaded.path, loaded.root.densegen.run.root)
+    try:
+        ensure_mpl_cache_dir(run_root / "outputs" / ".mpl-cache")
+    except Exception as exc:
+        console.print(f"[bold red]Matplotlib cache setup failed:[/] {exc}")
+        console.print("[bold]Tip[/]: set MPLCONFIGDIR=outputs/.mpl-cache inside the workspace.")
+        raise typer.Exit(code=1)
     install_native_stderr_filters(suppress_solver_messages=False)
     from .viz.plotting import run_plots_from_config
 
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index 1fd9dabe..fa724746 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -42,7 +42,7 @@ def _construct_mapping(loader, node, deep: bool = False):
 _StrictLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _construct_mapping)
 
 
-LATEST_SCHEMA_VERSION = "2.4"
+LATEST_SCHEMA_VERSION = "2.5"
 SUPPORTED_SCHEMA_VERSIONS = {LATEST_SCHEMA_VERSION}
 
 KNOWN_SOLVER_OPTION_KEYS = {
@@ -116,6 +116,14 @@ def resolve_run_scoped_path(cfg_path: Path, run_root: Path, value: str | os.Path
     return resolved
 
 
+def resolve_outputs_scoped_path(cfg_path: Path, run_root: Path, value: str | os.PathLike, *, label: str) -> Path:
+    resolved = resolve_run_scoped_path(cfg_path, run_root, value, label=label)
+    outputs_root = run_root / "outputs"
+    if not _is_relative_to(resolved, outputs_root):
+        raise ConfigError(f"{label} must be within outputs/ under densegen.run.root ({outputs_root}), got: {resolved}")
+    return resolved
+
+
 class RunConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
     id: str
@@ -1032,31 +1040,59 @@ def _arrays_positive(cls, v: int):
 
 
 # ---- Postprocess ----
-class GapFillConfig(BaseModel):
+class PadGcConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
-    mode: Literal["off", "strict", "adaptive"] = "adaptive"
-    end: Literal["5prime", "3prime"] = "5prime"
-    gc_min: float = 0.40
-    gc_max: float = 0.60
-    max_tries: int = 2000
+    mode: Literal["off", "range", "target"] = "range"
+    min: float = 0.40
+    max: float = 0.60
+    target: float = 0.50
+    tolerance: float = 0.10
+    min_pad_length: int = 4
+
+    @field_validator("min", "max", "target", "tolerance")
+    @classmethod
+    def _gc_ok(cls, v: float, info):
+        if not (0.0 <= float(v) <= 1.0):
+            raise ValueError(f"{info.field_name} must be between 0 and 1")
+        return float(v)
 
-    @field_validator("gc_min", "gc_max")
+    @field_validator("min_pad_length")
     @classmethod
-    def _gc_ok(cls, v: float):
-        if not (0.0 <= v <= 1.0):
-            raise ValueError("GC fraction must be between 0 and 1")
-        return v
+    def _min_pad_length_ok(cls, v: int):
+        if int(v) < 0:
+            raise ValueError("min_pad_length must be >= 0")
+        return int(v)
 
     @model_validator(mode="after")
     def _gc_bounds(self):
-        if self.gc_min > self.gc_max:
-            raise ValueError("gc_min must be <= gc_max")
+        if self.min > self.max:
+            raise ValueError("gc.min must be <= gc.max")
+        if self.mode == "target":
+            target_min = self.target - self.tolerance
+            target_max = self.target + self.tolerance
+            if target_min < 0.0 or target_max > 1.0:
+                raise ValueError("gc.target +/- gc.tolerance must stay within [0, 1]")
         return self
 
 
+class PadConfig(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    mode: Literal["off", "strict", "adaptive"] = "adaptive"
+    end: Literal["5prime", "3prime"] = "5prime"
+    gc: PadGcConfig = Field(default_factory=PadGcConfig)
+    max_tries: int = 2000
+
+    @field_validator("max_tries")
+    @classmethod
+    def _max_tries_ok(cls, v: int):
+        if int(v) <= 0:
+            raise ValueError("max_tries must be > 0")
+        return int(v)
+
+
 class PostprocessConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
-    gap_fill: GapFillConfig = Field(default_factory=GapFillConfig)
+    pad: PadConfig = Field(default_factory=PadConfig)
 
 
 # ---- Logging ----
@@ -1104,7 +1140,7 @@ def _progress_refresh_ok(cls, v: float):
 # ---- Plots ----
 class PlotConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
-    out_dir: str = "outputs"
+    out_dir: str = "outputs/plots"
     format: Literal["png", "pdf", "svg"] = "png"
     source: Optional[Literal["usr", "parquet"]] = None
     default: List[str] = Field(default_factory=list)
@@ -1180,14 +1216,14 @@ def _validate_run_scoped_paths(cfg_path: Path, root_cfg: RootConfig) -> None:
 
     out_cfg = root_cfg.densegen.output
     if out_cfg.parquet is not None:
-        resolve_run_scoped_path(
+        resolve_outputs_scoped_path(
             cfg_path,
             run_root,
             out_cfg.parquet.path,
             label="output.parquet.path",
         )
     if out_cfg.usr is not None:
-        resolve_run_scoped_path(
+        resolve_outputs_scoped_path(
             cfg_path,
             run_root,
             out_cfg.usr.root,
@@ -1195,10 +1231,21 @@ def _validate_run_scoped_paths(cfg_path: Path, root_cfg: RootConfig) -> None:
         )
 
     log_dir = root_cfg.densegen.logging.log_dir
-    resolve_run_scoped_path(cfg_path, run_root, log_dir, label="logging.log_dir")
+    resolve_outputs_scoped_path(cfg_path, run_root, log_dir, label="logging.log_dir")
+
+    sampling_cfg = root_cfg.densegen.generation.sampling
+    if getattr(sampling_cfg, "library_source", None) == "artifact" and getattr(
+        sampling_cfg, "library_artifact_path", None
+    ):
+        resolve_outputs_scoped_path(
+            cfg_path,
+            run_root,
+            sampling_cfg.library_artifact_path,
+            label="sampling.library_artifact_path",
+        )
 
     if root_cfg.plots is not None:
-        resolve_run_scoped_path(
+        resolve_outputs_scoped_path(
             cfg_path,
             run_root,
             root_cfg.plots.out_dir,
diff --git a/src/dnadesign/densegen/src/core/artifacts/candidates.py b/src/dnadesign/densegen/src/core/artifacts/candidates.py
index ebc289ea..b4c6d34a 100644
--- a/src/dnadesign/densegen/src/core/artifacts/candidates.py
+++ b/src/dnadesign/densegen/src/core/artifacts/candidates.py
@@ -90,13 +90,13 @@ def build_candidate_artifact(
     if missing:
         raise ValueError(
             "Candidate artifacts missing required columns "
-            f"{missing}. Clear outputs/candidates and re-run with keep_all_candidates_debug."
+            f"{missing}. Clear outputs/pools/candidates and re-run with keep_all_candidates_debug."
         )
     df = df[df["run_id"] == str(run_id)].copy()
     if df.empty:
         raise ValueError(
             "No candidate records match the current run_id. "
-            "Clear outputs/candidates and re-run with keep_all_candidates_debug."
+            "Clear outputs/pools/candidates and re-run with keep_all_candidates_debug."
         )
     df.to_parquet(candidates_path, index=False)
 
diff --git a/src/dnadesign/densegen/src/core/metadata.py b/src/dnadesign/densegen/src/core/metadata.py
index 728fe697..045052c7 100644
--- a/src/dnadesign/densegen/src/core/metadata.py
+++ b/src/dnadesign/densegen/src/core/metadata.py
@@ -48,7 +48,7 @@ def build_metadata(
     solver_strands: str,
     seq_len: int,
     actual_length: int,
-    gap_meta: dict,
+    pad_meta: dict,
     sampling_meta: dict,
     schema_version: str,
     created_at: str,
@@ -57,7 +57,7 @@ def build_metadata(
     run_config_path: str,
     run_config_sha256: str,
     random_seed: int,
-    policy_gc_fill: str,
+    policy_pad: str,
     policy_sampling: str,
     policy_solver: str,
     input_meta: dict,
@@ -106,7 +106,7 @@ def build_metadata(
         "run_config_sha256": run_config_sha256,
         "length": int(actual_length),
         "random_seed": int(random_seed),
-        "policy_gc_fill": policy_gc_fill,
+        "policy_pad": policy_pad,
         "policy_sampling": policy_sampling,
         "policy_solver": policy_solver,
         "solver_backend": chosen_solver,
@@ -181,16 +181,18 @@ def build_metadata(
         "min_required_regulators": min_required_regulators,
         "min_count_by_regulator": min_count_by_regulator_list,
         "covers_required_regulators": bool(covers_required_regulators),
-        "gap_fill_used": gap_meta.get("used", False),
-        "gap_fill_bases": gap_meta.get("bases"),
-        "gap_fill_end": gap_meta.get("end"),
-        "gap_fill_gc_min": gap_meta.get("gc_min"),
-        "gap_fill_gc_max": gap_meta.get("gc_max"),
-        "gap_fill_gc_target_min": gap_meta.get("gc_target_min"),
-        "gap_fill_gc_target_max": gap_meta.get("gc_target_max"),
-        "gap_fill_gc_actual": gap_meta.get("gc_actual"),
-        "gap_fill_relaxed": gap_meta.get("relaxed"),
-        "gap_fill_attempts": gap_meta.get("attempts"),
+        "pad_used": pad_meta.get("used", False),
+        "pad_bases": pad_meta.get("bases"),
+        "pad_end": pad_meta.get("end"),
+        "pad_gc_mode": pad_meta.get("gc_mode"),
+        "pad_gc_min": pad_meta.get("gc_min"),
+        "pad_gc_max": pad_meta.get("gc_max"),
+        "pad_gc_target_min": pad_meta.get("gc_target_min"),
+        "pad_gc_target_max": pad_meta.get("gc_target_max"),
+        "pad_gc_actual": pad_meta.get("gc_actual"),
+        "pad_relaxed": pad_meta.get("relaxed"),
+        "pad_relaxed_reason": pad_meta.get("relaxed_reason"),
+        "pad_attempts": pad_meta.get("attempts"),
         "gc_total": gc_total,
         "gc_core": gc_core,
     }
diff --git a/src/dnadesign/densegen/src/core/metadata_schema.py b/src/dnadesign/densegen/src/core/metadata_schema.py
index 45a85f6f..110d4bee 100644
--- a/src/dnadesign/densegen/src/core/metadata_schema.py
+++ b/src/dnadesign/densegen/src/core/metadata_schema.py
@@ -27,7 +27,7 @@ class MetaField:
 
 
 META_FIELDS: list[MetaField] = [
-    MetaField("schema_version", (str,), "DenseGen schema version (e.g., 2.4)."),
+    MetaField("schema_version", (str,), "DenseGen schema version (e.g., 2.5)."),
     MetaField("created_at", (str,), "UTC ISO8601 timestamp for record creation."),
     MetaField("run_id", (str,), "Run identifier (densegen.run.id)."),
     MetaField("run_root", (str,), "Resolved run root path (densegen.run.root)."),
@@ -35,8 +35,8 @@ class MetaField:
     MetaField("run_config_sha256", (str,), "SHA256 hash of the run config file."),
     MetaField("length", (int,), "Actual output sequence length."),
     MetaField("random_seed", (int,), "Global RNG seed used for the run."),
-    MetaField("policy_gc_fill", (str,), "Gap-fill policy applied (off|strict|adaptive)."),
-    MetaField("policy_sampling", (str,), "Sampling policy label (pool strategy)."),
+    MetaField("policy_pad", (str,), "Pad policy applied (off|strict|adaptive)."),
+    MetaField("policy_sampling", (str,), "Stage-B sampling policy label (pool strategy)."),
     MetaField("policy_solver", (str,), "Solver policy label (strategy name)."),
     MetaField("solver_backend", (str,), "Solver backend name (null when approximate).", allow_none=True),
     MetaField("solver_strategy", (str,), "Solver strategy used."),
@@ -48,8 +48,8 @@ class MetaField:
     MetaField("solver_objective", (numbers.Real,), "Solver objective value.", allow_none=True),
     MetaField("solver_solve_time_s", (numbers.Real,), "Solver solve time in seconds.", allow_none=True),
     MetaField("plan", (str,), "Plan item name."),
-    MetaField("tf_list", (list,), "All TFs present in the sampled library."),
-    MetaField("tfbs_parts", (list,), "TF:TFBS strings used to build the library."),
+    MetaField("tf_list", (list,), "All TFs present in the Stage-B sampled library."),
+    MetaField("tfbs_parts", (list,), "TF:TFBS strings used to build the Stage-B library."),
     MetaField("used_tfbs", (list,), "TF:TFBS strings used in the final sequence."),
     MetaField(
         "used_tfbs_detail",
@@ -70,7 +70,7 @@ class MetaField:
     MetaField("input_dataset", (str,), "USR dataset name for USR inputs.", allow_none=True),
     MetaField("input_root", (str,), "Resolved root for USR inputs.", allow_none=True),
     MetaField("input_mode", (str,), "Input mode (binding_sites | sequence_library | pwm_sampled)."),
-    MetaField("input_pwm_ids", (list,), "PWM motif IDs used for sampling (pwm_* inputs)."),
+    MetaField("input_pwm_ids", (list,), "Stage-A PWM motif IDs used for sampling (pwm_* inputs)."),
     MetaField("input_row_count", (int,), "Total rows/sequences in the input pool."),
     MetaField("input_tf_count", (int,), "Unique TF count in the input pool (binding_sites only)."),
     MetaField("input_tfbs_count", (int,), "Unique TFBS/sequence count in the input pool."),
@@ -83,74 +83,81 @@ class MetaField:
     MetaField(
         "sampling_fraction",
         (numbers.Real,),
-        "Unique TFBS count in the sampled library divided by input_tfbs_count (1.0 when pool_strategy=full).",
+        "Stage-B unique TFBS count in the sampled library divided by input_tfbs_count (1.0 when pool_strategy=full).",
         allow_none=True,
     ),
     MetaField(
         "sampling_fraction_pairs",
         (numbers.Real,),
-        "Unique TF:TFBS pair count in the sampled library divided by input_tf_tfbs_pair_count.",
+        "Stage-B unique TF:TFBS pair count in the sampled library divided by input_tf_tfbs_pair_count.",
+        allow_none=True,
+    ),
+    MetaField("input_pwm_strategy", (str,), "Stage-A PWM sampling strategy.", allow_none=True),
+    MetaField("input_pwm_scoring_backend", (str,), "Stage-A PWM scoring backend (densegen|fimo).", allow_none=True),
+    MetaField("input_pwm_score_threshold", (numbers.Real,), "Stage-A PWM score threshold.", allow_none=True),
+    MetaField("input_pwm_score_percentile", (numbers.Real,), "Stage-A PWM score percentile.", allow_none=True),
+    MetaField("input_pwm_pvalue_threshold", (numbers.Real,), "Stage-A PWM p-value threshold (FIMO).", allow_none=True),
+    MetaField("input_pwm_pvalue_bins", (list,), "Stage-A PWM p-value bins (FIMO).", allow_none=True),
+    MetaField("input_pwm_mining_batch_size", (int,), "Stage-A PWM mining batch size (FIMO).", allow_none=True),
+    MetaField("input_pwm_mining_max_batches", (int,), "Stage-A PWM mining max batches (FIMO).", allow_none=True),
+    MetaField("input_pwm_mining_max_candidates", (int,), "Stage-A PWM mining max candidates (FIMO).", allow_none=True),
+    MetaField(
+        "input_pwm_mining_max_seconds",
+        (numbers.Real,),
+        "Stage-A PWM mining max seconds (FIMO).",
         allow_none=True,
     ),
-    MetaField("input_pwm_strategy", (str,), "PWM sampling strategy.", allow_none=True),
-    MetaField("input_pwm_scoring_backend", (str,), "PWM scoring backend (densegen|fimo).", allow_none=True),
-    MetaField("input_pwm_score_threshold", (numbers.Real,), "PWM score threshold.", allow_none=True),
-    MetaField("input_pwm_score_percentile", (numbers.Real,), "PWM score percentile.", allow_none=True),
-    MetaField("input_pwm_pvalue_threshold", (numbers.Real,), "PWM p-value threshold (FIMO).", allow_none=True),
-    MetaField("input_pwm_pvalue_bins", (list,), "PWM p-value bins (FIMO).", allow_none=True),
-    MetaField("input_pwm_mining_batch_size", (int,), "PWM mining batch size (FIMO).", allow_none=True),
-    MetaField("input_pwm_mining_max_batches", (int,), "PWM mining max batches (FIMO).", allow_none=True),
-    MetaField("input_pwm_mining_max_candidates", (int,), "PWM mining max candidates (FIMO).", allow_none=True),
-    MetaField("input_pwm_mining_max_seconds", (numbers.Real,), "PWM mining max seconds (FIMO).", allow_none=True),
     MetaField(
         "input_pwm_mining_retain_bin_ids",
         (list,),
-        "PWM mining retained p-value bin indices (FIMO).",
+        "Stage-A PWM mining retained p-value bin indices (FIMO).",
         allow_none=True,
     ),
     MetaField(
         "input_pwm_mining_log_every_batches",
         (int,),
-        "PWM mining log frequency (batches).",
+        "Stage-A PWM mining log frequency (batches).",
         allow_none=True,
     ),
-    MetaField("input_pwm_selection_policy", (str,), "PWM selection policy (FIMO).", allow_none=True),
-    MetaField("input_pwm_bgfile", (str,), "PWM background model path (FIMO).", allow_none=True),
-    MetaField("input_pwm_keep_all_candidates_debug", (bool,), "PWM FIMO debug TSV enabled.", allow_none=True),
-    MetaField("input_pwm_include_matched_sequence", (bool,), "PWM matched-sequence capture.", allow_none=True),
-    MetaField("input_pwm_n_sites", (int,), "PWM sampling n_sites.", allow_none=True),
-    MetaField("input_pwm_oversample_factor", (int,), "PWM sampling oversample factor.", allow_none=True),
+    MetaField("input_pwm_selection_policy", (str,), "Stage-A PWM selection policy (FIMO).", allow_none=True),
+    MetaField("input_pwm_bgfile", (str,), "Stage-A PWM background model path (FIMO).", allow_none=True),
+    MetaField("input_pwm_keep_all_candidates_debug", (bool,), "Stage-A PWM FIMO debug TSV enabled.", allow_none=True),
+    MetaField("input_pwm_include_matched_sequence", (bool,), "Stage-A PWM matched-sequence capture.", allow_none=True),
+    MetaField("input_pwm_n_sites", (int,), "Stage-A PWM sampling n_sites.", allow_none=True),
+    MetaField("input_pwm_oversample_factor", (int,), "Stage-A PWM sampling oversample factor.", allow_none=True),
     MetaField("fixed_elements", (dict,), "Fixed-element constraints (promoters + side biases)."),
     MetaField("visual", (str,), "ASCII visual layout of placements."),
     MetaField("compression_ratio", (numbers.Real,), "Solution compression ratio.", allow_none=True),
-    MetaField("library_size", (int,), "Number of motifs in the sampled library."),
-    MetaField("library_unique_tf_count", (int,), "Unique TF count in the sampled library."),
-    MetaField("library_unique_tfbs_count", (int,), "Unique TFBS count in the sampled library."),
+    MetaField("library_size", (int,), "Number of motifs in the Stage-B sampled library."),
+    MetaField("library_unique_tf_count", (int,), "Unique TF count in the Stage-B sampled library."),
+    MetaField("library_unique_tfbs_count", (int,), "Unique TFBS count in the Stage-B sampled library."),
     MetaField("sequence_length", (int,), "Target sequence length."),
     MetaField("promoter_constraint", (str,), "Primary promoter constraint name (if set).", allow_none=True),
-    MetaField("sampling_target_length", (int,), "Target library length for sampling."),
-    MetaField("sampling_achieved_length", (int,), "Achieved library length for sampling."),
-    MetaField("sampling_relaxed_cap", (bool,), "Whether sampling caps were relaxed."),
-    MetaField("sampling_final_cap", (int,), "Final per-TF cap after relaxation.", allow_none=True),
-    MetaField("sampling_pool_strategy", (str,), "Sampling pool strategy (full|subsample|iterative_subsample)."),
-    MetaField("sampling_library_size", (int,), "Configured library size for subsampling."),
-    MetaField("sampling_library_strategy", (str,), "Library sampling strategy.", allow_none=True),
-    MetaField("sampling_iterative_max_libraries", (int,), "Max libraries for iterative subsampling."),
-    MetaField("sampling_iterative_min_new_solutions", (int,), "Min new solutions per library."),
-    MetaField("sampling_library_index", (int,), "1-based index of the sampled library."),
-    MetaField("sampling_library_hash", (str,), "SHA256 hash of the sampled library contents."),
-    MetaField("gap_fill_used", (bool,), "Whether gap fill was applied."),
-    MetaField("gap_fill_bases", (int,), "Number of bases filled.", allow_none=True),
-    MetaField("gap_fill_end", (str,), "Gap fill end (5prime/3prime).", allow_none=True),
-    MetaField("gap_fill_gc_min", (numbers.Real,), "Final GC minimum used.", allow_none=True),
-    MetaField("gap_fill_gc_max", (numbers.Real,), "Final GC maximum used.", allow_none=True),
-    MetaField("gap_fill_gc_target_min", (numbers.Real,), "Requested GC minimum.", allow_none=True),
-    MetaField("gap_fill_gc_target_max", (numbers.Real,), "Requested GC maximum.", allow_none=True),
-    MetaField("gap_fill_gc_actual", (numbers.Real,), "Observed GC fraction.", allow_none=True),
-    MetaField("gap_fill_relaxed", (bool,), "Whether GC bounds were relaxed.", allow_none=True),
-    MetaField("gap_fill_attempts", (int,), "Number of attempts to fill gap.", allow_none=True),
+    MetaField("sampling_target_length", (int,), "Stage-B target library length for sampling."),
+    MetaField("sampling_achieved_length", (int,), "Stage-B achieved library length for sampling."),
+    MetaField("sampling_relaxed_cap", (bool,), "Stage-B sampling caps were relaxed."),
+    MetaField("sampling_final_cap", (int,), "Stage-B final per-TF cap after relaxation.", allow_none=True),
+    MetaField("sampling_pool_strategy", (str,), "Stage-B sampling pool strategy (full|subsample|iterative_subsample)."),
+    MetaField("sampling_library_size", (int,), "Stage-B configured library size for subsampling."),
+    MetaField("sampling_library_strategy", (str,), "Stage-B library sampling strategy.", allow_none=True),
+    MetaField("sampling_iterative_max_libraries", (int,), "Stage-B max libraries for iterative subsampling."),
+    MetaField("sampling_iterative_min_new_solutions", (int,), "Stage-B min new solutions per library."),
+    MetaField("sampling_library_index", (int,), "Stage-B 1-based index of the sampled library."),
+    MetaField("sampling_library_hash", (str,), "Stage-B SHA256 hash of the sampled library contents."),
+    MetaField("pad_used", (bool,), "Whether pad bases were applied."),
+    MetaField("pad_bases", (int,), "Number of bases padded.", allow_none=True),
+    MetaField("pad_end", (str,), "Pad end (5prime/3prime).", allow_none=True),
+    MetaField("pad_gc_mode", (str,), "Pad GC mode (off|range|target).", allow_none=True),
+    MetaField("pad_gc_min", (numbers.Real,), "Final GC minimum used.", allow_none=True),
+    MetaField("pad_gc_max", (numbers.Real,), "Final GC maximum used.", allow_none=True),
+    MetaField("pad_gc_target_min", (numbers.Real,), "Requested GC minimum.", allow_none=True),
+    MetaField("pad_gc_target_max", (numbers.Real,), "Requested GC maximum.", allow_none=True),
+    MetaField("pad_gc_actual", (numbers.Real,), "Observed GC fraction.", allow_none=True),
+    MetaField("pad_relaxed", (bool,), "Whether GC bounds were relaxed.", allow_none=True),
+    MetaField("pad_relaxed_reason", (str,), "Reason GC bounds were relaxed.", allow_none=True),
+    MetaField("pad_attempts", (int,), "Number of attempts to fill pad.", allow_none=True),
     MetaField("gc_total", (numbers.Real,), "GC fraction of the final sequence."),
-    MetaField("gc_core", (numbers.Real,), "GC fraction of the pre-gap-fill core sequence."),
+    MetaField("gc_core", (numbers.Real,), "GC fraction of the pre-pad core sequence."),
 ]
 
 _FIELD_BY_NAME = {field.name: field for field in META_FIELDS}
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 9dfda450..268e901f 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -39,6 +39,7 @@
     DenseGenConfig,
     LoadedConfig,
     ResolvedPlanItem,
+    resolve_outputs_scoped_path,
     resolve_relative_path,
     resolve_run_root,
 )
@@ -55,7 +56,7 @@
 from .artifacts.pool import POOL_MODE_SEQUENCE, POOL_MODE_TFBS, PoolData, build_pool_artifact
 from .artifacts.records import AttemptRecord, SolutionRecord
 from .metadata import build_metadata
-from .postprocess import random_fill
+from .postprocess import generate_pad
 from .pvalue_bins import resolve_pvalue_bins
 from .run_manifest import PlanManifest, RunManifest
 from .run_paths import (
@@ -66,6 +67,7 @@
     run_manifest_path,
     run_outputs_root,
     run_state_path,
+    run_tables_root,
 )
 from .run_state import RunState, load_run_state
 from .runtime_policy import RuntimePolicy
@@ -117,7 +119,7 @@ class PipelineDeps:
     source_factory: Callable[[object, Path], BaseDataSource]
     sink_factory: Callable[[DenseGenConfig, Path], Iterable[SinkBase]]
     optimizer: OptimizerAdapter
-    gap_fill: Callable[..., tuple[str, dict] | str]
+    pad: Callable[..., tuple[str, dict] | str]
 
 
 def default_deps() -> PipelineDeps:
@@ -125,7 +127,7 @@ def default_deps() -> PipelineDeps:
         source_factory=data_source_factory,
         sink_factory=build_sinks,
         optimizer=DenseArraysAdapter(),
-        gap_fill=random_fill,
+        pad=generate_pad,
     )
 
 
@@ -927,10 +929,10 @@ def _leaderboard_snapshot(
     }
 
 
-def _apply_gap_fill_offsets(used_tfbs_detail: list[dict], gap_meta: dict) -> list[dict]:
+def _apply_pad_offsets(used_tfbs_detail: list[dict], pad_meta: dict) -> list[dict]:
     pad_left = 0
-    if gap_meta.get("used") and gap_meta.get("end") == "5prime":
-        pad_left = int(gap_meta.get("bases") or 0)
+    if pad_meta.get("used") and pad_meta.get("end") == "5prime":
+        pad_left = int(pad_meta.get("bases") or 0)
     for entry in used_tfbs_detail:
         offset_raw = int(entry.get("offset_raw", entry.get("offset", 0)))
         length = int(entry.get("length", len(entry.get("tfbs") or "")))
@@ -1294,8 +1296,8 @@ def _compute_sampling_fraction_pairs(
     return len(pairs) / float(input_pair_count)
 
 
-def _consolidate_parts(outputs_root: Path, *, part_glob: str, final_name: str) -> bool:
-    parts = sorted(outputs_root.glob(part_glob))
+def _consolidate_parts(tables_root: Path, *, part_glob: str, final_name: str) -> bool:
+    parts = sorted(tables_root.glob(part_glob))
     if not parts:
         return False
     try:
@@ -1304,12 +1306,12 @@ def _consolidate_parts(outputs_root: Path, *, part_glob: str, final_name: str) -
         import pyarrow.parquet as pq
     except Exception as exc:  # pragma: no cover - optional dependency
         raise RuntimeError("pyarrow is required to consolidate parquet parts.") from exc
-    final_path = outputs_root / final_name
+    final_path = tables_root / final_name
     sources = [str(p) for p in parts]
     if final_path.exists():
         sources.insert(0, str(final_path))
     dataset = ds.dataset(sources, format="parquet")
-    tmp_path = outputs_root / f".{final_name}.tmp"
+    tmp_path = tables_root / f".{final_name}.tmp"
     writer = pq.ParquetWriter(tmp_path, schema=dataset.schema)
     scanner = ds.Scanner.from_dataset(dataset, batch_size=4096)
     for batch in scanner.to_batches():
@@ -1404,7 +1406,7 @@ def _write_effective_config(
 SOLUTIONS_CHUNK_SIZE = 256
 
 
-def _flush_attempts(outputs_root: Path, buffer: list[dict]) -> None:
+def _flush_attempts(tables_root: Path, buffer: list[dict]) -> None:
     if not buffer:
         return
     try:
@@ -1443,13 +1445,13 @@ def _flush_attempts(outputs_root: Path, buffer: list[dict]) -> None:
         ]
     )
     table = pa.Table.from_pylist(buffer, schema=schema)
-    outputs_root.mkdir(parents=True, exist_ok=True)
+    tables_root.mkdir(parents=True, exist_ok=True)
     filename = f"attempts_part-{uuid.uuid4().hex}.parquet"
-    pq.write_table(table, outputs_root / filename)
+    pq.write_table(table, tables_root / filename)
     buffer.clear()
 
 
-def _flush_solutions(outputs_root: Path, buffer: list[dict]) -> None:
+def _flush_solutions(tables_root: Path, buffer: list[dict]) -> None:
     if not buffer:
         return
     try:
@@ -1473,16 +1475,16 @@ def _flush_solutions(outputs_root: Path, buffer: list[dict]) -> None:
         ]
     )
     table = pa.Table.from_pylist(buffer, schema=schema)
-    outputs_root.mkdir(parents=True, exist_ok=True)
+    tables_root.mkdir(parents=True, exist_ok=True)
     filename = f"solutions_part-{uuid.uuid4().hex}.parquet"
-    pq.write_table(table, outputs_root / filename)
+    pq.write_table(table, tables_root / filename)
     buffer.clear()
 
 
 def _load_failure_counts_from_attempts(
-    outputs_root: Path,
+    tables_root: Path,
 ) -> dict[tuple[str, str, str, str, str | None], dict[str, int]]:
-    attempts_path = outputs_root / "attempts.parquet"
+    attempts_path = tables_root / "attempts.parquet"
     if not attempts_path.exists():
         return {}
     try:
@@ -1529,12 +1531,12 @@ def _load_failure_counts_from_attempts(
     return counts
 
 
-def _load_existing_library_index(outputs_root: Path) -> int:
-    attempts_path = outputs_root / "attempts.parquet"
+def _load_existing_library_index(tables_root: Path) -> int:
+    attempts_path = tables_root / "attempts.parquet"
     paths: list[Path] = []
     if attempts_path.exists():
         paths.append(attempts_path)
-    paths.extend(sorted(outputs_root.glob("attempts_part-*.parquet")))
+    paths.extend(sorted(tables_root.glob("attempts_part-*.parquet")))
     if not paths:
         return 0
     max_idx = 0
@@ -1554,13 +1556,13 @@ def _load_existing_library_index(outputs_root: Path) -> int:
 
 
 def _load_existing_library_index_by_plan(
-    outputs_root: Path,
+    tables_root: Path,
 ) -> dict[tuple[str, str], int]:
-    attempts_path = outputs_root / "attempts.parquet"
+    attempts_path = tables_root / "attempts.parquet"
     paths: list[Path] = []
     if attempts_path.exists():
         paths.append(attempts_path)
-    paths.extend(sorted(outputs_root.glob("attempts_part-*.parquet")))
+    paths.extend(sorted(tables_root.glob("attempts_part-*.parquet")))
     if not paths:
         return {}
     max_by_plan: dict[tuple[str, str], int] = {}
@@ -1584,12 +1586,12 @@ def _load_existing_library_index_by_plan(
     return max_by_plan
 
 
-def _load_existing_attempt_index_by_plan(outputs_root: Path) -> dict[tuple[str, str], int]:
-    attempts_path = outputs_root / "attempts.parquet"
+def _load_existing_attempt_index_by_plan(tables_root: Path) -> dict[tuple[str, str], int]:
+    attempts_path = tables_root / "attempts.parquet"
     paths: list[Path] = []
     if attempts_path.exists():
         paths.append(attempts_path)
-    paths.extend(sorted(outputs_root.glob("attempts_part-*.parquet")))
+    paths.extend(sorted(tables_root.glob("attempts_part-*.parquet")))
     if not paths:
         return {}
     max_by_plan: dict[tuple[str, str], int] = {}
@@ -1618,7 +1620,7 @@ def _load_existing_attempt_index_by_plan(outputs_root: Path) -> dict[tuple[str,
 
 
 def _append_attempt(
-    outputs_root: Path,
+    tables_root: Path,
     *,
     run_id: str,
     input_name: str,
@@ -1681,14 +1683,14 @@ def _append_attempt(
     if attempts_buffer is not None:
         attempts_buffer.append(payload)
         if len(attempts_buffer) >= ATTEMPTS_CHUNK_SIZE:
-            _flush_attempts(outputs_root, attempts_buffer)
+            _flush_attempts(tables_root, attempts_buffer)
         return record.attempt_id
-    _flush_attempts(outputs_root, [payload])
+    _flush_attempts(tables_root, [payload])
     return record.attempt_id
 
 
 def _log_rejection(
-    outputs_root: Path,
+    tables_root: Path,
     *,
     run_id: str,
     input_name: str,
@@ -1714,7 +1716,7 @@ def _log_rejection(
 ) -> None:
     status = "duplicate" if reason == "output_duplicate" else "rejected"
     _append_attempt(
-        outputs_root,
+        tables_root,
         run_id=run_id,
         input_name=input_name,
         plan_name=plan_name,
@@ -1915,6 +1917,7 @@ def _record_library_build(
     max_dupes = int(runtime_cfg.max_duplicate_solutions)
     max_resample_attempts = int(runtime_cfg.max_resample_attempts)
     stall_seconds = int(runtime_cfg.stall_seconds_before_resample)
+    solve_timeout_seconds = float(stall_seconds) if stall_seconds > 0 else None
     stall_warn_every = int(runtime_cfg.stall_warning_every_seconds)
     max_total_resamples = int(runtime_cfg.max_total_resamples)
     max_seconds_per_plan = int(runtime_cfg.max_seconds_per_plan)
@@ -1933,13 +1936,18 @@ def _record_library_build(
     )
 
     post = global_cfg.postprocess
-    gap_cfg = post.gap_fill
-    fill_gap = gap_cfg.mode != "off"
-    fill_mode = gap_cfg.mode
-    fill_end = gap_cfg.end
-    fill_gc_min = float(gap_cfg.gc_min)
-    fill_gc_max = float(gap_cfg.gc_max)
-    fill_max_tries = int(gap_cfg.max_tries)
+    pad_cfg = post.pad
+    pad_enabled = pad_cfg.mode != "off"
+    pad_mode = pad_cfg.mode
+    pad_end = pad_cfg.end
+    pad_gc_cfg = pad_cfg.gc
+    pad_gc_mode = pad_gc_cfg.mode
+    pad_gc_min = float(pad_gc_cfg.min)
+    pad_gc_max = float(pad_gc_cfg.max)
+    pad_gc_target = float(pad_gc_cfg.target)
+    pad_gc_tolerance = float(pad_gc_cfg.tolerance)
+    pad_gc_min_length = int(pad_gc_cfg.min_pad_length)
+    pad_max_tries = int(pad_cfg.max_tries)
 
     solver_cfg = global_cfg.solver
     solver_opts = list(solver_cfg.options)
@@ -1955,7 +1963,7 @@ def _record_library_build(
     last_screen_refresh = 0.0
     latest_failure_totals: str | None = None
 
-    policy_gc_fill = str(fill_mode)
+    policy_pad = str(pad_mode)
     policy_sampling = pool_strategy
     policy_solver = solver_strategy
 
@@ -1977,10 +1985,11 @@ def _record_library_build(
     failure_counts = site_failure_counts if site_failure_counts is not None else {}
     attempts_buffer: list[dict] = []
     run_root_path = Path(run_root)
-    outputs_root = run_root_path / "outputs"
-    existing_library_builds = _load_existing_library_index(outputs_root)
+    outputs_root = run_outputs_root(run_root_path)
+    tables_root = run_tables_root(run_root_path)
+    existing_library_builds = _load_existing_library_index(tables_root)
 
-    # Load source (cache PWM sampling results across round-robin passes).
+    # Load source (cache Stage-A PWM sampling results across round-robin passes).
     cache_key = source_label
     cached = source_cache.get(cache_key) if source_cache is not None else None
     if cached is None:
@@ -2123,7 +2132,7 @@ def _record_library_build(
                     parts.append(f"max_seconds={mining_max_seconds}s")
                 mining_label = ", ".join(parts) if parts else "enabled"
             log.info(
-                "PWM input sampling for %s: motifs=%d | sites=%s | strategy=%s | backend=%s | score=%s | "
+                "Stage-A PWM sampling for %s: motifs=%d | sites=%s | strategy=%s | backend=%s | score=%s | "
                 "selection=%s | bins=%s | mining=%s | oversample=%s | caps=%s | length=%s",
                 source_label,
                 len(input_meta.get("input_pwm_ids") or []),
@@ -2182,7 +2191,7 @@ def _record_library_build(
     libraries_used = 0
     library_source_label = str(library_source or getattr(sampling_cfg, "library_source", "build")).lower()
     if library_source_label not in {"build", "artifact"}:
-        raise ValueError(f"Unsupported sampling.library_source: {library_source_label}")
+        raise ValueError(f"Unsupported Stage-B sampling.library_source: {library_source_label}")
     if library_source_label == "artifact" and library_cursor is not None:
         prior_used = int(library_cursor.get((source_label, plan_name), 0))
         libraries_built = prior_used
@@ -2203,13 +2212,13 @@ def _select_library_from_artifact() -> tuple[list[str], list[str], list[str], di
             raise RuntimeError(
                 f"Library artifact exhausted for {source_label}/{plan_name} "
                 f"(requested index={cursor + 1}, available={len(records)}). "
-                "Build more libraries or reduce resampling."
+                "Build more libraries or reduce Stage-B resampling."
             )
         record = records[cursor]
         library_cursor[key] = cursor + 1
         if record.pool_strategy is None or record.library_sampling_strategy is None:
             raise RuntimeError(
-                f"Library artifact missing sampling metadata for {source_label}/{plan_name} "
+                f"Library artifact missing Stage-B sampling metadata for {source_label}/{plan_name} "
                 f"(library_index={record.library_index}). Rebuild libraries with the current version."
             )
         if str(record.pool_strategy) != str(pool_strategy):
@@ -2219,7 +2228,7 @@ def _select_library_from_artifact() -> tuple[list[str], list[str], list[str], di
             )
         if str(record.library_sampling_strategy) != str(library_sampling_strategy):
             raise RuntimeError(
-                f"Library artifact sampling strategy mismatch for {source_label}/{plan_name}: "
+                f"Library artifact Stage-B sampling strategy mismatch for {source_label}/{plan_name}: "
                 f"artifact={record.library_sampling_strategy} config={library_sampling_strategy}."
             )
         if pool_strategy != "full" and record.library_size != int(getattr(sampling_cfg, "library_size", 0)):
@@ -2305,7 +2314,7 @@ def _build_next_library() -> tuple[list[str], list[str], list[str], dict]:
             f"(sequence_length={seq_len}, max_library_motif={max_tfbs_len}, "
             f"max_fixed_element={fixed_elements_max_len}). "
             "Increase densegen.generation.sequence_length or reduce motif lengths "
-            "(e.g., adjust PWM sampling length_range or fixed-element motifs)."
+            "(e.g., adjust Stage-A PWM sampling length_range or fixed-element motifs)."
         )
 
     def _current_leaderboard_snapshot() -> dict[str, object]:
@@ -2392,12 +2401,12 @@ def _record_site_failures(reason: str) -> None:
     pool_label = sampling_info.get("pool_strategy")
     target_len = sampling_info.get("target_length")
     achieved_len = sampling_info.get("achieved_length")
-    header = f"Stage B library for {source_label}/{plan_name}"
+    header = f"Stage-B library for {source_label}/{plan_name}"
     if library_index is not None:
         header = f"{header} (build {library_index})"
     if tf_summary:
         log.info(
-            "%s: %d motifs | TF counts: %s | target=%s achieved=%s pool=%s sampling=%s",
+            "%s: %d motifs | TF counts: %s | target=%s achieved=%s pool=%s stage_b_sampling=%s",
             header,
             len(library_for_opt),
             tf_summary,
@@ -2408,7 +2417,7 @@ def _record_site_failures(reason: str) -> None:
         )
     else:
         log.info(
-            "%s: %d motifs | target=%s achieved=%s pool=%s sampling=%s",
+            "%s: %d motifs | target=%s achieved=%s pool=%s stage_b_sampling=%s",
             header,
             len(library_for_opt),
             target_len,
@@ -2432,7 +2441,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             if k_required is not None and len(solver_required_regs) < k_required:
                 raise ValueError(
                     "Required regulator candidate set is smaller than min_required_regulators "
-                    f"after library sampling ({len(solver_required_regs)} < {k_required}). "
+                    f"after Stage-B library sampling ({len(solver_required_regs)} < {k_required}). "
                     "Increase library_size or relax required_regulators/min_required_regulators."
                 )
         if min_required_regulators is not None and not required_regulators:
@@ -2449,6 +2458,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             required_regulators=solver_required_regs,
             min_count_by_regulator=solver_min_counts,
             min_required_regulators=min_required_regulators,
+            solve_timeout_seconds=solve_timeout_seconds,
         )
         return run
 
@@ -2474,6 +2484,34 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             produced_this_library = 0
             stall_triggered = False
 
+            def _mark_stall(now: float) -> None:
+                nonlocal stall_events, stall_triggered
+                if stall_triggered:
+                    return
+                log.info(
+                    "[%s/%s] Stall (> %ds) with no solutions; will resample.",
+                    source_label,
+                    plan_name,
+                    stall_seconds,
+                )
+                stall_events += 1
+                if events_path is not None:
+                    try:
+                        _emit_event(
+                            events_path,
+                            event="STALL_DETECTED",
+                            payload={
+                                "input_name": source_label,
+                                "plan_name": plan_name,
+                                "stall_seconds": float(now - subsample_started),
+                                "library_index": int(sampling_library_index),
+                                "library_hash": str(sampling_library_hash),
+                            },
+                        )
+                    except Exception:
+                        log.debug("Failed to emit STALL_DETECTED event.", exc_info=True)
+                stall_triggered = True
+
             for sol in generator:
                 now = time.monotonic()
                 if policy.should_trigger_stall(
@@ -2481,29 +2519,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     subsample_started=subsample_started,
                     produced_this_library=produced_this_library,
                 ):
-                    log.info(
-                        "[%s/%s] Stall (> %ds) with no solutions; will resample.",
-                        source_label,
-                        plan_name,
-                        stall_seconds,
-                    )
-                    stall_events += 1
-                    if events_path is not None:
-                        try:
-                            _emit_event(
-                                events_path,
-                                event="STALL_DETECTED",
-                                payload={
-                                    "input_name": source_label,
-                                    "plan_name": plan_name,
-                                    "stall_seconds": float(now - subsample_started),
-                                    "library_index": int(sampling_library_index),
-                                    "library_hash": str(sampling_library_hash),
-                                },
-                            )
-                        except Exception:
-                            log.debug("Failed to emit STALL_DETECTED event.", exc_info=True)
-                    stall_triggered = True
+                    _mark_stall(now)
                     break
                 if policy.should_warn_stall(
                     now=now,
@@ -2570,7 +2586,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         _record_site_failures("min_count_per_tf")
                         attempt_index = _next_attempt_index()
                         _log_rejection(
-                            outputs_root,
+                            tables_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
@@ -2611,7 +2627,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         _record_site_failures("required_regulators")
                         attempt_index = _next_attempt_index()
                         _log_rejection(
-                            outputs_root,
+                            tables_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
@@ -2655,7 +2671,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         _record_site_failures("min_count_by_regulator")
                         attempt_index = _next_attempt_index()
                         _log_rejection(
-                            outputs_root,
+                            tables_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
@@ -2703,7 +2719,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         _record_site_failures("min_required_regulators")
                         attempt_index = _next_attempt_index()
                         _log_rejection(
-                            outputs_root,
+                            tables_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
@@ -2745,7 +2761,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         _record_site_failures("min_required_regulators")
                         attempt_index = _next_attempt_index()
                         _log_rejection(
-                            outputs_root,
+                            tables_root,
                             run_id=run_id,
                             input_name=source_label,
                             plan_name=plan_name,
@@ -2777,20 +2793,22 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                             )
                         continue
 
-                gap_meta = {"used": False}
+                pad_meta = {"used": False}
                 final_seq = seq
-                if not fill_gap and len(final_seq) < seq_len:
-                    raise RuntimeError(
-                        f"[{source_label}/{plan_name}] Sequence shorter than target and gap_fill.mode=off."
-                    )
-                if fill_gap and len(final_seq) < seq_len:
+                if not pad_enabled and len(final_seq) < seq_len:
+                    raise RuntimeError(f"[{source_label}/{plan_name}] Sequence shorter than target and pad.mode=off.")
+                if pad_enabled and len(final_seq) < seq_len:
                     gap = seq_len - len(final_seq)
-                    rf = deps.gap_fill(
-                        gap,
-                        fill_gc_min,
-                        fill_gc_max,
-                        max_tries=fill_max_tries,
-                        mode=fill_mode,
+                    rf = deps.pad(
+                        length=gap,
+                        mode=pad_mode,
+                        gc_mode=pad_gc_mode,
+                        gc_min=pad_gc_min,
+                        gc_max=pad_gc_max,
+                        gc_target=pad_gc_target,
+                        gc_tolerance=pad_gc_tolerance,
+                        gc_min_pad_length=pad_gc_min_length,
+                        max_tries=pad_max_tries,
                         rng=rng,
                     )
                     if isinstance(rf, tuple) and len(rf) == 2:
@@ -2798,21 +2816,23 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         pad_info = pad_info or {}
                     else:
                         pad, pad_info = rf, {}
-                    final_seq = (pad + final_seq) if fill_end == "5prime" else (final_seq + pad)
-                    gap_meta = {
+                    final_seq = (pad + final_seq) if pad_end == "5prime" else (final_seq + pad)
+                    pad_meta = {
                         "used": True,
                         "bases": gap,
-                        "end": fill_end,
-                        "gc_min": pad_info.get("final_gc_min", fill_gc_min),
-                        "gc_max": pad_info.get("final_gc_max", fill_gc_max),
-                        "gc_target_min": pad_info.get("target_gc_min", fill_gc_min),
-                        "gc_target_max": pad_info.get("target_gc_max", fill_gc_max),
+                        "end": pad_end,
+                        "gc_mode": pad_info.get("gc_mode", pad_gc_mode),
+                        "gc_min": pad_info.get("final_gc_min"),
+                        "gc_max": pad_info.get("final_gc_max"),
+                        "gc_target_min": pad_info.get("target_gc_min"),
+                        "gc_target_max": pad_info.get("target_gc_max"),
                         "gc_actual": pad_info.get("gc_actual"),
                         "relaxed": pad_info.get("relaxed"),
+                        "relaxed_reason": pad_info.get("relaxed_reason"),
                         "attempts": pad_info.get("attempts"),
                     }
 
-                used_tfbs_detail = _apply_gap_fill_offsets(used_tfbs_detail, gap_meta)
+                used_tfbs_detail = _apply_pad_offsets(used_tfbs_detail, pad_meta)
                 gc_core = _gc_fraction(seq)
                 gc_total = _gc_fraction(final_seq)
                 created_at = datetime.now(timezone.utc).isoformat()
@@ -2829,7 +2849,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     solver_strands=solver_strands,
                     seq_len=seq_len,
                     actual_length=len(final_seq),
-                    gap_meta=gap_meta,
+                    pad_meta=pad_meta,
                     sampling_meta=sampling_info,
                     schema_version=str(global_cfg.schema_version),
                     created_at=created_at,
@@ -2838,7 +2858,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     run_config_path=run_config_path,
                     run_config_sha256=run_config_sha256,
                     random_seed=random_seed,
-                    policy_gc_fill=policy_gc_fill,
+                    policy_pad=policy_pad,
                     policy_sampling=policy_sampling,
                     policy_solver=policy_solver,
                     input_meta=input_meta,
@@ -2884,7 +2904,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                     duplicate_records += 1
                     attempt_index = _next_attempt_index()
                     _log_rejection(
-                        outputs_root,
+                        tables_root,
                         run_id=run_id,
                         input_name=source_label,
                         plan_name=plan_name,
@@ -2947,7 +2967,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
 
                 attempt_index = _next_attempt_index()
                 attempt_id = _append_attempt(
-                    outputs_root,
+                    tables_root,
                     run_id=run_id,
                     input_name=source_label,
                     plan_name=plan_name,
@@ -2991,7 +3011,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                         ).to_dict()
                     )
                     if len(solution_rows) >= SOLUTIONS_CHUNK_SIZE:
-                        _flush_solutions(outputs_root, solution_rows)
+                        _flush_solutions(tables_root, solution_rows)
 
                 _update_usage_counts(usage_counts, used_tfbs_detail)
                 for tf, count in used_tf_counts.items():
@@ -3151,12 +3171,17 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             if local_generated >= max_per_subsample or global_generated >= quota:
                 break
 
+            if produced_this_library == 0 and not stall_triggered and stall_seconds > 0:
+                now = time.monotonic()
+                if (now - subsample_started) >= stall_seconds:
+                    _mark_stall(now)
+
             if produced_this_library == 0:
                 reason = "stall_no_solution" if stall_triggered else "no_solution"
                 _record_site_failures(reason)
                 attempt_index = _next_attempt_index()
                 _append_attempt(
-                    outputs_root,
+                    tables_root,
                     run_id=run_id,
                     input_name=source_label,
                     plan_name=plan_name,
@@ -3185,7 +3210,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             if pool_strategy == "iterative_subsample" and iterative_min_new_solutions > 0:
                 if produced_this_library < iterative_min_new_solutions:
                     log.info(
-                        "[%s/%s] Library produced %d < iterative_min_new_solutions=%d; resampling.",
+                        "[%s/%s] Library produced %d < iterative_min_new_solutions=%d; Stage-B resampling.",
                         source_label,
                         plan_name,
                         produced_this_library,
@@ -3202,8 +3227,8 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             # Resample
             if not policy.allow_resample():
                 raise RuntimeError(
-                    f"[{source_label}/{plan_name}] pool_strategy={pool_strategy!r} does not allow resampling. "
-                    "Reduce quota or use iterative_subsample."
+                    f"[{source_label}/{plan_name}] pool_strategy={pool_strategy!r} does not allow Stage-B "
+                    "resampling. Reduce quota or use iterative_subsample."
                 )
             resamples_in_try += 1
             total_resamples += 1
@@ -3307,9 +3332,9 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             sink.flush()
 
         if one_subsample_only:
-            _flush_attempts(outputs_root, attempts_buffer)
+            _flush_attempts(tables_root, attempts_buffer)
             if solution_rows is not None:
-                _flush_solutions(outputs_root, solution_rows)
+                _flush_solutions(tables_root, solution_rows)
             if state_counts is not None:
                 state_counts[(source_label, plan_name)] = int(global_generated)
                 if write_state is not None:
@@ -3332,9 +3357,9 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
                 "leaderboard_latest": snapshot,
             }
 
-    _flush_attempts(outputs_root, attempts_buffer)
+    _flush_attempts(tables_root, attempts_buffer)
     if solution_rows is not None:
-        _flush_solutions(outputs_root, solution_rows)
+        _flush_solutions(tables_root, solution_rows)
     log.info("Completed %s/%s: %d/%d", source_label, plan_name, global_generated, quota)
     if state_counts is not None:
         state_counts[(source_label, plan_name)] = int(global_generated)
@@ -3372,6 +3397,7 @@ def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | Non
         run_cfg_path = str(loaded.path)
 
     outputs_root = run_outputs_root(run_root)
+    tables_root = run_tables_root(run_root)
     existing_outputs = has_existing_run_outputs(run_root)
     if resume:
         if not existing_outputs:
@@ -3498,13 +3524,18 @@ def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | Non
     sampling_cfg = cfg.generation.sampling
     library_source = str(getattr(sampling_cfg, "library_source", "build")).lower()
     if library_source == "artifact":
-        artifact_path = resolve_relative_path(loaded.path, sampling_cfg.library_artifact_path)
+        artifact_path = resolve_outputs_scoped_path(
+            loaded.path,
+            run_root,
+            sampling_cfg.library_artifact_path,
+            label="sampling.library_artifact_path",
+        )
         if not artifact_path.exists():
             raise RuntimeError(f"Library artifact directory not found: {artifact_path}")
         library_artifact = load_library_artifact(artifact_path)
         library_records = load_library_records(library_artifact)
         library_cursor = {}
-        existing_library_by_plan = _load_existing_library_index_by_plan(outputs_root)
+        existing_library_by_plan = _load_existing_library_index_by_plan(tables_root)
         for inp in cfg.inputs:
             for plan_item in pl:
                 key = (inp.name, plan_item.name)
@@ -3530,7 +3561,7 @@ def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | Non
                         )
                     if rec.library_sampling_strategy is None or rec.pool_strategy is None:
                         raise RuntimeError(
-                            f"Library artifact missing sampling metadata for {inp.name}/{plan_item.name} "
+                            f"Library artifact missing Stage-B sampling metadata for {inp.name}/{plan_item.name} "
                             f"(library_index={rec.library_index})."
                         )
                     required = list(dict.fromkeys(plan_item.required_regulators or []))
@@ -3561,7 +3592,7 @@ def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | Non
                                 f"< min_count_by_regulator={min_count}."
                             )
     elif library_source != "build":
-        raise RuntimeError(f"Unsupported sampling.library_source: {library_source}")
+        raise RuntimeError(f"Unsupported Stage-B sampling.library_source: {library_source}")
     ensure_run_meta_dir(run_root)
     state_path = run_state_path(run_root)
     state_created_at = datetime.now(timezone.utc).isoformat()
@@ -3588,8 +3619,8 @@ def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | Non
     site_failure_counts: dict[tuple[str, str, str, str, str | None], dict[str, int]] = {}
     attempt_counters: dict[tuple[str, str], int] = {}
     if resume:
-        site_failure_counts = _load_failure_counts_from_attempts(outputs_root)
-        attempt_counters = _load_existing_attempt_index_by_plan(outputs_root)
+        site_failure_counts = _load_failure_counts_from_attempts(tables_root)
+        attempt_counters = _load_existing_attempt_index_by_plan(tables_root)
         if cfg.output.targets:
             try:
                 df_existing, _ = load_records_from_config(
@@ -3611,14 +3642,16 @@ def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | Non
                     if mismatched and any(val != config_sha for val in mismatched):
                         raise RuntimeError(
                             "Existing outputs were produced with a different config. "
-                            "Remove outputs/ or stage a new run root to start fresh."
+                            "Remove outputs/tables (and outputs/meta if present) "
+                            "or stage a new run root to start fresh."
                         )
                 if "densegen__run_id" in df_existing.columns:
                     run_ids = df_existing["densegen__run_id"].dropna().unique().tolist()
                     if run_ids and any(val != cfg.run.id for val in run_ids):
                         raise RuntimeError(
                             "Existing outputs were produced with a different run_id. "
-                            "Remove outputs/ or stage a new run root to start fresh."
+                            "Remove outputs/tables (and outputs/meta if present) "
+                            "or stage a new run root to start fresh."
                         )
                 if {"densegen__input_name", "densegen__plan"} <= set(df_existing.columns):
                     counts = (
@@ -3829,13 +3862,14 @@ def _write_state() -> None:
         sink.finalize()
 
     outputs_root = run_outputs_root(run_root)
-    _consolidate_parts(outputs_root, part_glob="attempts_part-*.parquet", final_name="attempts.parquet")
-    _consolidate_parts(outputs_root, part_glob="solutions_part-*.parquet", final_name="solutions.parquet")
+    tables_root = run_tables_root(run_root)
+    _consolidate_parts(tables_root, part_glob="attempts_part-*.parquet", final_name="attempts.parquet")
+    _consolidate_parts(tables_root, part_glob="solutions_part-*.parquet", final_name="solutions.parquet")
 
     libraries_dir = outputs_root / "libraries"
     if library_source == "artifact":
         if library_artifact is None:
-            raise RuntimeError("sampling.library_source=artifact but no library artifact was loaded.")
+            raise RuntimeError("Stage-B sampling.library_source=artifact but no library artifact was loaded.")
         try:
             build_rows = pd.read_parquet(library_artifact.builds_path).to_dict("records")
             member_rows = pd.read_parquet(library_artifact.members_path).to_dict("records")
@@ -3902,7 +3936,7 @@ def _write_state() -> None:
             raise RuntimeError(f"Failed to write library artifacts: {exc}") from exc
 
     if composition_rows:
-        composition_path = outputs_root / "composition.parquet"
+        composition_path = tables_root / "composition.parquet"
         existing_rows: list[dict] = []
         if composition_path.exists():
             try:
diff --git a/src/dnadesign/densegen/src/core/postprocess/__init__.py b/src/dnadesign/densegen/src/core/postprocess/__init__.py
index eee40055..4fe19e5c 100644
--- a/src/dnadesign/densegen/src/core/postprocess/__init__.py
+++ b/src/dnadesign/densegen/src/core/postprocess/__init__.py
@@ -2,8 +2,8 @@
 DenseGen postprocess steps.
 """
 
-from .gap_fill import random_fill
+from .gap_fill import generate_pad
 
 __all__ = [
-    "random_fill",
+    "generate_pad",
 ]
diff --git a/src/dnadesign/densegen/src/core/postprocess/gap_fill.py b/src/dnadesign/densegen/src/core/postprocess/gap_fill.py
index e4df599c..1ba39999 100644
--- a/src/dnadesign/densegen/src/core/postprocess/gap_fill.py
+++ b/src/dnadesign/densegen/src/core/postprocess/gap_fill.py
@@ -3,7 +3,7 @@
 <dnadesign project>
 dnadesign/densegen/core/postprocess/gap_fill.py
 
-Gap fill policy implementation.
+Pad policy implementation (fills remaining length budget).
 
 Module Author(s): Eric J. South
 Dunlop Lab
@@ -24,17 +24,21 @@ def _gc_fraction(seq: str) -> float:
     return (g + c) / len(seq)
 
 
-def random_fill(
+def generate_pad(
     length: int,
+    *,
+    mode: str = "adaptive",
+    gc_mode: str = "range",
     gc_min: float = 0.40,
     gc_max: float = 0.60,
-    *,
+    gc_target: float = 0.50,
+    gc_tolerance: float = 0.10,
+    gc_min_pad_length: int = 4,
     max_tries: int = 2000,
-    mode: str = "strict",
     rng: random.Random | None = None,
 ) -> tuple[str, dict]:
     """
-    Random filler with strict/adaptive GC control.
+    Generate pad bases to meet a length budget with optional GC constraints.
 
     Returns
     -------
@@ -44,17 +48,13 @@ def random_fill(
           "attempts": int,
           "gc_actual": float,
           "relaxed": bool,
-          "final_gc_min": float,
-          "final_gc_max": float,
-          "target_gc_min": float,
-          "target_gc_max": float,
+          "relaxed_reason": str | None,
+          "final_gc_min": float | None,
+          "final_gc_max": float | None,
+          "target_gc_min": float | None,
+          "target_gc_max": float | None,
+          "gc_mode": str,
         }
-
-    Behavior
-    --------
-    - strict: infeasible windows raise ValueError.
-    - adaptive: infeasible windows relax to [0, 1] and are recorded with relaxed=True.
-    - GC content is constructed directly within the final window (no rejection sampling).
     """
     rng = rng or random
 
@@ -63,27 +63,64 @@ def random_fill(
             "attempts": 0,
             "gc_actual": 0.0,
             "relaxed": False,
-            "final_gc_min": gc_min,
-            "final_gc_max": gc_max,
-            "target_gc_min": gc_min,
-            "target_gc_max": gc_max,
+            "relaxed_reason": None,
+            "final_gc_min": None,
+            "final_gc_max": None,
+            "target_gc_min": None,
+            "target_gc_max": None,
+            "gc_mode": gc_mode,
         }
 
-    # Convert fraction window -> integer GC count window
-    lo = math.ceil(length * gc_min)
-    hi = math.floor(length * gc_max)
+    if gc_mode == "off":
+        bases = [rng.choice("ACGT") for _ in range(length)]
+        seq = "".join(bases)
+        return seq, {
+            "attempts": 1,
+            "gc_actual": _gc_fraction(seq),
+            "relaxed": False,
+            "relaxed_reason": None,
+            "final_gc_min": None,
+            "final_gc_max": None,
+            "target_gc_min": None,
+            "target_gc_max": None,
+            "gc_mode": gc_mode,
+        }
+
+    if gc_mode not in {"range", "target"}:
+        raise ValueError(f"Unsupported gc_mode: {gc_mode!r}")
+
+    if gc_mode == "target":
+        target_min = gc_target - gc_tolerance
+        target_max = gc_target + gc_tolerance
+        if target_min < 0.0 or target_max > 1.0:
+            raise ValueError("gc_target +/- gc_tolerance must stay within [0, 1]")
+    else:
+        target_min = gc_min
+        target_max = gc_max
 
     relaxed = False
-    final_min = gc_min
-    final_max = gc_max
+    relaxed_reason = None
+    final_min = target_min
+    final_max = target_max
+
+    if length < gc_min_pad_length:
+        if mode == "strict":
+            raise ValueError(f"Pad length {length} is shorter than gc.min_pad_length={gc_min_pad_length}.")
+        relaxed = True
+        relaxed_reason = "short_pad"
+        final_min, final_max = 0.0, 1.0
+
+    lo = math.ceil(length * final_min)
+    hi = math.floor(length * final_max)
 
-    # If infeasible window (common for very small lengths), handle per policy.
     if lo > hi:
         if mode == "strict":
-            raise ValueError(f"GC target infeasible for gap length {length} (min={gc_min}, max={gc_max}).")
+            raise ValueError(f"GC target infeasible for pad length {length} (min={target_min}, max={target_max}).")
         relaxed = True
-        lo, hi = 0, length
+        if relaxed_reason is None:
+            relaxed_reason = "infeasible_gc_window"
         final_min, final_max = 0.0, 1.0
+        lo, hi = 0, length
 
     gc_count = rng.randint(lo, hi) if lo <= hi else 0
     bases = [rng.choice("GC") for _ in range(gc_count)] + [rng.choice("AT") for _ in range(length - gc_count)]
@@ -93,8 +130,10 @@ def random_fill(
         "attempts": 1,
         "gc_actual": _gc_fraction(seq),
         "relaxed": relaxed,
+        "relaxed_reason": relaxed_reason,
         "final_gc_min": final_min,
         "final_gc_max": final_max,
-        "target_gc_min": gc_min,
-        "target_gc_max": gc_max,
+        "target_gc_min": target_min,
+        "target_gc_max": target_max,
+        "gc_mode": gc_mode,
     }
diff --git a/src/dnadesign/densegen/src/core/pvalue_bins.py b/src/dnadesign/densegen/src/core/pvalue_bins.py
index 69084a56..0f80af80 100644
--- a/src/dnadesign/densegen/src/core/pvalue_bins.py
+++ b/src/dnadesign/densegen/src/core/pvalue_bins.py
@@ -3,7 +3,7 @@
 <dnadesign project>
 dnadesign/densegen/core/pvalue_bins.py
 
-Canonical p-value bin edges for FIMO-based PWM sampling.
+Canonical p-value bin edges for Stage-A FIMO-based PWM sampling.
 
 Module Author(s): Eric J. South
 Dunlop Lab
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index fdeba454..0ba33919 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -23,11 +23,17 @@
 import pandas as pd
 
 from ..adapters.outputs import load_records_from_config
-from ..config import RootConfig, resolve_run_root, resolve_run_scoped_path
+from ..config import RootConfig, resolve_outputs_scoped_path, resolve_run_root
 from ..utils.mpl_utils import ensure_mpl_cache_dir
 from .artifacts.pool import POOL_MODE_TFBS, load_pool_artifact
 from .run_manifest import load_run_manifest
-from .run_paths import candidates_root, run_manifest_path, run_outputs_root
+from .run_paths import (
+    candidates_root,
+    dense_arrays_path,
+    run_manifest_path,
+    run_outputs_root,
+    run_tables_root,
+)
 
 log = logging.getLogger(__name__)
 
@@ -368,6 +374,7 @@ def collect_report_data(
 ) -> ReportBundle:
     run_root = resolve_run_root(cfg_path, root_cfg.densegen.run.root)
     outputs_root = run_outputs_root(run_root)
+    tables_root = run_tables_root(run_root)
     warnings: list[str] = []
     cols = [
         "id",
@@ -394,17 +401,19 @@ def collect_report_data(
         warnings.append("Output records are empty; solution-focused sections will be blank.")
 
     used_df = _explode_used(df)
-    attempts_path = outputs_root / "attempts.parquet"
+    attempts_path = tables_root / "attempts.parquet"
     if not attempts_path.exists():
-        warnings.append("outputs/attempts.parquet is missing; library usage and resample summaries may be incomplete.")
+        warnings.append(
+            "outputs/tables/attempts.parquet is missing; library usage and resample summaries may be incomplete."
+        )
         attempts_df = pd.DataFrame()
     else:
         attempts_df = pd.read_parquet(attempts_path)
     library_df = _explode_library_from_attempts(attempts_df)
-    solutions_path = outputs_root / "solutions.parquet"
+    solutions_path = tables_root / "solutions.parquet"
     if not solutions_path.exists():
         warnings.append(
-            "outputs/solutions.parquet is missing; solution previews and composition summaries will be skipped."
+            "outputs/tables/solutions.parquet is missing; solution previews and composition summaries will be skipped."
         )
         solutions_df = pd.DataFrame()
     else:
@@ -816,7 +825,7 @@ def _candidate_logging_enabled() -> bool:
         tables["tf_cooccurrence"] = _compute_cooccurrence(used_df)
         tables["tf_adjacency"] = _compute_adjacency(used_df)
 
-    composition_path = outputs_root / "composition.parquet"
+    composition_path = tables_root / "composition.parquet"
     if composition_path.exists():
         try:
             tables["composition"] = pd.read_parquet(composition_path)
@@ -889,7 +898,7 @@ def _candidate_logging_enabled() -> bool:
         if candidate_logging and (candidates_dir / "candidates.parquet").exists()
         else None,
         "candidates_summary_path": str(cand_summary_path) if candidate_logging and cand_summary_path.exists() else None,
-        "outputs_path": str(outputs_root / "dense_arrays.parquet"),
+        "outputs_path": str(dense_arrays_path(run_root)),
         "effective_config_path": str(outputs_root / "meta" / "effective_config.json")
         if (outputs_root / "meta" / "effective_config.json").exists()
         else None,
@@ -911,11 +920,12 @@ def _candidate_logging_enabled() -> bool:
     return ReportBundle(run_report=run_report, tables=tables, plots={})
 
 
-def _plot_available() -> bool:
+def _plot_available(cache_dir: Path) -> bool:
     try:
-        ensure_mpl_cache_dir()
+        ensure_mpl_cache_dir(cache_dir)
         import matplotlib  # noqa: F401
-    except Exception:
+    except Exception as exc:
+        log.info("Matplotlib not available or cache setup failed; skipping report plots. (%s)", exc)
         return False
     return True
 
@@ -940,14 +950,16 @@ def _markdown_table(df: pd.DataFrame, *, columns: list[str] | None = None, max_r
     return "\n".join(lines)
 
 
-def _generate_report_plots(bundle: ReportBundle, *, cfg_path: Path, out_dir: Path) -> dict[str, list[str]]:
-    if not _plot_available():
+def _generate_report_plots(
+    bundle: ReportBundle, *, cfg_path: Path, out_dir: Path, cache_dir: Path
+) -> dict[str, list[str]]:
+    if not _plot_available(cache_dir):
         log.info("matplotlib not available; skipping report plots.")
         return {}
     import matplotlib.pyplot as plt
 
     plots: dict[str, list[str]] = {}
-    assets_dir = out_dir / "report_assets"
+    assets_dir = out_dir / "assets"
     assets_dir.mkdir(parents=True, exist_ok=True)
     run_root = resolve_run_root(cfg_path, bundle.run_report.get("run_root", ""))
     outputs_root = run_outputs_root(run_root)
@@ -1087,20 +1099,21 @@ def write_report(
     root_cfg: RootConfig,
     cfg_path: Path,
     *,
-    out_dir: str | Path = "outputs",
+    out_dir: str | Path = "outputs/report",
     include_combinatorics: bool = False,
     formats: set[str] | None = None,
 ) -> ReportBundle:
     run_root = resolve_run_root(cfg_path, root_cfg.densegen.run.root)
-    out_path = resolve_run_scoped_path(cfg_path, run_root, str(out_dir), label="report.out")
+    out_path = resolve_outputs_scoped_path(cfg_path, run_root, str(out_dir), label="report.out")
     out_path.mkdir(parents=True, exist_ok=True)
+    cache_dir = run_root / "outputs" / ".mpl-cache"
 
     bundle = collect_report_data(root_cfg, cfg_path, include_combinatorics=include_combinatorics)
     composition = bundle.tables.get("composition")
     if composition is not None and not composition.empty:
         bundle.run_report["composition_rows"] = int(len(composition))
         try:
-            assets_dir = out_path / "report_assets"
+            assets_dir = out_path / "assets"
             assets_dir.mkdir(parents=True, exist_ok=True)
             composition_csv = assets_dir / "composition.csv"
             composition.to_csv(composition_csv, index=False)
@@ -1108,7 +1121,7 @@ def write_report(
         except Exception:
             log.warning("Failed to export composition CSV for report.", exc_info=True)
     try:
-        plots = _generate_report_plots(bundle, cfg_path=cfg_path, out_dir=out_path)
+        plots = _generate_report_plots(bundle, cfg_path=cfg_path, out_dir=out_path, cache_dir=cache_dir)
         bundle.plots = plots
         if plots:
             bundle.run_report["report_plots"] = plots
@@ -1145,19 +1158,19 @@ def _render_report_md(bundle: ReportBundle) -> str:
         f"- Warnings: {len(report.get('warnings') or [])}",
         "",
         "## Outputs",
-        "- outputs/dense_arrays.parquet",
-        "- outputs/attempts.parquet",
-        "- outputs/solutions.parquet",
-        "- outputs/composition.parquet",
+        "- outputs/tables/dense_arrays.parquet",
+        "- outputs/tables/attempts.parquet",
+        "- outputs/tables/solutions.parquet",
+        "- outputs/tables/composition.parquet",
         "- outputs/libraries/library_builds.parquet",
         "- outputs/libraries/library_members.parquet",
         "- outputs/pools/pool_manifest.json",
         "- outputs/meta/effective_config.json",
         "- outputs/meta/events.jsonl",
-        "- outputs/candidates/<run_id>/candidates.parquet (when candidate logging is enabled)",
-        "- outputs/candidates/<run_id>/candidates_summary.parquet (when candidate logging is enabled)",
-        "- outputs/report_assets/ (plots linked by report.html)",
-        "- outputs/report_assets/composition.csv (full composition table, when available)",
+        "- outputs/pools/candidates/candidates.parquet (when candidate logging is enabled)",
+        "- outputs/pools/candidates/candidates_summary.parquet (when candidate logging is enabled)",
+        "- outputs/report/assets/ (plots linked by report.html)",
+        "- outputs/report/assets/composition.csv (full composition table, when available)",
     ]
     warnings = report.get("warnings") or []
     if warnings:
diff --git a/src/dnadesign/densegen/src/core/run_paths.py b/src/dnadesign/densegen/src/core/run_paths.py
index 58d418cd..5d8b60f7 100644
--- a/src/dnadesign/densegen/src/core/run_paths.py
+++ b/src/dnadesign/densegen/src/core/run_paths.py
@@ -16,12 +16,42 @@
 
 RUN_OUTPUTS_DIR = "outputs"
 RUN_META_DIR = "meta"
+RUN_LOGS_DIR = "logs"
+RUN_POOLS_DIR = "pools"
+RUN_LIBRARIES_DIR = "libraries"
+RUN_TABLES_DIR = "tables"
+RUN_PLOTS_DIR = "plots"
+RUN_REPORT_DIR = "report"
+RUN_REPORT_ASSETS_DIR = "assets"
 CANDIDATES_DIR = "candidates"
 
 RUN_MANIFEST_NAME = "run_manifest.json"
 INPUTS_MANIFEST_NAME = "inputs_manifest.json"
 RUN_STATE_NAME = "run_state.json"
+ID_INDEX_NAME = "_densegen_ids.sqlite"
+
+TABLE_FILES = {
+    "dense_arrays.parquet",
+    "attempts.parquet",
+    "solutions.parquet",
+    "composition.parquet",
+}
 IGNORED_OUTPUT_ENTRIES = {".DS_Store", ".gitkeep"}
+NON_BLOCKING_OUTPUT_DIRS = {
+    RUN_LOGS_DIR,
+    RUN_POOLS_DIR,
+    RUN_LIBRARIES_DIR,
+    RUN_PLOTS_DIR,
+    RUN_REPORT_DIR,
+}
+META_BLOCKING_FILES = {
+    RUN_MANIFEST_NAME,
+    INPUTS_MANIFEST_NAME,
+    RUN_STATE_NAME,
+    "effective_config.json",
+    "events.jsonl",
+    ID_INDEX_NAME,
+}
 
 
 def run_outputs_root(run_root: Path) -> Path:
@@ -32,13 +62,29 @@ def candidates_root(outputs_root: Path, run_id: str) -> Path:
     run_label = str(run_id).strip()
     if not run_label:
         raise ValueError("run_id must be a non-empty string for candidate artifacts.")
-    return outputs_root / CANDIDATES_DIR / run_label
+    return outputs_root / RUN_POOLS_DIR / CANDIDATES_DIR
 
 
 def run_meta_root(run_root: Path) -> Path:
     return run_outputs_root(run_root) / RUN_META_DIR
 
 
+def run_tables_root(run_root: Path) -> Path:
+    return run_outputs_root(run_root) / RUN_TABLES_DIR
+
+
+def run_plots_root(run_root: Path) -> Path:
+    return run_outputs_root(run_root) / RUN_PLOTS_DIR
+
+
+def run_report_root(run_root: Path) -> Path:
+    return run_outputs_root(run_root) / RUN_REPORT_DIR
+
+
+def run_report_assets_root(run_root: Path) -> Path:
+    return run_report_root(run_root) / RUN_REPORT_ASSETS_DIR
+
+
 def ensure_run_meta_dir(run_root: Path) -> Path:
     meta = run_meta_root(run_root)
     meta.mkdir(parents=True, exist_ok=True)
@@ -57,6 +103,26 @@ def run_state_path(run_root: Path) -> Path:
     return run_meta_root(run_root) / RUN_STATE_NAME
 
 
+def id_index_path(run_root: Path) -> Path:
+    return run_meta_root(run_root) / ID_INDEX_NAME
+
+
+def dense_arrays_path(run_root: Path) -> Path:
+    return run_tables_root(run_root) / "dense_arrays.parquet"
+
+
+def attempts_path(run_root: Path) -> Path:
+    return run_tables_root(run_root) / "attempts.parquet"
+
+
+def solutions_path(run_root: Path) -> Path:
+    return run_tables_root(run_root) / "solutions.parquet"
+
+
+def composition_path(run_root: Path) -> Path:
+    return run_tables_root(run_root) / "composition.parquet"
+
+
 def has_existing_run_outputs(run_root: Path) -> bool:
     outputs_root = run_outputs_root(run_root)
     if not outputs_root.exists():
@@ -64,5 +130,43 @@ def has_existing_run_outputs(run_root: Path) -> bool:
     for entry in outputs_root.iterdir():
         if entry.name in IGNORED_OUTPUT_ENTRIES:
             continue
+        if entry.is_dir():
+            if entry.name in NON_BLOCKING_OUTPUT_DIRS:
+                continue
+            if entry.name == RUN_META_DIR:
+                if _meta_has_run_artifacts(entry):
+                    return True
+                continue
+            if entry.name == RUN_TABLES_DIR:
+                if _tables_has_run_artifacts(entry):
+                    return True
+                continue
+            return True
+        return True
+    return False
+
+
+def _meta_has_run_artifacts(meta_dir: Path) -> bool:
+    if not meta_dir.exists() or not meta_dir.is_dir():
+        return False
+    for entry in meta_dir.iterdir():
+        if entry.name in IGNORED_OUTPUT_ENTRIES:
+            continue
+        if entry.name in META_BLOCKING_FILES:
+            return True
+        return True
+    return False
+
+
+def _tables_has_run_artifacts(tables_dir: Path) -> bool:
+    if not tables_dir.exists() or not tables_dir.is_dir():
+        return False
+    for entry in tables_dir.iterdir():
+        if entry.name in IGNORED_OUTPUT_ENTRIES:
+            continue
+        if entry.name in TABLE_FILES:
+            return True
+        if entry.is_dir():
+            return True
         return True
     return False
diff --git a/src/dnadesign/densegen/src/utils/mpl_utils.py b/src/dnadesign/densegen/src/utils/mpl_utils.py
index 3ab8f5ff..b4423ae6 100644
--- a/src/dnadesign/densegen/src/utils/mpl_utils.py
+++ b/src/dnadesign/densegen/src/utils/mpl_utils.py
@@ -1,18 +1,20 @@
 from __future__ import annotations
 
+import logging
 import os
 from pathlib import Path
 
 
-def ensure_mpl_cache_dir() -> None:
+def ensure_mpl_cache_dir(target: Path | str) -> Path:
+    logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)
     if os.environ.get("MPLCONFIGDIR"):
-        return
-    cache_root = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
-    target = cache_root / "densegen" / "matplotlib"
+        return Path(os.environ["MPLCONFIGDIR"])
+    if not target:
+        raise ValueError("Matplotlib cache directory must be provided.")
+    dest = Path(target).expanduser()
     try:
-        target.mkdir(parents=True, exist_ok=True)
-        os.environ["MPLCONFIGDIR"] = str(target)
-    except Exception:
-        tmp = Path(os.getenv("TMPDIR") or "/tmp") / "densegen-matplotlib"
-        tmp.mkdir(parents=True, exist_ok=True)
-        os.environ["MPLCONFIGDIR"] = str(tmp)
+        dest.mkdir(parents=True, exist_ok=True)
+    except Exception as exc:
+        raise RuntimeError(f"Failed to create matplotlib cache dir: {dest}") from exc
+    os.environ["MPLCONFIGDIR"] = str(dest)
+    return dest
diff --git a/src/dnadesign/densegen/src/viz/plot_registry.py b/src/dnadesign/densegen/src/viz/plot_registry.py
index be4982e9..f3262106 100644
--- a/src/dnadesign/densegen/src/viz/plot_registry.py
+++ b/src/dnadesign/densegen/src/viz/plot_registry.py
@@ -21,9 +21,9 @@
         "fn": "plot_tf_usage",
         "description": "TF usage summary (stacked by length/TFBS or totals).",
     },
-    "gap_fill_gc": {
-        "fn": "plot_gap_fill_gc",
-        "description": "GC content target vs actual for gap-fill pads.",
+    "pad_gc": {
+        "fn": "plot_pad_gc",
+        "description": "GC content target vs actual for pad bases.",
     },
     "plan_counts": {
         "fn": "plot_plan_counts",
diff --git a/src/dnadesign/densegen/src/viz/plotting.py b/src/dnadesign/densegen/src/viz/plotting.py
index f8c3a76d..8e765fd7 100644
--- a/src/dnadesign/densegen/src/viz/plotting.py
+++ b/src/dnadesign/densegen/src/viz/plotting.py
@@ -35,7 +35,7 @@
 from rich.table import Table
 
 from ..adapters.outputs import load_records_from_config
-from ..config import RootConfig, resolve_run_root, resolve_run_scoped_path
+from ..config import RootConfig, resolve_outputs_scoped_path, resolve_run_root
 from .plot_registry import PLOT_SPECS
 
 # Embed TrueType fonts for clean text in vector exports
@@ -272,8 +272,8 @@ def _plan_to_pair_label_map(cfg: dict) -> dict[str, str]:
 
 
 def _ensure_out_dir(plots_cfg, cfg_path: Path, run_root: Path) -> Path:
-    out_dir = plots_cfg.out_dir if plots_cfg else "outputs"
-    out = resolve_run_scoped_path(cfg_path, run_root, out_dir, label="plots.out_dir")
+    out_dir = plots_cfg.out_dir if plots_cfg else "outputs/plots"
+    out = resolve_outputs_scoped_path(cfg_path, run_root, out_dir, label="plots.out_dir")
     out.mkdir(parents=True, exist_ok=True)
     return out
 
@@ -478,11 +478,11 @@ def _seq_color(i, n):
     raise ValueError("tf_usage.mode must be one of: stack_lengths, stack_tfbs, totals")
 
 
-def plot_gap_fill_gc(df: pd.DataFrame, out_path: Path, *, style: Optional[dict] = None) -> None:
+def plot_pad_gc(df: pd.DataFrame, out_path: Path, *, style: Optional[dict] = None) -> None:
     used_col, gc_col, b_col = (
-        _dg("gap_fill_used"),
-        _dg("gap_fill_gc_actual"),
-        _dg("gap_fill_bases"),
+        _dg("pad_used"),
+        _dg("pad_gc_actual"),
+        _dg("pad_bases"),
     )
     mask = df[used_col] == True  # noqa: E712
     x = pd.to_numeric(df.loc[mask, gc_col], errors="coerce")
@@ -492,9 +492,9 @@ def plot_gap_fill_gc(df: pd.DataFrame, out_path: Path, *, style: Optional[dict]
     style = _style(style)
     fig, ax = _fig_ax(style)
     ax.scatter(x.values, y.values, alpha=0.35, s=12)
-    ax.set_xlabel("Gap-fill GC fraction")
-    ax.set_ylabel("Gap-fill bases")
-    ax.set_title("Gap-fill: bases vs GC fraction (filled only)")
+    ax.set_xlabel("Pad GC fraction")
+    ax.set_ylabel("Pad bases")
+    ax.set_title("Pad: bases vs GC fraction (padded only)")
     _apply_style(ax, style)
     fig.tight_layout()
     fig.savefig(out_path)
@@ -784,7 +784,7 @@ def plot_tfbs_length_density(
     by_tf: Dict[str, list[int]] = {}
     if attempts_df is None or attempts_df.empty:
         raise ValueError(
-            "outputs/attempts.parquet is required for tfbs_length_density. "
+            "outputs/tables/attempts.parquet is required for tfbs_length_density. "
             "Run `dense run -c <config.yaml>` to generate attempts."
         )
     if attempts_df is not None and not attempts_df.empty:
@@ -1322,7 +1322,7 @@ def plot_tfbs_positional_histogram(
         "include_promoter_sites",
         "promoter_site_motifs",
     },
-    "gap_fill_gc": set(),
+    "pad_gc": set(),
 }
 
 
@@ -1348,8 +1348,8 @@ def _plot_required_columns(selected: Iterable[str], options: Dict[str, Dict[str,
             cols.add(_dg("compression_ratio"))
         elif name == "tf_usage":
             cols.update({_dg("used_tf_counts"), _dg("used_tfbs_detail")})
-        elif name == "gap_fill_gc":
-            cols.update({_dg("gap_fill_used"), _dg("gap_fill_gc_actual"), _dg("gap_fill_bases")})
+        elif name == "pad_gc":
+            cols.update({_dg("pad_used"), _dg("pad_gc_actual"), _dg("pad_bases")})
         elif name == "plan_counts":
             cols.add(_dg("plan"))
             created_col = str(raw.get("created_at_col", _dg("created_at")))
@@ -1435,7 +1435,7 @@ def run_plots_from_config(root_cfg: RootConfig, cfg_path: Path, *, only: Optiona
                 fn(df, out_path, style=style, cfg=root_cfg.densegen.model_dump(), **kwargs)
             elif name == "tfbs_length_density":
                 attempts_df = None
-                attempts_path = run_root / "outputs" / "attempts.parquet"
+                attempts_path = run_root / "outputs" / "tables" / "attempts.parquet"
                 if attempts_path.exists():
                     attempts_df = pd.read_parquet(attempts_path)
                 fn(df, out_path, style=style, attempts_df=attempts_df, **kwargs)
diff --git a/src/dnadesign/densegen/tests/test_artifacts_pool.py b/src/dnadesign/densegen/tests/test_artifacts_pool.py
index 9f3222ad..967104b1 100644
--- a/src/dnadesign/densegen/tests/test_artifacts_pool.py
+++ b/src/dnadesign/densegen/tests/test_artifacts_pool.py
@@ -18,7 +18,7 @@ def test_build_pool_artifact_binding_sites(tmp_path: Path) -> None:
         yaml.safe_dump(
             {
                 "densegen": {
-                    "schema_version": "2.4",
+                    "schema_version": "2.5",
                     "run": {"id": "demo", "root": "."},
                     "inputs": [
                         {
@@ -31,7 +31,7 @@ def test_build_pool_artifact_binding_sites(tmp_path: Path) -> None:
                     "output": {
                         "targets": ["parquet"],
                         "schema": {"bio_type": "dna", "alphabet": "dna_4"},
-                        "parquet": {"path": str(tmp_path / "out.parquet")},
+                        "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
                     },
                     "generation": {
                         "sequence_length": 10,
@@ -54,7 +54,7 @@ def test_build_pool_artifact_binding_sites(tmp_path: Path) -> None:
                         "leaderboard_every": 50,
                     },
                     "logging": {"log_dir": "outputs/logs", "level": "INFO"},
-                    "postprocess": {"gap_fill": {"mode": "off"}},
+                    "postprocess": {"pad": {"mode": "off"}},
                 }
             }
         )
diff --git a/src/dnadesign/densegen/tests/test_cli_config_option.py b/src/dnadesign/densegen/tests/test_cli_config_option.py
index 4c97a68b..6ea9df0d 100644
--- a/src/dnadesign/densegen/tests/test_cli_config_option.py
+++ b/src/dnadesign/densegen/tests/test_cli_config_option.py
@@ -3,6 +3,7 @@
 import textwrap
 from pathlib import Path
 
+import pytest
 from typer.testing import CliRunner
 
 from dnadesign.densegen.src.cli import app
@@ -13,7 +14,7 @@ def _write_min_config(path: Path) -> None:
         textwrap.dedent(
             """
             densegen:
-              schema_version: "2.4"
+              schema_version: "2.5"
               run:
                 id: demo
                 root: "."
@@ -28,7 +29,7 @@ def _write_min_config(path: Path) -> None:
                   bio_type: dna
                   alphabet: dna_4
                 parquet:
-                  path: outputs/dense_arrays.parquet
+                  path: outputs/tables/dense_arrays.parquet
 
               generation:
                 sequence_length: 10
@@ -42,7 +43,7 @@ def _write_min_config(path: Path) -> None:
                 strategy: iterate
 
               logging:
-                log_dir: logs
+                log_dir: outputs/logs
             """
         ).strip()
         + "\n"
@@ -65,3 +66,21 @@ def test_validate_reports_invalid_config(tmp_path: Path) -> None:
     result = runner.invoke(app, ["validate-config", "-c", str(cfg_path)])
     assert result.exit_code != 0, result.output
     assert "Config error" in result.output
+
+
+def test_validate_uses_cwd_config_when_missing_flag(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    cfg_path = tmp_path / "config.yaml"
+    _write_min_config(cfg_path)
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+    result = runner.invoke(app, ["validate-config"])
+    assert result.exit_code == 0, result.output
+    assert "Config is valid" in result.output
+
+
+def test_validate_missing_cwd_config_reports_actionable_error(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+    result = runner.invoke(app, ["validate-config"])
+    assert result.exit_code != 0, result.output
+    assert "No config found" in result.output
diff --git a/src/dnadesign/densegen/tests/test_cli_describe.py b/src/dnadesign/densegen/tests/test_cli_describe.py
index 71d76200..ce2f9736 100644
--- a/src/dnadesign/densegen/tests/test_cli_describe.py
+++ b/src/dnadesign/densegen/tests/test_cli_describe.py
@@ -13,7 +13,7 @@ def _write_min_config(path: Path) -> None:
         textwrap.dedent(
             """
             densegen:
-              schema_version: "2.4"
+              schema_version: "2.5"
               run:
                 id: demo
                 root: "."
@@ -28,7 +28,7 @@ def _write_min_config(path: Path) -> None:
                   bio_type: dna
                   alphabet: dna_4
                 parquet:
-                  path: outputs/dense_arrays.parquet
+                  path: outputs/tables/dense_arrays.parquet
 
               generation:
                 sequence_length: 10
@@ -42,7 +42,7 @@ def _write_min_config(path: Path) -> None:
                 options: []
 
               logging:
-                log_dir: logs
+                log_dir: outputs/logs
             """
         ).strip()
         + "\n"
@@ -56,4 +56,4 @@ def test_describe_outputs_summary(tmp_path: Path) -> None:
     result = runner.invoke(app, ["inspect", "config", "-c", str(cfg_path)])
     assert result.exit_code == 0, result.output
     assert "Config" in result.output
-    assert "Gap fill" in result.output
+    assert "Pad" in result.output
diff --git a/src/dnadesign/densegen/tests/test_cli_run_modes.py b/src/dnadesign/densegen/tests/test_cli_run_modes.py
index 4d4226c4..d59e055f 100644
--- a/src/dnadesign/densegen/tests/test_cli_run_modes.py
+++ b/src/dnadesign/densegen/tests/test_cli_run_modes.py
@@ -12,7 +12,7 @@ def _write_config(run_root: Path) -> Path:
     cfg_path.write_text(
         """
         densegen:
-          schema_version: "2.4"
+          schema_version: "2.5"
           run:
             id: demo
             root: "."
@@ -26,7 +26,7 @@ def _write_config(run_root: Path) -> Path:
               bio_type: dna
               alphabet: dna_4
             parquet:
-              path: outputs/dense_arrays.parquet
+              path: outputs/tables/dense_arrays.parquet
           generation:
             sequence_length: 10
             quota: 1
@@ -36,6 +36,17 @@ def _write_config(run_root: Path) -> Path:
           solver:
             backend: CBC
             strategy: iterate
+          postprocess:
+            pad:
+              mode: adaptive
+              end: 5prime
+              gc:
+                mode: range
+                min: 0.4
+                max: 0.6
+                target: 0.5
+                tolerance: 0.1
+                min_pad_length: 4
           logging:
             log_dir: outputs/logs
         """.strip()
@@ -76,3 +87,24 @@ def test_run_resume_requires_outputs(tmp_path: Path) -> None:
 
     assert result.exit_code != 0, result.output
     assert "--resume requested but no outputs were found" in result.output
+
+
+def test_campaign_reset_removes_outputs(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    _write_inputs(run_root)
+    cfg_path = _write_config(run_root)
+
+    outputs_dir = run_root / "outputs"
+    outputs_dir.mkdir(parents=True, exist_ok=True)
+    (outputs_dir / "meta").mkdir(parents=True, exist_ok=True)
+    (outputs_dir / "tables").mkdir(parents=True, exist_ok=True)
+    (outputs_dir / "meta" / "run_state.json").write_text("{}")
+    (outputs_dir / "tables" / "dense_arrays.parquet").write_text("seed")
+
+    runner = CliRunner()
+    result = runner.invoke(app, ["campaign-reset", "-c", str(cfg_path)])
+
+    assert result.exit_code == 0, result.output
+    assert not outputs_dir.exists()
+    assert (run_root / "inputs.csv").exists()
diff --git a/src/dnadesign/densegen/tests/test_cli_summarize_library.py b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
index 9b438125..e72f595b 100644
--- a/src/dnadesign/densegen/tests/test_cli_summarize_library.py
+++ b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
@@ -13,7 +13,7 @@
 
 def _base_meta(library_hash: str, library_index: int) -> dict:
     return {
-        "schema_version": "2.4",
+        "schema_version": "2.5",
         "run_id": "demo",
         "run_root": ".",
         "run_config_path": "config.yaml",
@@ -21,7 +21,7 @@ def _base_meta(library_hash: str, library_index: int) -> dict:
         "created_at": "2026-01-14T00:00:00+00:00",
         "length": 10,
         "random_seed": 0,
-        "policy_gc_fill": "off",
+        "policy_pad": "off",
         "policy_sampling": "subsample",
         "policy_solver": "iterate",
         "solver_backend": "CBC",
@@ -99,16 +99,18 @@ def _base_meta(library_hash: str, library_index: int) -> dict:
         "min_required_regulators": None,
         "min_count_by_regulator": [],
         "covers_required_regulators": True,
-        "gap_fill_used": False,
-        "gap_fill_bases": None,
-        "gap_fill_end": None,
-        "gap_fill_gc_min": None,
-        "gap_fill_gc_max": None,
-        "gap_fill_gc_target_min": None,
-        "gap_fill_gc_target_max": None,
-        "gap_fill_gc_actual": None,
-        "gap_fill_relaxed": None,
-        "gap_fill_attempts": None,
+        "pad_used": False,
+        "pad_bases": None,
+        "pad_end": None,
+        "pad_gc_mode": None,
+        "pad_gc_min": None,
+        "pad_gc_max": None,
+        "pad_gc_target_min": None,
+        "pad_gc_target_max": None,
+        "pad_gc_actual": None,
+        "pad_relaxed": None,
+        "pad_relaxed_reason": None,
+        "pad_attempts": None,
         "gc_total": 0.5,
         "gc_core": 0.5,
     }
@@ -118,7 +120,7 @@ def _write_config(path: Path) -> None:
     path.write_text(
         """
         densegen:
-          schema_version: "2.4"
+          schema_version: "2.5"
           run:
             id: demo
             root: "."
@@ -132,7 +134,7 @@ def _write_config(path: Path) -> None:
               bio_type: dna
               alphabet: dna_4
             parquet:
-              path: outputs/dense_arrays.parquet
+              path: outputs/tables/dense_arrays.parquet
           generation:
             sequence_length: 10
             quota: 1
@@ -156,7 +158,7 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
     _write_config(cfg_path)
 
     # outputs
-    out_file = run_root / "outputs" / "dense_arrays.parquet"
+    out_file = run_root / "outputs" / "tables" / "dense_arrays.parquet"
     sink = ParquetSink(path=str(out_file), chunk_size=1)
     meta = _base_meta(library_hash="abc123", library_index=1)
     rec = OutputRecord.from_sequence(
@@ -170,7 +172,7 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
     sink.finalize()
 
     # attempts parquet (library offered to solver)
-    outputs_dir = run_root / "outputs"
+    outputs_dir = run_root / "outputs" / "tables"
     outputs_dir.mkdir(parents=True, exist_ok=True)
     attempts_df = pd.DataFrame(
         [
@@ -227,7 +229,7 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
     manifest = RunManifest(
         run_id="demo",
         created_at="2026-01-14T00:00:00+00:00",
-        schema_version="2.4",
+        schema_version="2.5",
         config_sha256="dummy",
         run_root=str(run_root),
         random_seed=123,
@@ -270,7 +272,7 @@ def test_summarize_library_limit_truncates(tmp_path: Path) -> None:
     cfg_path = run_root / "config.yaml"
     _write_config(cfg_path)
 
-    out_file = run_root / "outputs" / "dense_arrays.parquet"
+    out_file = run_root / "outputs" / "tables" / "dense_arrays.parquet"
     sink = ParquetSink(path=str(out_file), chunk_size=1)
     for lib_hash, lib_index in [("abc123", 1), ("def456", 2)]:
         meta = _base_meta(library_hash=lib_hash, library_index=lib_index)
@@ -284,7 +286,7 @@ def test_summarize_library_limit_truncates(tmp_path: Path) -> None:
         sink.add(rec)
     sink.finalize()
 
-    outputs_dir = run_root / "outputs"
+    outputs_dir = run_root / "outputs" / "tables"
     outputs_dir.mkdir(parents=True, exist_ok=True)
     attempts_df = pd.DataFrame(
         [
@@ -379,7 +381,7 @@ def test_summarize_library_limit_truncates(tmp_path: Path) -> None:
     manifest = RunManifest(
         run_id="demo",
         created_at="2026-01-14T00:00:00+00:00",
-        schema_version="2.4",
+        schema_version="2.5",
         config_sha256="dummy",
         run_root=str(run_root),
         random_seed=123,
diff --git a/src/dnadesign/densegen/tests/test_cli_workspace_command.py b/src/dnadesign/densegen/tests/test_cli_workspace_command.py
new file mode 100644
index 00000000..009705bf
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_cli_workspace_command.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from dnadesign.densegen.src.cli import DEFAULT_CONFIG_FILENAME, _workspace_command
+
+
+def test_workspace_command_uses_cd_when_config_present(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    workspace = tmp_path / "ws"
+    workspace.mkdir()
+    (workspace / DEFAULT_CONFIG_FILENAME).write_text("x")
+
+    monkeypatch.chdir(tmp_path)
+    cmd = _workspace_command("dense run", run_root=workspace)
+    assert cmd == f"cd {workspace} && dense run"
+
+
+def test_workspace_command_omits_cd_when_already_in_workspace(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    workspace = tmp_path / "ws"
+    workspace.mkdir()
+    (workspace / DEFAULT_CONFIG_FILENAME).write_text("x")
+
+    monkeypatch.chdir(workspace)
+    cmd = _workspace_command("dense run", run_root=workspace)
+    assert cmd == "dense run"
+
+
+def test_workspace_command_falls_back_to_config_path(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    cfg_path = tmp_path / "config.yaml"
+    cfg_path.write_text("x")
+    other = tmp_path / "other"
+    other.mkdir()
+
+    monkeypatch.chdir(tmp_path)
+    cmd = _workspace_command("dense run", cfg_path=cfg_path, run_root=other)
+    assert cmd == f"dense run -c {cfg_path}"
diff --git a/src/dnadesign/densegen/tests/test_cli_workspace_init.py b/src/dnadesign/densegen/tests/test_cli_workspace_init.py
index 668c21e3..f8d743e6 100644
--- a/src/dnadesign/densegen/tests/test_cli_workspace_init.py
+++ b/src/dnadesign/densegen/tests/test_cli_workspace_init.py
@@ -13,7 +13,7 @@ def _write_template_config(path: Path) -> None:
         textwrap.dedent(
             """
             densegen:
-              schema_version: "2.4"
+              schema_version: "2.5"
               run:
                 id: demo
                 root: "."
@@ -32,7 +32,7 @@ def _write_min_config(path: Path) -> None:
         textwrap.dedent(
             """
             densegen:
-              schema_version: "2.4"
+              schema_version: "2.5"
               run:
                 id: demo
                 root: "."
@@ -47,7 +47,7 @@ def _write_min_config(path: Path) -> None:
                   bio_type: dna
                   alphabet: dna_4
                 parquet:
-                  path: outputs/dense_arrays.parquet
+                  path: outputs/tables/dense_arrays.parquet
 
               generation:
                 sequence_length: 10
@@ -93,8 +93,8 @@ def test_workspace_init_warns_on_relative_inputs_without_copy(tmp_path: Path) ->
 def test_stage_b_reports_missing_pool_manifest(tmp_path: Path) -> None:
     cfg_path = tmp_path / "config.yaml"
     _write_min_config(cfg_path)
-    pool_dir = tmp_path / "pools"
-    pool_dir.mkdir()
+    pool_dir = tmp_path / "outputs" / "pools"
+    pool_dir.mkdir(parents=True)
     runner = CliRunner()
     result = runner.invoke(
         app,
diff --git a/src/dnadesign/densegen/tests/test_config_strict.py b/src/dnadesign/densegen/tests/test_config_strict.py
index 906749e6..1e019cce 100644
--- a/src/dnadesign/densegen/tests/test_config_strict.py
+++ b/src/dnadesign/densegen/tests/test_config_strict.py
@@ -10,7 +10,7 @@
 
 MIN_CONFIG = {
     "densegen": {
-        "schema_version": "2.4",
+        "schema_version": "2.5",
         "run": {"id": "demo", "root": "."},
         "inputs": [
             {
@@ -23,7 +23,7 @@
             "targets": ["parquet"],
             "schema": {"bio_type": "dna", "alphabet": "dna_4"},
             "parquet": {
-                "path": "outputs/dense_arrays.parquet",
+                "path": "outputs/tables/dense_arrays.parquet",
                 "deduplicate": True,
                 "chunk_size": 128,
             },
@@ -34,7 +34,7 @@
             "plan": [{"name": "default", "quota": 1}],
         },
         "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
-        "logging": {"log_dir": "logs"},
+        "logging": {"log_dir": "outputs/logs"},
     }
 }
 
@@ -66,6 +66,35 @@ def test_schema_version_supported(tmp_path: Path) -> None:
         load_config(cfg_path)
 
 
+def test_gap_fill_rejected(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["postprocess"] = {"gap_fill": {"mode": "off"}}
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="gap_fill"):
+        load_config(cfg_path)
+
+
+def test_pad_config_accepts(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["postprocess"] = {
+        "pad": {
+            "mode": "off",
+            "end": "5prime",
+            "gc": {
+                "mode": "range",
+                "min": 0.4,
+                "max": 0.6,
+                "target": 0.5,
+                "tolerance": 0.1,
+                "min_pad_length": 4,
+            },
+            "max_tries": 2000,
+        }
+    }
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    load_config(cfg_path)
+
+
 def test_plan_mixing_quota_and_fraction(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
     cfg["densegen"]["generation"]["plan"] = [
@@ -96,9 +125,56 @@ def test_usr_sequences_requires_root(tmp_path: Path) -> None:
         load_config(cfg_path)
 
 
+def test_output_paths_must_live_under_outputs(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["output"]["parquet"]["path"] = "dense_arrays.parquet"
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="output.parquet.path must be within outputs"):
+        load_config(cfg_path)
+
+
+def test_usr_root_must_live_under_outputs(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["output"] = {
+        "targets": ["usr"],
+        "schema": {"bio_type": "dna", "alphabet": "dna_4"},
+        "usr": {"dataset": "demo", "root": "usr"},
+    }
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="output.usr.root must be within outputs"):
+        load_config(cfg_path)
+
+
+def test_logging_dir_must_live_under_outputs(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["logging"]["log_dir"] = "logs"
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="logging.log_dir must be within outputs"):
+        load_config(cfg_path)
+
+
+def test_plots_dir_must_live_under_outputs(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["plots"] = {"out_dir": "plots"}
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="plots.out_dir must be within outputs"):
+        load_config(cfg_path)
+
+
+def test_library_artifact_path_must_live_under_outputs(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["generation"]["sampling"] = {
+        "library_source": "artifact",
+        "library_artifact_path": "libraries",
+    }
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="sampling.library_artifact_path must be within outputs"):
+        load_config(cfg_path)
+
+
 def test_output_kind_is_rejected(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
-    cfg["densegen"]["output"] = {"kind": "parquet", "parquet": {"path": "outputs/demo_parquet.parquet"}}
+    cfg["densegen"]["output"] = {"kind": "parquet", "parquet": {"path": "outputs/tables/demo_parquet.parquet"}}
     cfg_path = _write(cfg, tmp_path / "cfg.yaml")
     with pytest.raises(ConfigError):
         load_config(cfg_path)
@@ -224,7 +300,7 @@ def test_plots_source_required_for_multi_sink(tmp_path: Path) -> None:
         "targets": ["usr", "parquet"],
         "schema": {"bio_type": "dna", "alphabet": "dna_4"},
         "usr": {"dataset": "demo", "root": "usr_root", "chunk_size": 10, "allow_overwrite": False},
-        "parquet": {"path": "outputs/demo_parquet.parquet", "deduplicate": True, "chunk_size": 128},
+        "parquet": {"path": "outputs/tables/demo_parquet.parquet", "deduplicate": True, "chunk_size": 128},
     }
     cfg_path = _write(cfg, tmp_path / "cfg.yaml")
     with pytest.raises(ConfigError):
@@ -237,7 +313,7 @@ def test_plots_source_must_be_target(tmp_path: Path) -> None:
         "targets": ["usr", "parquet"],
         "schema": {"bio_type": "dna", "alphabet": "dna_4"},
         "usr": {"dataset": "demo", "root": "usr_root", "chunk_size": 10, "allow_overwrite": False},
-        "parquet": {"path": "outputs/demo_parquet.parquet", "deduplicate": True, "chunk_size": 128},
+        "parquet": {"path": "outputs/tables/demo_parquet.parquet", "deduplicate": True, "chunk_size": 128},
     }
     cfg["plots"] = {"source": "csv", "out_dir": "plots"}
     cfg_path = _write(cfg, tmp_path / "cfg.yaml")
diff --git a/src/dnadesign/densegen/tests/test_demo_config.py b/src/dnadesign/densegen/tests/test_demo_config.py
index 38f073cc..cdb5861f 100644
--- a/src/dnadesign/densegen/tests/test_demo_config.py
+++ b/src/dnadesign/densegen/tests/test_demo_config.py
@@ -2,7 +2,6 @@
 
 from pathlib import Path
 
-from dnadesign.densegen.src import cli
 from dnadesign.densegen.src.adapters.sources.base import resolve_path
 from dnadesign.densegen.src.config import load_config
 
@@ -16,7 +15,6 @@ def test_demo_config_exists_and_loads() -> None:
     assert cfg_path.exists(), f"Missing demo config: {cfg_path}"
     loaded = load_config(cfg_path)
     assert loaded.root.densegen.run.id == "demo_meme_two_tf"
-    assert cli._default_config_path().resolve() == cfg_path
 
 
 def test_demo_artifacts_present() -> None:
diff --git a/src/dnadesign/densegen/tests/test_gc_fill_policy.py b/src/dnadesign/densegen/tests/test_gc_fill_policy.py
index 5786856f..68823bf7 100644
--- a/src/dnadesign/densegen/tests/test_gc_fill_policy.py
+++ b/src/dnadesign/densegen/tests/test_gc_fill_policy.py
@@ -2,17 +2,36 @@
 
 import pytest
 
-from dnadesign.densegen.src.core.postprocess import random_fill
+from dnadesign.densegen.src.core.postprocess import generate_pad
 
 
-def test_gc_fill_strict_infeasible() -> None:
+def test_pad_strict_infeasible_range() -> None:
     with pytest.raises(ValueError):
-        random_fill(length=1, gc_min=0.4, gc_max=0.6, mode="strict")
+        generate_pad(
+            length=1,
+            mode="strict",
+            gc_mode="range",
+            gc_min=0.4,
+            gc_max=0.6,
+            gc_target=0.5,
+            gc_tolerance=0.1,
+            gc_min_pad_length=4,
+        )
 
 
-def test_gc_fill_adaptive_relaxes() -> None:
-    seq, info = random_fill(length=1, gc_min=0.4, gc_max=0.6, mode="adaptive")
+def test_pad_adaptive_relaxes_short_pad() -> None:
+    seq, info = generate_pad(
+        length=1,
+        mode="adaptive",
+        gc_mode="range",
+        gc_min=0.4,
+        gc_max=0.6,
+        gc_target=0.5,
+        gc_tolerance=0.1,
+        gc_min_pad_length=4,
+    )
     assert len(seq) == 1
     assert info["relaxed"] is True
+    assert info["relaxed_reason"] == "short_pad"
     assert info["final_gc_min"] == 0.0
     assert info["final_gc_max"] == 1.0
diff --git a/src/dnadesign/densegen/tests/test_optimizer_wrapper.py b/src/dnadesign/densegen/tests/test_optimizer_wrapper.py
index 0fae5f10..3fa2d8a1 100644
--- a/src/dnadesign/densegen/tests/test_optimizer_wrapper.py
+++ b/src/dnadesign/densegen/tests/test_optimizer_wrapper.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
 
+import pytest
+
+import dnadesign.densegen.src.adapters.optimizer.dense_arrays as dense_arrays_adapter
 from dnadesign.densegen.src.adapters.optimizer import DenseArrayOptimizer
 
 
@@ -10,3 +13,50 @@ def test_promoter_constraint_name_is_ignored() -> None:
         fixed_elements={"promoter_constraints": [{"name": "sigma70", "upstream": "TTGACA", "downstream": "TATAAT"}]},
     )
     opt.get_optimizer_instance()
+
+
+def test_solver_time_limit_applies(monkeypatch: pytest.MonkeyPatch) -> None:
+    class _DummyModel:
+        def __init__(self) -> None:
+            self.time_limit_ms = None
+
+        def SetTimeLimit(self, ms: int) -> None:
+            self.time_limit_ms = ms
+
+    class _DummyOptimizer:
+        def __init__(self, library, sequence_length, strands="double") -> None:
+            self.library = list(library)
+            self.sequence_length = sequence_length
+            self.strands = strands
+            self.model = None
+
+        def add_promoter_constraints(self, **_kwargs) -> None:
+            return None
+
+        def add_side_biases(self, **_kwargs) -> None:
+            return None
+
+        def add_regulator_constraints(self, *_args, **_kwargs) -> None:
+            return None
+
+        def build_model(self, solver="CBC", solver_options=None) -> None:
+            self.model = _DummyModel()
+
+        def solutions(self, solver="CBC", solver_options=None):
+            if False:
+                yield None
+
+    monkeypatch.setattr(dense_arrays_adapter.da, "Optimizer", _DummyOptimizer)
+
+    adapter = dense_arrays_adapter.DenseArraysAdapter()
+    run = adapter.build(
+        library=["AT"],
+        sequence_length=10,
+        solver="CBC",
+        strategy="iterate",
+        solver_options=[],
+        fixed_elements=None,
+        solve_timeout_seconds=2,
+    )
+    run.optimizer.build_model()
+    assert run.optimizer.model.time_limit_ms == 2000
diff --git a/src/dnadesign/densegen/tests/test_outputs_parquet.py b/src/dnadesign/densegen/tests/test_outputs_parquet.py
index 3165eee4..f85ecd99 100644
--- a/src/dnadesign/densegen/tests/test_outputs_parquet.py
+++ b/src/dnadesign/densegen/tests/test_outputs_parquet.py
@@ -11,7 +11,7 @@
 
 def _dummy_meta() -> dict:
     return {
-        "schema_version": "2.4",
+        "schema_version": "2.5",
         "run_id": "demo",
         "run_root": ".",
         "run_config_path": "config.yaml",
@@ -19,7 +19,7 @@ def _dummy_meta() -> dict:
         "created_at": "2026-01-14T00:00:00+00:00",
         "length": 4,
         "random_seed": 0,
-        "policy_gc_fill": "off",
+        "policy_pad": "off",
         "policy_sampling": "subsample",
         "policy_solver": "iterate",
         "solver_backend": "CBC",
@@ -94,16 +94,18 @@ def _dummy_meta() -> dict:
         "min_required_regulators": None,
         "min_count_by_regulator": [],
         "covers_required_regulators": True,
-        "gap_fill_used": False,
-        "gap_fill_bases": None,
-        "gap_fill_end": None,
-        "gap_fill_gc_min": None,
-        "gap_fill_gc_max": None,
-        "gap_fill_gc_target_min": None,
-        "gap_fill_gc_target_max": None,
-        "gap_fill_gc_actual": None,
-        "gap_fill_relaxed": None,
-        "gap_fill_attempts": None,
+        "pad_used": False,
+        "pad_bases": None,
+        "pad_end": None,
+        "pad_gc_mode": None,
+        "pad_gc_min": None,
+        "pad_gc_max": None,
+        "pad_gc_target_min": None,
+        "pad_gc_target_max": None,
+        "pad_gc_actual": None,
+        "pad_relaxed": None,
+        "pad_relaxed_reason": None,
+        "pad_attempts": None,
         "gc_total": 0.5,
         "gc_core": 0.5,
     }
diff --git a/src/dnadesign/densegen/tests/test_required_regulators.py b/src/dnadesign/densegen/tests/test_required_regulators.py
index 0841e4fd..701fd6bb 100644
--- a/src/dnadesign/densegen/tests/test_required_regulators.py
+++ b/src/dnadesign/densegen/tests/test_required_regulators.py
@@ -57,6 +57,7 @@ def build(
         required_regulators=None,
         min_count_by_regulator=None,
         min_required_regulators=None,
+        solve_timeout_seconds=None,
     ):
         opt = _DummyOpt()
         sol1 = _DummySol(sequence="AAA", library=library, used_indices=[0])
@@ -74,7 +75,7 @@ def test_required_regulators_filtering(tmp_path: Path) -> None:
     csv_path.write_text("tf,tfbs\nTF1,AAA\nTF2,CCC\n")
     cfg = {
         "densegen": {
-            "schema_version": "2.4",
+            "schema_version": "2.5",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {
@@ -87,7 +88,7 @@ def test_required_regulators_filtering(tmp_path: Path) -> None:
             "output": {
                 "targets": ["parquet"],
                 "schema": {"bio_type": "dna", "alphabet": "dna_4"},
-                "parquet": {"path": str(tmp_path / "out.parquet")},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
             },
             "generation": {
                 "sequence_length": 3,
@@ -126,8 +127,8 @@ def test_required_regulators_filtering(tmp_path: Path) -> None:
                 "max_failed_solutions": 0,
                 "random_seed": 1,
             },
-            "postprocess": {"gap_fill": {"mode": "off", "end": "5prime", "gc_min": 0.4, "gc_max": 0.6}},
-            "logging": {"log_dir": str(tmp_path / "logs"), "level": "INFO"},
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
         }
     }
     cfg_path = tmp_path / "cfg.yaml"
@@ -138,7 +139,7 @@ def test_required_regulators_filtering(tmp_path: Path) -> None:
         source_factory=data_source_factory,
         sink_factory=lambda _cfg, _path: [sink],
         optimizer=_DummyAdapter(),
-        gap_fill=lambda *args, **kwargs: "",
+        pad=lambda *args, **kwargs: "",
     )
     summary = run_pipeline(loaded, deps=deps, resume=False)
     assert summary.total_generated == 1
@@ -151,7 +152,7 @@ def test_required_regulators_k_of_n(tmp_path: Path) -> None:
     csv_path.write_text("tf,tfbs\nTF1,AAA\nTF2,CCC\n")
     cfg = {
         "densegen": {
-            "schema_version": "2.4",
+            "schema_version": "2.5",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {
@@ -164,7 +165,7 @@ def test_required_regulators_k_of_n(tmp_path: Path) -> None:
             "output": {
                 "targets": ["parquet"],
                 "schema": {"bio_type": "dna", "alphabet": "dna_4"},
-                "parquet": {"path": str(tmp_path / "out.parquet")},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
             },
             "generation": {
                 "sequence_length": 3,
@@ -204,8 +205,8 @@ def test_required_regulators_k_of_n(tmp_path: Path) -> None:
                 "max_failed_solutions": 0,
                 "random_seed": 1,
             },
-            "postprocess": {"gap_fill": {"mode": "off", "end": "5prime", "gc_min": 0.4, "gc_max": 0.6}},
-            "logging": {"log_dir": str(tmp_path / "logs"), "level": "INFO"},
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
         }
     }
     cfg_path = tmp_path / "cfg.yaml"
@@ -216,7 +217,7 @@ def test_required_regulators_k_of_n(tmp_path: Path) -> None:
         source_factory=data_source_factory,
         sink_factory=lambda _cfg, _path: [sink],
         optimizer=_DummyAdapter(),
-        gap_fill=lambda *args, **kwargs: "",
+        pad=lambda *args, **kwargs: "",
     )
     summary = run_pipeline(loaded, deps=deps, resume=False)
     assert summary.total_generated == 1
diff --git a/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py b/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py
index e3ed5cfc..168e48bf 100644
--- a/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py
+++ b/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 
 import numpy as np
+import pytest
 import yaml
 
 from dnadesign.densegen.src.adapters.optimizer import OptimizerRun
@@ -59,6 +60,7 @@ def build(
         required_regulators=None,
         min_count_by_regulator=None,
         min_required_regulators=None,
+        solve_timeout_seconds=None,
     ):
         opt = _DummyOpt()
         seqs = ["AAA", "CCC", "GGG", "TTT", "AAC", "CCA"]
@@ -81,7 +83,7 @@ def test_round_robin_chunk_cap_subsample(tmp_path: Path) -> None:
 
     cfg = {
         "densegen": {
-            "schema_version": "2.4",
+            "schema_version": "2.5",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {
@@ -94,7 +96,7 @@ def test_round_robin_chunk_cap_subsample(tmp_path: Path) -> None:
             "output": {
                 "targets": ["parquet"],
                 "schema": {"bio_type": "dna", "alphabet": "dna_4"},
-                "parquet": {"path": "outputs/dense_arrays.parquet"},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
             },
             "generation": {
                 "sequence_length": 3,
@@ -128,8 +130,8 @@ def test_round_robin_chunk_cap_subsample(tmp_path: Path) -> None:
                 "max_failed_solutions": 0,
                 "random_seed": 1,
             },
-            "postprocess": {"gap_fill": {"mode": "off", "end": "5prime", "gc_min": 0.4, "gc_max": 0.6}},
-            "logging": {"log_dir": "logs", "level": "INFO"},
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
         }
     }
 
@@ -142,7 +144,7 @@ def test_round_robin_chunk_cap_subsample(tmp_path: Path) -> None:
         source_factory=data_source_factory,
         sink_factory=lambda _cfg, _path: [sink],
         optimizer=_DummyAdapter(),
-        gap_fill=lambda *args, **kwargs: "",
+        pad=lambda *args, **kwargs: "",
     )
 
     plan_item = loaded.root.densegen.generation.resolve_plan()[0]
@@ -171,3 +173,145 @@ def test_round_robin_chunk_cap_subsample(tmp_path: Path) -> None:
     )
 
     assert produced <= loaded.root.densegen.runtime.arrays_generated_before_resample
+
+
+def test_stall_detected_with_no_solutions(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    run_dir = tmp_path / "run"
+    run_dir.mkdir()
+    (run_dir / "outputs" / "parquet").mkdir(parents=True)
+    (run_dir / "logs").mkdir()
+
+    csv_path = run_dir / "sites.csv"
+    csv_path.write_text("tf,tfbs\nTF1,AAA\nTF2,CCC\n")
+
+    cfg = {
+        "densegen": {
+            "schema_version": "2.5",
+            "run": {"id": "demo", "root": "."},
+            "inputs": [
+                {
+                    "name": "demo",
+                    "type": "binding_sites",
+                    "path": str(csv_path),
+                    "format": "csv",
+                }
+            ],
+            "output": {
+                "targets": ["parquet"],
+                "schema": {"bio_type": "dna", "alphabet": "dna_4"},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
+            },
+            "generation": {
+                "sequence_length": 3,
+                "quota": 1,
+                "sampling": {
+                    "pool_strategy": "subsample",
+                    "library_size": 2,
+                    "subsample_over_length_budget_by": 0,
+                    "library_sampling_strategy": "tf_balanced",
+                    "cover_all_regulators": False,
+                    "unique_binding_sites": True,
+                    "max_sites_per_regulator": None,
+                    "relax_on_exhaustion": False,
+                    "allow_incomplete_coverage": False,
+                    "iterative_max_libraries": 1,
+                    "iterative_min_new_solutions": 0,
+                },
+                "plan": [{"name": "default", "quota": 1}],
+            },
+            "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+            "runtime": {
+                "round_robin": False,
+                "arrays_generated_before_resample": 1,
+                "min_count_per_tf": 0,
+                "max_duplicate_solutions": 5,
+                "stall_seconds_before_resample": 10,
+                "stall_warning_every_seconds": 0,
+                "max_resample_attempts": 0,
+                "max_total_resamples": 0,
+                "max_seconds_per_plan": 0,
+                "max_failed_solutions": 0,
+                "random_seed": 1,
+            },
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
+        }
+    }
+
+    cfg_path = run_dir / "config.yaml"
+    cfg_path.write_text(yaml.safe_dump(cfg))
+    loaded = load_config(cfg_path)
+
+    class _EmptyAdapter:
+        def probe_solver(self, backend: str, *, test_length: int = 10) -> None:
+            return None
+
+        def build(
+            self,
+            *,
+            library,
+            sequence_length,
+            solver,
+            strategy,
+            solver_options,
+            fixed_elements,
+            strands="double",
+            regulator_by_index=None,
+            required_regulators=None,
+            min_count_by_regulator=None,
+            min_required_regulators=None,
+            solve_timeout_seconds=None,
+        ):
+            opt = _DummyOpt()
+
+            def _gen():
+                if False:
+                    yield None
+
+            return OptimizerRun(optimizer=opt, generator=_gen())
+
+    def _monotonic():
+        _monotonic.value += 100.0
+        return _monotonic.value
+
+    _monotonic.value = 0.0
+    monkeypatch.setattr(
+        "dnadesign.densegen.src.core.pipeline.time.monotonic",
+        _monotonic,
+    )
+
+    sink = _DummySink()
+    deps = PipelineDeps(
+        source_factory=data_source_factory,
+        sink_factory=lambda _cfg, _path: [sink],
+        optimizer=_EmptyAdapter(),
+        pad=lambda *args, **kwargs: "",
+    )
+
+    plan_item = loaded.root.densegen.generation.resolve_plan()[0]
+    produced, stats = _process_plan_for_source(
+        loaded.root.densegen.inputs[0],
+        plan_item,
+        loaded.root.densegen,
+        [sink],
+        chosen_solver="CBC",
+        deps=deps,
+        rng=random.Random(1),
+        np_rng=np.random.default_rng(1),
+        cfg_path=loaded.path,
+        run_id=loaded.root.densegen.run.id,
+        run_root=str(run_dir),
+        run_config_path="config.yaml",
+        run_config_sha256="sha",
+        random_seed=1,
+        dense_arrays_version=None,
+        dense_arrays_version_source="test",
+        output_bio_type="dna",
+        output_alphabet="dna_4",
+        one_subsample_only=True,
+        already_generated=0,
+        inputs_manifest={},
+    )
+
+    assert produced == 0
+    assert stats["stall_events"] == 1
diff --git a/src/dnadesign/densegen/tests/test_run_manifest.py b/src/dnadesign/densegen/tests/test_run_manifest.py
index fb46bc4a..b2b9bab4 100644
--- a/src/dnadesign/densegen/tests/test_run_manifest.py
+++ b/src/dnadesign/densegen/tests/test_run_manifest.py
@@ -31,9 +31,9 @@ def test_run_manifest_roundtrip(tmp_path) -> None:
     manifest = RunManifest(
         run_id="demo_run",
         created_at="2026-01-16T12:00:00Z",
-        schema_version="2.4",
+        schema_version="2.5",
         config_sha256="abc123",
-        run_root="/tmp/demo",
+        run_root="outputs",
         random_seed=42,
         seed_stage_a=101,
         seed_stage_b=202,
@@ -50,7 +50,7 @@ def test_run_manifest_roundtrip(tmp_path) -> None:
     path = run_manifest_path(tmp_path)
     manifest.write_json(path)
     loaded = load_run_manifest(path)
-    assert loaded.schema_version == "2.4"
+    assert loaded.schema_version == "2.5"
     assert loaded.dense_arrays_version == "0.0.0"
     assert loaded.dense_arrays_version_source == "lock"
     assert loaded.items[0].failed_min_count_per_tf == 1
diff --git a/src/dnadesign/densegen/tests/test_run_paths.py b/src/dnadesign/densegen/tests/test_run_paths.py
new file mode 100644
index 00000000..7f312af3
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_run_paths.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from dnadesign.densegen.src.core.run_paths import has_existing_run_outputs, run_meta_root, run_outputs_root
+
+
+def test_existing_outputs_ignores_pre_run_dirs(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    outputs_root = run_outputs_root(run_root)
+    (outputs_root / "logs").mkdir(parents=True)
+    (outputs_root / "meta").mkdir(parents=True)
+    (outputs_root / "pools").mkdir(parents=True)
+    (outputs_root / "libraries").mkdir(parents=True)
+    (outputs_root / "tables").mkdir(parents=True)
+    (outputs_root / "plots").mkdir(parents=True)
+    (outputs_root / "report").mkdir(parents=True)
+    assert not has_existing_run_outputs(run_root)
+
+    (outputs_root / "pools" / "pool_manifest.json").write_text("{}")
+    (outputs_root / "libraries" / "library_builds.parquet").write_text("stub")
+    assert not has_existing_run_outputs(run_root)
+
+
+def test_existing_outputs_detects_root_files(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    outputs_root = run_outputs_root(run_root)
+    tables_root = outputs_root / "tables"
+    tables_root.mkdir(parents=True)
+    (tables_root / "dense_arrays.parquet").write_text("stub")
+    assert has_existing_run_outputs(run_root)
+
+
+def test_existing_outputs_detects_meta_run_state(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    meta_root = run_meta_root(run_root)
+    meta_root.mkdir(parents=True)
+    (meta_root / "run_state.json").write_text("{}")
+    assert has_existing_run_outputs(run_root)
diff --git a/src/dnadesign/densegen/tests/test_run_state.py b/src/dnadesign/densegen/tests/test_run_state.py
index 9c401e1d..ea9066a1 100644
--- a/src/dnadesign/densegen/tests/test_run_state.py
+++ b/src/dnadesign/densegen/tests/test_run_state.py
@@ -8,9 +8,9 @@ def test_run_state_roundtrip(tmp_path) -> None:
     counts = {("input", "plan"): 3, ("input", "plan2"): 1}
     state = RunState.from_counts(
         run_id="demo",
-        schema_version="2.4",
+        schema_version="2.5",
         config_sha256="abc123",
-        run_root="/tmp/demo",
+        run_root="outputs",
         counts=counts,
         created_at="2026-01-18T00:00:00Z",
         updated_at="2026-01-18T00:00:00Z",
@@ -20,5 +20,5 @@ def test_run_state_roundtrip(tmp_path) -> None:
     state.write_json(path)
     loaded = load_run_state(path)
     assert loaded.run_id == "demo"
-    assert loaded.schema_version == "2.4"
+    assert loaded.schema_version == "2.5"
     assert loaded.items[0].generated >= 0
diff --git a/src/dnadesign/densegen/tests/test_sequence_length_guard.py b/src/dnadesign/densegen/tests/test_sequence_length_guard.py
index faefb2a6..c8a4dfc6 100644
--- a/src/dnadesign/densegen/tests/test_sequence_length_guard.py
+++ b/src/dnadesign/densegen/tests/test_sequence_length_guard.py
@@ -58,6 +58,7 @@ def build(
         required_regulators=None,
         min_count_by_regulator=None,
         min_required_regulators=None,
+        solve_timeout_seconds=None,
     ):
         opt = _DummyOpt()
         sol = _DummySol(sequence="AAAA", library=library, used_indices=[0])
@@ -73,7 +74,7 @@ def test_sequence_length_guard_shorter_than_motif(tmp_path: Path) -> None:
     csv_path.write_text("tf,tfbs\nTF1,AAAAA\n")
     cfg = {
         "densegen": {
-            "schema_version": "2.4",
+            "schema_version": "2.5",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {
@@ -86,7 +87,7 @@ def test_sequence_length_guard_shorter_than_motif(tmp_path: Path) -> None:
             "output": {
                 "targets": ["parquet"],
                 "schema": {"bio_type": "dna", "alphabet": "dna_4"},
-                "parquet": {"path": str(tmp_path / "out.parquet")},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
             },
             "generation": {
                 "sequence_length": 4,
@@ -120,8 +121,8 @@ def test_sequence_length_guard_shorter_than_motif(tmp_path: Path) -> None:
                 "max_failed_solutions": 0,
                 "random_seed": 1,
             },
-            "postprocess": {"gap_fill": {"mode": "off", "end": "5prime", "gc_min": 0.4, "gc_max": 0.6}},
-            "logging": {"log_dir": str(tmp_path / "logs"), "level": "INFO"},
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
         }
     }
     cfg_path = tmp_path / "cfg.yaml"
@@ -132,7 +133,7 @@ def test_sequence_length_guard_shorter_than_motif(tmp_path: Path) -> None:
         source_factory=data_source_factory,
         sink_factory=lambda _cfg, _path: [sink],
         optimizer=_DummyAdapter(),
-        gap_fill=lambda *args, **kwargs: "",
+        pad=lambda *args, **kwargs: "",
     )
     with pytest.raises(ValueError, match="sequence_length"):
         run_pipeline(loaded, deps=deps, resume=False)
diff --git a/src/dnadesign/densegen/tests/test_source_cache.py b/src/dnadesign/densegen/tests/test_source_cache.py
index 5af83b39..ce280550 100644
--- a/src/dnadesign/densegen/tests/test_source_cache.py
+++ b/src/dnadesign/densegen/tests/test_source_cache.py
@@ -59,6 +59,7 @@ def build(
         required_regulators=None,
         min_count_by_regulator=None,
         min_required_regulators=None,
+        solve_timeout_seconds=None,
     ):
         opt = _DummyOpt()
         seqs = ["AAA", "CCC"]
@@ -91,7 +92,7 @@ def test_source_cache_reuses_loaded_inputs(tmp_path: Path) -> None:
 
     cfg = {
         "densegen": {
-            "schema_version": "2.4",
+            "schema_version": "2.5",
             "run": {"id": "demo", "root": "."},
             "inputs": [
                 {
@@ -105,7 +106,7 @@ def test_source_cache_reuses_loaded_inputs(tmp_path: Path) -> None:
             "output": {
                 "targets": ["parquet"],
                 "schema": {"bio_type": "dna", "alphabet": "dna_4"},
-                "parquet": {"path": "outputs/dense_arrays.parquet"},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
             },
             "generation": {
                 "sequence_length": 3,
@@ -139,8 +140,8 @@ def test_source_cache_reuses_loaded_inputs(tmp_path: Path) -> None:
                 "max_failed_solutions": 0,
                 "random_seed": 1,
             },
-            "postprocess": {"gap_fill": {"mode": "off", "end": "5prime", "gc_min": 0.4, "gc_max": 0.6}},
-            "logging": {"log_dir": "logs", "level": "INFO"},
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
         }
     }
 
@@ -154,7 +155,7 @@ def test_source_cache_reuses_loaded_inputs(tmp_path: Path) -> None:
         source_factory=lambda _cfg, _path: dummy_source,
         sink_factory=lambda _cfg, _path: [sink],
         optimizer=_DummyAdapter(),
-        gap_fill=lambda *args, **kwargs: "",
+        pad=lambda *args, **kwargs: "",
     )
 
     plan_item = loaded.root.densegen.generation.resolve_plan()[0]
diff --git a/src/dnadesign/densegen/tests/test_used_tfbs_offsets.py b/src/dnadesign/densegen/tests/test_used_tfbs_offsets.py
index 3d0da07c..dcf23bcd 100644
--- a/src/dnadesign/densegen/tests/test_used_tfbs_offsets.py
+++ b/src/dnadesign/densegen/tests/test_used_tfbs_offsets.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from dnadesign.densegen.src.core.pipeline import _apply_gap_fill_offsets, _compute_used_tf_info
+from dnadesign.densegen.src.core.pipeline import _apply_pad_offsets, _compute_used_tf_info
 
 
 class _DummySol:
@@ -29,8 +29,8 @@ def test_used_tfbs_offsets_shift_with_5prime_padding() -> None:
     assert used_counts == {"TF1": 1, "TF2": 1}
     assert used_list == ["TF1", "TF2"]
 
-    gap_meta = {"used": True, "bases": 3, "end": "5prime"}
-    updated = _apply_gap_fill_offsets(used_detail, gap_meta)
+    pad_meta = {"used": True, "bases": 3, "end": "5prime"}
+    updated = _apply_pad_offsets(used_detail, pad_meta)
     assert updated[0]["offset_raw"] == 0
     assert updated[0]["offset"] == 3
     assert updated[0]["length"] == 2
diff --git a/src/dnadesign/densegen/workspaces/README.md b/src/dnadesign/densegen/workspaces/README.md
index 34f6ae3b..aa66814a 100644
--- a/src/dnadesign/densegen/workspaces/README.md
+++ b/src/dnadesign/densegen/workspaces/README.md
@@ -2,16 +2,20 @@ Run-scoped workbenches live here. Each workspace should contain:
 
 - `config.yaml`
 - `inputs/`
-- `outputs/` (Parquet data, run reports, plots, library artifacts)
+- `outputs/` (run artifacts)
+  - `tables/` (dense_arrays + attempts/solutions/composition)
+  - `plots/` (plot images)
+  - `report/` (report.md/.json/.html + assets/)
+  - `pools/` (Stage‑A TFBS pools + optional candidates/)
+  - `libraries/` (Stage‑B library artifacts)
   - `logs/` (optional; defaults to outputs/logs)
-  - `meta/` (run manifests + run state)
+  - `meta/` (run manifests + run state + id index)
 
 Keep real production runs out of version control; use this directory for demo artifacts only.
 
 Archived or legacy artifacts live under `_archive/` so the active workspace list stays clean.
-The canonical demo lives under `demo_meme_two_tf/` and uses MEME motif files copied from
-the basic Cruncher demo workspace (`inputs/local_motifs`). DenseGen reads these with the
-shared Cruncher MEME parser to keep parsing DRY and consistent.
+The canonical demo lives under `demo_meme_two_tf/` and uses MEME motif files stored in the
+workspace `inputs/` directory (packaged for reproducibility).
 Use `dense inspect run --root workspaces/_archive` if you want to inspect archived workspaces.
 Only `demo_meme_two_tf/` is tracked in git; any other workspace directories here are ignored
 and intended for local experiments.
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index f57fc76b..1cf0dc77 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -1,10 +1,6 @@
 # DenseGen demo: two TF motifs (LexA + CpxR) with bounded PWM sampling.
-# Source: Cruncher basic demo MEME files (lexA + cpxR) copied into this demo for
-# reproducibility. Parsing uses Cruncher’s MEME parser (DRY).
-# Motif widths: lexA=22, cpxR=21.
-
 densegen:
-  schema_version: "2.4"
+  schema_version: "2.5"
   run:
     id: demo_meme_two_tf
     root: "."
@@ -39,16 +35,16 @@ densegen:
       bio_type: dna
       alphabet: dna_4
     parquet:
-      path: outputs/dense_arrays.parquet
+      path: outputs/tables/dense_arrays.parquet
       deduplicate: true
       chunk_size: 64
 
   generation:
     sequence_length: 60
-    quota: 50
+    quota: 12
     sampling:
       pool_strategy: subsample
-      library_size: 24
+      library_size: 20
       subsample_over_length_budget_by: 80
       library_sampling_strategy: coverage_weighted
       coverage_boost_alpha: 0.15
@@ -63,13 +59,22 @@ densegen:
 
     plan:
       - name: meme_demo
-        quota: 50
+        quota: 12
+        fixed_elements:
+          promoter_constraints:
+            - name: sigma70_consensus
+              upstream: TTGACA
+              downstream: TATAAT
+              spacer_length: [16, 18]
+              upstream_pos: [0, 60]
         required_regulators: [lexA, cpxR]
 
   solver:
     backend: CBC
     strategy: iterate
-    options: []
+    options:
+      - "Threads=16"
+      - "TimeLimit=5"
 
   runtime:
     round_robin: true
@@ -80,16 +85,21 @@ densegen:
     stall_warning_every_seconds: 10
     max_resample_attempts: 2
     max_total_resamples: 50
-    max_seconds_per_plan: 0
+    max_seconds_per_plan: 60
     max_failed_solutions: 0
     random_seed: 42
 
   postprocess:
-    gap_fill:
+    pad:
       mode: adaptive
       end: 5prime
-      gc_min: 0.40
-      gc_max: 0.60
+      gc:
+        mode: range
+        min: 0.40
+        max: 0.60
+        target: 0.50
+        tolerance: 0.10
+        min_pad_length: 4
       max_tries: 2000
 
   logging:
@@ -103,5 +113,9 @@ densegen:
 
 plots:
   source: parquet
-  out_dir: outputs
+  out_dir: outputs/plots
   format: png
+  default: [tf_usage, tf_coverage]
+  options:
+    tf_coverage:
+      include_promoter_sites: true

From 0e31addc6748de3bf203104fe366b7764f3902e1 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 12:00:05 -0500
Subject: [PATCH 20/40] densegen: harden solver controls and demo config

---
 .../densegen/docs/demo/demo_basic.md          |  11 +-
 .../densegen/docs/guide/generation.md         |  10 +-
 .../densegen/docs/guide/postprocess.md        |   2 +-
 .../densegen/docs/reference/config.md         |  19 +-
 .../src/adapters/optimizer/dense_arrays.py    |  71 +++++---
 .../densegen/src/adapters/outputs/parquet.py  |   3 +-
 src/dnadesign/densegen/src/cli.py             |  24 ++-
 src/dnadesign/densegen/src/config/__init__.py | 119 +++++++------
 src/dnadesign/densegen/src/core/metadata.py   |   6 +-
 .../densegen/src/core/metadata_schema.py      |  14 +-
 src/dnadesign/densegen/src/core/pipeline.py   | 140 +++++++++++++--
 .../densegen/src/core/postprocess/gap_fill.py |   2 +-
 .../densegen/src/core/run_manifest.py         |   9 +-
 .../densegen/src/core/runtime_policy.py       |  10 +-
 .../densegen/tests/test_artifacts_pool.py     |   2 +-
 .../densegen/tests/test_cli_describe.py       |   1 -
 .../tests/test_cli_stage_a_summary.py         |  67 +++++++
 .../densegen/tests/test_config_strict.py      | 121 +++++++++++--
 .../densegen/tests/test_optimizer_wrapper.py  |   9 +-
 .../densegen/tests/test_outputs_parquet.py    |   3 +-
 .../tests/test_required_regulators.py         |   8 +-
 .../tests/test_round_robin_chunk_cap.py       |  12 +-
 .../densegen/tests/test_run_manifest.py       |   5 +-
 .../densegen/tests/test_runtime_policy.py     |  29 +++
 .../tests/test_sequence_length_guard.py       | 166 +++++++++++++++++-
 .../densegen/tests/test_source_cache.py       |   6 +-
 .../workspaces/demo_meme_two_tf/config.yaml   |   9 +-
 27 files changed, 709 insertions(+), 169 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
 create mode 100644 src/dnadesign/densegen/tests/test_runtime_policy.py

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 73fb3dfb..4a5b2cc0 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -99,13 +99,14 @@ runtime config), while Cruncher keeps its own workspace + config.
 
 Why: confirm resolved outputs, Stage‑A sampling knobs, fixed elements, and Stage‑B sampling policy.
 
-Rationale for the demo settings: we want **~100 binding sites per motif**, so we set Stage‑A
+Rationale for the demo settings: we want **dozens of binding sites per motif**, so we set Stage‑A
 `n_sites` and oversampling/mining caps to reach that target; Stage‑B sampling then builds fixed‑size
 libraries before running the solver.
 This demo also pins a strong σ70 promoter pair (`TTGACA`/`TATAAT`) as fixed elements; the default
 `tf_coverage` plot overlays these sites when `plots.options.tf_coverage.include_promoter_sites: true`.
-`generation.sequence_length` is set to 90 so the fixed promoter plus required TFBS sites can fit
-without solver infeasibility.
+To keep the 60‑bp budget feasible with ~21–22 bp TFBS lengths, the plan sets
+`min_required_regulators: 1` while listing both LexA and CpxR, so each sequence must include at
+least one of the two regulators.
 
 ```bash
 dense inspect config
@@ -145,8 +146,8 @@ This demo config also enables plot generation from the run (`plots.default`) and
 `outputs/plots/` using `plots.format` (switch to `pdf` or `svg` in `config.yaml` if desired).
 The demo quota is intentionally small (`generation.quota: 12` with `runtime.max_seconds_per_plan: 60`)
 to keep the end‑to‑end run fast; scale these up for production runs.
-The demo also uses `solver.strategy: approximate` for speed; switch to `iterate` or `diverse`
-once you want full solver runs.
+The demo uses `solver.strategy: iterate` for full solver runs; switch to `diverse` or `optimal`
+as needed for exploration.
 If run outputs already exist (e.g., `outputs/tables/*.parquet` or `outputs/meta/run_state.json`),
 choose `--resume` to continue or `--fresh` to clear outputs. Use `dense run --no-plot` to skip
 auto‑plots when re‑running.
diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index 3774d0bb..20e82d7f 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -29,7 +29,7 @@ plan:
       promoter_constraints:
         - upstream: "TTGACA"
           downstream: "TATAAT"
-          spacer_length: [16, 18]
+          spacer_length: [15, 19]
 ```
 Note: `generation.sequence_length` must be at least as long as the widest motif in the library or fixed elements; DenseGen fails fast if a motif cannot fit.
 
@@ -71,7 +71,7 @@ DenseGen exposes dense-arrays solution modes via `solver.strategy`:
 - `iterate` - yield solutions in descending score.
 - `diverse` - yield solutions with diversity-biased ordering.
 - `optimal` - only the best solution per library.
-- `approximate` - heuristic solution per library (no solver options; backend optional).
+- `approximate` - heuristic solution per library (no solver backend required).
 - `strands` - `single | double` (default: `double`).
 
 DenseGen fails fast if the requested solver backend is unavailable; use `dense validate-config --probe-solver` or `dense inspect config --probe-solver` to check availability before long runs.
@@ -80,12 +80,10 @@ DenseGen fails fast if the requested solver backend is unavailable; use `dense v
 solver:
   backend: CBC
   strategy: diverse
-  options: ["Threads=8", "TimeLimit=10"]
+  time_limit_seconds: 10
   strands: double
-  allow_unknown_options: false
 ```
-
-DenseGen validates solver option keys for known backends and fails fast on unknown options. If you need to pass custom solver flags, set `solver.allow_unknown_options: true` explicitly.
+DenseGen fails fast if the solver cannot apply requested time limits or thread counts (CBC does not support threads).
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/guide/postprocess.md b/src/dnadesign/densegen/docs/guide/postprocess.md
index 8f17d488..b5bb2c6f 100644
--- a/src/dnadesign/densegen/docs/guide/postprocess.md
+++ b/src/dnadesign/densegen/docs/guide/postprocess.md
@@ -26,7 +26,7 @@ postprocess:
       max: 0.60
       target: 0.50
       tolerance: 0.10
-      min_pad_length: 4
+      min_pad_length: 0
     max_tries: 2000
 ```
 
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 25fb0cd5..63d7eb02 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -215,15 +215,11 @@ Notes:
 - `backend`: solver name string (required unless `strategy: approximate`).
   - Common values: `CBC`, `GUROBI` (depends on your dense-arrays install).
 - `strategy`: `iterate | diverse | optimal | approximate`
-- `options` (list of solver option strings)
-  - `options` must be empty when `strategy: approximate`
+- `time_limit_seconds` (float > 0, optional)
+- `threads` (int > 0, optional)
 - `strands`: `single | double` (default: `double`)
-- `allow_unknown_options` (bool; default `false`)
-  - DenseGen validates solver option keys for known backends. Set to `true` to bypass validation.
-  - Known keys (case-insensitive):
-    - CBC: `Threads`, `TimeLimit`, `TimeLimitSeconds`, `MaxSeconds`, `Seconds`, `RatioGap`,
-      `MIPGap`, `Seed`, `RandomSeed`, `LogLevel`
-    - GUROBI: `Threads`, `TimeLimit`, `MIPGap`, `Seed`, `LogToConsole`, `LogFile`, `Method`, `Presolve`
+  - `time_limit_seconds` and `threads` are invalid when `strategy: approximate`
+  - `threads` is rejected for CBC backends (OR-Tools does not apply it)
 
 ---
 
@@ -237,7 +233,7 @@ Notes:
 - `arrays_generated_before_resample` (int > 0)
 - `min_count_per_tf` (int >= 0)
 - `max_duplicate_solutions`, `stall_seconds_before_resample`, `stall_warning_every_seconds`
-  - `stall_seconds_before_resample` also sets a per‑solve time limit (seconds) for solver‑based strategies; `0` disables.
+  - `stall_seconds_before_resample` controls how long to wait with no new solutions before resampling; the timer resets on each new solution; `0` disables.
 - `max_resample_attempts`, `max_total_resamples`, `max_seconds_per_plan`, `max_failed_solutions`
 - `leaderboard_every` (int >= 0; 0 disables periodic leaderboard logs)
 - `checkpoint_every` (int >= 0; 0 disables run_state checkpoints)
@@ -323,9 +319,8 @@ densegen:
   solver:
     backend: CBC
     strategy: diverse
-    options: []
+    time_limit_seconds: 5
     strands: double
-    allow_unknown_options: false
 
   runtime:
     round_robin: true
@@ -351,7 +346,7 @@ densegen:
         max: 0.6
         target: 0.5
         tolerance: 0.1
-        min_pad_length: 4
+        min_pad_length: 0
       max_tries: 2000
 
   logging:
diff --git a/src/dnadesign/densegen/src/adapters/optimizer/dense_arrays.py b/src/dnadesign/densegen/src/adapters/optimizer/dense_arrays.py
index 2f42fe8c..d60e8a5f 100644
--- a/src/dnadesign/densegen/src/adapters/optimizer/dense_arrays.py
+++ b/src/dnadesign/densegen/src/adapters/optimizer/dense_arrays.py
@@ -39,14 +39,14 @@ def build(
         sequence_length: int,
         solver: str | None,
         strategy: str,
-        solver_options: list[str],
         fixed_elements: dict | None,
         strands: str = "double",
         regulator_by_index: list[str] | None = None,
         required_regulators: list[str] | None = None,
         min_count_by_regulator: dict[str, int] | None = None,
         min_required_regulators: int | None = None,
-        solve_timeout_seconds: float | None = None,
+        solver_time_limit_seconds: float | None = None,
+        solver_threads: int | None = None,
     ) -> OptimizerRun: ...
 
 
@@ -116,29 +116,47 @@ def _apply_regulator_constraints(
     )
 
 
-def _apply_solve_timeout(opt: da.Optimizer, *, solve_timeout_seconds: float | None) -> None:
-    if solve_timeout_seconds is None:
-        return
-    try:
-        seconds = float(solve_timeout_seconds)
-    except (TypeError, ValueError) as exc:
-        raise ValueError("solve_timeout_seconds must be a number of seconds > 0") from exc
-    if seconds <= 0:
+def _apply_solver_controls(
+    opt: da.Optimizer,
+    *,
+    time_limit_seconds: float | None,
+    threads: int | None,
+) -> None:
+    if time_limit_seconds is None and threads is None:
         return
+    if time_limit_seconds is not None:
+        try:
+            time_limit_seconds = float(time_limit_seconds)
+        except (TypeError, ValueError) as exc:
+            raise ValueError("solver.time_limit_seconds must be a number of seconds > 0") from exc
+        if time_limit_seconds <= 0:
+            raise ValueError("solver.time_limit_seconds must be > 0")
+    if threads is not None:
+        try:
+            threads = int(threads)
+        except (TypeError, ValueError) as exc:
+            raise ValueError("solver.threads must be an integer > 0") from exc
+        if threads <= 0:
+            raise ValueError("solver.threads must be > 0")
     if not hasattr(opt, "build_model"):
-        raise RuntimeError("Optimizer does not expose build_model; cannot apply solve timeout.")
+        raise RuntimeError("Optimizer does not expose build_model; cannot apply solver controls.")
     original_build_model = opt.build_model
 
-    def _build_model_with_timeout(*args, **kwargs):
+    def _build_model_with_controls(*args, **kwargs):
         original_build_model(*args, **kwargs)
         model = getattr(opt, "model", None)
         if model is None:
-            raise RuntimeError("Solver model not initialized; cannot apply time limit.")
-        if not hasattr(model, "SetTimeLimit"):
-            raise RuntimeError("Solver model does not support SetTimeLimit; cannot enforce stall_seconds.")
-        model.SetTimeLimit(int(max(1, round(seconds * 1000))))
+            raise RuntimeError("Solver model not initialized; cannot apply solver controls.")
+        if time_limit_seconds is not None:
+            if not hasattr(model, "SetTimeLimit"):
+                raise RuntimeError("Solver model does not support SetTimeLimit.")
+            model.SetTimeLimit(int(max(1, round(time_limit_seconds * 1000))))
+        if threads is not None:
+            if not hasattr(model, "SetNumThreads"):
+                raise RuntimeError("Solver model does not support SetNumThreads.")
+            model.SetNumThreads(int(threads))
 
-    opt.build_model = _build_model_with_timeout
+    opt.build_model = _build_model_with_controls
 
 
 class DenseArraysAdapter:
@@ -160,14 +178,14 @@ def build(
         sequence_length: int,
         solver: str | None,
         strategy: str,
-        solver_options: list[str],
         fixed_elements: dict | None,
         strands: str = "double",
         regulator_by_index: list[str] | None = None,
         required_regulators: list[str] | None = None,
         min_count_by_regulator: dict[str, int] | None = None,
         min_required_regulators: int | None = None,
-        solve_timeout_seconds: float | None = None,
+        solver_time_limit_seconds: float | None = None,
+        solver_threads: int | None = None,
     ) -> OptimizerRun:
         if strategy != "approximate" and not solver:
             raise ValueError("solver.backend is required unless strategy=approximate")
@@ -176,7 +194,6 @@ def build(
             library=library,
             sequence_length=sequence_length,
             solver=solver_name,
-            solver_options=solver_options,
             fixed_elements=fixed_elements,
             strands=strands,
         )
@@ -188,18 +205,22 @@ def build(
             min_count_by_regulator=min_count_by_regulator,
             min_required_regulators=min_required_regulators,
         )
-        _apply_solve_timeout(opt, solve_timeout_seconds=solve_timeout_seconds)
+        _apply_solver_controls(
+            opt,
+            time_limit_seconds=solver_time_limit_seconds,
+            threads=solver_threads,
+        )
         if strategy == "diverse":
             if not hasattr(opt, "solutions_diverse"):
                 raise RuntimeError("dense-arrays does not support solutions_diverse on this install.")
-            gen = opt.solutions_diverse(solver=solver_name, solver_options=solver_options)
+            gen = opt.solutions_diverse(solver=solver_name, solver_options=None)
         elif strategy == "iterate":
-            gen = opt.solutions(solver=solver_name, solver_options=solver_options)
+            gen = opt.solutions(solver=solver_name, solver_options=None)
         elif strategy == "optimal":
 
             def _gen():
                 start = time.monotonic()
-                sol = opt.optimal(solver=solver_name, solver_options=solver_options)
+                sol = opt.optimal(solver=solver_name, solver_options=None)
                 try:
                     setattr(sol, "_densegen_solve_time_s", time.monotonic() - start)
                 except Exception:
@@ -247,7 +268,6 @@ def __init__(
         library: list[str],
         sequence_length: int,
         solver: str | None = None,
-        solver_options: list | None = None,
         fixed_elements: dict | None = None,
         strands: str = "double",
     ):
@@ -275,7 +295,6 @@ def __init__(
         self.library = filtered
         self.sequence_length = sequence_length
         self.solver = solver
-        self.solver_options = solver_options or []
         self.fixed_elements = (fixed_elements or {}).copy()
         if strands not in {"single", "double"}:
             raise ValueError("strands must be 'single' or 'double'")
diff --git a/src/dnadesign/densegen/src/adapters/outputs/parquet.py b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
index d2c18751..c1513444 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/parquet.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
@@ -27,7 +27,6 @@
 
 def _meta_arrow_type(name: str, pa):
     list_str = {
-        "solver_options",
         "tf_list",
         "tfbs_parts",
         "used_tfbs",
@@ -44,6 +43,7 @@ def _meta_arrow_type(name: str, pa):
     int_fields = {
         "length",
         "random_seed",
+        "solver_threads",
         "min_count_per_tf",
         "min_required_regulators",
         "input_pwm_n_sites",
@@ -87,6 +87,7 @@ def _meta_arrow_type(name: str, pa):
         "gc_core",
         "solver_objective",
         "solver_solve_time_s",
+        "solver_time_limit_seconds",
     }
     bool_fields = {
         "covers_all_tfs_in_solution",
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index d0f04221..bc9a4aad 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -1441,9 +1441,11 @@ def inspect_config(
             outputs.add_row(target, "-")
     console.print(outputs)
 
-    solver = Table("backend", "strategy", "options", "strands")
+    solver = Table("backend", "strategy", "time_limit_s", "threads", "strands")
     backend_display = str(cfg.solver.backend) if cfg.solver.backend is not None else "-"
-    solver.add_row(backend_display, str(cfg.solver.strategy), str(len(cfg.solver.options)), str(cfg.solver.strands))
+    time_limit = "-" if cfg.solver.time_limit_seconds is None else str(cfg.solver.time_limit_seconds)
+    threads = "-" if cfg.solver.threads is None else str(cfg.solver.threads)
+    solver.add_row(backend_display, str(cfg.solver.strategy), time_limit, threads, str(cfg.solver.strands))
     console.print(solver)
 
     sampling = cfg.generation.sampling
@@ -1626,6 +1628,24 @@ def stage_a_build_pool(
             console.print(f"[bold]FIMO p-value bins for {pool.name}[/]")
             console.print(bin_table)
 
+    length_table = Table("input", "count", "min_len", "median_len", "max_len")
+    for pool in pool_data.values():
+        if pool.df is None or "tfbs" not in pool.df.columns:
+            continue
+        lengths = pool.df["tfbs"].astype(str).str.len()
+        if lengths.empty:
+            continue
+        length_table.add_row(
+            str(pool.name),
+            str(int(lengths.count())),
+            str(int(lengths.min())),
+            f"{float(lengths.median()):.1f}",
+            str(int(lengths.max())),
+        )
+    if length_table.row_count:
+        console.print("[bold]TFBS length summary[/]")
+        console.print(length_table)
+
     table = Table("input", "type", "rows", "pool_file")
     for entry in artifact.inputs.values():
         table.add_row(entry.name, entry.input_type, str(entry.rows), entry.pool_path.name)
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index fa724746..3665a55b 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -45,31 +45,6 @@ def _construct_mapping(loader, node, deep: bool = False):
 LATEST_SCHEMA_VERSION = "2.5"
 SUPPORTED_SCHEMA_VERSIONS = {LATEST_SCHEMA_VERSION}
 
-KNOWN_SOLVER_OPTION_KEYS = {
-    "CBC": {
-        "threads",
-        "timelimit",
-        "timelimitseconds",
-        "maxseconds",
-        "seconds",
-        "ratiogap",
-        "mipgap",
-        "seed",
-        "randomseed",
-        "loglevel",
-    },
-    "GUROBI": {
-        "threads",
-        "timelimit",
-        "mipgap",
-        "seed",
-        "logtoconsole",
-        "logfile",
-        "method",
-        "presolve",
-    },
-}
-
 
 class ConfigError(ValueError):
     pass
@@ -947,9 +922,9 @@ class SolverConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
     backend: Optional[str] = None
     strategy: Literal["iterate", "diverse", "optimal", "approximate"]
-    options: List[str] = Field(default_factory=list)
     strands: Literal["single", "double"] = "double"
-    allow_unknown_options: bool = False
+    time_limit_seconds: float | None = None
+    threads: int | None = None
 
     @field_validator("backend")
     @classmethod
@@ -960,38 +935,36 @@ def _backend_nonempty(cls, v: str | None):
             raise ValueError("solver.backend must be a non-empty string")
         return v
 
+    @field_validator("time_limit_seconds")
+    @classmethod
+    def _time_limit_ok(cls, v: float | None):
+        if v is None:
+            return v
+        value = float(v)
+        if value <= 0:
+            raise ValueError("solver.time_limit_seconds must be > 0")
+        return value
+
+    @field_validator("threads")
+    @classmethod
+    def _threads_ok(cls, v: int | None):
+        if v is None:
+            return v
+        value = int(v)
+        if value <= 0:
+            raise ValueError("solver.threads must be > 0")
+        return value
+
     @model_validator(mode="after")
     def _strategy_backend_consistency(self):
         if self.strategy != "approximate" and not self.backend:
             raise ValueError("solver.backend is required unless strategy=approximate")
-        if self.strategy == "approximate" and self.options:
-            raise ValueError("solver.options must be empty when strategy=approximate")
-        if self.options:
-            cleaned: list[str] = []
-            for opt in self.options:
-                if not isinstance(opt, str) or not opt.strip():
-                    raise ValueError("solver.options entries must be non-empty strings")
-                cleaned.append(opt.strip())
-            self.options = cleaned
-            if not self.allow_unknown_options:
-                backend = (self.backend or "").strip().upper()
-                allowed = KNOWN_SOLVER_OPTION_KEYS.get(backend)
-                if allowed is None:
-                    raise ValueError(
-                        f"solver.options provided but backend '{backend}' has no known option list. "
-                        "Set solver.allow_unknown_options: true to bypass validation."
-                    )
-                unknown: list[str] = []
-                for opt in self.options:
-                    key = opt.split("=", 1)[0].split()[0].strip().lower()
-                    if key not in allowed:
-                        unknown.append(opt)
-                if unknown:
-                    preview = ", ".join(unknown[:5])
-                    raise ValueError(
-                        f"Unknown solver.options for backend '{backend}': {preview}. "
-                        "Set solver.allow_unknown_options: true to bypass validation."
-                    )
+        if self.strategy == "approximate" and (self.time_limit_seconds is not None or self.threads is not None):
+            raise ValueError("solver.time_limit_seconds/threads are invalid when strategy=approximate")
+        if self.threads is not None and self.backend:
+            backend = str(self.backend).strip().upper()
+            if backend == "CBC":
+                raise ValueError("solver.threads is not supported for CBC backends.")
         return self
 
 
@@ -1047,7 +1020,7 @@ class PadGcConfig(BaseModel):
     max: float = 0.60
     target: float = 0.50
     tolerance: float = 0.10
-    min_pad_length: int = 4
+    min_pad_length: int = 0
 
     @field_validator("min", "max", "target", "tolerance")
     @classmethod
@@ -1056,6 +1029,15 @@ def _gc_ok(cls, v: float, info):
             raise ValueError(f"{info.field_name} must be between 0 and 1")
         return float(v)
 
+    @field_validator("mode", mode="before")
+    @classmethod
+    def _coerce_mode(cls, v):
+        if isinstance(v, bool):
+            if v is False:
+                return "off"
+            raise ValueError("pad.gc.mode must be one of: off, range, target")
+        return v
+
     @field_validator("min_pad_length")
     @classmethod
     def _min_pad_length_ok(cls, v: int):
@@ -1089,6 +1071,15 @@ def _max_tries_ok(cls, v: int):
             raise ValueError("max_tries must be > 0")
         return int(v)
 
+    @field_validator("mode", mode="before")
+    @classmethod
+    def _coerce_mode(cls, v):
+        if isinstance(v, bool):
+            if v is False:
+                return "off"
+            raise ValueError("pad.mode must be one of: off, strict, adaptive")
+        return v
+
 
 class PostprocessConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
@@ -1253,9 +1244,25 @@ def _validate_run_scoped_paths(cfg_path: Path, root_cfg: RootConfig) -> None:
         )
 
 
+def _reject_removed_solver_options(raw: object) -> None:
+    if not isinstance(raw, dict):
+        return
+    densegen = raw.get("densegen")
+    if not isinstance(densegen, dict):
+        return
+    solver = densegen.get("solver")
+    if not isinstance(solver, dict):
+        return
+    if "options" in solver:
+        raise ConfigError("solver.options has been removed. Use solver.time_limit_seconds or solver.threads instead.")
+    if "allow_unknown_options" in solver:
+        raise ConfigError("solver.allow_unknown_options has been removed.")
+
+
 def load_config(path: Path | str) -> LoadedConfig:
     cfg_path = Path(path).resolve()
     raw = yaml.load(cfg_path.read_text(), Loader=_StrictLoader)
+    _reject_removed_solver_options(raw)
     try:
         root = RootConfig.model_validate(raw)
     except ValidationError as e:
diff --git a/src/dnadesign/densegen/src/core/metadata.py b/src/dnadesign/densegen/src/core/metadata.py
index 045052c7..33ba2146 100644
--- a/src/dnadesign/densegen/src/core/metadata.py
+++ b/src/dnadesign/densegen/src/core/metadata.py
@@ -44,7 +44,8 @@ def build_metadata(
     fixed_elements,
     chosen_solver: str | None,
     solver_strategy: str,
-    solver_options: List[str],
+    solver_time_limit_seconds: float | None,
+    solver_threads: int | None,
     solver_strands: str,
     seq_len: int,
     actual_length: int,
@@ -111,7 +112,8 @@ def build_metadata(
         "policy_solver": policy_solver,
         "solver_backend": chosen_solver,
         "solver_strategy": solver_strategy,
-        "solver_options": list(solver_options),
+        "solver_time_limit_seconds": solver_time_limit_seconds,
+        "solver_threads": solver_threads,
         "solver_strands": solver_strands,
         "dense_arrays_version": dense_arrays_version,
         "dense_arrays_version_source": dense_arrays_version_source,
diff --git a/src/dnadesign/densegen/src/core/metadata_schema.py b/src/dnadesign/densegen/src/core/metadata_schema.py
index 110d4bee..5904f63c 100644
--- a/src/dnadesign/densegen/src/core/metadata_schema.py
+++ b/src/dnadesign/densegen/src/core/metadata_schema.py
@@ -40,7 +40,18 @@ class MetaField:
     MetaField("policy_solver", (str,), "Solver policy label (strategy name)."),
     MetaField("solver_backend", (str,), "Solver backend name (null when approximate).", allow_none=True),
     MetaField("solver_strategy", (str,), "Solver strategy used."),
-    MetaField("solver_options", (list,), "Solver options list."),
+    MetaField(
+        "solver_time_limit_seconds",
+        (numbers.Real,),
+        "Solver time limit in seconds.",
+        allow_none=True,
+    ),
+    MetaField(
+        "solver_threads",
+        (int,),
+        "Solver thread count.",
+        allow_none=True,
+    ),
     MetaField("solver_strands", (str,), "Solver strands mode (single|double)."),
     MetaField("dense_arrays_version", (str,), "dense-arrays package version.", allow_none=True),
     MetaField("dense_arrays_version_source", (str,), "dense-arrays version source (installed|lock|pyproject|unknown)."),
@@ -190,7 +201,6 @@ def validate_metadata(meta: Mapping[str, Any]) -> None:
 
 def _validate_list_fields(meta: Mapping[str, Any]) -> None:
     list_of_str = {
-        "solver_options",
         "tf_list",
         "tfbs_parts",
         "used_tfbs",
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 268e901f..4006d8b7 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -513,6 +513,96 @@ def _max_fixed_element_len(fixed_elements_dump: dict) -> int:
     return max_len
 
 
+def _min_fixed_elements_length(fixed_elements_dump: dict) -> int:
+    total = 0
+    pcs = fixed_elements_dump.get("promoter_constraints") or []
+    for pc in pcs:
+        if not isinstance(pc, dict):
+            continue
+        upstream = str(pc.get("upstream") or "").strip().upper()
+        downstream = str(pc.get("downstream") or "").strip().upper()
+        spacer = pc.get("spacer_length")
+        if isinstance(spacer, (list, tuple)) and spacer:
+            spacer_min = min(int(v) for v in spacer)
+        elif isinstance(spacer, (int, float)):
+            spacer_min = int(spacer)
+        else:
+            spacer_min = 0
+        total += len(upstream) + len(downstream) + max(0, spacer_min)
+    return total
+
+
+def _min_required_length_for_constraints(
+    *,
+    library_tfbs: list[str],
+    library_tfs: list[str],
+    fixed_elements_dump: dict,
+    required_regulators: list[str] | None,
+    min_required_regulators: int | None,
+    min_count_by_regulator: dict[str, int] | None,
+    min_count_per_tf: int,
+) -> tuple[int, dict[str, int]]:
+    lengths_by_tf: dict[str, list[int]] = {}
+    for idx, tfbs in enumerate(library_tfbs):
+        tf = library_tfs[idx] if idx < len(library_tfs) else ""
+        if not tf:
+            continue
+        lengths_by_tf.setdefault(tf, []).append(len(str(tfbs)))
+    for tf in lengths_by_tf:
+        lengths_by_tf[tf].sort()
+
+    per_tf_required: dict[str, int] = {}
+    if required_regulators and min_required_regulators is None:
+        for tf in required_regulators:
+            per_tf_required[tf] = max(per_tf_required.get(tf, 0), 1)
+    if min_count_by_regulator:
+        for tf, count in min_count_by_regulator.items():
+            per_tf_required[tf] = max(per_tf_required.get(tf, 0), int(count))
+    if min_count_per_tf > 0:
+        for tf in lengths_by_tf:
+            per_tf_required[tf] = max(per_tf_required.get(tf, 0), int(min_count_per_tf))
+
+    missing = []
+    per_tf_total = 0
+    for tf, count in per_tf_required.items():
+        lengths = lengths_by_tf.get(tf, [])
+        if len(lengths) < int(count):
+            missing.append(f"{tf}({len(lengths)}/{count})")
+            continue
+        per_tf_total += sum(lengths[: int(count)])
+    if missing:
+        preview = ", ".join(missing[:6])
+        raise ValueError(f"Not enough TFBS to satisfy per-regulator minimums: {preview}")
+
+    k_required_extra = 0
+    if min_required_regulators is not None:
+        candidate_set = set(required_regulators or lengths_by_tf.keys())
+        available = [tf for tf in candidate_set if tf in lengths_by_tf]
+        if len(available) < int(min_required_regulators):
+            raise ValueError(
+                "min_required_regulators exceeds available regulators in the Stage-B library "
+                f"({len(available)} < {int(min_required_regulators)})."
+            )
+        already_required = {tf for tf, count in per_tf_required.items() if count > 0}
+        remaining = max(0, int(min_required_regulators) - len(set(available) & already_required))
+        if remaining > 0:
+            candidates = [lengths_by_tf[tf][0] for tf in available if tf not in already_required]
+            if len(candidates) < remaining:
+                raise ValueError(
+                    "min_required_regulators exceeds available regulators after fixed minimums "
+                    f"({len(candidates)} < {remaining})."
+                )
+            k_required_extra = sum(sorted(candidates)[:remaining])
+
+    fixed_min = _min_fixed_elements_length(fixed_elements_dump)
+    total = fixed_min + per_tf_total + k_required_extra
+    return total, {
+        "fixed_elements_min": fixed_min,
+        "per_tf_min": per_tf_total,
+        "min_required_extra": k_required_extra,
+    }
+
+
 def _input_metadata(source_cfg, cfg_path: Path) -> dict:
     source_type = getattr(source_cfg, "type", "unknown")
     source_name = getattr(source_cfg, "name", "unknown")
@@ -1917,7 +2007,6 @@ def _record_library_build(
     max_dupes = int(runtime_cfg.max_duplicate_solutions)
     max_resample_attempts = int(runtime_cfg.max_resample_attempts)
     stall_seconds = int(runtime_cfg.stall_seconds_before_resample)
-    solve_timeout_seconds = float(stall_seconds) if stall_seconds > 0 else None
     stall_warn_every = int(runtime_cfg.stall_warning_every_seconds)
     max_total_resamples = int(runtime_cfg.max_total_resamples)
     max_seconds_per_plan = int(runtime_cfg.max_seconds_per_plan)
@@ -1950,9 +2039,12 @@ def _record_library_build(
     pad_max_tries = int(pad_cfg.max_tries)
 
     solver_cfg = global_cfg.solver
-    solver_opts = list(solver_cfg.options)
     solver_strategy = str(solver_cfg.strategy)
     solver_strands = str(solver_cfg.strands)
+    solver_time_limit_seconds = (
+        float(solver_cfg.time_limit_seconds) if solver_cfg.time_limit_seconds is not None else None
+    )
+    solver_threads = int(solver_cfg.threads) if solver_cfg.threads is not None else None
 
     log_cfg = global_cfg.logging
     print_visual = bool(log_cfg.print_visual)
@@ -2316,6 +2408,25 @@ def _build_next_library() -> tuple[list[str], list[str], list[str], dict]:
             "Increase densegen.generation.sequence_length or reduce motif lengths "
             "(e.g., adjust Stage-A PWM sampling length_range or fixed-element motifs)."
         )
+    min_required_len, min_breakdown = _min_required_length_for_constraints(
+        library_tfbs=library_tfbs,
+        library_tfs=library_tfs,
+        fixed_elements_dump=fixed_elements_dump,
+        required_regulators=required_regulators,
+        min_required_regulators=min_required_regulators,
+        min_count_by_regulator=plan_min_count_by_regulator,
+        min_count_per_tf=min_count_per_tf,
+    )
+    if min_required_len > 0 and seq_len < min_required_len:
+        raise ValueError(
+            "generation.sequence_length is shorter than the minimum required length for constraints "
+            f"(sequence_length={seq_len}, min_required_length={min_required_len}, "
+            f"fixed_elements_min={min_breakdown['fixed_elements_min']}, "
+            f"per_tf_min={min_breakdown['per_tf_min']}, "
+            f"min_required_extra={min_breakdown['min_required_extra']}). "
+            "Increase densegen.generation.sequence_length or relax required_regulators, "
+            "min_required_regulators, min_count_by_regulator, or fixed-element constraints."
+        )
 
     def _current_leaderboard_snapshot() -> dict[str, object]:
         return _leaderboard_snapshot(
@@ -2451,14 +2562,14 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             sequence_length=seq_len,
             solver=chosen_solver,
             strategy=solver_strategy,
-            solver_options=solver_opts,
             fixed_elements=fe_dict,
             strands=solver_strands,
             regulator_by_index=regulator_by_index,
             required_regulators=solver_required_regs,
             min_count_by_regulator=solver_min_counts,
             min_required_regulators=min_required_regulators,
-            solve_timeout_seconds=solve_timeout_seconds,
+            solver_time_limit_seconds=solver_time_limit_seconds,
+            solver_threads=solver_threads,
         )
         return run
 
@@ -2481,6 +2592,7 @@ def _make_generator(_library_for_opt: List[str], _regulator_labels: List[str]):
             consecutive_dup = 0
             subsample_started = time.monotonic()
             last_log_warn = subsample_started
+            last_progress = subsample_started
             produced_this_library = 0
             stall_triggered = False
 
@@ -2503,7 +2615,7 @@ def _mark_stall(now: float) -> None:
                             payload={
                                 "input_name": source_label,
                                 "plan_name": plan_name,
-                                "stall_seconds": float(now - subsample_started),
+                                "stall_seconds": float(now - last_progress),
                                 "library_index": int(sampling_library_index),
                                 "library_hash": str(sampling_library_hash),
                             },
@@ -2516,15 +2628,14 @@ def _mark_stall(now: float) -> None:
                 now = time.monotonic()
                 if policy.should_trigger_stall(
                     now=now,
-                    subsample_started=subsample_started,
-                    produced_this_library=produced_this_library,
+                    last_progress=last_progress,
                 ):
                     _mark_stall(now)
                     break
                 if policy.should_warn_stall(
                     now=now,
                     last_warn=last_log_warn,
-                    produced_this_library=produced_this_library,
+                    last_progress=last_progress,
                 ):
                     log.info(
                         "[%s/%s] Still working... %.1fs on current library.",
@@ -2533,6 +2644,7 @@ def _mark_stall(now: float) -> None:
                         now - subsample_started,
                     )
                     last_log_warn = now
+                last_progress = now
 
                 if forbid_each:
                     opt.forbid(sol)
@@ -2845,7 +2957,8 @@ def _mark_stall(now: float) -> None:
                     fixed_elements=fixed_elements,
                     chosen_solver=chosen_solver,
                     solver_strategy=solver_strategy,
-                    solver_options=solver_opts,
+                    solver_time_limit_seconds=solver_time_limit_seconds,
+                    solver_threads=solver_threads,
                     solver_strands=solver_strands,
                     seq_len=seq_len,
                     actual_length=len(final_seq),
@@ -3173,7 +3286,7 @@ def _mark_stall(now: float) -> None:
 
             if produced_this_library == 0 and not stall_triggered and stall_seconds > 0:
                 now = time.monotonic()
-                if (now - subsample_started) >= stall_seconds:
+                if (now - last_progress) >= stall_seconds:
                     _mark_stall(now)
 
             if produced_this_library == 0:
@@ -3425,6 +3538,10 @@ def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | Non
         deps.optimizer,
         strategy=str(cfg.solver.strategy),
     )
+    solver_time_limit_seconds = (
+        float(cfg.solver.time_limit_seconds) if cfg.solver.time_limit_seconds is not None else None
+    )
+    solver_threads = int(cfg.solver.threads) if cfg.solver.threads is not None else None
     dense_arrays_version, dense_arrays_version_source = _resolve_dense_arrays_version(loaded.path)
 
     # Build sinks
@@ -3985,7 +4102,8 @@ def _write_state() -> None:
         seed_solver=seeds.get("solver"),
         solver_backend=chosen_solver,
         solver_strategy=str(cfg.solver.strategy),
-        solver_options=list(cfg.solver.options),
+        solver_time_limit_seconds=solver_time_limit_seconds,
+        solver_threads=solver_threads,
         solver_strands=str(cfg.solver.strands),
         dense_arrays_version=dense_arrays_version,
         dense_arrays_version_source=dense_arrays_version_source,
diff --git a/src/dnadesign/densegen/src/core/postprocess/gap_fill.py b/src/dnadesign/densegen/src/core/postprocess/gap_fill.py
index 1ba39999..9f7c1ac9 100644
--- a/src/dnadesign/densegen/src/core/postprocess/gap_fill.py
+++ b/src/dnadesign/densegen/src/core/postprocess/gap_fill.py
@@ -33,7 +33,7 @@ def generate_pad(
     gc_max: float = 0.60,
     gc_target: float = 0.50,
     gc_tolerance: float = 0.10,
-    gc_min_pad_length: int = 4,
+    gc_min_pad_length: int = 0,
     max_tries: int = 2000,
     rng: random.Random | None = None,
 ) -> tuple[str, dict]:
diff --git a/src/dnadesign/densegen/src/core/run_manifest.py b/src/dnadesign/densegen/src/core/run_manifest.py
index a47398d6..d9bd802d 100644
--- a/src/dnadesign/densegen/src/core/run_manifest.py
+++ b/src/dnadesign/densegen/src/core/run_manifest.py
@@ -69,7 +69,8 @@ class RunManifest:
     seed_solver: int | None
     solver_backend: str | None
     solver_strategy: str
-    solver_options: list[str]
+    solver_time_limit_seconds: float | None
+    solver_threads: int | None
     solver_strands: str
     dense_arrays_version: str | None
     dense_arrays_version_source: str
@@ -89,7 +90,8 @@ def to_dict(self) -> dict[str, Any]:
             "seed_solver": self.seed_solver,
             "solver_backend": self.solver_backend,
             "solver_strategy": self.solver_strategy,
-            "solver_options": list(self.solver_options),
+            "solver_time_limit_seconds": self.solver_time_limit_seconds,
+            "solver_threads": self.solver_threads,
             "solver_strands": self.solver_strands,
             "dense_arrays_version": self.dense_arrays_version,
             "dense_arrays_version_source": self.dense_arrays_version_source,
@@ -137,7 +139,8 @@ def load_run_manifest(path: Path) -> RunManifest:
         seed_solver=data.get("seed_solver"),
         solver_backend=data.get("solver_backend"),
         solver_strategy=str(data.get("solver_strategy", "")),
-        solver_options=list(data.get("solver_options", [])),
+        solver_time_limit_seconds=data.get("solver_time_limit_seconds"),
+        solver_threads=data.get("solver_threads"),
         solver_strands=str(data.get("solver_strands", "")),
         dense_arrays_version=data.get("dense_arrays_version"),
         dense_arrays_version_source=str(data.get("dense_arrays_version_source", "")),
diff --git a/src/dnadesign/densegen/src/core/runtime_policy.py b/src/dnadesign/densegen/src/core/runtime_policy.py
index 52383e14..60262f0e 100644
--- a/src/dnadesign/densegen/src/core/runtime_policy.py
+++ b/src/dnadesign/densegen/src/core/runtime_policy.py
@@ -20,15 +20,17 @@ class RuntimePolicy:
     def allow_resample(self) -> bool:
         return self.pool_strategy in {"iterative_subsample", "subsample"}
 
-    def should_trigger_stall(self, *, now: float, subsample_started: float, produced_this_library: int) -> bool:
+    def should_trigger_stall(self, *, now: float, last_progress: float) -> bool:
         if self.stall_seconds_before_resample <= 0:
             return False
-        return (produced_this_library == 0) and (now - subsample_started >= self.stall_seconds_before_resample)
+        return (now - last_progress) >= self.stall_seconds_before_resample
 
-    def should_warn_stall(self, *, now: float, last_warn: float, produced_this_library: int) -> bool:
+    def should_warn_stall(self, *, now: float, last_warn: float, last_progress: float) -> bool:
         if self.stall_warning_every_seconds <= 0:
             return False
-        return (produced_this_library == 0) and (now - last_warn >= self.stall_warning_every_seconds)
+        if (now - last_progress) < self.stall_warning_every_seconds:
+            return False
+        return (now - last_warn) >= self.stall_warning_every_seconds
 
     def plan_timed_out(self, *, now: float, plan_started: float) -> bool:
         if self.max_seconds_per_plan <= 0:
diff --git a/src/dnadesign/densegen/tests/test_artifacts_pool.py b/src/dnadesign/densegen/tests/test_artifacts_pool.py
index 967104b1..01de4c5c 100644
--- a/src/dnadesign/densegen/tests/test_artifacts_pool.py
+++ b/src/dnadesign/densegen/tests/test_artifacts_pool.py
@@ -38,7 +38,7 @@ def test_build_pool_artifact_binding_sites(tmp_path: Path) -> None:
                         "quota": 1,
                         "plan": [{"name": "default", "quota": 1}],
                     },
-                    "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+                    "solver": {"backend": "CBC", "strategy": "iterate"},
                     "runtime": {
                         "round_robin": False,
                         "arrays_generated_before_resample": 10,
diff --git a/src/dnadesign/densegen/tests/test_cli_describe.py b/src/dnadesign/densegen/tests/test_cli_describe.py
index ce2f9736..a16f37a7 100644
--- a/src/dnadesign/densegen/tests/test_cli_describe.py
+++ b/src/dnadesign/densegen/tests/test_cli_describe.py
@@ -39,7 +39,6 @@ def _write_min_config(path: Path) -> None:
 
               solver:
                 strategy: approximate
-                options: []
 
               logging:
                 log_dir: outputs/logs
diff --git a/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
new file mode 100644
index 00000000..70659fbc
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
@@ -0,0 +1,67 @@
+# ABOUTME: CLI coverage for Stage-A build-pool length summaries.
+# ABOUTME: Ensures pooled TFBS length stats are surfaced in stdout.
+from __future__ import annotations
+
+import textwrap
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from dnadesign.densegen.src.cli import app
+
+
+def test_stage_a_build_pool_reports_length_summary(tmp_path: Path) -> None:
+    inputs_dir = tmp_path / "inputs"
+    inputs_dir.mkdir()
+    (inputs_dir / "sites.csv").write_text(
+        textwrap.dedent(
+            """
+            tf,tfbs
+            TF1,AAAAAAAAAA
+            TF2,CCCCCCCCCCCC
+            TF3,GGGGGGGGGGGGGG
+            """
+        ).strip()
+        + "\n"
+    )
+    cfg_path = tmp_path / "config.yaml"
+    cfg_path.write_text(
+        textwrap.dedent(
+            f"""
+            densegen:
+              schema_version: "2.5"
+              run:
+                id: demo
+                root: "."
+              inputs:
+                - name: toy_sites
+                  type: binding_sites
+                  path: {inputs_dir / "sites.csv"}
+                  format: csv
+              output:
+                targets: [parquet]
+                schema:
+                  bio_type: dna
+                  alphabet: dna_4
+                parquet:
+                  path: outputs/tables/dense_arrays.parquet
+              generation:
+                sequence_length: 30
+                quota: 1
+                plan:
+                  - name: default
+                    quota: 1
+              solver:
+                backend: CBC
+                strategy: iterate
+              logging:
+                log_dir: outputs/logs
+            """
+        ).strip()
+        + "\n"
+    )
+    runner = CliRunner()
+    result = runner.invoke(app, ["stage-a", "build-pool", "-c", str(cfg_path)])
+    assert result.exit_code == 0, result.output
+    assert "TFBS length summary" in result.output
+    assert "toy_sites" in result.output
diff --git a/src/dnadesign/densegen/tests/test_config_strict.py b/src/dnadesign/densegen/tests/test_config_strict.py
index 1e019cce..76f98db9 100644
--- a/src/dnadesign/densegen/tests/test_config_strict.py
+++ b/src/dnadesign/densegen/tests/test_config_strict.py
@@ -33,7 +33,7 @@
             "quota": 1,
             "plan": [{"name": "default", "quota": 1}],
         },
-        "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+        "solver": {"backend": "CBC", "strategy": "iterate", "strands": "double"},
         "logging": {"log_dir": "outputs/logs"},
     }
 }
@@ -95,6 +95,57 @@ def test_pad_config_accepts(tmp_path: Path) -> None:
     load_config(cfg_path)
 
 
+def test_pad_mode_off_accepts_yaml_boolean(tmp_path: Path) -> None:
+    cfg_text = """
+    densegen:
+      schema_version: "2.5"
+      run:
+        id: demo
+        root: "."
+      inputs:
+        - name: demo
+          type: binding_sites
+          path: inputs.csv
+      output:
+        targets: [parquet]
+        schema:
+          bio_type: dna
+          alphabet: dna_4
+        parquet:
+          path: outputs/tables/dense_arrays.parquet
+          deduplicate: true
+          chunk_size: 128
+      generation:
+        sequence_length: 10
+        quota: 1
+        plan:
+          - name: default
+            quota: 1
+      solver:
+        backend: CBC
+        strategy: iterate
+      postprocess:
+        pad:
+          mode: off
+          end: 5prime
+          gc:
+            mode: off
+            min: 0.4
+            max: 0.6
+            target: 0.5
+            tolerance: 0.1
+            min_pad_length: 0
+          max_tries: 2000
+      logging:
+        log_dir: outputs/logs
+    """
+    cfg_path = tmp_path / "cfg.yaml"
+    cfg_path.write_text(cfg_text.strip())
+    loaded = load_config(cfg_path)
+    assert loaded.root.densegen.postprocess.pad.mode == "off"
+    assert loaded.root.densegen.postprocess.pad.gc.mode == "off"
+
+
 def test_plan_mixing_quota_and_fraction(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
     cfg["densegen"]["generation"]["plan"] = [
@@ -180,20 +231,70 @@ def test_output_kind_is_rejected(tmp_path: Path) -> None:
         load_config(cfg_path)
 
 
-def test_solver_options_unknown_rejected(tmp_path: Path) -> None:
+def test_solver_options_removed(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
-    cfg["densegen"]["solver"]["options"] = ["UnknownOpt=1"]
+    cfg["densegen"]["solver"]["options"] = ["TimeLimit=5"]
     cfg_path = _write(cfg, tmp_path / "cfg.yaml")
-    with pytest.raises(ConfigError, match="Unknown solver.options"):
+    with pytest.raises(ConfigError, match="solver.options"):
         load_config(cfg_path)
 
 
-def test_solver_options_allow_unknown(tmp_path: Path) -> None:
+def test_solver_allow_unknown_options_removed(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
-    cfg["densegen"]["solver"]["options"] = ["UnknownOpt=1"]
     cfg["densegen"]["solver"]["allow_unknown_options"] = True
     cfg_path = _write(cfg, tmp_path / "cfg.yaml")
-    load_config(cfg_path)
+    with pytest.raises(ConfigError, match="solver.allow_unknown_options"):
+        load_config(cfg_path)
+
+
+def test_solver_controls_accepts_threads_and_time_limit(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["solver"]["backend"] = "GUROBI"
+    cfg["densegen"]["solver"]["time_limit_seconds"] = 5
+    cfg["densegen"]["solver"]["threads"] = 2
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    loaded = load_config(cfg_path)
+    assert loaded.root.densegen.solver.time_limit_seconds == 5
+    assert loaded.root.densegen.solver.threads == 2
+
+
+def test_solver_controls_rejected_for_approximate(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["solver"]["strategy"] = "approximate"
+    cfg["densegen"]["solver"]["backend"] = None
+    cfg["densegen"]["solver"]["time_limit_seconds"] = 5
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="approximate"):
+        load_config(cfg_path)
+
+
+def test_solver_threads_rejected_for_cbc(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["solver"]["threads"] = 2
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError, match="threads.*CBC"):
+        load_config(cfg_path)
+
+
+def test_pad_gc_default_min_pad_length_zero(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["postprocess"] = {
+        "pad": {
+            "mode": "adaptive",
+            "end": "5prime",
+            "gc": {
+                "mode": "range",
+                "min": 0.4,
+                "max": 0.6,
+                "target": 0.5,
+                "tolerance": 0.1,
+            },
+            "max_tries": 2000,
+        }
+    }
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    loaded = load_config(cfg_path)
+    assert loaded.root.densegen.postprocess.pad.gc.min_pad_length == 0
 
 
 def test_promoter_constraint_motif_validation(tmp_path: Path) -> None:
@@ -265,9 +366,9 @@ def test_side_biases_overlap_rejected(tmp_path: Path) -> None:
         load_config(cfg_path)
 
 
-def test_solver_strategy_approximate_requires_no_options(tmp_path: Path) -> None:
+def test_solver_strategy_approximate_rejects_threads(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
-    cfg["densegen"]["solver"] = {"backend": "CBC", "strategy": "approximate", "options": ["Threads=2"]}
+    cfg["densegen"]["solver"] = {"backend": "CBC", "strategy": "approximate", "threads": 2}
     cfg_path = _write(cfg, tmp_path / "cfg.yaml")
     with pytest.raises(ConfigError):
         load_config(cfg_path)
@@ -275,7 +376,7 @@ def test_solver_strategy_approximate_requires_no_options(tmp_path: Path) -> None
 
 def test_solver_backend_optional_for_approximate(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
-    cfg["densegen"]["solver"] = {"strategy": "approximate", "options": []}
+    cfg["densegen"]["solver"] = {"strategy": "approximate"}
     cfg_path = _write(cfg, tmp_path / "cfg.yaml")
     load_config(cfg_path)
 
diff --git a/src/dnadesign/densegen/tests/test_optimizer_wrapper.py b/src/dnadesign/densegen/tests/test_optimizer_wrapper.py
index 3fa2d8a1..e9c5b419 100644
--- a/src/dnadesign/densegen/tests/test_optimizer_wrapper.py
+++ b/src/dnadesign/densegen/tests/test_optimizer_wrapper.py
@@ -19,10 +19,14 @@ def test_solver_time_limit_applies(monkeypatch: pytest.MonkeyPatch) -> None:
     class _DummyModel:
         def __init__(self) -> None:
             self.time_limit_ms = None
+            self.threads = None
 
         def SetTimeLimit(self, ms: int) -> None:
             self.time_limit_ms = ms
 
+        def SetNumThreads(self, threads: int) -> None:
+            self.threads = threads
+
     class _DummyOptimizer:
         def __init__(self, library, sequence_length, strands="double") -> None:
             self.library = list(library)
@@ -54,9 +58,10 @@ def solutions(self, solver="CBC", solver_options=None):
         sequence_length=10,
         solver="CBC",
         strategy="iterate",
-        solver_options=[],
         fixed_elements=None,
-        solve_timeout_seconds=2,
+        solver_time_limit_seconds=2,
+        solver_threads=3,
     )
     run.optimizer.build_model()
     assert run.optimizer.model.time_limit_ms == 2000
+    assert run.optimizer.model.threads == 3
diff --git a/src/dnadesign/densegen/tests/test_outputs_parquet.py b/src/dnadesign/densegen/tests/test_outputs_parquet.py
index f85ecd99..c350e189 100644
--- a/src/dnadesign/densegen/tests/test_outputs_parquet.py
+++ b/src/dnadesign/densegen/tests/test_outputs_parquet.py
@@ -24,7 +24,8 @@ def _dummy_meta() -> dict:
         "policy_solver": "iterate",
         "solver_backend": "CBC",
         "solver_strategy": "iterate",
-        "solver_options": [],
+        "solver_time_limit_seconds": None,
+        "solver_threads": None,
         "solver_strands": "double",
         "dense_arrays_version": None,
         "dense_arrays_version_source": "unknown",
diff --git a/src/dnadesign/densegen/tests/test_required_regulators.py b/src/dnadesign/densegen/tests/test_required_regulators.py
index 701fd6bb..f413f0a0 100644
--- a/src/dnadesign/densegen/tests/test_required_regulators.py
+++ b/src/dnadesign/densegen/tests/test_required_regulators.py
@@ -50,14 +50,14 @@ def build(
         sequence_length,
         solver,
         strategy,
-        solver_options,
         fixed_elements,
         strands="double",
         regulator_by_index=None,
         required_regulators=None,
         min_count_by_regulator=None,
         min_required_regulators=None,
-        solve_timeout_seconds=None,
+        solver_time_limit_seconds=None,
+        solver_threads=None,
     ):
         opt = _DummyOpt()
         sol1 = _DummySol(sequence="AAA", library=library, used_indices=[0])
@@ -113,7 +113,7 @@ def test_required_regulators_filtering(tmp_path: Path) -> None:
                     }
                 ],
             },
-            "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+            "solver": {"backend": "CBC", "strategy": "iterate"},
             "runtime": {
                 "round_robin": False,
                 "arrays_generated_before_resample": 10,
@@ -191,7 +191,7 @@ def test_required_regulators_k_of_n(tmp_path: Path) -> None:
                     }
                 ],
             },
-            "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+            "solver": {"backend": "CBC", "strategy": "iterate"},
             "runtime": {
                 "round_robin": False,
                 "arrays_generated_before_resample": 10,
diff --git a/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py b/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py
index 168e48bf..52ded9c3 100644
--- a/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py
+++ b/src/dnadesign/densegen/tests/test_round_robin_chunk_cap.py
@@ -53,14 +53,14 @@ def build(
         sequence_length,
         solver,
         strategy,
-        solver_options,
         fixed_elements,
         strands="double",
         regulator_by_index=None,
         required_regulators=None,
         min_count_by_regulator=None,
         min_required_regulators=None,
-        solve_timeout_seconds=None,
+        solver_time_limit_seconds=None,
+        solver_threads=None,
     ):
         opt = _DummyOpt()
         seqs = ["AAA", "CCC", "GGG", "TTT", "AAC", "CCA"]
@@ -116,7 +116,7 @@ def test_round_robin_chunk_cap_subsample(tmp_path: Path) -> None:
                 },
                 "plan": [{"name": "default", "quota": 5}],
             },
-            "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+            "solver": {"backend": "CBC", "strategy": "iterate"},
             "runtime": {
                 "round_robin": True,
                 "arrays_generated_before_resample": 2,
@@ -219,7 +219,7 @@ def test_stall_detected_with_no_solutions(monkeypatch: pytest.MonkeyPatch, tmp_p
                 },
                 "plan": [{"name": "default", "quota": 1}],
             },
-            "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+            "solver": {"backend": "CBC", "strategy": "iterate"},
             "runtime": {
                 "round_robin": False,
                 "arrays_generated_before_resample": 1,
@@ -253,14 +253,14 @@ def build(
             sequence_length,
             solver,
             strategy,
-            solver_options,
             fixed_elements,
             strands="double",
             regulator_by_index=None,
             required_regulators=None,
             min_count_by_regulator=None,
             min_required_regulators=None,
-            solve_timeout_seconds=None,
+            solver_time_limit_seconds=None,
+            solver_threads=None,
         ):
             opt = _DummyOpt()
 
diff --git a/src/dnadesign/densegen/tests/test_run_manifest.py b/src/dnadesign/densegen/tests/test_run_manifest.py
index b2b9bab4..1c3b2b48 100644
--- a/src/dnadesign/densegen/tests/test_run_manifest.py
+++ b/src/dnadesign/densegen/tests/test_run_manifest.py
@@ -40,7 +40,8 @@ def test_run_manifest_roundtrip(tmp_path) -> None:
         seed_solver=303,
         solver_backend="CBC",
         solver_strategy="iterate",
-        solver_options=[],
+        solver_time_limit_seconds=5.0,
+        solver_threads=2,
         solver_strands="double",
         dense_arrays_version="0.0.0",
         dense_arrays_version_source="lock",
@@ -57,3 +58,5 @@ def test_run_manifest_roundtrip(tmp_path) -> None:
     assert loaded.items[0].duplicate_solutions == 3
     assert loaded.items[0].leaderboard_latest is not None
     assert loaded.random_seed == 42
+    assert loaded.solver_time_limit_seconds == 5.0
+    assert loaded.solver_threads == 2
diff --git a/src/dnadesign/densegen/tests/test_runtime_policy.py b/src/dnadesign/densegen/tests/test_runtime_policy.py
new file mode 100644
index 00000000..3e5d1dea
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_runtime_policy.py
@@ -0,0 +1,29 @@
+# ABOUTME: Tests runtime policy stall and warning timing behavior.
+# ABOUTME: Ensures stall timers reset on solver progress signals.
+
+from __future__ import annotations
+
+from dnadesign.densegen.src.core.runtime_policy import RuntimePolicy
+
+
+def test_stall_timer_resets_on_progress() -> None:
+    policy = RuntimePolicy(
+        pool_strategy="subsample",
+        arrays_generated_before_resample=1,
+        stall_seconds_before_resample=10,
+        stall_warning_every_seconds=5,
+        max_resample_attempts=1,
+        max_total_resamples=1,
+        max_seconds_per_plan=0,
+    )
+
+    assert policy.should_trigger_stall(now=9.0, last_progress=0.0) is False
+    assert policy.should_trigger_stall(now=10.0, last_progress=0.0) is True
+
+    assert policy.should_warn_stall(now=4.0, last_warn=0.0, last_progress=0.0) is False
+    assert policy.should_warn_stall(now=5.0, last_warn=0.0, last_progress=0.0) is True
+
+    last_progress = 6.0
+    last_warn = 5.0
+    assert policy.should_warn_stall(now=9.0, last_warn=last_warn, last_progress=last_progress) is False
+    assert policy.should_warn_stall(now=11.0, last_warn=last_warn, last_progress=last_progress) is True
diff --git a/src/dnadesign/densegen/tests/test_sequence_length_guard.py b/src/dnadesign/densegen/tests/test_sequence_length_guard.py
index c8a4dfc6..2856ebdc 100644
--- a/src/dnadesign/densegen/tests/test_sequence_length_guard.py
+++ b/src/dnadesign/densegen/tests/test_sequence_length_guard.py
@@ -51,14 +51,14 @@ def build(
         sequence_length,
         solver,
         strategy,
-        solver_options,
         fixed_elements,
         strands="double",
         regulator_by_index=None,
         required_regulators=None,
         min_count_by_regulator=None,
         min_required_regulators=None,
-        solve_timeout_seconds=None,
+        solver_time_limit_seconds=None,
+        solver_threads=None,
     ):
         opt = _DummyOpt()
         sol = _DummySol(sequence="AAAA", library=library, used_indices=[0])
@@ -107,7 +107,7 @@ def test_sequence_length_guard_shorter_than_motif(tmp_path: Path) -> None:
                 },
                 "plan": [{"name": "default", "quota": 1}],
             },
-            "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+            "solver": {"backend": "CBC", "strategy": "iterate"},
             "runtime": {
                 "round_robin": False,
                 "arrays_generated_before_resample": 10,
@@ -137,3 +137,163 @@ def test_sequence_length_guard_shorter_than_motif(tmp_path: Path) -> None:
     )
     with pytest.raises(ValueError, match="sequence_length"):
         run_pipeline(loaded, deps=deps, resume=False)
+
+
+def test_sequence_length_guard_required_regulators_min_length(tmp_path: Path) -> None:
+    csv_path = tmp_path / "sites.csv"
+    csv_path.write_text("tf,tfbs\nTF1,AAAAA\nTF2,TTTTT\n")
+    cfg = {
+        "densegen": {
+            "schema_version": "2.5",
+            "run": {"id": "demo", "root": "."},
+            "inputs": [
+                {
+                    "name": "demo",
+                    "type": "binding_sites",
+                    "path": str(csv_path),
+                    "format": "csv",
+                }
+            ],
+            "output": {
+                "targets": ["parquet"],
+                "schema": {"bio_type": "dna", "alphabet": "dna_4"},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
+            },
+            "generation": {
+                "sequence_length": 8,
+                "quota": 1,
+                "sampling": {
+                    "pool_strategy": "full",
+                    "library_size": 2,
+                    "subsample_over_length_budget_by": 0,
+                    "library_sampling_strategy": "tf_balanced",
+                    "cover_all_regulators": False,
+                    "unique_binding_sites": True,
+                    "max_sites_per_regulator": None,
+                    "relax_on_exhaustion": False,
+                    "allow_incomplete_coverage": False,
+                    "iterative_max_libraries": 1,
+                    "iterative_min_new_solutions": 0,
+                },
+                "plan": [
+                    {
+                        "name": "default",
+                        "quota": 1,
+                        "required_regulators": ["TF1", "TF2"],
+                    }
+                ],
+            },
+            "solver": {"backend": "CBC", "strategy": "iterate"},
+            "runtime": {
+                "round_robin": False,
+                "arrays_generated_before_resample": 10,
+                "min_count_per_tf": 0,
+                "max_duplicate_solutions": 5,
+                "stall_seconds_before_resample": 10,
+                "stall_warning_every_seconds": 10,
+                "max_resample_attempts": 1,
+                "max_total_resamples": 1,
+                "max_seconds_per_plan": 0,
+                "max_failed_solutions": 0,
+                "random_seed": 1,
+            },
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
+        }
+    }
+    cfg_path = tmp_path / "cfg.yaml"
+    cfg_path.write_text(yaml.safe_dump(cfg))
+    loaded = load_config(cfg_path)
+    sink = _DummySink()
+    deps = PipelineDeps(
+        source_factory=data_source_factory,
+        sink_factory=lambda _cfg, _path: [sink],
+        optimizer=_DummyAdapter(),
+        pad=lambda *args, **kwargs: "",
+    )
+    with pytest.raises(ValueError, match="minimum required length"):
+        run_pipeline(loaded, deps=deps, resume=False)
+
+
+def test_sequence_length_guard_promoter_constraints_min_length(tmp_path: Path) -> None:
+    csv_path = tmp_path / "sites.csv"
+    csv_path.write_text("tf,tfbs\nTF1,AAAAA\n")
+    cfg = {
+        "densegen": {
+            "schema_version": "2.5",
+            "run": {"id": "demo", "root": "."},
+            "inputs": [
+                {
+                    "name": "demo",
+                    "type": "binding_sites",
+                    "path": str(csv_path),
+                    "format": "csv",
+                }
+            ],
+            "output": {
+                "targets": ["parquet"],
+                "schema": {"bio_type": "dna", "alphabet": "dna_4"},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
+            },
+            "generation": {
+                "sequence_length": 8,
+                "quota": 1,
+                "sampling": {
+                    "pool_strategy": "full",
+                    "library_size": 1,
+                    "subsample_over_length_budget_by": 0,
+                    "library_sampling_strategy": "tf_balanced",
+                    "cover_all_regulators": False,
+                    "unique_binding_sites": True,
+                    "max_sites_per_regulator": None,
+                    "relax_on_exhaustion": False,
+                    "allow_incomplete_coverage": False,
+                    "iterative_max_libraries": 1,
+                    "iterative_min_new_solutions": 0,
+                },
+                "plan": [
+                    {
+                        "name": "default",
+                        "quota": 1,
+                        "fixed_elements": {
+                            "promoter_constraints": [
+                                {
+                                    "upstream": "AAAA",
+                                    "downstream": "TTTT",
+                                    "spacer_length": [2, 2],
+                                }
+                            ]
+                        },
+                    }
+                ],
+            },
+            "solver": {"backend": "CBC", "strategy": "iterate"},
+            "runtime": {
+                "round_robin": False,
+                "arrays_generated_before_resample": 10,
+                "min_count_per_tf": 0,
+                "max_duplicate_solutions": 5,
+                "stall_seconds_before_resample": 10,
+                "stall_warning_every_seconds": 10,
+                "max_resample_attempts": 1,
+                "max_total_resamples": 1,
+                "max_seconds_per_plan": 0,
+                "max_failed_solutions": 0,
+                "random_seed": 1,
+            },
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
+        }
+    }
+    cfg_path = tmp_path / "cfg.yaml"
+    cfg_path.write_text(yaml.safe_dump(cfg))
+    loaded = load_config(cfg_path)
+    sink = _DummySink()
+    deps = PipelineDeps(
+        source_factory=data_source_factory,
+        sink_factory=lambda _cfg, _path: [sink],
+        optimizer=_DummyAdapter(),
+        pad=lambda *args, **kwargs: "",
+    )
+    with pytest.raises(ValueError, match="minimum required length"):
+        run_pipeline(loaded, deps=deps, resume=False)
diff --git a/src/dnadesign/densegen/tests/test_source_cache.py b/src/dnadesign/densegen/tests/test_source_cache.py
index ce280550..431e42c2 100644
--- a/src/dnadesign/densegen/tests/test_source_cache.py
+++ b/src/dnadesign/densegen/tests/test_source_cache.py
@@ -52,14 +52,14 @@ def build(
         sequence_length,
         solver,
         strategy,
-        solver_options,
         fixed_elements,
         strands="double",
         regulator_by_index=None,
         required_regulators=None,
         min_count_by_regulator=None,
         min_required_regulators=None,
-        solve_timeout_seconds=None,
+        solver_time_limit_seconds=None,
+        solver_threads=None,
     ):
         opt = _DummyOpt()
         seqs = ["AAA", "CCC"]
@@ -126,7 +126,7 @@ def test_source_cache_reuses_loaded_inputs(tmp_path: Path) -> None:
                 },
                 "plan": [{"name": "default", "quota": 2}],
             },
-            "solver": {"backend": "CBC", "strategy": "iterate", "options": []},
+            "solver": {"backend": "CBC", "strategy": "iterate"},
             "runtime": {
                 "round_robin": True,
                 "arrays_generated_before_resample": 1,
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index 1cf0dc77..d561bea7 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -65,16 +65,15 @@ densegen:
             - name: sigma70_consensus
               upstream: TTGACA
               downstream: TATAAT
-              spacer_length: [16, 18]
+              spacer_length: [15, 19]
               upstream_pos: [0, 60]
         required_regulators: [lexA, cpxR]
+        min_required_regulators: 1
 
   solver:
     backend: CBC
     strategy: iterate
-    options:
-      - "Threads=16"
-      - "TimeLimit=5"
+    time_limit_seconds: 5
 
   runtime:
     round_robin: true
@@ -99,7 +98,7 @@ densegen:
         max: 0.60
         target: 0.50
         tolerance: 0.10
-        min_pad_length: 4
+        min_pad_length: 0
       max_tries: 2000
 
   logging:

From 93d53b91a284662f3587d488698b08705e007da5 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 12:00:33 -0500
Subject: [PATCH 21/40] densegen: fix library summary outputs

---
 .../densegen/docs/dev/audit_2026-01-23.md     |  50 +++++++
 src/dnadesign/densegen/src/core/reporting.py  |   4 +
 src/dnadesign/densegen/tests/__init__.py      |   0
 src/dnadesign/densegen/tests/meta_fixtures.py | 107 ++++++++++++++
 .../tests/test_cli_summarize_library.py       | 116 +--------------
 .../test_reporting_library_summary_outputs.py | 134 ++++++++++++++++++
 6 files changed, 302 insertions(+), 109 deletions(-)
 create mode 100644 src/dnadesign/densegen/docs/dev/audit_2026-01-23.md
 create mode 100644 src/dnadesign/densegen/tests/__init__.py
 create mode 100644 src/dnadesign/densegen/tests/meta_fixtures.py
 create mode 100644 src/dnadesign/densegen/tests/test_reporting_library_summary_outputs.py

diff --git a/src/dnadesign/densegen/docs/dev/audit_2026-01-23.md b/src/dnadesign/densegen/docs/dev/audit_2026-01-23.md
new file mode 100644
index 00000000..d9887678
--- /dev/null
+++ b/src/dnadesign/densegen/docs/dev/audit_2026-01-23.md
@@ -0,0 +1,50 @@
+# DenseGen audit notes (2026-01-23)
+
+- Scope: densegen end-to-end audit, CLI demo flow, outputs organization review.
+- Branch: densegen/cruncher-refine.
+
+## In-progress
+- Run full densegen test suite after latest fixes.
+- Re-run demo flow (validate-config, stage-a, stage-b, run, inspect run --library, report, plot, campaign-reset).
+- Summarize outputs organization findings + inefficiencies.
+
+## Changes applied
+- Fixed report library summary to fill missing outputs counts (prevents NaN crash in `dense inspect run --library`).
+- Added regression test for missing outputs counts; added shared test fixture helper.
+- Added tests package `__init__.py` for shared fixtures.
+
+## Demo run notes
+- `dense run` initial attempt timed out in the shell (120s) while still running; resumed with `dense run --resume` and completed.
+- `dense inspect run --library --events` now succeeds (outputs column no longer NaN).
+- `dense report --format md` writes `outputs/report` with plots + composition.csv.
+
+## Output layout snapshot (demo60)
+- Total files: 55
+- Largest dirs: `outputs/report` (~560K), `outputs/tables` (~356K), `.mpl-cache` (~160K).
+- Tables: `dense_arrays.parquet` (canonical), `solutions.parquet` (solution↔attempt mapping),
+  `composition.parquet` (per-placement), `attempts.parquet` (solver audit log).
+- Report assets include per-library utilization plots (one file per library).
+
+## Open questions / potential improvements
+- Consider reducing duplication between `attempts.parquet` and `libraries/*` (library TFBS lists
+  appear in both) or making library details join-only via `library_members.parquet`.
+- `dense report --format md` still writes many plot assets; consider gating per-library plots or
+  adding a `--plots none|summary|per-library` switch.
+- `.mpl-cache` sits under outputs; consider doc note about cleaning via `dense campaign-reset`.
+
+## Commands executed
+- pixi run dense campaign-reset -c demo60/config.yaml
+- pixi run dense validate-config --probe-solver -c demo60/config.yaml
+- pixi run dense inspect inputs -c demo60/config.yaml
+- pixi run dense inspect config -c demo60/config.yaml
+- pixi run dense stage-a build-pool -c demo60/config.yaml
+- pixi run dense stage-b build-libraries -c demo60/config.yaml
+- pixi run dense run -c demo60/config.yaml (timed out at 120s)
+- pixi run dense run --resume -c demo60/config.yaml
+- pixi run dense inspect run --library --events -c demo60/config.yaml
+- pixi run dense report -c demo60/config.yaml --format md
+- pixi run dense ls-plots
+- pixi run dense plot --only tf_usage,tf_coverage -c demo60/config.yaml
+- uv run pytest -q src/dnadesign/densegen/tests
+- uv run ruff check src/dnadesign/densegen
+- uv run ruff format src/dnadesign/densegen
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index 0ba33919..6bcd9a59 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -599,6 +599,8 @@ def _candidate_logging_enabled() -> bool:
         library_summary = library_summary.merge(outputs_by_lib, on="library_index", how="left")
     elif not library_summary.empty:
         library_summary["outputs"] = 0
+    if not library_summary.empty and "outputs" in library_summary.columns:
+        library_summary["outputs"] = pd.to_numeric(library_summary["outputs"], errors="coerce").fillna(0).astype(int)
 
     tables["library_summary"] = library_summary
 
@@ -633,6 +635,8 @@ def _candidate_logging_enabled() -> bool:
             library_usage = library_usage.merge(outputs_by_lib, on="library_index", how="left")
         if "outputs" not in library_usage.columns:
             library_usage["outputs"] = 0
+        if "outputs" in library_usage.columns:
+            library_usage["outputs"] = pd.to_numeric(library_usage["outputs"], errors="coerce").fillna(0).astype(int)
     tables["library_usage"] = library_usage
 
     plan_summary = pd.DataFrame(
diff --git a/src/dnadesign/densegen/tests/__init__.py b/src/dnadesign/densegen/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/dnadesign/densegen/tests/meta_fixtures.py b/src/dnadesign/densegen/tests/meta_fixtures.py
new file mode 100644
index 00000000..f61ab593
--- /dev/null
+++ b/src/dnadesign/densegen/tests/meta_fixtures.py
@@ -0,0 +1,107 @@
+from __future__ import annotations
+
+
+def output_meta(*, library_hash: str, library_index: int) -> dict:
+    return {
+        "schema_version": "2.5",
+        "run_id": "demo",
+        "run_root": ".",
+        "run_config_path": "config.yaml",
+        "run_config_sha256": "dummy",
+        "created_at": "2026-01-14T00:00:00+00:00",
+        "length": 10,
+        "random_seed": 0,
+        "policy_pad": "off",
+        "policy_sampling": "subsample",
+        "policy_solver": "iterate",
+        "solver_backend": "CBC",
+        "solver_strategy": "iterate",
+        "solver_time_limit_seconds": None,
+        "solver_threads": None,
+        "solver_strands": "double",
+        "dense_arrays_version": None,
+        "dense_arrays_version_source": "unknown",
+        "solver_status": None,
+        "solver_objective": None,
+        "solver_solve_time_s": None,
+        "plan": "demo_plan",
+        "tf_list": ["lexA", "cpxR"],
+        "tfbs_parts": ["lexA:AAA", "cpxR:CCC"],
+        "used_tfbs": ["lexA:AAA", "cpxR:CCC"],
+        "used_tfbs_detail": [
+            {"tf": "lexA", "tfbs": "AAA", "orientation": "fwd", "offset": 0},
+            {"tf": "cpxR", "tfbs": "CCC", "orientation": "fwd", "offset": 4},
+        ],
+        "used_tf_counts": [{"tf": "lexA", "count": 1}, {"tf": "cpxR", "count": 1}],
+        "used_tf_list": ["lexA", "cpxR"],
+        "covers_all_tfs_in_solution": True,
+        "min_count_per_tf": 0,
+        "input_type": "binding_sites",
+        "input_name": "demo_input",
+        "input_path": "inputs.csv",
+        "input_dataset": None,
+        "input_root": None,
+        "input_mode": "binding_sites",
+        "input_pwm_ids": [],
+        "input_row_count": 0,
+        "input_tf_count": 0,
+        "input_tfbs_count": 0,
+        "input_tf_tfbs_pair_count": 1,
+        "sampling_fraction": 0.5,
+        "sampling_fraction_pairs": 0.5,
+        "input_pwm_strategy": None,
+        "input_pwm_scoring_backend": None,
+        "input_pwm_score_threshold": None,
+        "input_pwm_score_percentile": None,
+        "input_pwm_pvalue_threshold": None,
+        "input_pwm_pvalue_bins": None,
+        "input_pwm_mining_batch_size": None,
+        "input_pwm_mining_max_batches": None,
+        "input_pwm_mining_max_candidates": None,
+        "input_pwm_mining_max_seconds": None,
+        "input_pwm_mining_retain_bin_ids": None,
+        "input_pwm_mining_log_every_batches": None,
+        "input_pwm_selection_policy": None,
+        "input_pwm_bgfile": None,
+        "input_pwm_keep_all_candidates_debug": None,
+        "input_pwm_include_matched_sequence": None,
+        "input_pwm_n_sites": None,
+        "input_pwm_oversample_factor": None,
+        "fixed_elements": {"promoter_constraints": [], "side_biases": {"left": [], "right": []}},
+        "visual": "",
+        "compression_ratio": None,
+        "library_size": 2,
+        "library_unique_tf_count": 2,
+        "library_unique_tfbs_count": 2,
+        "sequence_length": 10,
+        "promoter_constraint": None,
+        "sampling_target_length": 0,
+        "sampling_achieved_length": 0,
+        "sampling_relaxed_cap": False,
+        "sampling_final_cap": None,
+        "sampling_pool_strategy": "subsample",
+        "sampling_library_size": 2,
+        "sampling_library_strategy": "tf_balanced",
+        "sampling_iterative_max_libraries": 1,
+        "sampling_iterative_min_new_solutions": 0,
+        "sampling_library_index": library_index,
+        "sampling_library_hash": library_hash,
+        "required_regulators": [],
+        "min_required_regulators": None,
+        "min_count_by_regulator": [],
+        "covers_required_regulators": True,
+        "pad_used": False,
+        "pad_bases": None,
+        "pad_end": None,
+        "pad_gc_mode": None,
+        "pad_gc_min": None,
+        "pad_gc_max": None,
+        "pad_gc_target_min": None,
+        "pad_gc_target_max": None,
+        "pad_gc_actual": None,
+        "pad_relaxed": None,
+        "pad_relaxed_reason": None,
+        "pad_attempts": None,
+        "gc_total": 0.5,
+        "gc_core": 0.5,
+    }
diff --git a/src/dnadesign/densegen/tests/test_cli_summarize_library.py b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
index e72f595b..041af212 100644
--- a/src/dnadesign/densegen/tests/test_cli_summarize_library.py
+++ b/src/dnadesign/densegen/tests/test_cli_summarize_library.py
@@ -9,111 +9,7 @@
 from dnadesign.densegen.src.cli import app
 from dnadesign.densegen.src.core.run_manifest import PlanManifest, RunManifest
 from dnadesign.densegen.src.core.run_paths import ensure_run_meta_dir, run_manifest_path
-
-
-def _base_meta(library_hash: str, library_index: int) -> dict:
-    return {
-        "schema_version": "2.5",
-        "run_id": "demo",
-        "run_root": ".",
-        "run_config_path": "config.yaml",
-        "run_config_sha256": "dummy",
-        "created_at": "2026-01-14T00:00:00+00:00",
-        "length": 10,
-        "random_seed": 0,
-        "policy_pad": "off",
-        "policy_sampling": "subsample",
-        "policy_solver": "iterate",
-        "solver_backend": "CBC",
-        "solver_strategy": "iterate",
-        "solver_options": [],
-        "solver_strands": "double",
-        "dense_arrays_version": None,
-        "dense_arrays_version_source": "unknown",
-        "solver_status": None,
-        "solver_objective": None,
-        "solver_solve_time_s": None,
-        "plan": "demo_plan",
-        "tf_list": ["lexA", "cpxR"],
-        "tfbs_parts": ["lexA:AAA", "cpxR:CCC"],
-        "used_tfbs": ["lexA:AAA", "cpxR:CCC"],
-        "used_tfbs_detail": [
-            {"tf": "lexA", "tfbs": "AAA", "orientation": "fwd", "offset": 0},
-            {"tf": "cpxR", "tfbs": "CCC", "orientation": "fwd", "offset": 4},
-        ],
-        "used_tf_counts": [{"tf": "lexA", "count": 1}, {"tf": "cpxR", "count": 1}],
-        "used_tf_list": ["lexA", "cpxR"],
-        "covers_all_tfs_in_solution": True,
-        "min_count_per_tf": 0,
-        "input_type": "binding_sites",
-        "input_name": "demo_input",
-        "input_path": "inputs.csv",
-        "input_dataset": None,
-        "input_root": None,
-        "input_mode": "binding_sites",
-        "input_pwm_ids": [],
-        "input_row_count": 0,
-        "input_tf_count": 0,
-        "input_tfbs_count": 0,
-        "input_tf_tfbs_pair_count": 1,
-        "sampling_fraction": 0.5,
-        "sampling_fraction_pairs": 0.5,
-        "input_pwm_strategy": None,
-        "input_pwm_scoring_backend": None,
-        "input_pwm_score_threshold": None,
-        "input_pwm_score_percentile": None,
-        "input_pwm_pvalue_threshold": None,
-        "input_pwm_pvalue_bins": None,
-        "input_pwm_mining_batch_size": None,
-        "input_pwm_mining_max_batches": None,
-        "input_pwm_mining_max_candidates": None,
-        "input_pwm_mining_max_seconds": None,
-        "input_pwm_mining_retain_bin_ids": None,
-        "input_pwm_mining_log_every_batches": None,
-        "input_pwm_selection_policy": None,
-        "input_pwm_bgfile": None,
-        "input_pwm_keep_all_candidates_debug": None,
-        "input_pwm_include_matched_sequence": None,
-        "input_pwm_n_sites": None,
-        "input_pwm_oversample_factor": None,
-        "fixed_elements": {"promoter_constraints": [], "side_biases": {"left": [], "right": []}},
-        "visual": "",
-        "compression_ratio": None,
-        "library_size": 2,
-        "library_unique_tf_count": 2,
-        "library_unique_tfbs_count": 2,
-        "sequence_length": 10,
-        "promoter_constraint": None,
-        "sampling_target_length": 0,
-        "sampling_achieved_length": 0,
-        "sampling_relaxed_cap": False,
-        "sampling_final_cap": None,
-        "sampling_pool_strategy": "subsample",
-        "sampling_library_size": 2,
-        "sampling_library_strategy": "tf_balanced",
-        "sampling_iterative_max_libraries": 1,
-        "sampling_iterative_min_new_solutions": 0,
-        "sampling_library_index": library_index,
-        "sampling_library_hash": library_hash,
-        "required_regulators": [],
-        "min_required_regulators": None,
-        "min_count_by_regulator": [],
-        "covers_required_regulators": True,
-        "pad_used": False,
-        "pad_bases": None,
-        "pad_end": None,
-        "pad_gc_mode": None,
-        "pad_gc_min": None,
-        "pad_gc_max": None,
-        "pad_gc_target_min": None,
-        "pad_gc_target_max": None,
-        "pad_gc_actual": None,
-        "pad_relaxed": None,
-        "pad_relaxed_reason": None,
-        "pad_attempts": None,
-        "gc_total": 0.5,
-        "gc_core": 0.5,
-    }
+from dnadesign.densegen.tests.meta_fixtures import output_meta
 
 
 def _write_config(path: Path) -> None:
@@ -160,7 +56,7 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
     # outputs
     out_file = run_root / "outputs" / "tables" / "dense_arrays.parquet"
     sink = ParquetSink(path=str(out_file), chunk_size=1)
-    meta = _base_meta(library_hash="abc123", library_index=1)
+    meta = output_meta(library_hash="abc123", library_index=1)
     rec = OutputRecord.from_sequence(
         sequence="ATGCATGCAT",
         meta=meta,
@@ -238,7 +134,8 @@ def test_summarize_library_grouping(tmp_path: Path) -> None:
         seed_solver=101112,
         solver_backend="CBC",
         solver_strategy="iterate",
-        solver_options=[],
+        solver_time_limit_seconds=None,
+        solver_threads=None,
         solver_strands="double",
         dense_arrays_version=None,
         dense_arrays_version_source="unknown",
@@ -275,7 +172,7 @@ def test_summarize_library_limit_truncates(tmp_path: Path) -> None:
     out_file = run_root / "outputs" / "tables" / "dense_arrays.parquet"
     sink = ParquetSink(path=str(out_file), chunk_size=1)
     for lib_hash, lib_index in [("abc123", 1), ("def456", 2)]:
-        meta = _base_meta(library_hash=lib_hash, library_index=lib_index)
+        meta = output_meta(library_hash=lib_hash, library_index=lib_index)
         rec = OutputRecord.from_sequence(
             sequence="ATGCATGCAT",
             meta=meta,
@@ -390,7 +287,8 @@ def test_summarize_library_limit_truncates(tmp_path: Path) -> None:
         seed_solver=101112,
         solver_backend="CBC",
         solver_strategy="iterate",
-        solver_options=[],
+        solver_time_limit_seconds=None,
+        solver_threads=None,
         solver_strands="double",
         dense_arrays_version=None,
         dense_arrays_version_source="unknown",
diff --git a/src/dnadesign/densegen/tests/test_reporting_library_summary_outputs.py b/src/dnadesign/densegen/tests/test_reporting_library_summary_outputs.py
new file mode 100644
index 00000000..b89164d2
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_reporting_library_summary_outputs.py
@@ -0,0 +1,134 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+
+from dnadesign.densegen.src.adapters.outputs import OutputRecord, ParquetSink
+from dnadesign.densegen.src.config import load_config
+from dnadesign.densegen.src.core.reporting import collect_report_data
+from dnadesign.densegen.tests.meta_fixtures import output_meta
+
+
+def _write_config(path: Path) -> None:
+    path.write_text(
+        """
+        densegen:
+          schema_version: "2.5"
+          run:
+            id: demo
+            root: "."
+          inputs:
+            - name: demo_input
+              type: binding_sites
+              path: inputs.csv
+          output:
+            targets: [parquet]
+            schema:
+              bio_type: dna
+              alphabet: dna_4
+            parquet:
+              path: outputs/tables/dense_arrays.parquet
+          generation:
+            sequence_length: 10
+            quota: 1
+            plan:
+              - name: demo_plan
+                quota: 1
+          solver:
+            backend: CBC
+            strategy: iterate
+          logging:
+            log_dir: outputs/logs
+        """.strip()
+        + "\n"
+    )
+
+
+def test_library_summary_outputs_filled(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    cfg_path = run_root / "config.yaml"
+    _write_config(cfg_path)
+
+    out_file = run_root / "outputs" / "tables" / "dense_arrays.parquet"
+    sink = ParquetSink(path=str(out_file), chunk_size=1)
+    meta = output_meta(library_hash="abc123", library_index=1)
+    rec = OutputRecord.from_sequence(
+        sequence="ATGCATGCAT",
+        meta=meta,
+        source="densegen:demo",
+        bio_type="dna",
+        alphabet="dna_4",
+    )
+    sink.add(rec)
+    sink.finalize()
+
+    attempts_path = run_root / "outputs" / "tables" / "attempts.parquet"
+    attempts_path.parent.mkdir(parents=True, exist_ok=True)
+    attempts_df = pd.DataFrame(
+        [
+            {
+                "attempt_id": "a1",
+                "attempt_index": 1,
+                "run_id": "demo",
+                "input_name": "demo_input",
+                "plan_name": "demo_plan",
+                "created_at": "2026-01-14T00:00:01+00:00",
+                "status": "success",
+                "reason": "ok",
+                "detail_json": "{}",
+                "sequence": "ATGCATGCAT",
+                "sequence_hash": "hash1",
+                "solution_id": "out1",
+                "used_tf_counts_json": "{}",
+                "used_tf_list": ["lexA", "cpxR"],
+                "sampling_library_index": 1,
+                "sampling_library_hash": "abc123",
+                "solver_status": "optimal",
+                "solver_objective": 0.0,
+                "solver_solve_time_s": 0.1,
+                "dense_arrays_version": None,
+                "library_tfbs": ["AAA", "CCC"],
+                "library_tfs": ["lexA", "cpxR"],
+                "library_site_ids": ["s1", "s2"],
+                "library_sources": ["demo", "demo"],
+            },
+            {
+                "attempt_id": "a2",
+                "attempt_index": 2,
+                "run_id": "demo",
+                "input_name": "demo_input",
+                "plan_name": "demo_plan",
+                "created_at": "2026-01-14T00:00:02+00:00",
+                "status": "failed",
+                "reason": "no_solution",
+                "detail_json": "{}",
+                "sequence": None,
+                "sequence_hash": "",
+                "solution_id": "",
+                "used_tf_counts_json": "{}",
+                "used_tf_list": ["lexA", "cpxR"],
+                "sampling_library_index": 2,
+                "sampling_library_hash": "def456",
+                "solver_status": "infeasible",
+                "solver_objective": None,
+                "solver_solve_time_s": 0.2,
+                "dense_arrays_version": None,
+                "library_tfbs": ["AAA", "CCC"],
+                "library_tfs": ["lexA", "cpxR"],
+                "library_site_ids": ["s1", "s2"],
+                "library_sources": ["demo", "demo"],
+            },
+        ]
+    )
+    attempts_df.to_parquet(attempts_path, index=False)
+
+    loaded = load_config(cfg_path)
+    bundle = collect_report_data(loaded.root, cfg_path, include_combinatorics=False)
+    library_summary = bundle.tables["library_summary"]
+
+    assert not library_summary.empty
+    assert not library_summary["outputs"].isna().any()
+    outputs_by_lib = library_summary.set_index("library_index")["outputs"].to_dict()
+    assert outputs_by_lib.get(2) == 0

From 1033ce76630c09b6d6f9704d56463d89a4de3c30 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 13:41:42 -0500
Subject: [PATCH 22/40] densegen: decouple report plots and add plot manifest

---
 .../densegen/docs/demo/demo_basic.md          |  11 +-
 .../densegen/docs/dev/architecture.md         |   2 +-
 .../densegen/docs/dev/audit_2026-01-23.md     |   7 +-
 .../densegen/docs/guide/outputs-metadata.md   |   3 +-
 src/dnadesign/densegen/docs/reference/cli.md  |   4 +-
 .../densegen/docs/reference/outputs.md        |   7 +-
 src/dnadesign/densegen/src/cli.py             |  21 +-
 src/dnadesign/densegen/src/core/reporting.py  | 199 ++-----------
 src/dnadesign/densegen/src/core/run_paths.py  |   5 -
 .../densegen/src/viz/plot_registry.py         |  10 +
 src/dnadesign/densegen/src/viz/plotting.py    | 261 +++++++++++++++++-
 .../densegen/tests/config_fixtures.py         |  41 +++
 .../densegen/tests/test_cli_report_plots.py   |  21 ++
 .../densegen/tests/test_plot_manifest.py      | 147 ++++++++++
 .../tests/test_report_assets_removed.py       |  42 +++
 15 files changed, 577 insertions(+), 204 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/config_fixtures.py
 create mode 100644 src/dnadesign/densegen/tests/test_cli_report_plots.py
 create mode 100644 src/dnadesign/densegen/tests/test_plot_manifest.py
 create mode 100644 src/dnadesign/densegen/tests/test_report_assets_removed.py

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 4a5b2cc0..817ddb8c 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -144,6 +144,7 @@ dense run
 
 This demo config also enables plot generation from the run (`plots.default`) and saves plots in
 `outputs/plots/` using `plots.format` (switch to `pdf` or `svg` in `config.yaml` if desired).
+Reports do not generate plots; they can optionally link the existing plot manifest.
 The demo quota is intentionally small (`generation.quota: 12` with `runtime.max_seconds_per_plan: 60`)
 to keep the end‑to‑end run fast; scale these up for production runs.
 The demo uses `solver.strategy: iterate` for full solver runs; switch to `diverse` or `optimal`
@@ -159,7 +160,7 @@ auto‑plots when re‑running.
 Why: inspect Stage‑B library usage and runtime events.
 
 ```bash
-dense inspect run --library --events
+dense inspect run --library --events --library-limit 5
 ```
 
 ---
@@ -195,7 +196,13 @@ export MPLCONFIGDIR=outputs/.mpl-cache
 Why: generate a human‑readable audit summary.
 
 ```bash
-dense report --format md
+dense report --format md --plots include
+```
+
+If you skipped plots during the run, generate them first:
+
+```bash
+dense plot
 ```
 
 ---
diff --git a/src/dnadesign/densegen/docs/dev/architecture.md b/src/dnadesign/densegen/docs/dev/architecture.md
index bdce24b5..8efd348f 100644
--- a/src/dnadesign/densegen/docs/dev/architecture.md
+++ b/src/dnadesign/densegen/docs/dev/architecture.md
@@ -20,7 +20,7 @@ YAML config
   -> optimization (dense-arrays ILP)
   -> postprocess (pad, policies)
   -> outputs (tables + manifests)
-  -> plots (outputs/plots; report assets under outputs/report/assets)
+  -> plots (outputs/plots)
 ```
 
 ---
diff --git a/src/dnadesign/densegen/docs/dev/audit_2026-01-23.md b/src/dnadesign/densegen/docs/dev/audit_2026-01-23.md
index d9887678..13d3bbdb 100644
--- a/src/dnadesign/densegen/docs/dev/audit_2026-01-23.md
+++ b/src/dnadesign/densegen/docs/dev/audit_2026-01-23.md
@@ -16,20 +16,19 @@
 ## Demo run notes
 - `dense run` initial attempt timed out in the shell (120s) while still running; resumed with `dense run --resume` and completed.
 - `dense inspect run --library --events` now succeeds (outputs column no longer NaN).
-- `dense report --format md` writes `outputs/report` with plots + composition.csv.
+- `dense report --format md` writes `outputs/report` summaries only; plots live in `outputs/plots`.
 
 ## Output layout snapshot (demo60)
 - Total files: 55
 - Largest dirs: `outputs/report` (~560K), `outputs/tables` (~356K), `.mpl-cache` (~160K).
 - Tables: `dense_arrays.parquet` (canonical), `solutions.parquet` (solution↔attempt mapping),
   `composition.parquet` (per-placement), `attempts.parquet` (solver audit log).
-- Report assets include per-library utilization plots (one file per library).
+- Plots are consolidated under `outputs/plots` and indexed in `outputs/plots/plot_manifest.json`.
 
 ## Open questions / potential improvements
 - Consider reducing duplication between `attempts.parquet` and `libraries/*` (library TFBS lists
   appear in both) or making library details join-only via `library_members.parquet`.
-- `dense report --format md` still writes many plot assets; consider gating per-library plots or
-  adding a `--plots none|summary|per-library` switch.
+- Report currently links plots only when `--plots include` is set; consider a summary-only plot preset.
 - `.mpl-cache` sits under outputs; consider doc note about cleaning via `dense campaign-reset`.
 
 ## Commands executed
diff --git a/src/dnadesign/densegen/docs/guide/outputs-metadata.md b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
index 669813e2..4eb9700b 100644
--- a/src/dnadesign/densegen/docs/guide/outputs-metadata.md
+++ b/src/dnadesign/densegen/docs/guide/outputs-metadata.md
@@ -61,10 +61,9 @@ Optional targets (when enabled):
   - `candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json` (aggregates)
 - `outputs/plots/` — plot images from `dense run` auto‑plotting or `dense plot`
   (format controlled by `plots.format`).
+- `outputs/plots/plot_manifest.json` — plot inventory used by reports when `--plots include` is set.
 - `outputs/report/` — audit report outputs:
   - `report.json`, `report.md`, `report.html`
-  - `assets/` (plots linked by the HTML report)
-  - `assets/composition.csv` (full composition table)
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index 77519ee0..3eea544a 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -176,11 +176,11 @@ Options:
 - `--run` — run directory (defaults to config run root).
 - `--out` — output directory relative to run root (default: `outputs/report`; must be inside `outputs/`).
 - `--format` — `json`, `md`, `html`, or `all` (comma‑separated allowed).
+- `--plots` — `none` or `include` (default: `none`). When `include`, report links plots from
+  `outputs/plots/plot_manifest.json` (run `dense plot` first).
 
 Report outputs:
 - `report.json`, `report.md`, `report.html`
-- `assets/` (plots referenced by the HTML report)
-- `assets/composition.csv` (full composition table when available)
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index 417842ef..b4f7c8db 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -109,17 +109,18 @@ The `dense report` command writes a compact audit summary under `outputs/report/
 - `outputs/report/report.json`
 - `outputs/report/report.md`
 - `outputs/report/report.html` (basic HTML wrapper for quick sharing)
-- `outputs/report/assets/` (plots linked by `report.html`)
-- `outputs/report/assets/composition.csv` (full composition table when available)
 
 These summarize run scope and link to the canonical outputs (`outputs/tables/dense_arrays.parquet` and
-`outputs/tables/attempts.parquet`). Use `dense report --format json|md|html|all` to control which files are emitted.
+`outputs/tables/attempts.parquet`). Reports do not generate plots; run `dense plot` to populate
+`outputs/plots/`, and use `dense report --plots include` to link the existing plot manifest.
+Use `dense report --format json|md|html|all` to control which files are emitted.
 
 ---
 
 ### Plots
 
 `dense plot` writes plot images under `outputs/plots/` (format controlled by `plots.format`).
+`outputs/plots/plot_manifest.json` records the plot inventory for reports.
 
 ---
 
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index bc9a4aad..a2df4585 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -1262,6 +1262,11 @@ def report(
         "--out",
         help="Output directory (relative to run root; must be inside outputs/).",
     ),
+    plots: str = typer.Option(
+        "none",
+        "--plots",
+        help="Include plot links in the report: none or include (requires outputs/plots/plot_manifest.json).",
+    ),
     format: str = typer.Option(
         "all",
         "--format",
@@ -1293,19 +1298,27 @@ def report(
         console.print(f"[bold red]Unknown report format(s):[/] {', '.join(unknown)}")
         console.print("Allowed: json, md, html, all.")
         raise typer.Exit(code=1)
+    plots_mode = str(plots or "none").strip().lower()
+    if plots_mode not in {"none", "include"}:
+        console.print("[bold red]--plots must be one of: none, include.[/]")
+        raise typer.Exit(code=1)
+    include_plots = plots_mode == "include"
     formats_used = {"json", "md", "html"} if "all" in raw_formats else raw_formats
     run_root = _run_root_for(loaded)
     out_dir = _resolve_outputs_path_or_exit(cfg_path, run_root, out, label="report.out")
     try:
         with _suppress_pyarrow_sysctl_warnings():
-            write_report(loaded.root, cfg_path, out_dir=out_dir, formats=raw_formats)
+            write_report(loaded.root, cfg_path, out_dir=out_dir, include_plots=include_plots, formats=raw_formats)
     except FileNotFoundError as exc:
         console.print(f"[bold red]Report failed:[/] {exc}")
         entries = _list_dir_entries(run_root, limit=8)
         if entries:
             console.print(f"[bold]Run root contents[/]: {', '.join(entries)}")
         console.print("[bold]Next steps[/]:")
-        console.print(f"  - {_workspace_command('dense run', cfg_path=cfg_path, run_root=run_root)}")
+        if "plot_manifest" in str(exc):
+            console.print(f"  - {_workspace_command('dense plot', cfg_path=cfg_path, run_root=run_root)}")
+        else:
+            console.print(f"  - {_workspace_command('dense run', cfg_path=cfg_path, run_root=run_root)}")
         raise typer.Exit(code=1)
     console.print(f":sparkles: [bold green]Report written[/]: {out_dir}")
     outputs = []
@@ -1984,7 +1997,7 @@ def run(
         from .viz.plotting import run_plots_from_config
 
         console.print("[bold]Generating plots...[/]")
-        run_plots_from_config(root, loaded.path)
+        run_plots_from_config(root, loaded.path, source="run")
         console.print(":bar_chart: [bold green]Plots written.[/]")
 
 
@@ -2031,7 +2044,7 @@ def plot(
     install_native_stderr_filters(suppress_solver_messages=False)
     from .viz.plotting import run_plots_from_config
 
-    run_plots_from_config(loaded.root, loaded.path, only=only)
+    run_plots_from_config(loaded.root, loaded.path, only=only, source="plot")
     console.print(":bar_chart: [bold green]Plots written.[/]")
 
 
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index 6bcd9a59..8244c966 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -14,6 +14,7 @@
 
 import json
 import logging
+import os
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
@@ -24,7 +25,6 @@
 
 from ..adapters.outputs import load_records_from_config
 from ..config import RootConfig, resolve_outputs_scoped_path, resolve_run_root
-from ..utils.mpl_utils import ensure_mpl_cache_dir
 from .artifacts.pool import POOL_MODE_TFBS, load_pool_artifact
 from .run_manifest import load_run_manifest
 from .run_paths import (
@@ -701,7 +701,9 @@ def _candidate_logging_enabled() -> bool:
             .reset_index()
         )
     else:
-        used_tf = pd.DataFrame(columns=["library_hash", "plan", "tf", "used_placements", "used_unique_tfbs"])
+        used_tf = pd.DataFrame(
+            columns=["library_hash", "plan", "tf", "used_placements", "used_unique_tfbs", "used_sequences"]
+        )
         used_tfbs = pd.DataFrame(columns=["library_hash", "plan", "tf", "tfbs", "used_placements", "used_sequences"])
 
     total_sequences = (
@@ -924,16 +926,6 @@ def _candidate_logging_enabled() -> bool:
     return ReportBundle(run_report=run_report, tables=tables, plots={})
 
 
-def _plot_available(cache_dir: Path) -> bool:
-    try:
-        ensure_mpl_cache_dir(cache_dir)
-        import matplotlib  # noqa: F401
-    except Exception as exc:
-        log.info("Matplotlib not available or cache setup failed; skipping report plots. (%s)", exc)
-        return False
-    return True
-
-
 def _safe_filename(text: str) -> str:
     return "".join(ch if ch.isalnum() or ch in {"-", "_", "."} else "_" for ch in text) or "densegen"
 
@@ -954,148 +946,21 @@ def _markdown_table(df: pd.DataFrame, *, columns: list[str] | None = None, max_r
     return "\n".join(lines)
 
 
-def _generate_report_plots(
-    bundle: ReportBundle, *, cfg_path: Path, out_dir: Path, cache_dir: Path
-) -> dict[str, list[str]]:
-    if not _plot_available(cache_dir):
-        log.info("matplotlib not available; skipping report plots.")
-        return {}
-    import matplotlib.pyplot as plt
-
+def _load_plot_manifest(manifest_path: Path, *, report_root: Path) -> dict[str, list[str]]:
+    if not manifest_path.exists():
+        raise FileNotFoundError(f"plot_manifest not found: {manifest_path}")
+    payload = json.loads(manifest_path.read_text())
     plots: dict[str, list[str]] = {}
-    assets_dir = out_dir / "assets"
-    assets_dir.mkdir(parents=True, exist_ok=True)
-    run_root = resolve_run_root(cfg_path, bundle.run_report.get("run_root", ""))
-    outputs_root = run_outputs_root(run_root)
-
-    # Stage-A p-value histograms per input/TF (FIMO)
-    pool_dir = outputs_root / "pools"
-    if pool_dir.exists():
-        try:
-            pool_artifact = load_pool_artifact(pool_dir)
-            for entry in pool_artifact.inputs.values():
-                if entry.pool_mode != POOL_MODE_TFBS:
-                    continue
-                pool_path = pool_dir / entry.pool_path
-                if not pool_path.exists():
-                    continue
-                df_pool = pd.read_parquet(pool_path)
-                if "fimo_pvalue" not in df_pool.columns or "tf" not in df_pool.columns:
-                    continue
-                for tf, sub in df_pool.groupby("tf"):
-                    if sub.empty:
-                        continue
-                    pvals = sub["fimo_pvalue"].astype(float).replace(0, np.nan).dropna()
-                    if pvals.empty:
-                        continue
-                    fig, ax = plt.subplots(figsize=(6, 4))
-                    ax.hist(np.log10(pvals), bins=30, color="#4c78a8", edgecolor="white")
-                    ax.set_title(f"Stage-A p-value histogram: {entry.name}/{tf}")
-                    ax.set_xlabel("log10(p-value)")
-                    ax.set_ylabel("count")
-                    fname = f"stage_a_pvalue_hist__{_safe_filename(entry.name)}__{_safe_filename(str(tf))}.png"
-                    path = assets_dir / fname
-                    fig.tight_layout()
-                    fig.savefig(path)
-                    plt.close(fig)
-                    plots.setdefault("stage_a_pvalue_hist", []).append(str(path.relative_to(out_dir)))
-        except Exception:
-            log.warning("Failed to generate Stage-A p-value histograms.", exc_info=True)
-
-    # Stage-A score histograms per input/TF
-    if pool_dir.exists():
-        try:
-            pool_artifact = load_pool_artifact(pool_dir)
-            for entry in pool_artifact.inputs.values():
-                if entry.pool_mode != POOL_MODE_TFBS:
-                    continue
-                pool_path = pool_dir / entry.pool_path
-                if not pool_path.exists():
-                    continue
-                df_pool = pd.read_parquet(pool_path)
-                if "tf" not in df_pool.columns:
-                    continue
-                for tf, sub in df_pool.groupby("tf"):
-                    if sub.empty:
-                        continue
-                    if "fimo_score" in sub.columns:
-                        vals = pd.to_numeric(sub["fimo_score"], errors="coerce").dropna()
-                        if not vals.empty:
-                            fig, ax = plt.subplots(figsize=(6, 4))
-                            ax.hist(vals, bins=30, color="#72b7b2", edgecolor="white")
-                            ax.set_title(f"Stage-A FIMO score histogram: {entry.name}/{tf}")
-                            ax.set_xlabel("FIMO score")
-                            ax.set_ylabel("count")
-                            fname = (
-                                f"stage_a_fimo_score_hist__{_safe_filename(entry.name)}__{_safe_filename(str(tf))}.png"
-                            )
-                            path = assets_dir / fname
-                            fig.tight_layout()
-                            fig.savefig(path)
-                            plt.close(fig)
-                            plots.setdefault("stage_a_fimo_score_hist", []).append(str(path.relative_to(out_dir)))
-                    if "score" in sub.columns:
-                        vals = pd.to_numeric(sub["score"], errors="coerce").dropna()
-                        if not vals.empty:
-                            fig, ax = plt.subplots(figsize=(6, 4))
-                            ax.hist(vals, bins=30, color="#e45756", edgecolor="white")
-                            ax.set_title(f"Stage-A densegen score histogram: {entry.name}/{tf}")
-                            ax.set_xlabel("densegen score")
-                            ax.set_ylabel("count")
-                            fname = (
-                                "stage_a_densegen_score_hist__"
-                                f"{_safe_filename(entry.name)}__{_safe_filename(str(tf))}.png"
-                            )
-                            path = assets_dir / fname
-                            fig.tight_layout()
-                            fig.savefig(path)
-                            plt.close(fig)
-                            plots.setdefault("stage_a_densegen_score_hist", []).append(str(path.relative_to(out_dir)))
-        except Exception:
-            log.warning("Failed to generate Stage-A score histograms.", exc_info=True)
-
-    # Stage-A bin occupancy bar charts (per input)
-    stage_a_bins = bundle.tables.get("stage_a_bins")
-    if stage_a_bins is not None and not stage_a_bins.empty:
-        try:
-            for input_name, sub in stage_a_bins.groupby("input_name"):
-                fig, ax = plt.subplots(figsize=(6, 4))
-                sub = sub.sort_values(["tf", "bin_id"])
-                labels = [f"{row['tf']}:{int(row['bin_id'])}" for _, row in sub.iterrows()]
-                counts = sub["count"].astype(int).tolist()
-                ax.bar(labels, counts, color="#f58518")
-                ax.set_title(f"Stage-A bin occupancy: {input_name}")
-                ax.set_ylabel("count")
-                ax.tick_params(axis="x", labelrotation=45, labelsize=8)
-                fname = f"stage_a_bin_counts__{_safe_filename(str(input_name))}.png"
-                path = assets_dir / fname
-                fig.tight_layout()
-                fig.savefig(path)
-                plt.close(fig)
-                plots.setdefault("stage_a_bin_counts", []).append(str(path.relative_to(out_dir)))
-        except Exception:
-            log.warning("Failed to generate Stage-A bin occupancy plots.", exc_info=True)
-
-    # Stage-B TF utilization (offered vs used)
-    offered_vs_used = bundle.tables.get("offered_vs_used_tf")
-    if offered_vs_used is not None and not offered_vs_used.empty:
-        try:
-            for lib_hash, sub in offered_vs_used.groupby("library_hash"):
-                sub = sub.sort_values("tf")
-                fig, ax = plt.subplots(figsize=(7, 4))
-                ax.bar(sub["tf"], sub["used_sequences"], color="#54a24b", label="used sequences")
-                ax.set_title(f"Stage-B TF utilization: {lib_hash[:8]}")
-                ax.set_ylabel("used sequences")
-                ax.tick_params(axis="x", labelrotation=45, labelsize=8)
-                fname = f"stage_b_tf_util__{_safe_filename(str(lib_hash))}.png"
-                path = assets_dir / fname
-                fig.tight_layout()
-                fig.savefig(path)
-                plt.close(fig)
-                plots.setdefault("stage_b_tf_utilization", []).append(str(path.relative_to(out_dir)))
-        except Exception:
-            log.warning("Failed to generate Stage-B utilization plots.", exc_info=True)
-
+    for entry in payload.get("plots", []):
+        name = str(entry.get("name") or "").strip()
+        rel = str(entry.get("path") or "").strip()
+        if not name or not rel:
+            raise ValueError("plot_manifest entries must include name and path.")
+        plot_path = manifest_path.parent / rel
+        if not plot_path.exists():
+            raise FileNotFoundError(f"Plot listed in plot_manifest is missing: {plot_path}")
+        rel_to_report = os.path.relpath(plot_path, report_root)
+        plots.setdefault(name, []).append(rel_to_report)
     return plots
 
 
@@ -1105,32 +970,21 @@ def write_report(
     *,
     out_dir: str | Path = "outputs/report",
     include_combinatorics: bool = False,
+    include_plots: bool = False,
     formats: set[str] | None = None,
 ) -> ReportBundle:
     run_root = resolve_run_root(cfg_path, root_cfg.densegen.run.root)
     out_path = resolve_outputs_scoped_path(cfg_path, run_root, str(out_dir), label="report.out")
     out_path.mkdir(parents=True, exist_ok=True)
-    cache_dir = run_root / "outputs" / ".mpl-cache"
-
     bundle = collect_report_data(root_cfg, cfg_path, include_combinatorics=include_combinatorics)
     composition = bundle.tables.get("composition")
     if composition is not None and not composition.empty:
         bundle.run_report["composition_rows"] = int(len(composition))
-        try:
-            assets_dir = out_path / "assets"
-            assets_dir.mkdir(parents=True, exist_ok=True)
-            composition_csv = assets_dir / "composition.csv"
-            composition.to_csv(composition_csv, index=False)
-            bundle.run_report["composition_csv"] = str(composition_csv.relative_to(out_path))
-        except Exception:
-            log.warning("Failed to export composition CSV for report.", exc_info=True)
-    try:
-        plots = _generate_report_plots(bundle, cfg_path=cfg_path, out_dir=out_path, cache_dir=cache_dir)
+    if include_plots:
+        plot_manifest = run_root / "outputs" / "plots" / "plot_manifest.json"
+        plots = _load_plot_manifest(plot_manifest, report_root=out_path)
         bundle.plots = plots
-        if plots:
-            bundle.run_report["report_plots"] = plots
-    except Exception:
-        log.debug("Failed to generate report plots.", exc_info=True)
+        bundle.run_report["plot_manifest"] = str(plot_manifest.relative_to(run_root))
     formats = {f.lower() for f in (formats or {"json", "md"})}
     if "all" in formats:
         formats = {"json", "md", "html"}
@@ -1173,8 +1027,8 @@ def _render_report_md(bundle: ReportBundle) -> str:
         "- outputs/meta/events.jsonl",
         "- outputs/pools/candidates/candidates.parquet (when candidate logging is enabled)",
         "- outputs/pools/candidates/candidates_summary.parquet (when candidate logging is enabled)",
-        "- outputs/report/assets/ (plots linked by report.html)",
-        "- outputs/report/assets/composition.csv (full composition table, when available)",
+        "- outputs/plots/ (visual artifacts; run `dense plot` to populate)",
+        "- outputs/plots/plot_manifest.json (plot index for reports)",
     ]
     warnings = report.get("warnings") or []
     if warnings:
@@ -1347,11 +1201,8 @@ def _render_report_md(bundle: ReportBundle) -> str:
             )
         )
         comp_rows = report.get("composition_rows")
-        comp_csv = report.get("composition_csv")
         if comp_rows is not None:
             lines.append(f"- Full composition rows: {comp_rows}")
-        if comp_csv:
-            lines.append(f"- Full composition CSV: {comp_csv}")
     leaderboard = report.get("leaderboard_latest") or {}
     leader_tf = leaderboard.get("tf") or []
     leader_tfbs = leaderboard.get("tfbs") or []
diff --git a/src/dnadesign/densegen/src/core/run_paths.py b/src/dnadesign/densegen/src/core/run_paths.py
index 5d8b60f7..a2420b75 100644
--- a/src/dnadesign/densegen/src/core/run_paths.py
+++ b/src/dnadesign/densegen/src/core/run_paths.py
@@ -22,7 +22,6 @@
 RUN_TABLES_DIR = "tables"
 RUN_PLOTS_DIR = "plots"
 RUN_REPORT_DIR = "report"
-RUN_REPORT_ASSETS_DIR = "assets"
 CANDIDATES_DIR = "candidates"
 
 RUN_MANIFEST_NAME = "run_manifest.json"
@@ -81,10 +80,6 @@ def run_report_root(run_root: Path) -> Path:
     return run_outputs_root(run_root) / RUN_REPORT_DIR
 
 
-def run_report_assets_root(run_root: Path) -> Path:
-    return run_report_root(run_root) / RUN_REPORT_ASSETS_DIR
-
-
 def ensure_run_meta_dir(run_root: Path) -> Path:
     meta = run_meta_root(run_root)
     meta.mkdir(parents=True, exist_ok=True)
diff --git a/src/dnadesign/densegen/src/viz/plot_registry.py b/src/dnadesign/densegen/src/viz/plot_registry.py
index f3262106..2882b628 100644
--- a/src/dnadesign/densegen/src/viz/plot_registry.py
+++ b/src/dnadesign/densegen/src/viz/plot_registry.py
@@ -53,4 +53,14 @@
         "fn": "plot_tfbs_usage",
         "description": "TFBS usage by TF, ranked by occurrences.",
     },
+    "stage_a_pvalue_strat_hist": {
+        "fn": "plot_stage_a_pvalue_strat_hist",
+        "description": "Stage-A FIMO p-value stratification histogram (accepted pool).",
+        "requires": ["pools"],
+    },
+    "stage_a_length_hist": {
+        "fn": "plot_stage_a_length_hist",
+        "description": "Stage-A TFBS length histogram (accepted pool).",
+        "requires": ["pools"],
+    },
 }
diff --git a/src/dnadesign/densegen/src/viz/plotting.py b/src/dnadesign/densegen/src/viz/plotting.py
index 8e765fd7..f1d640e3 100644
--- a/src/dnadesign/densegen/src/viz/plotting.py
+++ b/src/dnadesign/densegen/src/viz/plotting.py
@@ -21,6 +21,8 @@
 from __future__ import annotations
 
 import json
+import re
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, Iterable, Optional
 
@@ -36,6 +38,7 @@
 
 from ..adapters.outputs import load_records_from_config
 from ..config import RootConfig, resolve_outputs_scoped_path, resolve_run_root
+from ..core.artifacts.pool import POOL_MODE_TFBS, load_pool_artifact
 from .plot_registry import PLOT_SPECS
 
 # Embed TrueType fonts for clean text in vector exports
@@ -43,6 +46,7 @@
 mpl.rcParams["ps.fonttype"] = 42
 
 _console = Console()
+_SAFE_FILENAME_RE = re.compile(r"[^A-Za-z0-9_.-]+")
 
 # ---------------------- Small helpers ----------------------
 
@@ -97,6 +101,54 @@ def _fig_ax(style: dict):
     return plt.subplots(figsize=(float(w), float(h)))
 
 
+def _safe_filename(text: str) -> str:
+    cleaned = _SAFE_FILENAME_RE.sub("_", str(text).strip())
+    return cleaned or "densegen"
+
+
+def _plot_manifest_path(out_dir: Path) -> Path:
+    return out_dir / "plot_manifest.json"
+
+
+def _load_plot_manifest(out_dir: Path) -> dict:
+    path = _plot_manifest_path(out_dir)
+    if not path.exists():
+        return {}
+    return json.loads(path.read_text())
+
+
+def _write_plot_manifest(
+    out_dir: Path,
+    *,
+    entries: list[dict],
+    run_root: Path,
+    cfg_path: Path,
+    source: str,
+) -> None:
+    existing = _load_plot_manifest(out_dir)
+    merged: dict[str, dict] = {}
+    for item in existing.get("plots", []):
+        rel_path = str(item.get("path") or "")
+        if not rel_path:
+            continue
+        if (out_dir / rel_path).exists():
+            merged[rel_path] = item
+    for item in entries:
+        rel_path = str(item.get("path") or "")
+        if not rel_path:
+            continue
+        merged[rel_path] = item
+    payload = {
+        "schema_version": "1.0",
+        "run_root": str(run_root),
+        "config_path": str(cfg_path),
+        "updated_at": datetime.now(timezone.utc).isoformat(),
+        "source": str(source),
+        "plots": sorted(merged.values(), key=lambda x: (x.get("name", ""), x.get("path", ""))),
+    }
+    _plot_manifest_path(out_dir).write_text(json.dumps(payload, indent=2, sort_keys=True))
+
+
 # color utils
 try:
     from matplotlib.colors import is_color_like as _mpl_is_color_like
@@ -278,6 +330,22 @@ def _ensure_out_dir(plots_cfg, cfg_path: Path, run_root: Path) -> Path:
     return out
 
 
+def _load_stage_a_pools(run_root: Path) -> dict[str, pd.DataFrame]:
+    pools_dir = run_root / "outputs" / "pools"
+    artifact = load_pool_artifact(pools_dir)
+    pools: dict[str, pd.DataFrame] = {}
+    for entry in artifact.inputs.values():
+        if entry.pool_mode != POOL_MODE_TFBS:
+            continue
+        pool_path = pools_dir / entry.pool_path
+        if not pool_path.exists():
+            raise FileNotFoundError(f"Stage-A pool not found: {pool_path}")
+        pools[entry.name] = pd.read_parquet(pool_path)
+    if not pools:
+        raise ValueError("No TFBS pools available for Stage-A plots.")
+    return pools
+
+
 # ---------------------- Plots ----------------------
 
 
@@ -1254,6 +1322,126 @@ def plot_tfbs_positional_histogram(
     plt.close(fig)
 
 
+def plot_stage_a_pvalue_strat_hist(
+    df: pd.DataFrame,
+    out_path: Path,
+    *,
+    pools: dict[str, pd.DataFrame] | None = None,
+    style: Optional[dict] = None,
+) -> list[Path]:
+    if pools is None:
+        raise ValueError("Stage-A plots require Stage-A pools; run stage-a build-pool first.")
+    raw_style = style or {}
+    style = _style(raw_style)
+    if "figsize" not in raw_style:
+        style["figsize"] = (8, 4)
+    paths: list[Path] = []
+    for input_name, pool_df in pools.items():
+        required = {"fimo_pvalue", "fimo_bin_id", "fimo_bin_low", "fimo_bin_high"}
+        missing = sorted(required - set(pool_df.columns))
+        if missing:
+            raise ValueError(f"Stage-A p-value stratification plot missing columns: {', '.join(missing)}")
+        pvals = pd.to_numeric(pool_df["fimo_pvalue"], errors="coerce").replace(0, np.nan).dropna()
+        if pvals.empty:
+            raise ValueError(f"No FIMO p-values available for input '{input_name}'.")
+        min_p = float(pvals[pvals > 0].min())
+        bin_rows = pool_df[["fimo_bin_id", "fimo_bin_low", "fimo_bin_high"]].dropna().drop_duplicates("fimo_bin_id")
+        if bin_rows.empty:
+            raise ValueError(f"No FIMO bin metadata found for input '{input_name}'.")
+        counts = pool_df.groupby("fimo_bin_id").size().to_dict()
+        bins: list[dict] = []
+        for _, row in bin_rows.iterrows():
+            bin_id = int(row["fimo_bin_id"])
+            low = float(row["fimo_bin_low"])
+            high = float(row["fimo_bin_high"])
+            if high <= 0:
+                raise ValueError(f"FIMO bin high bound must be > 0 (input '{input_name}', bin {bin_id}).")
+            low_val = max(low, min_p)
+            left = -np.log10(high)
+            right = -np.log10(low_val)
+            if not (np.isfinite(left) and np.isfinite(right)):
+                raise ValueError(f"Invalid p-value bin bounds for input '{input_name}', bin {bin_id}.")
+            if right < left:
+                left, right = right, left
+            label = f"({low:.0e},{high:.0e}]"
+            bins.append(
+                {
+                    "left": left,
+                    "right": right,
+                    "count": int(counts.get(bin_id, 0)),
+                    "label": label,
+                }
+            )
+        bins.sort(key=lambda b: b["left"])
+        colors = _palette(style, len(bins), no_repeat=bool(style.get("palette_no_repeat", False)))
+
+        fig, ax = _fig_ax(style)
+        for idx, entry in enumerate(bins):
+            left = entry["left"]
+            right = entry["right"]
+            width = max(1e-6, right - left)
+            ax.bar(
+                left + width / 2,
+                entry["count"],
+                width=width,
+                color=colors[idx],
+                edgecolor="white",
+                label=entry["label"],
+                align="center",
+            )
+        ax.set_xlabel("-log10(p-value) (higher = more significant)")
+        ax.set_ylabel("count")
+        ax.set_title(f"Stage-A FIMO stratification: {input_name}")
+        ax.legend(loc="best", frameon=bool(style.get("legend_frame", False)))
+        _apply_style(ax, style)
+        fig.tight_layout()
+        fname = f"{out_path.stem}__{_safe_filename(input_name)}{out_path.suffix}"
+        path = out_path.parent / fname
+        fig.savefig(path)
+        plt.close(fig)
+        paths.append(path)
+    return paths
+
+
+def plot_stage_a_length_hist(
+    df: pd.DataFrame,
+    out_path: Path,
+    *,
+    pools: dict[str, pd.DataFrame] | None = None,
+    style: Optional[dict] = None,
+) -> list[Path]:
+    if pools is None:
+        raise ValueError("Stage-A plots require Stage-A pools; run stage-a build-pool first.")
+    raw_style = style or {}
+    style = _style(raw_style)
+    if "figsize" not in raw_style:
+        style["figsize"] = (5, 5)
+    paths: list[Path] = []
+    for input_name, pool_df in pools.items():
+        if "tfbs" not in pool_df.columns:
+            raise ValueError(f"Stage-A TFBS length plot missing 'tfbs' column for input '{input_name}'.")
+        lengths = pool_df["tfbs"].astype(str).map(len)
+        if lengths.empty:
+            raise ValueError(f"No TFBS lengths available for input '{input_name}'.")
+        min_len = int(lengths.min())
+        max_len = int(lengths.max())
+        bins = np.arange(min_len, max_len + 2) - 0.5
+        fig, ax = _fig_ax(style)
+        ax.hist(lengths, bins=bins, color="#4c78a8", edgecolor="white")
+        ax.set_xlabel("TFBS length (nt)")
+        ax.set_ylabel("count")
+        ax.set_title(f"Stage-A TFBS lengths: {input_name}")
+        ax.set_xticks(list(range(min_len, max_len + 1)))
+        _apply_style(ax, style)
+        fig.tight_layout()
+        fname = f"{out_path.stem}__{_safe_filename(input_name)}{out_path.suffix}"
+        path = out_path.parent / fname
+        fig.savefig(path)
+        plt.close(fig)
+        paths.append(path)
+    return paths
+
+
 AVAILABLE_PLOTS: Dict[str, Dict[str, object]] = {}
 for _name, _spec in PLOT_SPECS.items():
     _fn_name = _spec.get("fn")
@@ -1263,6 +1451,7 @@ def plot_tfbs_positional_histogram(
     AVAILABLE_PLOTS[_name] = {
         "fn": _fn,
         "description": _spec.get("description", ""),
+        "requires": _spec.get("requires"),
     }
 
 
@@ -1323,6 +1512,8 @@ def plot_tfbs_positional_histogram(
         "promoter_site_motifs",
     },
     "pad_gc": set(),
+    "stage_a_pvalue_strat_hist": set(),
+    "stage_a_length_hist": set(),
 }
 
 
@@ -1344,6 +1535,8 @@ def _plot_required_columns(selected: Iterable[str], options: Dict[str, Dict[str,
     cols: set[str] = set()
     for name in selected:
         raw = options.get(name, {}) if options else {}
+        if name in {"stage_a_pvalue_strat_hist", "stage_a_length_hist"}:
+            continue
         if name == "compression_ratio":
             cols.add(_dg("compression_ratio"))
         elif name == "tf_usage":
@@ -1380,7 +1573,25 @@ def _plot_required_columns(selected: Iterable[str], options: Dict[str, Dict[str,
     return sorted(cols)
 
 
-def run_plots_from_config(root_cfg: RootConfig, cfg_path: Path, *, only: Optional[str] = None) -> None:
+def _plot_required_sources(selected: Iterable[str]) -> set[str]:
+    sources: set[str] = set()
+    for name in selected:
+        spec = AVAILABLE_PLOTS.get(name, {})
+        requires = spec.get("requires")
+        if requires:
+            sources.update({str(item) for item in requires})
+        else:
+            sources.add("outputs")
+    return sources
+
+
+def run_plots_from_config(
+    root_cfg: RootConfig,
+    cfg_path: Path,
+    *,
+    only: Optional[str] = None,
+    source: str = "plot",
+) -> None:
     plots_cfg = root_cfg.plots
     run_root = resolve_run_root(cfg_path, root_cfg.densegen.run.root)
     out_dir = _ensure_out_dir(plots_cfg, cfg_path, run_root)
@@ -1389,18 +1600,31 @@ def run_plots_from_config(root_cfg: RootConfig, cfg_path: Path, *, only: Optiona
     selected = [p.strip() for p in (only.split(",") if only else default_list)]
     options = plots_cfg.options if plots_cfg else {}
     global_style = plots_cfg.style if plots_cfg else {}
+    required_sources = _plot_required_sources(selected)
     cols = _plot_required_columns(selected, options)
     max_rows = plots_cfg.sample_rows if plots_cfg else None
-    df, src = load_records_from_config(root_cfg, cfg_path, columns=cols, max_rows=max_rows)
+    df = pd.DataFrame()
+    src_label = "none"
+    row_count = 0
+    if "outputs" in required_sources:
+        df, src_label = load_records_from_config(root_cfg, cfg_path, columns=cols, max_rows=max_rows)
+        row_count = len(df)
+    pools: dict[str, pd.DataFrame] | None = None
+    if "pools" in required_sources:
+        pools = _load_stage_a_pools(run_root)
+        if row_count == 0:
+            row_count = sum(len(pool_df) for pool_df in pools.values())
+            src_label = f"pools:{run_root / 'outputs' / 'pools'}"
 
     _console.print(
         Panel.fit(
-            f"DenseGen plotting • source: {src} • rows: {len(df):,}\nOutput: {out_dir}",
+            f"DenseGen plotting • source: {src_label} • rows: {row_count:,}\nOutput: {out_dir}",
             border_style="blue",
         )
     )
     summary = Table("plot", "saved to", "status")
     errors: list[tuple[str, Exception]] = []
+    manifest_entries: list[dict] = []
 
     for name in selected:
         if name not in AVAILABLE_PLOTS:
@@ -1432,16 +1656,37 @@ def run_plots_from_config(root_cfg: RootConfig, cfg_path: Path, *, only: Optiona
                 "tfbs_positional_frequency",
                 "tfbs_positional_histogram",
             }:
-                fn(df, out_path, style=style, cfg=root_cfg.densegen.model_dump(), **kwargs)
+                result = fn(df, out_path, style=style, cfg=root_cfg.densegen.model_dump(), **kwargs)
             elif name == "tfbs_length_density":
                 attempts_df = None
                 attempts_path = run_root / "outputs" / "tables" / "attempts.parquet"
                 if attempts_path.exists():
                     attempts_df = pd.read_parquet(attempts_path)
-                fn(df, out_path, style=style, attempts_df=attempts_df, **kwargs)
+                result = fn(df, out_path, style=style, attempts_df=attempts_df, **kwargs)
+            elif name in {"stage_a_pvalue_strat_hist", "stage_a_length_hist"}:
+                result = fn(df, out_path, style=style, pools=pools, **kwargs)
             else:
-                fn(df, out_path, style=style, **kwargs)
-            summary.add_row(name, str(out_path), "[green]ok[/]")
+                result = fn(df, out_path, style=style, **kwargs)
+            if result is None:
+                paths = [out_path]
+            elif isinstance(result, (list, tuple, set)):
+                paths = [Path(p) for p in result]
+            else:
+                paths = [Path(result)]
+            saved_label = str(paths[0]) if len(paths) == 1 else f"{paths[0]} (+{len(paths) - 1})"
+            summary.add_row(name, saved_label, "[green]ok[/]")
+            created_at = datetime.now(timezone.utc).isoformat()
+            for path in paths:
+                manifest_entries.append(
+                    {
+                        "name": name,
+                        "path": str(path.relative_to(out_dir)),
+                        "description": AVAILABLE_PLOTS[name]["description"],
+                        "figsize": list(style.get("figsize", [])) if style.get("figsize") else None,
+                        "generated_at": created_at,
+                        "source": str(source),
+                    }
+                )
         except Exception as e:
             summary.add_row(name, "—", f"[red]failed[/] ({e})")
             errors.append((name, e))
@@ -1450,3 +1695,5 @@ def run_plots_from_config(root_cfg: RootConfig, cfg_path: Path, *, only: Optiona
     if errors:
         details = "; ".join(f"{name}: {err}" for name, err in errors)
         raise RuntimeError(f"{len(errors)} plot(s) failed: {details}")
+
+    _write_plot_manifest(out_dir, entries=manifest_entries, run_root=run_root, cfg_path=cfg_path, source=source)
diff --git a/src/dnadesign/densegen/tests/config_fixtures.py b/src/dnadesign/densegen/tests/config_fixtures.py
new file mode 100644
index 00000000..c393ac64
--- /dev/null
+++ b/src/dnadesign/densegen/tests/config_fixtures.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+import textwrap
+from pathlib import Path
+
+
+def write_minimal_config(path: Path) -> None:
+    path.write_text(
+        textwrap.dedent(
+            """
+            densegen:
+              schema_version: "2.5"
+              run:
+                id: demo
+                root: "."
+              inputs:
+                - name: demo_input
+                  type: binding_sites
+                  path: inputs.csv
+              output:
+                targets: [parquet]
+                schema:
+                  bio_type: dna
+                  alphabet: dna_4
+                parquet:
+                  path: outputs/tables/dense_arrays.parquet
+              generation:
+                sequence_length: 10
+                quota: 1
+                plan:
+                  - name: demo_plan
+                    quota: 1
+              solver:
+                backend: CBC
+                strategy: iterate
+              logging:
+                log_dir: outputs/logs
+            """
+        ).strip()
+        + "\n"
+    )
diff --git a/src/dnadesign/densegen/tests/test_cli_report_plots.py b/src/dnadesign/densegen/tests/test_cli_report_plots.py
new file mode 100644
index 00000000..1c144f08
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_cli_report_plots.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from dnadesign.densegen.src.cli import app
+from dnadesign.densegen.tests.config_fixtures import write_minimal_config
+
+
+def test_report_requires_plot_manifest_when_enabled(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    cfg_path = run_root / "config.yaml"
+    write_minimal_config(cfg_path)
+    (run_root / "inputs.csv").write_text("tf,tfbs\n")
+
+    runner = CliRunner()
+    result = runner.invoke(app, ["report", "--plots", "include", "-c", str(cfg_path)])
+    assert result.exit_code != 0
+    assert "plot_manifest" in result.output
diff --git a/src/dnadesign/densegen/tests/test_plot_manifest.py b/src/dnadesign/densegen/tests/test_plot_manifest.py
new file mode 100644
index 00000000..f2ef21c7
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_plot_manifest.py
@@ -0,0 +1,147 @@
+from __future__ import annotations
+
+import json
+import textwrap
+from pathlib import Path
+
+import pandas as pd
+
+from dnadesign.densegen.src.adapters.outputs import OutputRecord, ParquetSink
+from dnadesign.densegen.src.config import load_config
+from dnadesign.densegen.src.viz.plotting import run_plots_from_config
+from dnadesign.densegen.tests.meta_fixtures import output_meta
+
+
+def _write_config(path: Path, *, plots_default: list[str]) -> None:
+    path.write_text(
+        textwrap.dedent(
+            """
+            densegen:
+              schema_version: "2.5"
+              run:
+                id: demo
+                root: "."
+              inputs:
+                - name: demo_input
+                  type: binding_sites
+                  path: inputs.csv
+              output:
+                targets: [parquet]
+                schema:
+                  bio_type: dna
+                  alphabet: dna_4
+                parquet:
+                  path: outputs/tables/dense_arrays.parquet
+              generation:
+                sequence_length: 10
+                quota: 1
+                plan:
+                  - name: demo_plan
+                    quota: 1
+              solver:
+                backend: CBC
+                strategy: iterate
+              logging:
+                log_dir: outputs/logs
+            plots:
+              source: parquet
+              out_dir: outputs/plots
+              format: png
+              default: PLACEHOLDER_DEFAULT
+            """
+        )
+        .strip()
+        .replace("PLACEHOLDER_DEFAULT", json.dumps(plots_default))
+        + "\n"
+    )
+
+
+def _write_output_record(run_root: Path) -> None:
+    out_file = run_root / "outputs" / "tables" / "dense_arrays.parquet"
+    sink = ParquetSink(path=str(out_file), chunk_size=1)
+    meta = output_meta(library_hash="abc123", library_index=1)
+    meta["compression_ratio"] = 1.0
+    rec = OutputRecord.from_sequence(
+        sequence="ATGCATGCAT",
+        meta=meta,
+        source="densegen:demo",
+        bio_type="dna",
+        alphabet="dna_4",
+    )
+    sink.add(rec)
+    sink.finalize()
+
+
+def _write_pool_manifest(run_root: Path) -> None:
+    pools_dir = run_root / "outputs" / "pools"
+    pools_dir.mkdir(parents=True, exist_ok=True)
+    df = pd.DataFrame(
+        {
+            "input_name": ["demo_input"] * 3,
+            "tf": ["tfA", "tfA", "tfB"],
+            "tfbs": ["AAAA", "AAAAT", "AAAAAA"],
+            "fimo_pvalue": [1e-8, 1e-9, 1e-6],
+            "fimo_bin_id": [1, 0, 2],
+            "fimo_bin_low": [1e-10, 0.0, 1e-8],
+            "fimo_bin_high": [1e-8, 1e-10, 1e-6],
+            "motif_id": ["m1", "m1", "m2"],
+            "tfbs_id": ["id1", "id2", "id3"],
+        }
+    )
+    pool_path = pools_dir / "demo_input__pool.parquet"
+    df.to_parquet(pool_path, index=False)
+    manifest = {
+        "schema_version": "1.0",
+        "run_id": "demo",
+        "run_root": ".",
+        "config_path": "config.yaml",
+        "inputs": [
+            {
+                "name": "demo_input",
+                "type": "binding_sites",
+                "pool_path": "demo_input__pool.parquet",
+                "rows": int(len(df)),
+                "columns": list(df.columns),
+                "pool_mode": "tfbs",
+            }
+        ],
+    }
+    (pools_dir / "pool_manifest.json").write_text(json.dumps(manifest, indent=2))
+
+
+def test_plot_manifest_written(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    cfg_path = run_root / "config.yaml"
+    _write_config(cfg_path, plots_default=["compression_ratio"])
+    (run_root / "inputs.csv").write_text("tf,tfbs\n")
+    _write_output_record(run_root)
+
+    loaded = load_config(cfg_path)
+    run_plots_from_config(loaded.root, cfg_path)
+
+    manifest_path = run_root / "outputs" / "plots" / "plot_manifest.json"
+    assert manifest_path.exists()
+    payload = json.loads(manifest_path.read_text())
+    names = {item["name"] for item in payload.get("plots", [])}
+    assert "compression_ratio" in names
+    paths = {item["path"] for item in payload.get("plots", [])}
+    assert "compression_ratio.png" in paths
+
+
+def test_stage_a_plots_without_outputs(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    cfg_path = run_root / "config.yaml"
+    _write_config(cfg_path, plots_default=["stage_a_pvalue_strat_hist", "stage_a_length_hist"])
+    (run_root / "inputs.csv").write_text("tf,tfbs\n")
+    _write_pool_manifest(run_root)
+
+    loaded = load_config(cfg_path)
+    run_plots_from_config(loaded.root, cfg_path)
+
+    plots_dir = run_root / "outputs" / "plots"
+    pval_plot = plots_dir / "stage_a_pvalue_strat_hist__demo_input.png"
+    length_plot = plots_dir / "stage_a_length_hist__demo_input.png"
+    assert pval_plot.exists()
+    assert length_plot.exists()
diff --git a/src/dnadesign/densegen/tests/test_report_assets_removed.py b/src/dnadesign/densegen/tests/test_report_assets_removed.py
new file mode 100644
index 00000000..78972a5e
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_report_assets_removed.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+
+from dnadesign.densegen.src.config import load_config
+from dnadesign.densegen.src.core.reporting import write_report
+from dnadesign.densegen.tests.config_fixtures import write_minimal_config
+
+
+def test_report_does_not_write_assets_dir(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    cfg_path = run_root / "config.yaml"
+    write_minimal_config(cfg_path)
+    (run_root / "inputs.csv").write_text("tf,tfbs\n")
+
+    tables_root = run_root / "outputs" / "tables"
+    tables_root.mkdir(parents=True, exist_ok=True)
+    pd.DataFrame(
+        [
+            {
+                "solution_id": "s1",
+                "attempt_id": "a1",
+                "placement_index": 0,
+                "tf": "lexA",
+                "tfbs": "AAA",
+                "motif_id": "m1",
+                "tfbs_id": "t1",
+                "orientation": "fwd",
+                "offset": 0,
+                "library_hash": "abc123",
+            }
+        ]
+    ).to_parquet(tables_root / "composition.parquet", index=False)
+
+    loaded = load_config(cfg_path)
+    write_report(loaded.root, cfg_path)
+
+    assets_root = run_root / "outputs" / "report" / "assets"
+    assert not assets_root.exists()

From 3b83e323ff018189a7a81ffbdc4db3a92d2f6efe Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 14:57:02 -0500
Subject: [PATCH 23/40] densegen: append Stage-A pools and refresh CLI defaults

---
 .../densegen/docs/guide/workspace.md          |   2 +-
 src/dnadesign/densegen/docs/reference/cli.md  |   2 +-
 .../densegen/docs/reference/config.md         |   2 +-
 .../densegen/docs/reference/outputs.md        |   5 +-
 .../src/adapters/sources/pwm_sampling.py      |  11 +-
 src/dnadesign/densegen/src/cli.py             |  20 ++-
 .../densegen/src/core/artifacts/candidates.py |   7 +-
 .../densegen/src/core/artifacts/pool.py       | 124 ++++++++++++++--
 src/dnadesign/densegen/src/core/pipeline.py   | 108 ++++++++------
 src/dnadesign/densegen/src/core/reporting.py  |  12 +-
 .../tests/test_cli_library_limit_default.py   |  81 ++++++++++
 .../tests/test_cli_stage_a_summary.py         |  14 +-
 .../densegen/tests/test_pool_append.py        |  87 +++++++++++
 .../densegen/tests/test_resume_pool_reuse.py  | 139 ++++++++++++++++++
 14 files changed, 540 insertions(+), 74 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/test_cli_library_limit_default.py
 create mode 100644 src/dnadesign/densegen/tests/test_pool_append.py
 create mode 100644 src/dnadesign/densegen/tests/test_resume_pool_reuse.py

diff --git a/src/dnadesign/densegen/docs/guide/workspace.md b/src/dnadesign/densegen/docs/guide/workspace.md
index b429a450..abbc38fe 100644
--- a/src/dnadesign/densegen/docs/guide/workspace.md
+++ b/src/dnadesign/densegen/docs/guide/workspace.md
@@ -39,7 +39,7 @@ workspace/
   `dense run --fresh` (clear outputs and start over). Stage‑A/Stage‑B artifacts in
   `outputs/pools` or `outputs/libraries` do not trigger this guard.
 - **Candidate mining artifacts**: `outputs/pools/candidates/` is overwritten by `dense run` or
-  `stage-a build-pool --overwrite`; copy it elsewhere if you want to keep prior candidates. Use
+  `stage-a build-pool --fresh`; copy it elsewhere if you want to keep prior candidates. Use
   `dense run --fresh` to clear outputs when restarting a workspace.
 
 ---
diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index 3eea544a..00430b8e 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -85,7 +85,7 @@ Build Stage‑A TFBS pools from inputs and write a pool manifest.
 Options:
 - `--out` — output directory relative to run root (default: `outputs/pools`; must be inside `outputs/`).
 - `--input/-i` — input name(s) to build (defaults to all).
-- `--overwrite` — overwrite existing pool files.
+- `--fresh` — replace existing pool files (default is append + dedupe).
 
 Outputs:
 - `pool_manifest.json`
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 63d7eb02..3b5cbc42 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -80,7 +80,7 @@ PWM inputs perform **Stage‑A sampling** (sampling sites from PWMs) via
     - `bgfile` (optional path) - MEME bfile-format background model for FIMO
     - `keep_all_candidates_debug` (bool, default false) - write candidate Parquet logs to
       `outputs/pools/candidates/` for inspection (overwritten by `dense run` or
-      `stage-a build-pool --overwrite`)
+      `stage-a build-pool --fresh`)
     - `include_matched_sequence` (bool, default false) - include `fimo_matched_sequence` in TFBS outputs
     - `length_policy`: `exact | range` (default: `exact`)
     - `length_range`: `[min, max]` (required when `length_policy=range`; `min` >= motif length)
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index b4f7c8db..6bd33a5b 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -133,12 +133,15 @@ DenseGen can materialize Stage‑A/Stage‑B artifacts without running the solve
   - `outputs/pools/<input>__pool.parquet`
   - `outputs/pools/candidates/candidates__<label>.parquet` (when `keep_all_candidates_debug: true`)
   - `outputs/pools/candidates/candidates.parquet` + `candidates_summary.parquet` + `candidates_manifest.json`
-    (overwritten by `dense run` or `stage-a build-pool --overwrite`)
+    (overwritten by `dense run` or `stage-a build-pool --fresh`)
 - `dense stage-b build-libraries` writes:
   - `outputs/libraries/library_builds.parquet`
   - `outputs/libraries/library_members.parquet`
   - `outputs/libraries/library_manifest.json`
 
+`dense stage-a build-pool` appends new unique TFBS to existing pools by default; pass `--fresh`
+to rebuild pools from scratch.
+
 Stage‑B expects Stage‑A pools (default `outputs/pools`). These are optional inspection artifacts and are not required for a normal `dense run`.
 
 ---
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 5e12dc97..5077681d 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -77,7 +77,16 @@ def _write_candidate_records(
     safe_label = f"{_safe_label(debug_label or motif_id)}{suffix}"
     debug_output_dir.mkdir(parents=True, exist_ok=True)
     path = debug_output_dir / f"candidates__{safe_label}.parquet"
-    pd.DataFrame(records).to_parquet(path, index=False)
+    df = pd.DataFrame(records)
+    if path.exists():
+        try:
+            existing = pd.read_parquet(path)
+            df = pd.concat([existing, df], ignore_index=True)
+            if "candidate_id" in df.columns:
+                df = df.drop_duplicates(subset=["candidate_id"], keep="last")
+        except Exception:
+            raise RuntimeError(f"Failed to append candidate records to {path}")
+    df.to_parquet(path, index=False)
     return path
 
 
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index a2df4585..5f4e4bb9 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -899,7 +899,7 @@ def inspect_run(
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Show failure breakdown columns."),
     library: bool = typer.Option(False, "--library", help="Include offered-vs-used library summaries."),
-    library_limit: int = typer.Option(0, "--library-limit", help="Limit libraries shown in summaries (0 = all)."),
+    library_limit: int = typer.Option(10, "--library-limit", help="Limit libraries shown in summaries (0 = all)."),
     top: int = typer.Option(10, "--top", help="Rows to show for library summaries."),
     by_library: bool = typer.Option(True, "--by-library/--no-by-library", help="Group library summaries per build."),
     top_per_tf: Optional[int] = typer.Option(None, "--top-per-tf", help="Limit TFBS rows per TF when summarizing."),
@@ -1541,7 +1541,11 @@ def stage_a_build_pool(
         "-i",
         help="Input name(s) to build (defaults to all inputs).",
     ),
-    overwrite: bool = typer.Option(False, help="Overwrite existing pool files."),
+    fresh: bool = typer.Option(
+        False,
+        "--fresh",
+        help="Start from scratch and replace existing pool files.",
+    ),
     config: Optional[Path] = typer.Option(None, "--config", "-c", help="Path to config YAML."),
 ):
     cfg_path, is_default = _resolve_config_path(ctx, config)
@@ -1571,12 +1575,16 @@ def stage_a_build_pool(
     candidates_dir = candidates_root(outputs_root, cfg.run.id)
     if candidate_logging:
         try:
-            existed = prepare_candidates_dir(candidates_dir, overwrite=overwrite)
-        except FileExistsError as exc:
+            existed = prepare_candidates_dir(candidates_dir, overwrite=fresh)
+        except Exception as exc:
             console.print(f"[bold red]{exc}[/]")
             raise typer.Exit(code=1)
-        if existed:
+        if existed and fresh:
             console.print(f"[yellow]Cleared prior candidate artifacts at {candidates_dir} to avoid mixing runs.[/]")
+        elif existed and not fresh:
+            console.print(
+                f"[yellow]Appending to existing candidate artifacts under {candidates_dir} (use --fresh to reset).[/]"
+            )
 
     with _suppress_pyarrow_sysctl_warnings():
         try:
@@ -1587,7 +1595,7 @@ def stage_a_build_pool(
                 rng=rng,
                 outputs_root=outputs_root,
                 out_dir=out_dir,
-                overwrite=overwrite,
+                overwrite=fresh,
                 selected_inputs=selected if selected else None,
             )
         except FileExistsError as exc:
diff --git a/src/dnadesign/densegen/src/core/artifacts/candidates.py b/src/dnadesign/densegen/src/core/artifacts/candidates.py
index b4c6d34a..0c635c3c 100644
--- a/src/dnadesign/densegen/src/core/artifacts/candidates.py
+++ b/src/dnadesign/densegen/src/core/artifacts/candidates.py
@@ -49,9 +49,10 @@ def find_candidate_files(candidates_dir: Path) -> list[Path]:
 def prepare_candidates_dir(candidates_dir: Path, *, overwrite: bool = True) -> bool:
     existed = candidates_dir.exists()
     if existed:
-        if not overwrite:
-            raise FileExistsError(f"Candidate artifacts already exist in {candidates_dir}")
-        shutil.rmtree(candidates_dir)
+        if not candidates_dir.is_dir():
+            raise RuntimeError(f"Candidate artifacts path is not a directory: {candidates_dir}")
+        if overwrite:
+            shutil.rmtree(candidates_dir)
     candidates_dir.mkdir(parents=True, exist_ok=True)
     return existed
 
diff --git a/src/dnadesign/densegen/src/core/artifacts/pool.py b/src/dnadesign/densegen/src/core/artifacts/pool.py
index 372a2e8b..03a8e7fb 100644
--- a/src/dnadesign/densegen/src/core/artifacts/pool.py
+++ b/src/dnadesign/densegen/src/core/artifacts/pool.py
@@ -96,6 +96,62 @@ def load_pool_artifact(out_dir: Path) -> TFBSPoolArtifact:
     return TFBSPoolArtifact.load(manifest_path)
 
 
+def _pool_filename_prefix(path: Path) -> str:
+    name = path.name
+    suffix = "__pool.parquet"
+    if name.endswith(suffix):
+        return name[: -len(suffix)]
+    return path.stem
+
+
+def _seed_used_names_from_entries(entries: dict[str, PoolInputEntry]) -> dict[str, int]:
+    used: dict[str, int] = {}
+    for entry in entries.values():
+        prefix = _pool_filename_prefix(entry.pool_path)
+        base = prefix
+        count = None
+        if "__" in prefix:
+            maybe_base, maybe_num = prefix.rsplit("__", 1)
+            if maybe_num.isdigit():
+                base = maybe_base
+                count = int(maybe_num)
+        current = used.get(base, 0)
+        if count is None:
+            used[base] = max(current, 1)
+        else:
+            used[base] = max(current, count + 1)
+    return used
+
+
+def load_pool_data(out_dir: Path) -> tuple[TFBSPoolArtifact, dict[str, PoolData]]:
+    artifact = load_pool_artifact(out_dir)
+    pool_data: dict[str, PoolData] = {}
+    for entry in artifact.inputs.values():
+        pool_path = out_dir / entry.pool_path
+        if not pool_path.exists():
+            raise FileNotFoundError(f"Pool file listed in manifest is missing: {pool_path}")
+        df = pd.read_parquet(pool_path)
+        pool_mode = entry.pool_mode or _resolve_pool_mode(df)
+        if pool_mode == POOL_MODE_TFBS:
+            _ensure_tfbs_ids(df)
+        sequences: list[str]
+        if pool_mode == POOL_MODE_SEQUENCE:
+            sequences = df["sequence"].tolist() if "sequence" in df.columns else []
+            pool_df = None
+        else:
+            sequences = df["tfbs"].tolist() if "tfbs" in df.columns else []
+            pool_df = df
+        pool_data[entry.name] = PoolData(
+            name=entry.name,
+            input_type=entry.input_type,
+            pool_mode=pool_mode,
+            df=pool_df,
+            sequences=sequences,
+            pool_path=pool_path,
+        )
+    return artifact, pool_data
+
+
 def _resolve_pool_mode(df: pd.DataFrame) -> str:
     if "tf" in df.columns and "tfbs" in df.columns:
         return POOL_MODE_TFBS
@@ -141,10 +197,37 @@ def build_pool_artifact(
     pool_data: dict[str, PoolData] = {}
     used_names: dict[str, int] = {}
     rows: list[tuple[str, str, str, Path]] = []
+    existing_entries: dict[str, PoolInputEntry] = {}
+    preserved_entries: dict[str, PoolInputEntry] = {}
+
+    if not overwrite:
+        manifest_path = _pool_manifest_path(out_dir)
+        if manifest_path.exists():
+            existing_artifact = TFBSPoolArtifact.load(manifest_path)
+            existing_entries = existing_artifact.inputs
+            used_names = _seed_used_names_from_entries(existing_entries)
+            current_inputs = {inp.name for inp in cfg.inputs}
+            stale = sorted(set(existing_entries) - current_inputs)
+            if stale:
+                raise ValueError(
+                    "Pool manifest contains inputs not present in the config: "
+                    f"{', '.join(stale)}. Use --fresh to rebuild pools."
+                )
+            if selected_inputs:
+                preserved_entries = {
+                    name: entry for name, entry in existing_entries.items() if name not in selected_inputs
+                }
 
     for inp in cfg.inputs:
         if selected_inputs and inp.name not in selected_inputs:
             continue
+        existing_entry = existing_entries.get(inp.name)
+        existing_df = None
+        if existing_entry is not None:
+            pool_path = out_dir / existing_entry.pool_path
+            if not pool_path.exists():
+                raise FileNotFoundError(f"Pool file listed in manifest is missing: {pool_path}")
+            existing_df = pd.read_parquet(pool_path)
         src = deps.source_factory(inp, cfg_path)
         data_entries, meta_df = src.load_data(rng=rng, outputs_root=outputs_root, run_id=str(cfg.run.id))
         if meta_df is None:
@@ -157,20 +240,37 @@ def build_pool_artifact(
         if pool_mode == POOL_MODE_TFBS:
             _ensure_tfbs_ids(df)
 
-        base = _sanitize_filename(inp.name)
-        count = used_names.get(base, 0)
-        used_names[base] = count + 1
-        suffix = f"{base}__{count}" if count else base
-        filename = f"{suffix}__pool.parquet"
-        dest = out_dir / filename
-        if dest.exists() and not overwrite:
-            raise FileExistsError(f"Pool already exists: {dest}")
+        if existing_df is not None:
+            if set(existing_df.columns) != set(df.columns):
+                raise ValueError(f"Pool schema changed for input '{inp.name}'. Use --fresh to rebuild pools.")
+            if existing_entry is None:
+                raise ValueError(f"Pool manifest missing entry for input '{inp.name}'.")
+            if existing_entry.pool_mode and existing_entry.pool_mode != pool_mode:
+                raise ValueError(
+                    f"Pool mode mismatch for input '{inp.name}': "
+                    f"{existing_entry.pool_mode} vs {pool_mode}. Use --fresh to rebuild."
+                )
+            df = df[existing_df.columns]
+            merge_key = "tfbs_id" if "tfbs_id" in df.columns else "sequence"
+            combined = pd.concat([existing_df, df], ignore_index=True)
+            combined = combined.drop_duplicates(subset=[merge_key], keep="first")
+            df = combined
+            dest = out_dir / existing_entry.pool_path
+        else:
+            base = _sanitize_filename(inp.name)
+            count = used_names.get(base, 0)
+            used_names[base] = count + 1
+            suffix = f"{base}__{count}" if count else base
+            filename = f"{suffix}__pool.parquet"
+            dest = out_dir / filename
+            if dest.exists() and not overwrite:
+                raise FileExistsError(f"Pool already exists: {dest}")
         df.to_parquet(dest, index=False)
 
         entry = PoolInputEntry(
             name=inp.name,
             input_type=str(inp.type),
-            pool_path=Path(filename),
+            pool_path=Path(dest.name),
             rows=int(len(df)),
             columns=list(df.columns),
             pool_mode=pool_mode,
@@ -193,9 +293,13 @@ def build_pool_artifact(
         )
         rows.append((inp.name, str(inp.type), str(len(df)), dest))
 
-    if not rows:
+    if not rows and not preserved_entries:
         raise ValueError("No pools built (no matching inputs).")
 
+    if preserved_entries:
+        for name, entry in preserved_entries.items():
+            pool_entries.setdefault(name, entry)
+
     manifest = {
         "schema_version": POOL_SCHEMA_VERSION,
         "created_at": datetime.now(timezone.utc).isoformat(),
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index 4006d8b7..ddd07d20 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -53,7 +53,7 @@
     load_library_records,
     write_library_artifact,
 )
-from .artifacts.pool import POOL_MODE_SEQUENCE, POOL_MODE_TFBS, PoolData, build_pool_artifact
+from .artifacts.pool import POOL_MODE_SEQUENCE, POOL_MODE_TFBS, PoolData, build_pool_artifact, load_pool_data
 from .artifacts.records import AttemptRecord, SolutionRecord
 from .metadata import build_metadata
 from .postprocess import generate_pad
@@ -3563,19 +3563,6 @@ def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | Non
     outputs_root.mkdir(parents=True, exist_ok=True)
     candidates_dir = candidates_root(outputs_root, cfg.run.id)
     candidate_logging = _candidate_logging_enabled(cfg)
-    if candidate_logging:
-        try:
-            existed = prepare_candidates_dir(candidates_dir, overwrite=True)
-        except Exception as exc:
-            raise RuntimeError(f"Failed to prepare candidate artifacts directory: {exc}") from exc
-        if existed:
-            log.warning(
-                "Cleared prior candidate artifacts at %s to avoid mixing runs. "
-                "Copy this directory elsewhere if you want to keep previous mining output.",
-                candidates_dir,
-            )
-        else:
-            log.info("Candidate mining artifacts will be written to %s", candidates_dir)
     events_path = outputs_root / "meta" / "events.jsonl"
     try:
         _write_effective_config(
@@ -3584,39 +3571,74 @@ def run_pipeline(loaded: LoadedConfig, *, resume: bool, deps: PipelineDeps | Non
     except Exception:
         log.debug("Failed to write effective_config.json.", exc_info=True)
     pool_dir = outputs_root / "pools"
-    try:
-        _pool_artifact, pool_data = build_pool_artifact(
-            cfg=cfg,
-            cfg_path=loaded.path,
-            deps=deps,
-            rng=np_rng_stage_a,
-            outputs_root=outputs_root,
-            out_dir=pool_dir,
-            overwrite=True,
+    pool_manifest = pool_dir / "pool_manifest.json"
+    pool_data: dict[str, PoolData] | None = None
+    if pool_manifest.exists():
+        try:
+            _pool_artifact, pool_data = load_pool_data(pool_dir)
+        except Exception as exc:
+            raise RuntimeError(f"Failed to load existing Stage-A pool artifacts: {exc}") from exc
+        log.info(
+            "Using existing Stage-A pools from %s "
+            "(use dense stage-a build-pool --fresh or dense run --fresh to rebuild).",
+            pool_dir,
         )
-    except Exception as exc:
-        raise RuntimeError(f"Failed to build Stage-A TFBS pools: {exc}") from exc
-    try:
-        _emit_event(
-            events_path,
-            event="POOL_BUILT",
-            payload={
-                "inputs": [
-                    {
-                        "name": pool.name,
-                        "input_type": pool.input_type,
-                        "pool_mode": pool.pool_mode,
-                        "rows": int(pool.df.shape[0]) if pool.df is not None else int(len(pool.sequences)),
-                    }
-                    for pool in pool_data.values()
-                ]
-            },
+
+    if resume and pool_data is None:
+        raise RuntimeError(
+            "resume=True requires existing Stage-A pools. Run dense stage-a build-pool first or rerun without resume."
         )
-    except Exception:
-        log.debug("Failed to emit POOL_BUILT event.", exc_info=True)
+
+    build_pools = pool_data is None
+    if build_pools and candidate_logging:
+        try:
+            existed = prepare_candidates_dir(candidates_dir, overwrite=False)
+        except Exception as exc:
+            raise RuntimeError(f"Failed to prepare candidate artifacts directory: {exc}") from exc
+        if existed:
+            log.info(
+                "Appending candidate artifacts under %s (use dense run --fresh to reset).",
+                candidates_dir,
+            )
+        else:
+            log.info("Candidate mining artifacts will be written to %s", candidates_dir)
+
+    if build_pools:
+        try:
+            _pool_artifact, pool_data = build_pool_artifact(
+                cfg=cfg,
+                cfg_path=loaded.path,
+                deps=deps,
+                rng=np_rng_stage_a,
+                outputs_root=outputs_root,
+                out_dir=pool_dir,
+                overwrite=False,
+            )
+        except Exception as exc:
+            raise RuntimeError(f"Failed to build Stage-A TFBS pools: {exc}") from exc
+        try:
+            _emit_event(
+                events_path,
+                event="POOL_BUILT",
+                payload={
+                    "inputs": [
+                        {
+                            "name": pool.name,
+                            "input_type": pool.input_type,
+                            "pool_mode": pool.pool_mode,
+                            "rows": int(pool.df.shape[0]) if pool.df is not None else int(len(pool.sequences)),
+                        }
+                        for pool in pool_data.values()
+                    ]
+                },
+            )
+        except Exception:
+            log.debug("Failed to emit POOL_BUILT event.", exc_info=True)
+    if pool_data is None:
+        raise RuntimeError("Stage-A pool loading failed unexpectedly; no pools are available.")
     for name, pool in pool_data.items():
         source_cache[name] = pool
-    if candidate_logging:
+    if candidate_logging and build_pools:
         candidate_files = find_candidate_files(candidates_dir)
         if candidate_files:
             try:
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index 8244c966..38eec5c6 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -723,10 +723,9 @@ def _candidate_logging_enabled() -> bool:
     offered_vs_used_tf = offered_tf.merge(used_tf_any, on=["library_hash", "tf"], how="left").merge(
         total_sequences, on="library_hash", how="left"
     )
-    offered_vs_used_tf["used_placements"] = offered_vs_used_tf["used_placements"].fillna(0).astype(int)
-    offered_vs_used_tf["used_unique_tfbs"] = offered_vs_used_tf["used_unique_tfbs"].fillna(0).astype(int)
-    offered_vs_used_tf["used_sequences"] = offered_vs_used_tf["used_sequences"].fillna(0).astype(int)
-    offered_vs_used_tf["total_sequences"] = offered_vs_used_tf["total_sequences"].fillna(0).astype(int)
+    for col in ["used_placements", "used_unique_tfbs", "used_sequences", "total_sequences"]:
+        if col in offered_vs_used_tf.columns:
+            offered_vs_used_tf[col] = pd.to_numeric(offered_vs_used_tf[col], errors="coerce").fillna(0).astype(int)
     offered_vs_used_tf["utilization_any"] = offered_vs_used_tf.apply(
         lambda r: (r["used_sequences"] / r["total_sequences"]) if r["total_sequences"] else 0.0, axis=1
     )
@@ -740,8 +739,9 @@ def _candidate_logging_enabled() -> bool:
         .reset_index()
     )
     offered_vs_used_tfbs = offered_tfbs.merge(used_tfbs_any, on=["library_hash", "tf", "tfbs"], how="left")
-    offered_vs_used_tfbs["used_placements"] = offered_vs_used_tfbs["used_placements"].fillna(0).astype(int)
-    offered_vs_used_tfbs["used_sequences"] = offered_vs_used_tfbs["used_sequences"].fillna(0).astype(int)
+    for col in ["used_placements", "used_sequences"]:
+        if col in offered_vs_used_tfbs.columns:
+            offered_vs_used_tfbs[col] = pd.to_numeric(offered_vs_used_tfbs[col], errors="coerce").fillna(0).astype(int)
 
     tables["offered_vs_used_tf"] = offered_vs_used_tf
     tables["offered_vs_used_tfbs"] = offered_vs_used_tfbs
diff --git a/src/dnadesign/densegen/tests/test_cli_library_limit_default.py b/src/dnadesign/densegen/tests/test_cli_library_limit_default.py
new file mode 100644
index 00000000..740c644f
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_cli_library_limit_default.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pandas as pd
+from typer.testing import CliRunner
+
+from dnadesign.densegen.src.cli import app
+from dnadesign.densegen.tests.config_fixtures import write_minimal_config
+
+
+def test_inspect_run_library_default_limit(tmp_path: Path) -> None:
+    cfg_path = tmp_path / "config.yaml"
+    write_minimal_config(cfg_path)
+    (tmp_path / "inputs.csv").write_text("tf,tfbs\nTF1,AAA\n")
+
+    tables_root = tmp_path / "outputs" / "tables"
+    tables_root.mkdir(parents=True, exist_ok=True)
+    meta_root = tmp_path / "outputs" / "meta"
+    meta_root.mkdir(parents=True, exist_ok=True)
+    run_manifest = {
+        "run_id": "demo",
+        "created_at": "2026-01-14T00:00:00+00:00",
+        "schema_version": "2.5",
+        "config_sha256": "dummy",
+        "run_root": ".",
+        "random_seed": 0,
+        "seed_stage_a": 0,
+        "seed_stage_b": 0,
+        "seed_solver": 0,
+        "solver_backend": "CBC",
+        "solver_strategy": "iterate",
+        "solver_time_limit_seconds": None,
+        "solver_threads": None,
+        "solver_strands": "double",
+        "dense_arrays_version": None,
+        "dense_arrays_version_source": "unknown",
+        "items": [
+            {
+                "input_name": "demo_input",
+                "plan_name": "demo_plan",
+                "generated": 0,
+                "duplicates_skipped": 0,
+                "failed_solutions": 0,
+                "total_resamples": 0,
+                "libraries_built": 0,
+                "stall_events": 0,
+            }
+        ],
+    }
+    (meta_root / "run_manifest.json").write_text(json.dumps(run_manifest))
+    attempts_rows = []
+    hashes = []
+    for idx in range(1, 12):
+        lib_hash = f"hash{idx:02d}"
+        hashes.append(lib_hash)
+        attempts_rows.append(
+            {
+                "attempt_id": f"a{idx}",
+                "attempt_index": idx,
+                "run_id": "demo",
+                "input_name": "demo_input",
+                "plan_name": "demo_plan",
+                "created_at": "2026-01-14T00:00:00+00:00",
+                "status": "success",
+                "sampling_library_index": idx,
+                "sampling_library_hash": lib_hash,
+                "library_tfbs": ["AAA"],
+                "library_tfs": ["TF1"],
+                "library_site_ids": [f"s{idx}"],
+                "library_sources": ["demo"],
+            }
+        )
+    pd.DataFrame(attempts_rows).to_parquet(tables_root / "attempts.parquet", index=False)
+
+    runner = CliRunner()
+    result = runner.invoke(app, ["inspect", "run", "--library", "-c", str(cfg_path)])
+    assert result.exit_code == 0
+    assert hashes[9] in result.output
+    assert hashes[10] not in result.output
diff --git a/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
index 70659fbc..21c5639c 100644
--- a/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
+++ b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
@@ -10,7 +10,7 @@
 from dnadesign.densegen.src.cli import app
 
 
-def test_stage_a_build_pool_reports_length_summary(tmp_path: Path) -> None:
+def _write_stage_a_config(tmp_path: Path) -> Path:
     inputs_dir = tmp_path / "inputs"
     inputs_dir.mkdir()
     (inputs_dir / "sites.csv").write_text(
@@ -60,8 +60,20 @@ def test_stage_a_build_pool_reports_length_summary(tmp_path: Path) -> None:
         ).strip()
         + "\n"
     )
+    return cfg_path
+
+
+def test_stage_a_build_pool_reports_length_summary(tmp_path: Path) -> None:
+    cfg_path = _write_stage_a_config(tmp_path)
     runner = CliRunner()
     result = runner.invoke(app, ["stage-a", "build-pool", "-c", str(cfg_path)])
     assert result.exit_code == 0, result.output
     assert "TFBS length summary" in result.output
     assert "toy_sites" in result.output
+
+
+def test_stage_a_build_pool_accepts_fresh_flag(tmp_path: Path) -> None:
+    cfg_path = _write_stage_a_config(tmp_path)
+    runner = CliRunner()
+    result = runner.invoke(app, ["stage-a", "build-pool", "--fresh", "-c", str(cfg_path)])
+    assert result.exit_code == 0, result.output
diff --git a/src/dnadesign/densegen/tests/test_pool_append.py b/src/dnadesign/densegen/tests/test_pool_append.py
new file mode 100644
index 00000000..99dbbdb0
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_pool_append.py
@@ -0,0 +1,87 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import yaml
+
+from dnadesign.densegen.src.adapters.sources import data_source_factory
+from dnadesign.densegen.src.config import load_config
+from dnadesign.densegen.src.core.artifacts.pool import build_pool_artifact, load_pool_artifact
+
+
+class _Deps:
+    def __init__(self) -> None:
+        self.source_factory = data_source_factory
+
+
+def _write_config(path: Path, input_path: Path) -> None:
+    cfg = {
+        "densegen": {
+            "schema_version": "2.5",
+            "run": {"id": "demo", "root": "."},
+            "inputs": [
+                {
+                    "name": "demo_input",
+                    "type": "binding_sites",
+                    "path": str(input_path),
+                    "format": "csv",
+                }
+            ],
+            "output": {
+                "targets": ["parquet"],
+                "schema": {"bio_type": "dna", "alphabet": "dna_4"},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
+            },
+            "generation": {
+                "sequence_length": 3,
+                "quota": 1,
+                "plan": [{"name": "demo_plan", "quota": 1}],
+            },
+            "solver": {"backend": "CBC", "strategy": "iterate"},
+            "runtime": {"random_seed": 1},
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
+        }
+    }
+    path.write_text(yaml.safe_dump(cfg))
+
+
+def test_build_pool_appends_without_overwrite(tmp_path: Path) -> None:
+    csv_path = tmp_path / "sites.csv"
+    csv_path.write_text("tf,tfbs\nTF1,AAA\nTF1,CCC\n")
+    cfg_path = tmp_path / "config.yaml"
+    _write_config(cfg_path, csv_path)
+
+    loaded = load_config(cfg_path)
+    deps = _Deps()
+    rng = np.random.default_rng(1)
+    outputs_root = tmp_path / "outputs"
+    out_dir = outputs_root / "pools"
+
+    build_pool_artifact(
+        cfg=loaded.root.densegen,
+        cfg_path=cfg_path,
+        deps=deps,
+        rng=rng,
+        outputs_root=outputs_root,
+        out_dir=out_dir,
+        overwrite=False,
+    )
+
+    csv_path.write_text("tf,tfbs\nTF1,GGG\n")
+    build_pool_artifact(
+        cfg=loaded.root.densegen,
+        cfg_path=cfg_path,
+        deps=deps,
+        rng=rng,
+        outputs_root=outputs_root,
+        out_dir=out_dir,
+        overwrite=False,
+    )
+
+    artifact = load_pool_artifact(out_dir)
+    entry = artifact.entry_for("demo_input")
+    df = pd.read_parquet(out_dir / entry.pool_path)
+    assert set(df["tfbs"].tolist()) == {"AAA", "CCC", "GGG"}
diff --git a/src/dnadesign/densegen/tests/test_resume_pool_reuse.py b/src/dnadesign/densegen/tests/test_resume_pool_reuse.py
new file mode 100644
index 00000000..626c8248
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_resume_pool_reuse.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import yaml
+
+from dnadesign.densegen.src.adapters.optimizer import OptimizerRun
+from dnadesign.densegen.src.adapters.outputs import ParquetSink
+from dnadesign.densegen.src.adapters.sources import data_source_factory
+from dnadesign.densegen.src.config import load_config
+from dnadesign.densegen.src.core.pipeline import PipelineDeps, run_pipeline
+
+
+class _DummyOpt:
+    def forbid(self, _sol) -> None:
+        return None
+
+
+class _DummySol:
+    def __init__(self, sequence: str, library: list[str], used_indices: list[int]) -> None:
+        self.sequence = sequence
+        self.library = library
+        self._indices = used_indices
+        self.compression_ratio = 1.0
+
+    def offset_indices_in_order(self):
+        return [(0, idx) for idx in self._indices]
+
+
+class _DummyAdapter:
+    def probe_solver(self, backend: str, *, test_length: int = 10) -> None:
+        return None
+
+    def build(
+        self,
+        *,
+        library,
+        sequence_length,
+        solver,
+        strategy,
+        fixed_elements,
+        strands="double",
+        regulator_by_index=None,
+        required_regulators=None,
+        min_count_by_regulator=None,
+        min_required_regulators=None,
+        solver_time_limit_seconds=None,
+        solver_threads=None,
+    ):
+        opt = _DummyOpt()
+        sol1 = _DummySol(sequence="AAA", library=library, used_indices=[0])
+
+        def _gen():
+            yield sol1
+
+        return OptimizerRun(optimizer=opt, generator=_gen())
+
+
+def _write_config(path: Path, input_path: Path) -> None:
+    cfg = {
+        "densegen": {
+            "schema_version": "2.5",
+            "run": {"id": "demo", "root": "."},
+            "inputs": [
+                {
+                    "name": "demo",
+                    "type": "binding_sites",
+                    "path": str(input_path),
+                    "format": "csv",
+                }
+            ],
+            "output": {
+                "targets": ["parquet"],
+                "schema": {"bio_type": "dna", "alphabet": "dna_4"},
+                "parquet": {"path": "outputs/tables/dense_arrays.parquet"},
+            },
+            "generation": {
+                "sequence_length": 3,
+                "quota": 1,
+                "sampling": {
+                    "pool_strategy": "full",
+                    "library_size": 1,
+                    "subsample_over_length_budget_by": 0,
+                    "cover_all_regulators": False,
+                    "unique_binding_sites": True,
+                    "max_sites_per_regulator": None,
+                    "relax_on_exhaustion": False,
+                    "allow_incomplete_coverage": True,
+                    "iterative_max_libraries": 1,
+                    "iterative_min_new_solutions": 0,
+                },
+                "plan": [{"name": "default", "quota": 1}],
+            },
+            "solver": {"backend": "CBC", "strategy": "iterate"},
+            "runtime": {
+                "round_robin": False,
+                "arrays_generated_before_resample": 1,
+                "min_count_per_tf": 0,
+                "max_duplicate_solutions": 1,
+                "stall_seconds_before_resample": 1,
+                "stall_warning_every_seconds": 1,
+                "max_resample_attempts": 1,
+                "max_total_resamples": 1,
+                "max_seconds_per_plan": 0,
+                "max_failed_solutions": 0,
+                "random_seed": 1,
+            },
+            "postprocess": {"pad": {"mode": "off"}},
+            "logging": {"log_dir": "outputs/logs", "level": "INFO"},
+        }
+    }
+    path.write_text(yaml.safe_dump(cfg))
+
+
+def test_resume_uses_existing_pool_without_inputs(tmp_path: Path) -> None:
+    csv_path = tmp_path / "sites.csv"
+    csv_path.write_text("tf,tfbs\nTF1,AAA\n")
+    cfg_path = tmp_path / "config.yaml"
+    _write_config(cfg_path, csv_path)
+
+    loaded = load_config(cfg_path)
+
+    def _sink_factory(_cfg, _path):
+        tables_root = tmp_path / "outputs" / "tables"
+        tables_root.mkdir(parents=True, exist_ok=True)
+        out_file = tables_root / "dense_arrays.parquet"
+        return [ParquetSink(path=str(out_file), chunk_size=1)]
+
+    deps = PipelineDeps(
+        source_factory=data_source_factory,
+        sink_factory=_sink_factory,
+        optimizer=_DummyAdapter(),
+        pad=lambda *args, **kwargs: "",
+    )
+
+    run_pipeline(loaded, deps=deps, resume=False)
+
+    csv_path.unlink()
+    run_pipeline(loaded, deps=deps, resume=True)

From ad790c0fb18eca75b01b030ae6a96942cfe88ad8 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 16:39:00 -0500
Subject: [PATCH 24/40] densegen: harden pool append and strict reporting

---
 src/dnadesign/densegen/docs/reference/cli.md  |  1 +
 .../densegen/docs/reference/outputs.md        |  1 +
 .../src/adapters/sources/pwm_sampling.py      | 19 ++++-
 src/dnadesign/densegen/src/cli.py             | 17 +++-
 .../densegen/src/core/artifacts/pool.py       | 80 ++++++++++++++++++-
 src/dnadesign/densegen/src/core/reporting.py  | 31 +++++--
 .../densegen/tests/test_candidate_append.py   | 69 ++++++++++++++++
 .../densegen/tests/test_pool_append.py        | 39 ++++++++-
 .../densegen/tests/test_report_strict_mode.py | 20 +++++
 9 files changed, 261 insertions(+), 16 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/test_candidate_append.py
 create mode 100644 src/dnadesign/densegen/tests/test_report_strict_mode.py

diff --git a/src/dnadesign/densegen/docs/reference/cli.md b/src/dnadesign/densegen/docs/reference/cli.md
index 00430b8e..cabc0cfb 100644
--- a/src/dnadesign/densegen/docs/reference/cli.md
+++ b/src/dnadesign/densegen/docs/reference/cli.md
@@ -176,6 +176,7 @@ Options:
 - `--run` — run directory (defaults to config run root).
 - `--out` — output directory relative to run root (default: `outputs/report`; must be inside `outputs/`).
 - `--format` — `json`, `md`, `html`, or `all` (comma‑separated allowed).
+- `--strict/--fail-on-missing` — fail if core report inputs are missing.
 - `--plots` — `none` or `include` (default: `none`). When `include`, report links plots from
   `outputs/plots/plot_manifest.json` (run `dense plot` first).
 
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index 6bd33a5b..7be88683 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -141,6 +141,7 @@ DenseGen can materialize Stage‑A/Stage‑B artifacts without running the solve
 
 `dense stage-a build-pool` appends new unique TFBS to existing pools by default; pass `--fresh`
 to rebuild pools from scratch.
+`pool_manifest.json` includes the input config hash plus file fingerprints; append requires they match.
 
 Stage‑B expects Stage‑A pools (default `outputs/pools`). These are optional inspection artifacts and are not required for a normal `dense run`.
 
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 5077681d..85047239 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -81,11 +81,22 @@ def _write_candidate_records(
     if path.exists():
         try:
             existing = pd.read_parquet(path)
+            if "candidate_id" not in existing.columns or "candidate_id" not in df.columns:
+                raise ValueError(
+                    f"Candidate append requires candidate_id in {path}. "
+                    "Clear outputs/pools/candidates or use --fresh to reset."
+                )
+            if set(existing.columns) != set(df.columns):
+                raise ValueError(
+                    f"Candidate schema mismatch for {path}. Clear outputs/pools/candidates or use --fresh to reset."
+                )
+            df = df[existing.columns]
             df = pd.concat([existing, df], ignore_index=True)
-            if "candidate_id" in df.columns:
-                df = df.drop_duplicates(subset=["candidate_id"], keep="last")
-        except Exception:
-            raise RuntimeError(f"Failed to append candidate records to {path}")
+            df = df.drop_duplicates(subset=["candidate_id"], keep="last")
+        except Exception as exc:
+            if isinstance(exc, ValueError):
+                raise
+            raise RuntimeError(f"Failed to append candidate records to {path}") from exc
     df.to_parquet(path, index=False)
     return path
 
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 5f4e4bb9..b3e6f65f 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -1267,6 +1267,12 @@ def report(
         "--plots",
         help="Include plot links in the report: none or include (requires outputs/plots/plot_manifest.json).",
     ),
+    strict: bool = typer.Option(
+        False,
+        "--strict",
+        "--fail-on-missing",
+        help="Fail if core report inputs are missing.",
+    ),
     format: str = typer.Option(
         "all",
         "--format",
@@ -1308,8 +1314,15 @@ def report(
     out_dir = _resolve_outputs_path_or_exit(cfg_path, run_root, out, label="report.out")
     try:
         with _suppress_pyarrow_sysctl_warnings():
-            write_report(loaded.root, cfg_path, out_dir=out_dir, include_plots=include_plots, formats=raw_formats)
-    except FileNotFoundError as exc:
+            write_report(
+                loaded.root,
+                cfg_path,
+                out_dir=out_dir,
+                include_plots=include_plots,
+                strict=strict,
+                formats=raw_formats,
+            )
+    except (FileNotFoundError, ValueError) as exc:
         console.print(f"[bold red]Report failed:[/] {exc}")
         entries = _list_dir_entries(run_root, limit=8)
         if entries:
diff --git a/src/dnadesign/densegen/src/core/artifacts/pool.py b/src/dnadesign/densegen/src/core/artifacts/pool.py
index 03a8e7fb..008ed934 100644
--- a/src/dnadesign/densegen/src/core/artifacts/pool.py
+++ b/src/dnadesign/densegen/src/core/artifacts/pool.py
@@ -4,6 +4,7 @@
 
 from __future__ import annotations
 
+import hashlib
 import json
 import re
 from dataclasses import dataclass
@@ -13,10 +14,11 @@
 
 import pandas as pd
 
+from ...config import resolve_relative_path
 from ...utils.logging_utils import install_native_stderr_filters
 from .ids import hash_tfbs_id
 
-POOL_SCHEMA_VERSION = "1.0"
+POOL_SCHEMA_VERSION = "1.1"
 POOL_MODE_TFBS = "tfbs"
 POOL_MODE_SEQUENCE = "sequence"
 _SAFE_FILENAME_RE = re.compile(r"[^A-Za-z0-9_.-]+")
@@ -27,6 +29,53 @@ def _sanitize_filename(name: str) -> str:
     return cleaned or "densegen"
 
 
+def _hash_pool_config(cfg) -> str:
+    payload = [inp.model_dump(mode="json") for inp in sorted(cfg.inputs, key=lambda item: item.name)]
+    blob = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8")
+    return hashlib.sha256(blob).hexdigest()
+
+
+def _resolve_input_paths(cfg_path: Path, source_cfg) -> list[Path]:
+    paths: list[Path] = []
+    if hasattr(source_cfg, "path"):
+        paths.append(resolve_relative_path(cfg_path, getattr(source_cfg, "path")))
+    if hasattr(source_cfg, "paths"):
+        for path in getattr(source_cfg, "paths") or []:
+            paths.append(resolve_relative_path(cfg_path, path))
+    return paths
+
+
+def _hash_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def _normalize_fingerprints(fingerprints: list[dict]) -> list[dict]:
+    return sorted((dict(fp) for fp in fingerprints), key=lambda fp: str(fp.get("path", "")))
+
+
+def _resolve_input_fingerprints(cfg_path: Path, source_cfg) -> list[dict]:
+    fingerprints: list[dict] = []
+    for path in _resolve_input_paths(cfg_path, source_cfg):
+        if not path.exists():
+            raise FileNotFoundError(f"Input file missing: {path}")
+        if not path.is_file():
+            raise ValueError(f"Input path is not a file: {path}")
+        stat = path.stat()
+        fingerprints.append(
+            {
+                "path": str(path),
+                "size": int(stat.st_size),
+                "mtime": float(stat.st_mtime),
+                "sha256": _hash_file(path),
+            }
+        )
+    return _normalize_fingerprints(fingerprints)
+
+
 @dataclass(frozen=True)
 class PoolInputEntry:
     name: str
@@ -35,6 +84,7 @@ class PoolInputEntry:
     rows: int
     columns: list[str]
     pool_mode: str
+    fingerprints: list[dict] | None = None
 
 
 @dataclass(frozen=True)
@@ -55,12 +105,16 @@ class TFBSPoolArtifact:
     run_id: str
     run_root: str
     config_path: str
+    config_hash: str | None = None
 
     @classmethod
     def load(cls, manifest_path: Path) -> "TFBSPoolArtifact":
         payload = json.loads(manifest_path.read_text())
         entries = {}
         for item in payload.get("inputs", []):
+            fingerprints = item.get("fingerprints")
+            if fingerprints is not None:
+                fingerprints = [dict(fp) for fp in fingerprints]
             entry = PoolInputEntry(
                 name=str(item.get("name")),
                 input_type=str(item.get("type")),
@@ -68,6 +122,7 @@ def load(cls, manifest_path: Path) -> "TFBSPoolArtifact":
                 rows=int(item.get("rows", 0)),
                 columns=list(item.get("columns") or []),
                 pool_mode=str(item.get("pool_mode") or POOL_MODE_TFBS),
+                fingerprints=fingerprints,
             )
             entries[entry.name] = entry
         return cls(
@@ -77,6 +132,7 @@ def load(cls, manifest_path: Path) -> "TFBSPoolArtifact":
             run_id=str(payload.get("run_id")),
             run_root=str(payload.get("run_root")),
             config_path=str(payload.get("config_path")),
+            config_hash=payload.get("config_hash"),
         )
 
     def entry_for(self, input_name: str) -> PoolInputEntry:
@@ -199,11 +255,18 @@ def build_pool_artifact(
     rows: list[tuple[str, str, str, Path]] = []
     existing_entries: dict[str, PoolInputEntry] = {}
     preserved_entries: dict[str, PoolInputEntry] = {}
+    config_hash = _hash_pool_config(cfg)
+    fingerprints_by_input = {inp.name: _resolve_input_fingerprints(cfg_path, inp) for inp in cfg.inputs}
 
     if not overwrite:
         manifest_path = _pool_manifest_path(out_dir)
         if manifest_path.exists():
             existing_artifact = TFBSPoolArtifact.load(manifest_path)
+            existing_config_hash = existing_artifact.config_hash
+            if not existing_config_hash:
+                raise ValueError("Pool manifest missing config hash. Use --fresh to rebuild pools.")
+            if existing_config_hash != config_hash:
+                raise ValueError("Pool config changed. Use --fresh to rebuild pools.")
             existing_entries = existing_artifact.inputs
             used_names = _seed_used_names_from_entries(existing_entries)
             current_inputs = {inp.name for inp in cfg.inputs}
@@ -217,6 +280,18 @@ def build_pool_artifact(
                 preserved_entries = {
                     name: entry for name, entry in existing_entries.items() if name not in selected_inputs
                 }
+            for inp in cfg.inputs:
+                entry = existing_entries.get(inp.name)
+                if entry is None:
+                    continue
+                existing_fingerprints = entry.fingerprints
+                if existing_fingerprints is None:
+                    raise ValueError(
+                        f"Pool manifest missing input fingerprints for '{inp.name}'. Use --fresh to rebuild pools."
+                    )
+                current_fingerprints = fingerprints_by_input.get(inp.name, [])
+                if _normalize_fingerprints(existing_fingerprints) != _normalize_fingerprints(current_fingerprints):
+                    raise ValueError(f"Input files changed for '{inp.name}'. Use --fresh to rebuild pools.")
 
     for inp in cfg.inputs:
         if selected_inputs and inp.name not in selected_inputs:
@@ -274,6 +349,7 @@ def build_pool_artifact(
             rows=int(len(df)),
             columns=list(df.columns),
             pool_mode=pool_mode,
+            fingerprints=fingerprints_by_input.get(inp.name, []),
         )
         pool_entries[inp.name] = entry
         sequences: list[str]
@@ -306,6 +382,7 @@ def build_pool_artifact(
         "run_id": cfg.run.id,
         "run_root": str(cfg.run.root),
         "config_path": str(cfg_path),
+        "config_hash": config_hash,
         "inputs": [
             {
                 "name": entry.name,
@@ -314,6 +391,7 @@ def build_pool_artifact(
                 "rows": entry.rows,
                 "columns": entry.columns,
                 "pool_mode": entry.pool_mode,
+                "fingerprints": entry.fingerprints or [],
             }
             for entry in pool_entries.values()
         ],
diff --git a/src/dnadesign/densegen/src/core/reporting.py b/src/dnadesign/densegen/src/core/reporting.py
index 38eec5c6..e15c3394 100644
--- a/src/dnadesign/densegen/src/core/reporting.py
+++ b/src/dnadesign/densegen/src/core/reporting.py
@@ -371,6 +371,7 @@ def collect_report_data(
     cfg_path: Path,
     *,
     include_combinatorics: bool = False,
+    strict: bool = False,
 ) -> ReportBundle:
     run_root = resolve_run_root(cfg_path, root_cfg.densegen.run.root)
     outputs_root = run_outputs_root(run_root)
@@ -394,7 +395,10 @@ def collect_report_data(
     try:
         df, source_label = load_records_from_config(root_cfg, cfg_path, columns=cols)
     except Exception as exc:
-        warnings.append(f"No output records available; report will focus on Stage-A/Stage-B diagnostics. ({exc})")
+        message = f"No output records available; report will focus on Stage-A/Stage-B diagnostics. ({exc})"
+        if strict:
+            raise ValueError(message) from exc
+        warnings.append(message)
         df = pd.DataFrame(columns=cols)
         source_label = "missing"
     if df.empty:
@@ -403,24 +407,31 @@ def collect_report_data(
     used_df = _explode_used(df)
     attempts_path = tables_root / "attempts.parquet"
     if not attempts_path.exists():
-        warnings.append(
-            "outputs/tables/attempts.parquet is missing; library usage and resample summaries may be incomplete."
-        )
+        message = "outputs/tables/attempts.parquet is missing; library usage and resample summaries may be incomplete."
+        if strict:
+            raise ValueError(message)
+        warnings.append(message)
         attempts_df = pd.DataFrame()
     else:
         attempts_df = pd.read_parquet(attempts_path)
     library_df = _explode_library_from_attempts(attempts_df)
     solutions_path = tables_root / "solutions.parquet"
     if not solutions_path.exists():
-        warnings.append(
+        message = (
             "outputs/tables/solutions.parquet is missing; solution previews and composition summaries will be skipped."
         )
+        if strict:
+            raise ValueError(message)
+        warnings.append(message)
         solutions_df = pd.DataFrame()
     else:
         try:
             solutions_df = pd.read_parquet(solutions_path)
         except Exception as exc:
-            warnings.append(f"Failed to load solutions.parquet; skipping solution tables. ({exc})")
+            message = f"Failed to load solutions.parquet; skipping solution tables. ({exc})"
+            if strict:
+                raise ValueError(message) from exc
+            warnings.append(message)
             solutions_df = pd.DataFrame()
     tables: Dict[str, pd.DataFrame] = {}
     tables["solutions"] = solutions_df
@@ -971,12 +982,18 @@ def write_report(
     out_dir: str | Path = "outputs/report",
     include_combinatorics: bool = False,
     include_plots: bool = False,
+    strict: bool = False,
     formats: set[str] | None = None,
 ) -> ReportBundle:
     run_root = resolve_run_root(cfg_path, root_cfg.densegen.run.root)
     out_path = resolve_outputs_scoped_path(cfg_path, run_root, str(out_dir), label="report.out")
     out_path.mkdir(parents=True, exist_ok=True)
-    bundle = collect_report_data(root_cfg, cfg_path, include_combinatorics=include_combinatorics)
+    bundle = collect_report_data(
+        root_cfg,
+        cfg_path,
+        include_combinatorics=include_combinatorics,
+        strict=strict,
+    )
     composition = bundle.tables.get("composition")
     if composition is not None and not composition.empty:
         bundle.run_report["composition_rows"] = int(len(composition))
diff --git a/src/dnadesign/densegen/tests/test_candidate_append.py b/src/dnadesign/densegen/tests/test_candidate_append.py
new file mode 100644
index 00000000..12482e3c
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_candidate_append.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from dnadesign.densegen.src.adapters.sources.pwm_sampling import _write_candidate_records
+
+
+def _write_candidates(path: Path, records: list[dict]) -> None:
+    _write_candidate_records(
+        records,
+        debug_output_dir=path,
+        debug_label="demo",
+        motif_id="m1",
+        motif_hash="hash",
+    )
+
+
+def test_candidate_append_requires_candidate_id(tmp_path: Path) -> None:
+    out_dir = tmp_path / "candidates"
+    _write_candidates(
+        out_dir,
+        [
+            {
+                "candidate_id": "c1",
+                "input_name": "demo",
+                "sequence": "AAAA",
+            }
+        ],
+    )
+
+    with pytest.raises(ValueError, match="candidate_id"):
+        _write_candidates(
+            out_dir,
+            [
+                {
+                    "input_name": "demo",
+                    "sequence": "CCCC",
+                }
+            ],
+        )
+
+
+def test_candidate_append_requires_schema_match(tmp_path: Path) -> None:
+    out_dir = tmp_path / "candidates"
+    _write_candidates(
+        out_dir,
+        [
+            {
+                "candidate_id": "c1",
+                "input_name": "demo",
+                "sequence": "AAAA",
+            }
+        ],
+    )
+
+    with pytest.raises(ValueError, match="schema"):
+        _write_candidates(
+            out_dir,
+            [
+                {
+                    "candidate_id": "c2",
+                    "input_name": "demo",
+                    "sequence": "CCCC",
+                    "score": 1.0,
+                }
+            ],
+        )
diff --git a/src/dnadesign/densegen/tests/test_pool_append.py b/src/dnadesign/densegen/tests/test_pool_append.py
index 99dbbdb0..05223a1a 100644
--- a/src/dnadesign/densegen/tests/test_pool_append.py
+++ b/src/dnadesign/densegen/tests/test_pool_append.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 import yaml
 
 from dnadesign.densegen.src.adapters.sources import data_source_factory
@@ -70,7 +71,6 @@ def test_build_pool_appends_without_overwrite(tmp_path: Path) -> None:
         overwrite=False,
     )
 
-    csv_path.write_text("tf,tfbs\nTF1,GGG\n")
     build_pool_artifact(
         cfg=loaded.root.densegen,
         cfg_path=cfg_path,
@@ -84,4 +84,39 @@ def test_build_pool_appends_without_overwrite(tmp_path: Path) -> None:
     artifact = load_pool_artifact(out_dir)
     entry = artifact.entry_for("demo_input")
     df = pd.read_parquet(out_dir / entry.pool_path)
-    assert set(df["tfbs"].tolist()) == {"AAA", "CCC", "GGG"}
+    assert set(df["tfbs"].tolist()) == {"AAA", "CCC"}
+
+
+def test_build_pool_requires_fresh_on_input_change(tmp_path: Path) -> None:
+    csv_path = tmp_path / "sites.csv"
+    csv_path.write_text("tf,tfbs\nTF1,AAA\nTF1,CCC\n")
+    cfg_path = tmp_path / "config.yaml"
+    _write_config(cfg_path, csv_path)
+
+    loaded = load_config(cfg_path)
+    deps = _Deps()
+    rng = np.random.default_rng(1)
+    outputs_root = tmp_path / "outputs"
+    out_dir = outputs_root / "pools"
+
+    build_pool_artifact(
+        cfg=loaded.root.densegen,
+        cfg_path=cfg_path,
+        deps=deps,
+        rng=rng,
+        outputs_root=outputs_root,
+        out_dir=out_dir,
+        overwrite=False,
+    )
+
+    csv_path.write_text("tf,tfbs\nTF1,GGG\n")
+    with pytest.raises(ValueError, match="fresh"):
+        build_pool_artifact(
+            cfg=loaded.root.densegen,
+            cfg_path=cfg_path,
+            deps=deps,
+            rng=rng,
+            outputs_root=outputs_root,
+            out_dir=out_dir,
+            overwrite=False,
+        )
diff --git a/src/dnadesign/densegen/tests/test_report_strict_mode.py b/src/dnadesign/densegen/tests/test_report_strict_mode.py
new file mode 100644
index 00000000..94fdef2d
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_report_strict_mode.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from dnadesign.densegen.src.config import load_config
+from dnadesign.densegen.src.core.reporting import write_report
+from dnadesign.densegen.tests.config_fixtures import write_minimal_config
+
+
+def test_report_strict_raises_on_missing_outputs(tmp_path: Path) -> None:
+    run_root = tmp_path / "run"
+    run_root.mkdir(parents=True)
+    cfg_path = run_root / "config.yaml"
+    write_minimal_config(cfg_path)
+
+    loaded = load_config(cfg_path)
+    with pytest.raises(ValueError, match="outputs"):
+        write_report(loaded.root, cfg_path, strict=True)

From c5d1c68aa801fc7f8e80ac73781a7e51ac8acb8b Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 16:42:51 -0500
Subject: [PATCH 25/40] densegen: clarify demo build-pool fresh semantics

---
 src/dnadesign/densegen/docs/demo/demo_basic.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 817ddb8c..75b2eb13 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -39,6 +39,8 @@ Stage‑A FIMO sampling requires MEME Suite (`fimo` on PATH). If you use pixi, r
 
 Why: create a self‑contained workspace with `config.yaml`, `inputs/`, and `outputs/`.
 
+If you already have a `demo/` workspace, either remove it or choose a different `--id`.
+
 ```bash
 dense workspace init --id demo --template-id demo_meme_two_tf --copy-inputs
 cd demo
@@ -119,9 +121,12 @@ dense inspect config
 Why: materialize TFBS pools for inspection and for deterministic Stage‑B previews.
 
 ```bash
-dense stage-a build-pool
+dense stage-a build-pool --fresh
 ```
 
+Note: `stage-a build-pool` appends new unique TFBS into existing pools by default. Use `--fresh`
+when re‑running to avoid cumulative pools and candidate logs.
+
 ---
 
 ### 6) Stage‑B build‑libraries

From 70ce4a1f330f50cdd86c3eb0387283297cb5fcdc Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 18:14:27 -0500
Subject: [PATCH 26/40] densegen: clarify inspect inputs output and demo
 cruncher step

---
 .../densegen/docs/demo/demo_basic.md          | 139 ++++++++++++------
 src/dnadesign/densegen/src/cli.py             |  49 +++---
 .../tests/test_cli_inspect_inputs_summary.py  |  70 +++++++++
 3 files changed, 192 insertions(+), 66 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/test_cli_inspect_inputs_summary.py

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 75b2eb13..be6aa508 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -1,27 +1,26 @@
-## DenseGen demo (workspace‑first)
+## DenseGen demo
 
-This walkthrough uses the packaged demo template. The staged workspace contains MEME `.txt` motifs
-in `inputs/` (lexA + cpxR), and Stage‑A sampling uses those files directly.
+This walkthrough uses the packaged demo template. The staged workspace contains MEME `txt` motifs in `inputs/` (lexA + cpxR), and Stage‑A sampling uses those files directly.
 
 ### Contents
-- [0) Prereqs](#0-prereqs) - sync deps and ensure solver tools.
-- [1) Stage a workspace](#1-stage-a-workspace) - scaffold a self‑contained workspace.
-- [2) Validate config](#2-validate-config) - schema + solver probe.
-- [3) Inspect inputs](#3-inspect-inputs) - Stage‑A inputs + sampling summary.
-- [3b) (Optional) Build inputs via Cruncher (external workspace)](#3b-optional-build-inputs-via-cruncher-external-workspace)
-- [4) Inspect config](#4-inspect-config) - resolved outputs + Stage‑A/Stage‑B settings.
-- [5) Stage‑A build‑pool](#5-stage-a-build-pool) - materialize TFBS pools.
-- [6) Stage‑B build‑libraries](#6-stage-b-build-libraries) - materialize solver libraries.
-- [7) Run generation](#7-run-generation) - execute Stage‑A + Stage‑B + optimization.
-- [8) Inspect run summary](#8-inspect-run-summary) - library + events.
-- [9) List plots](#9-list-plots) - available plot names.
-- [10) Plot](#10-plot) - render plots.
-- [11) Report](#11-report) - write audit report.
-- [12) Reset the demo](#12-reset-the-demo) - wipe outputs for a clean rerun.
+0. [Prereqs](#0-prereqs) - sync deps and ensure solver tools.
+1. [Stage a workspace](#1-stage-a-workspace) - scaffold a self‑contained workspace.
+2. [Validate config](#2-validate-config) - schema + solver probe.
+3. [Inspect inputs](#3-inspect-inputs) - Stage‑A inputs + sampling summary.
+3b. [(Optional) Build inputs via Cruncher (external workspace)](#3b-optional-build-inputs-via-cruncher-external-workspace)
+4. [Inspect config](#4-inspect-config) - resolved outputs + Stage‑A/Stage‑B settings.
+5. [Stage‑A build‑pool](#5-stage-a-build-pool) - materialize TFBS pools.
+6. [Stage‑B build‑libraries](#6-stage-b-build-libraries) - materialize solver libraries.
+7. [Run generation](#7-run-generation) - execute Stage‑A + Stage‑B + optimization.
+8. [Inspect run summary](#8-inspect-run-summary) - library + events.
+9. [List plots](#9-list-plots) - available plot names.
+10. [Plot](#10-plot) - render plots.
+11. [Report](#11-report) - write audit report.
+12. [Reset the demo](#12-reset-the-demo) - wipe outputs for a clean rerun.
 
 ---
 
-### 0) Prereqs
+### 0. Prereqs
 
 If you have not synced dependencies yet:
 
@@ -29,28 +28,63 @@ If you have not synced dependencies yet:
 uv sync --locked
 ```
 
-Stage‑A FIMO sampling requires MEME Suite (`fimo` on PATH). If you use pixi, run commands via
-`pixi run dense ...` so MEME tools are available. If running from source, prefix commands with
-`uv run`.
+Stage‑A FIMO sampling requires MEME Suite (`fimo` on PATH).
+
+Here we use pixi, install the environment and smoke test that MEME + DenseGen are reachable:
+
+```bash
+pixi install
+pixi run fimo --version
+pixi run dense --help
+```
+
+Optional convenience aliases if you plan to use pixi for the rest of the demo:
+
+```bash
+alias fimo="pixi run fimo"
+```
+
+If you are running from source without pixi, ensure MEME Suite is on PATH:
+
+```bash
+fimo --version
+```
+
+When running commands without aliases, prefix DenseGen commands with `uv run`:
+
+```bash
+uv run dense --help
+```
 
 ---
 
-### 1) Stage a workspace
+### 1. Stage a workspace
 
-Why: create a self‑contained workspace with `config.yaml`, `inputs/`, and `outputs/`.
+Use the pre‑staged demo workspace and run commands from its directory so relative paths resolve correctly.
 
-If you already have a `demo/` workspace, either remove it or choose a different `--id`.
+From the repo root:
 
 ```bash
-dense workspace init --id demo --template-id demo_meme_two_tf --copy-inputs
-cd demo
+cd src/dnadesign/densegen/workspaces/demo_meme_two_tf
+```
+
+If you use pixi tasks for DenseGen, define an alias that pins the config path in this workspace:
+
+```bash
+alias dense="pixi run dense -c $PWD/config.yaml"
+```
+
+If outputs already exist and you want a clean run, reset them now:
+
+```bash
+dense campaign-reset
 ```
 
 ---
 
-### 2) Validate config
+### 2. Validate config
 
-Why: fail fast on schema issues and confirm solver availability.
+Fail fast on schema issues and confirm solver availability.
 
 ```bash
 dense validate-config --probe-solver
@@ -58,7 +92,7 @@ dense validate-config --probe-solver
 
 ---
 
-### 3) Inspect inputs
+### 3. Inspect inputs
 
 Why: confirm Stage‑A inputs and sampling settings.
 
@@ -70,10 +104,10 @@ The demo uses MEME `.txt` motifs already in `inputs/` (`lexA.txt`, `cpxR.txt`).
 
 ---
 
-### 3b) (Optional) Build inputs via Cruncher (external workspace)
+### 3b. (Optional) Build inputs via Cruncher (external workspace)
 
-Why: generate Stage‑A motif artifacts and binding‑site tables in **Cruncher’s** workspace, then
-copy the exports into this DenseGen workspace.
+Generate Stage‑A motif artifacts and binding‑site tables in **Cruncher’s** workspace, then copy
+the exports into this DenseGen workspace.
 
 Follow the Cruncher demo (see `cruncher/docs/demos/demo_basics_two_tf.md`) in its own workspace.
 From the Cruncher workspace directory, export DenseGen inputs (no `-c` flag needed when you run in CWD):
@@ -84,20 +118,31 @@ cruncher catalog export-sites --set 1 --out outputs/exports/densegen_sites.csv
 cruncher catalog export-densegen --set 1 --out outputs/exports/densegen_pwms
 ```
 
-Copy those exports into this DenseGen workspace:
+Copy those exports into **this** DenseGen workspace:
 
 ```bash
-cp outputs/exports/densegen_sites.csv <densegen_workspace>/inputs/
-cp -R outputs/exports/densegen_pwms <densegen_workspace>/inputs/motif_artifacts
+cp <cruncher_workspace>/outputs/exports/densegen_sites.csv inputs/
+cp -R <cruncher_workspace>/outputs/exports/densegen_pwms inputs/motif_artifacts
+```
+
+Update `config.yaml` inputs to point at the exported artifacts, for example:
+
+```yaml
+inputs:
+  - name: demo_sites
+    type: binding_sites
+    path: inputs/densegen_sites.csv
+  - name: demo_pwms
+    type: pwm_artifact_set
+    paths:
+      - inputs/motif_artifacts/<motif>.json
 ```
 
-To use these exports, update `config.yaml` inputs to `type: binding_sites` (CSV/Parquet) or
-`type: pwm_artifact_set` (JSON artifacts). The DenseGen workspace remains config‑centric (one
-runtime config), while Cruncher keeps its own workspace + config.
+The DenseGen workspace stays config‑centric (one runtime config); Cruncher keeps its own workspace + config.
 
 ---
 
-### 4) Inspect config
+### 4. Inspect config
 
 Why: confirm resolved outputs, Stage‑A sampling knobs, fixed elements, and Stage‑B sampling policy.
 
@@ -116,7 +161,7 @@ dense inspect config
 
 ---
 
-### 5) Stage‑A build‑pool
+### 5. Stage-A build-pool
 
 Why: materialize TFBS pools for inspection and for deterministic Stage‑B previews.
 
@@ -129,7 +174,7 @@ when re‑running to avoid cumulative pools and candidate logs.
 
 ---
 
-### 6) Stage‑B build‑libraries
+### 6. Stage-B build-libraries
 
 Why: preview solver libraries without running the optimizer.
 
@@ -139,7 +184,7 @@ dense stage-b build-libraries
 
 ---
 
-### 7) Run generation
+### 7. Run generation
 
 Why: execute Stage‑A sampling (if needed), Stage‑B sampling, and solver optimization.
 
@@ -160,7 +205,7 @@ auto‑plots when re‑running.
 
 ---
 
-### 8) Inspect run summary
+### 8. Inspect run summary
 
 Why: inspect Stage‑B library usage and runtime events.
 
@@ -170,7 +215,7 @@ dense inspect run --library --events --library-limit 5
 
 ---
 
-### 9) List plots
+### 9. List plots
 
 Why: see available plot names before selecting a subset.
 
@@ -180,7 +225,7 @@ dense ls-plots
 
 ---
 
-### 10) Plot
+### 10. Plot
 
 Why: render selected plots from existing outputs.
 
@@ -196,7 +241,7 @@ export MPLCONFIGDIR=outputs/.mpl-cache
 
 ---
 
-### 11) Report
+### 11. Report
 
 Why: generate a human‑readable audit summary.
 
@@ -212,7 +257,7 @@ dense plot
 
 ---
 
-### 12) Reset the demo
+### 12. Reset the demo
 
 Why: wipe run outputs and state so you can re-run the demo cleanly.
 
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index b3e6f65f..6308411e 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -323,17 +323,20 @@ def _short_hash(val: str, *, n: int = 8) -> str:
 
 def _print_inputs_summary(loaded) -> None:
     cfg = loaded.root.densegen
-    inputs = Table("name", "type", "source")
+    inputs = Table("name", "type", "inputs")
     for inp in cfg.inputs:
         if hasattr(inp, "path"):
-            src = str(resolve_relative_path(loaded.path, inp.path))
+            resolved = resolve_relative_path(loaded.path, inp.path)
+            src = f"file={resolved}"
         elif hasattr(inp, "paths"):
             resolved = [str(resolve_relative_path(loaded.path, p)) for p in getattr(inp, "paths") or []]
-            src = f"{len(resolved)} files"
+            src = f"files={len(resolved)}"
             if resolved:
-                src = f"{len(resolved)} files ({resolved[0]})"
+                parents = {str(Path(p).parent) for p in resolved}
+                root = parents.pop() if len(parents) == 1 else "multiple"
+                src = f"{src}; dir={root}; first={resolved[0]}"
         elif hasattr(inp, "dataset"):
-            src = f"{inp.dataset} (root={resolve_relative_path(loaded.path, inp.root)})"
+            src = f"dataset={inp.dataset}; root={resolve_relative_path(loaded.path, inp.root)}"
         else:
             src = "-"
         inputs.add_row(inp.name, inp.type, src)
@@ -366,8 +369,8 @@ def _print_inputs_summary(loaded) -> None:
         "mining",
         "bgfile",
         "oversample",
-        "max_candidates",
-        "max_seconds",
+        "candidate_cap",
+        "time_cap_s",
         "length",
     )
     for inp in pwm_inputs:
@@ -381,9 +384,9 @@ def _print_inputs_summary(loaded) -> None:
             motif_label = ", ".join(motif_ids) if motif_ids else "all"
             if inp.type == "pwm_meme_set":
                 file_count = len(getattr(inp, "paths", []) or [])
-                motif_label = f"{motif_label} ({file_count} files)"
+                motif_label = f"{motif_label} (files={file_count})"
         elif inp.type == "pwm_artifact_set":
-            motif_label = f"{len(getattr(inp, 'paths', []) or [])} artifacts"
+            motif_label = f"artifacts={len(getattr(inp, 'paths', []) or [])}"
         else:
             motif_label = "from artifact"
         backend = getattr(sampling, "scoring_backend", "densegen")
@@ -404,24 +407,32 @@ def _print_inputs_summary(loaded) -> None:
             mining_cfg = getattr(sampling, "mining", None)
             bin_ids = getattr(mining_cfg, "retain_bin_ids", None)
             if bin_ids:
-                bins_label = f"{bins_label} retain={bin_ids}"
+                bins_label = f"{bins_label}; retain={','.join(str(i) for i in bin_ids)}"
         mining_label = "-"
         mining_cfg = getattr(sampling, "mining", None)
         if backend == "fimo" and mining_cfg is not None:
             parts = [f"batch={mining_cfg.batch_size}"]
             if mining_cfg.max_batches is not None:
                 parts.append(f"max_batches={mining_cfg.max_batches}")
-            if getattr(mining_cfg, "max_candidates", None) is not None:
-                parts.append(f"max_candidates={mining_cfg.max_candidates}")
-            if mining_cfg.max_seconds is not None:
-                parts.append(f"max_seconds={mining_cfg.max_seconds}s")
-            if mining_cfg.retain_bin_ids:
-                parts.append(f"retain={mining_cfg.retain_bin_ids}")
+            if mining_cfg.log_every_batches is not None:
+                parts.append(f"log_every={mining_cfg.log_every_batches}")
             mining_label = ", ".join(parts)
         bgfile_label = getattr(sampling, "bgfile", None) or "-"
         length_label = str(sampling.length_policy)
         if sampling.length_policy == "range" and sampling.length_range is not None:
             length_label = f"range({sampling.length_range[0]}..{sampling.length_range[1]})"
+        candidate_cap = "-"
+        time_cap = "-"
+        if backend == "fimo" and mining_cfg is not None:
+            if getattr(mining_cfg, "max_candidates", None) is not None:
+                candidate_cap = str(mining_cfg.max_candidates)
+            if mining_cfg.max_seconds is not None:
+                time_cap = str(mining_cfg.max_seconds)
+        else:
+            if sampling.max_candidates is not None:
+                candidate_cap = str(sampling.max_candidates)
+            if sampling.max_seconds is not None:
+                time_cap = str(sampling.max_seconds)
         pwm_table.add_row(
             inp.name,
             motif_label,
@@ -434,14 +445,14 @@ def _print_inputs_summary(loaded) -> None:
             str(mining_label),
             str(bgfile_label),
             str(sampling.oversample_factor),
-            str(sampling.max_candidates) if sampling.max_candidates is not None else "-",
-            str(sampling.max_seconds) if sampling.max_seconds is not None else "-",
+            candidate_cap,
+            time_cap,
             length_label,
         )
     console.print("[bold]Stage-A PWM sampling[/]")
     console.print(pwm_table)
     console.print(
-        "  -> Produces the realized TFBS pool (input_tfbs_count), captured in inputs_manifest.json after runs."
+        "  -> Produces the realized TFBS pool (input_tfbs_count), captured in outputs/meta/inputs_manifest.json."
     )
 
 
diff --git a/src/dnadesign/densegen/tests/test_cli_inspect_inputs_summary.py b/src/dnadesign/densegen/tests/test_cli_inspect_inputs_summary.py
new file mode 100644
index 00000000..e894c6b1
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_cli_inspect_inputs_summary.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from dnadesign.densegen.src.cli import app
+
+
+def _write_config(path: Path) -> None:
+    path.write_text(
+        """
+        densegen:
+          schema_version: "2.5"
+          run:
+            id: demo
+            root: "."
+          inputs:
+            - name: demo_sites
+              type: binding_sites
+              path: inputs.csv
+            - name: demo_pwm
+              type: pwm_matrix_csv
+              path: pwm.csv
+              motif_id: demo
+              sampling:
+                strategy: stochastic
+                scoring_backend: densegen
+                n_sites: 2
+                score_threshold: 0.0
+                oversample_factor: 2
+                max_candidates: 50
+                max_seconds: 1
+                length_policy: exact
+          output:
+            targets: [parquet]
+            schema:
+              bio_type: dna
+              alphabet: dna_4
+            parquet:
+              path: outputs/tables/dense_arrays.parquet
+          generation:
+            sequence_length: 10
+            quota: 1
+            plan:
+              - name: demo_plan
+                quota: 1
+          solver:
+            backend: CBC
+            strategy: iterate
+          logging:
+            log_dir: outputs/logs
+        """.strip()
+        + "\n"
+    )
+
+
+def test_inspect_inputs_uses_clear_labels(tmp_path: Path) -> None:
+    cfg_path = tmp_path / "config.yaml"
+    _write_config(cfg_path)
+    (tmp_path / "inputs.csv").write_text("tf,tfbs\n")
+    (tmp_path / "pwm.csv").write_text("A,C,G,T\n0.25,0.25,0.25,0.25\n")
+
+    runner = CliRunner()
+    result = runner.invoke(app, ["inspect", "inputs", "-c", str(cfg_path)], env={"COLUMNS": "200"})
+    assert result.exit_code == 0, result.output
+    assert "inputs" in result.output
+    assert "file=" in result.output
+    assert "candidate_cap" in result.output
+    assert "time_cap_s" in result.output

From 90f95f7dee403dc4b67583d9f42eb1e526abb43f Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 19:07:41 -0500
Subject: [PATCH 27/40] densegen demo artifact inputs

---
 .gitignore                                    |   2 +
 .secrets.baseline                             |  32 ++
 .../densegen/tests/test_demo_config.py        |   4 +-
 src/dnadesign/densegen/workspaces/README.md   |   6 +-
 .../workspaces/demo_meme_two_tf/config.yaml   |  12 +-
 .../demo_meme_two_tf/inputs/cpxR.txt          | 379 -----------------
 .../demo_meme_two_tf/inputs/lexA.txt          | 382 ------------------
 .../cpxR__demo_local_meme__cpxR.json          | 306 ++++++++++++++
 .../lexA__demo_local_meme__lexA.json          | 318 +++++++++++++++
 9 files changed, 668 insertions(+), 773 deletions(-)
 delete mode 100644 src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/cpxR.txt
 delete mode 100644 src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/lexA.txt
 create mode 100644 src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
 create mode 100644 src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json

diff --git a/.gitignore b/.gitignore
index df07eae5..556e42dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -199,6 +199,8 @@ src/dnadesign/densegen/workspaces/**
 !src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/
 !src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/*.txt
 !src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/*.meme
+!src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/
+!src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/*.json
 
 # Legacy DenseGen runs (ignored to avoid local artifact noise)
 src/dnadesign/densegen/runs/**
diff --git a/.secrets.baseline b/.secrets.baseline
index 44648cee..cc1e8785 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -138,6 +138,38 @@
         "line_number": 181
       }
     ],
+    "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json": [
+      {
+        "type": "Hex High Entropy String",
+        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json",
+        "hashed_secret": "ad5e5635f0c80045fa29ddd221dad64cfb301dcd",
+        "is_verified": false,
+        "line_number": 10
+      },
+      {
+        "type": "Hex High Entropy String",
+        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json",
+        "hashed_secret": "fdfc920b9be507648e9872bc1a2927d2708cba13",
+        "is_verified": false,
+        "line_number": 11
+      }
+    ],
+    "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json": [
+      {
+        "type": "Hex High Entropy String",
+        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json",
+        "hashed_secret": "ea1da5eaece4276b68edae4f5cac401f40c9e53a",
+        "is_verified": false,
+        "line_number": 10
+      },
+      {
+        "type": "Hex High Entropy String",
+        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json",
+        "hashed_secret": "9d643ed6995650a5c99e5cc12502cea7f6e7d5cf",
+        "is_verified": false,
+        "line_number": 11
+      }
+    ],
     "src/dnadesign/opal/campaigns/demo/inputs/r0/demo_y_sfxi_existing.csv": [
       {
         "type": "AWS Access Key",
diff --git a/src/dnadesign/densegen/tests/test_demo_config.py b/src/dnadesign/densegen/tests/test_demo_config.py
index cdb5861f..6cb5d28b 100644
--- a/src/dnadesign/densegen/tests/test_demo_config.py
+++ b/src/dnadesign/densegen/tests/test_demo_config.py
@@ -20,8 +20,8 @@ def test_demo_config_exists_and_loads() -> None:
 def test_demo_artifacts_present() -> None:
     cfg_path = _demo_config_path()
     cfg = load_config(cfg_path)
-    pwm_inputs = [inp for inp in cfg.root.densegen.inputs if inp.type == "pwm_meme_set"]
-    assert pwm_inputs, "Demo config should include pwm_meme_set inputs."
+    pwm_inputs = [inp for inp in cfg.root.densegen.inputs if inp.type == "pwm_artifact_set"]
+    assert pwm_inputs, "Demo config should include pwm_artifact_set inputs."
     missing: list[str] = []
     for inp in pwm_inputs:
         for path in getattr(inp, "paths", []) or []:
diff --git a/src/dnadesign/densegen/workspaces/README.md b/src/dnadesign/densegen/workspaces/README.md
index aa66814a..e8e1ed57 100644
--- a/src/dnadesign/densegen/workspaces/README.md
+++ b/src/dnadesign/densegen/workspaces/README.md
@@ -13,9 +13,9 @@ Run-scoped workbenches live here. Each workspace should contain:
 
 Keep real production runs out of version control; use this directory for demo artifacts only.
 
-Archived or legacy artifacts live under `_archive/` so the active workspace list stays clean.
-The canonical demo lives under `demo_meme_two_tf/` and uses MEME motif files stored in the
-workspace `inputs/` directory (packaged for reproducibility).
+Archived or legacy artifacts live under `_archived/` so the active workspace list stays clean.
+The canonical demo lives under `demo_meme_two_tf/` and uses DenseGen PWM artifacts stored in the
+workspace `inputs/motif_artifacts/` directory (packaged for reproducibility).
 Use `dense inspect run --root workspaces/_archive` if you want to inspect archived workspaces.
 Only `demo_meme_two_tf/` is tracked in git; any other workspace directories here are ignored
 and intended for local experiments.
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index d561bea7..fcf986f7 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -6,14 +6,12 @@ densegen:
     root: "."
 
   inputs:
-    # LexA + CpxR MEME motifs from Cruncher demo basics, merged into one TF pool.
-    # Motif ids expected in these files: lexA, cpxR.
-    - name: lexA_cpxR_meme
-      type: pwm_meme_set
+    # LexA + CpxR DenseGen artifacts exported from the Cruncher demo.
+    - name: lexA_cpxR_artifacts
+      type: pwm_artifact_set
       paths:
-        - inputs/lexA.txt
-        - inputs/cpxR.txt
-      motif_ids: [lexA, cpxR]
+        - inputs/motif_artifacts/lexA__demo_local_meme__lexA.json
+        - inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
       sampling:
         strategy: stochastic
         n_sites: 80
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/cpxR.txt b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/cpxR.txt
deleted file mode 100644
index 4945988f..00000000
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/cpxR.txt
+++ /dev/null
@@ -1,379 +0,0 @@
-********************************************************************************
-MEME - Motif discovery tool
-********************************************************************************
-MEME version 4.12.0 (Release date: Tue Jun 27 16:22:50 2017 -0700)
-
-For further information on how to interpret these results or to get
-a copy of the MEME software please access http://meme-suite.org .
-
-This file may be used as input to the MAST algorithm for searching
-sequence databases for matches to groups of motifs.  MAST is available
-for interactive use and downloading at http://meme-suite.org .
-********************************************************************************
-
-
-********************************************************************************
-REFERENCE
-********************************************************************************
-If you use this program in your research, please cite:
-
-Timothy L. Bailey and Charles Elkan,
-"Fitting a mixture model by expectation maximization to discover
-motifs in biopolymers", Proceedings of the Second International
-Conference on Intelligent Systems for Molecular Biology, pp. 28-36,
-AAAI Press, Menlo Park, California, 1994.
-********************************************************************************
-
-
-********************************************************************************
-TRAINING SET
-********************************************************************************
-DATAFILE= /global/projectb/scratch/leo/multiMotifs/cpxR_A-AAACGCAG/cpxR_A-AAACGCAG_summits_combined_sorted.fasta
-ALPHABET= ACGT
-Sequence name            Weight Length  Sequence name            Weight Length
--------------            ------ ------  -------------            ------ ------
-Escherichia_coli_MG1655_ 1.0000     60  Aliivibrio_fischeri_ATCC 1.0000     60
-Echinicola_vietnamensis_ 1.0000     60  Shewanella_amazonensis_S 1.0000     60
-Aliivibrio_fischeri_ATCC 1.0000     60  Shewanella_sp_ANA-3_cpxR 1.0000     60
-Escherichia_coli_BW25113 1.0000     60  Haemophilus_influenzae_A 1.0000     60
-Shewanella_sp_ANA-3_cpxR 1.0000     60  Salmonella_enterica_LT2_ 1.0000     60
-Shewanella_oneidensis_MR 1.0000     60  Shewanella_amazonensis_S 1.0000     60
-Sinorhizobium_meliloti_1 1.0000     60  Sphingobacteriaceae_sp_G 1.0000     60
-Marinobacter_adhaerens_H 1.0000     60  Echinicola_vietnamensis_ 1.0000     60
-Klebsiella_psneumoniae_A 1.0000     60  Sphingobacteriaceae_sp_G 1.0000     60
-Shewanella_oneidensis_MR 1.0000     60  Pseudomonas_putida_KT244 1.0000     60
-Saccharophagus_degradans 1.0000     60  Sphingobacteriaceae_sp_G 1.0000     60
-Escherichia_fergusonii_A 1.0000     60  Providencia_stuartii_ATC 1.0000     60
-Shewanella_loihica_PV-4_ 1.0000     60  Haemophilus_influenzae_A 1.0000     60
-Klebsiella_psneumoniae_A 1.0000     60  Escherichia_coli_MG1655_ 1.0000     60
-Shewanella_sp_ANA-3_cpxR 1.0000     60  Salmonella_enterica_LT2_ 1.0000     60
-Providencia_stuartii_ATC 1.0000     60  Marinobacter_adhaerens_H 1.0000     60
-Pseudomonas_aeruginosa_P 1.0000     60  Aliivibrio_fischeri_ATCC 1.0000     60
-Sphingobacteriaceae_sp_G 1.0000     60  Herbaspirillum_seropedic 1.0000     60
-Shewanella_amazonensis_S 1.0000     60  Shewanella_sp_ANA-3_cpxR 1.0000     60
-Shewanella_oneidensis_MR 1.0000     60  Kangiella_aquimarina_DSM 1.0000     60
-Paraburkholderia_phytofi 1.0000     60  Saccharophagus_degradans 1.0000     60
-Sphingobacteriaceae_sp_G 1.0000     60  Sphingobacteriaceae_sp_G 1.0000     60
-Pontibacter_actiniarum_K 1.0000     60  Escherichia_coli_MG1655_ 1.0000     60
-Aliivibrio_fischeri_ATCC 1.0000     60  Aliivibrio_fischeri_ATCC 1.0000     60
-Pontibacter_actiniarum_K 1.0000     60  Escherichia_coli_MG1655_ 1.0000     60
-Aliivibrio_fischeri_ATCC 1.0000     60  Enterobacter_cloacae_ATC 1.0000     60
-Klebsiella_psneumoniae_A 1.0000     60  Aliivibrio_fischeri_ATCC 1.0000     60
-Echinicola_vietnamensis_ 1.0000     60  Escherichia_fergusonii_A 1.0000     60
-Kangiella_aquimarina_DSM 1.0000     60  Aliivibrio_fischeri_ATCC 1.0000     60
-Pontibacter_actiniarum_K 1.0000     60  Sphingobacteriaceae_sp_G 1.0000     60
-Enterobacter_cloacae_ATC 1.0000     60  Klebsiella_psneumoniae_A 1.0000     60
-Shewanella_oneidensis_MR 1.0000     60  Enterobacter_cloacae_ATC 1.0000     60
-Shewanella_loihica_PV-4_ 1.0000     60  Pontibacter_actiniarum_K 1.0000     60
-Shigella_flexneri_ATCC70 1.0000     60  Kangiella_aquimarina_DSM 1.0000     60
-Saccharophagus_degradans 1.0000     60  Saccharophagus_degradans 1.0000     60
-Sphingobacteriaceae_sp_G 1.0000     60  Escherichia_fergusonii_A 1.0000     60
-Citrobacter_koseri_ATCCB 1.0000     60  Salmonella_enterica_LT2_ 1.0000     60
-Echinicola_vietnamensis_ 1.0000     60  Providencia_stuartii_ATC 1.0000     60
-Sphingobacteriaceae_sp_G 1.0000     60  Pseudomonas_fluorescens_ 1.0000     60
-Providencia_stuartii_ATC 1.0000     60  Saccharophagus_degradans 1.0000     60
-Escherichia_coli_BW25113 1.0000     60  Citrobacter_koseri_ATCCB 1.0000     60
-Pseudomonas_simiae_WCS41 1.0000     60  Enterobacter_cloacae_ATC 1.0000     60
-Pseudomonas_simiae_WCS41 1.0000     60  Shewanella_amazonensis_S 1.0000     60
-Citrobacter_koseri_ATCCB 1.0000     60  Enterobacter_cloacae_ATC 1.0000     60
-Pseudomonas_putida_KT244 1.0000     60  Citrobacter_koseri_ATCCB 1.0000     60
-Salmonella_enterica_LT2_ 1.0000     60  Shewanella_sp_ANA-3_cpxR 1.0000     60
-Aliivibrio_fischeri_ATCC 1.0000     60  Escherichia_coli_MG1655_ 1.0000     60
-Escherichia_coli_MG1655_ 1.0000     60  Aliivibrio_fischeri_ATCC 1.0000     60
-Escherichia_coli_BW25113 1.0000     60  Klebsiella_oxytoca_M5al_ 1.0000     60
-Providencia_stuartii_ATC 1.0000     60  Escherichia_fergusonii_A 1.0000     60
-********************************************************************************
-
-********************************************************************************
-COMMAND LINE SUMMARY
-********************************************************************************
-This information can also be useful in the event you wish to report a
-problem with the MEME software.
-
-command: meme /global/projectb/scratch/leo/multiMotifs/cpxR_A-AAACGCAG/cpxR_A-AAACGCAG_summits_combined_sorted.fasta -oc /global/projectb/scratch/leo/multiMotifs/cpxR_A-AAACGCAG_motifs_summits -dna -revcomp -mod anr -nmotifs 3 -minw 8 -maxw 32 -maxsize 1000000
-
-model:  mod=           anr    nmotifs=         3    evt=           inf
-object function=  E-value of product of p-values
-width:  minw=            8    maxw=           32
-width:  wg=             11    ws=              1    endgaps=       yes
-nsites: minsites=        2    maxsites=       50    wnsites=       0.8
-theta:  spmap=         uni    spfuzz=        0.5
-global: substring=     yes    branching=      no    wbranch=        no
-em:     prior=   dirichlet    b=            0.01    maxiter=        50
-        distance=    1e-05
-data:   n=            6000    N=             100    shuffle=        -1
-strands: + -
-sample: seed=            0    ctfrac=         -1    maxwords=       -1
-Letter frequencies in dataset:
-A 0.341 C 0.159 G 0.159 T 0.341
-Background letter frequencies (from dataset with add-one prior applied):
-A 0.341 C 0.159 G 0.159 T 0.341
-********************************************************************************
-
-
-********************************************************************************
-MOTIF cpxR MEME-1	width =  21  sites =  50  llr = 694  E-value = 9.9e-119
-********************************************************************************
---------------------------------------------------------------------------------
-	Motif cpxR MEME-1 Description
---------------------------------------------------------------------------------
-Simplified        A  3:::a182645:::a:95544
-pos.-specific     C  41:::912::51:::a:2:23
-probability       G  2::1::121::::3::11::2
-matrix            T  19a9:::446:9a7:::3541
-
-         bits    2.7                *
-                 2.4                *
-                 2.1      *         *
-                 1.9      *         *
-Relative         1.6     **        **
-Entropy          1.3   * **     ** **
-(20.0 bits)      1.1  *****     ** ***
-                 0.8  ******   *******
-                 0.5  ******  ******** *
-                 0.3 ******* ********* ***
-                 0.0 ---------------------
-
-Multilevel           CTTTACATATATTTACAATTA
-consensus            A      CTAC  G   TAAC
-sequence             G      A            G
-
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif cpxR MEME-1 sites sorted by position p-value
---------------------------------------------------------------------------------
-Sequence name            Strand  Start   P-value                    Site
--------------            ------  ----- ---------            ---------------------
-Haemophilus_influenzae_A     -     29  1.75e-10 CAAGAAAAAT CTTTACATTTCTTTACAAATC TTTACATTTC
-Sinorhizobium_meliloti_1     -     29  6.29e-09 GGCGCCGCGG CTTTACATTTCTTTACATTCG CCCAGAACGG
-Sphingobacteriaceae_sp_G     -     14  1.43e-08 TAAACATTTT ATTTACAGATATTTACAATCC ATTTTACAAG
-Citrobacter_koseri_ATCCB     -     19  1.71e-08 TGTTAATGTC ATTTACAGAAATTGACAATTC AGATGCCCTT
-Escherichia_coli_BW25113     +     28  2.38e-08 TGTTAACGTC ATTTACAGAAATTGACACATC AGATGCCTGC
-Escherichia_coli_MG1655_     +     27  2.38e-08 TGTTAACGTC ATTTACAGAAATTGACACATC AGATGCCTGC
-Shewanella_sp_ANA-3_cpxR     +     24  2.79e-08 ATTTAATCAT CTTTACACGAATTTACAATTC AGCGTGACTT
-Aliivibrio_fischeri_ATCC     +     22  3.25e-08 AAAATGTGAT CTTTACGTTTCTTGACACAAA AACAGATTAC
-Escherichia_fergusonii_A     -     19  4.91e-08 TGTTAACGTC GTTTACAGAAATTGACAGTTC AGATGCCAGA
-Shewanella_sp_ANA-3_cpxR     -     15  4.91e-08 ACTCCACTTT TTTTACATAACTTTACAAATA GGGTTGACAT
-Pseudomonas_simiae_WCS41     +     24  5.59e-08 CCGGGCCAAC CTTTACCTAACTTGACATTCA GTTAACCGCA
-Marinobacter_adhaerens_H     -     17  5.59e-08 GTTTGTTGTG GTTTACAAAAATTTACAAAAA AGTCGTTGCC
-Salmonella_enterica_LT2_     +     29  7.09e-08 TGTTAATGTC ATTTACAGAAATTGACAGTAC AGATACCTGC
-Shewanella_amazonensis_S     +     27  7.09e-08 ATTTGCTTTT CTTTACGGTAATTTACAATTG CTGGTGATGG
-Salmonella_enterica_LT2_     -     20  7.99e-08 TGCATTGTTT GTTGACATTTCTTTACATAAA GGCGGGGGAA
-Shewanella_oneidensis_MR     +     27  8.96e-08 ACTCCACTTT TTTTACATAACTTTACATATA GGGTTGACGT
-Haemophilus_influenzae_A     -     18  1.11e-07 CAAATAATTT CCTGACAAATCTTTACAAAAA TGAATTTTTG
-Escherichia_fergusonii_A     +     13  1.11e-07 GACTACCAGG ATTGACATAAATTGACAAATA TTGAATTTTC
-Paraburkholderia_phytofi     +     28  1.37e-07 GCGACACCAC ATTGACAATTCTTGACACATG ATTTGGCGGC
-Echinicola_vietnamensis_     -     38  1.37e-07         AT ATTTACAGGTATTGACAAAAA TATTTACAAA
-Shewanella_oneidensis_MR     +     10  1.52e-07  TCAATCGGG GCTTACATTAATTGACAATAA GCAAATCCGC
-Aliivibrio_fischeri_ATCC     +     14  1.52e-07 GTAACGCCGT CTTTACCTTTCTTTACGAATG CAAAGAATGT
-Klebsiella_psneumoniae_A     +     18  1.67e-07 ATCCGGTTTT TTTTACCCTTCTTTACACAAC TTTAATTTAT
-Echinicola_vietnamensis_     +     32  1.67e-07 GACACCAAAC GTTTACGAATATTTACACTTA TTGAATTT
-Aliivibrio_fischeri_ATCC     -     25  2.03e-07 TATATAAAAT CTTTACACATATTTACCAAAA TAGGACAACA
-Aliivibrio_fischeri_ATCC     -      7  2.23e-07 GTTGTATAAG CTTTACAGAAACTGACAGATA GAAAGG
-Escherichia_fergusonii_A     -     28  2.44e-07 ATCCGGTTTT TTTTACCTTTCTTTACACATA ATTTTCCCGC
-Sphingobacteriaceae_sp_G     +     19  2.44e-07 TTTTTCTTCG CGTTACAAAACTTTACAAATG CTTTTGGAAG
-Providencia_stuartii_ATC     +     20  2.44e-07 ATTTAGAAAA ATTTAAACTTCTTTACAATTC AAAAAAGCCT
-Salmonella_enterica_LT2_     +     23  2.68e-07 AGTGCATTTT CTTTAAACTTCTTTACATTAG GTTAGGCAAA
-Enterobacter_cloacae_ATC     -     15  2.68e-07 GCTGCGTTAT CTTTAAACTTCTTTACATAAG GTAAGGCAAC
-Saccharophagus_degradans     -     26  2.68e-07 AGTTTAACCG GTTTACACAAACTTACATTCA GTCAGCTTAA
-Escherichia_coli_MG1655_     +     23  2.68e-07 GCGCGCTTTT CTTTAAACTTCTTTACATTAG GTTATGTAAA
-Saccharophagus_degradans     -     21  2.68e-07 AACGATTAAA ATTTACATAAATTTACGCTAA AAACAGGGGT
-Echinicola_vietnamensis_     -     16  2.68e-07 TGACAAAAAT ATTTACAAATATTTACAAAAT GTGACAAAAG
-Echinicola_vietnamensis_     -     22  2.94e-07 AAACCAATTG ATTTACAAATACTTACATTAC AAGTTATATC
-Klebsiella_psneumoniae_A     +     27  3.21e-07 CCGAGGCCGC CTTTACGTTTATTTACGATTA AAAATAAACA
-Enterobacter_cloacae_ATC     -     16  3.51e-07 TGTGAATGAC TTTTACACAAACTGACAATTC GGATGCCGCC
-Citrobacter_koseri_ATCCB     +     16  3.84e-07 ATTTATTTCA GCTGACAAAAATTGACAAAAC CTGAGATTAC
-Shewanella_oneidensis_MR     +     26  3.84e-07 ATTTAAGGTT CTTTACATGAATTTACAATCT TGCGTGATAC
-Saccharophagus_degradans     -     31  4.20e-07  ATTTGTAGG GTTTACAAATCTTCACATTCC TTAACGTTTG
-Kangiella_aquimarina_DSM     -     15  4.20e-07 AAGATACTAT ATTTACACTGATTTACAAAAA AATTCAGCCT
-Klebsiella_psneumoniae_A     +     23  4.57e-07 TTCAATGTTA ATTTACATATCTATACAAAAG AAATTCAAAT
-Saccharophagus_degradans     +     27  4.99e-07 TGTATGCCAG ATTTACATTTCTTTACTTTTA CTTCGAAAAA
-Sphingobacteriaceae_sp_G     +     25  5.43e-07 CCAACTACAG GTTTACGTTTCTTTACAGTTT ATGGTCCATG
-Citrobacter_koseri_ATCCB     +     14  8.16e-07 ACATACAATC ACATACACATCTTTACATTTG GTGGTTTTCA
-Aliivibrio_fischeri_ATCC     -     24  8.16e-07 ATCGATACAT CTATACATATCTTTACATAAT CACTTAGCTC
-Shewanella_sp_ANA-3_cpxR     +     15  1.03e-06 CTTTCCCATC ATTTACCTATCTTTACTTTAA GTTTTTCATT
-Providencia_stuartii_ATC     -     26  1.28e-06 TGCTTCTTTA CCTTACAATTGTTTACACTCA CATAACCTTA
-Escherichia_fergusonii_A     -     14  2.49e-06 GAATGAAAGC GCTGACATATTTTTACAATCG TAAGAATTGA
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif cpxR MEME-1 block diagrams
---------------------------------------------------------------------------------
-SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
--------------            ----------------  -------------
-Haemophilus_influenzae_A          1.8e-10  28_[-1]_11
-Sinorhizobium_meliloti_1          6.3e-09  28_[-1]_11
-Sphingobacteriaceae_sp_G          1.4e-08  13_[-1]_26
-Citrobacter_koseri_ATCCB          1.7e-08  18_[-1]_21
-Escherichia_coli_BW25113          2.4e-08  27_[+1]_12
-Escherichia_coli_MG1655_          2.4e-08  26_[+1]_13
-Shewanella_sp_ANA-3_cpxR          2.8e-08  23_[+1]_16
-Aliivibrio_fischeri_ATCC          3.3e-08  21_[+1]_18
-Escherichia_fergusonii_A          4.9e-08  18_[-1]_21
-Shewanella_sp_ANA-3_cpxR          4.9e-08  14_[-1]_25
-Pseudomonas_simiae_WCS41          5.6e-08  23_[+1]_16
-Marinobacter_adhaerens_H          5.6e-08  16_[-1]_23
-Salmonella_enterica_LT2_          7.1e-08  28_[+1]_11
-Shewanella_amazonensis_S          7.1e-08  26_[+1]_13
-Salmonella_enterica_LT2_            8e-08  19_[-1]_20
-Shewanella_oneidensis_MR            9e-08  26_[+1]_13
-Haemophilus_influenzae_A          1.1e-07  17_[-1]_22
-Escherichia_fergusonii_A          1.1e-07  12_[+1]_27
-Paraburkholderia_phytofi          1.4e-07  27_[+1]_12
-Echinicola_vietnamensis_          1.4e-07  15_[-1]_1_[-1]_2
-Shewanella_oneidensis_MR          1.5e-07  9_[+1]_30
-Aliivibrio_fischeri_ATCC          1.5e-07  13_[+1]_26
-Klebsiella_psneumoniae_A          1.7e-07  17_[+1]_22
-Echinicola_vietnamensis_          1.7e-07  31_[+1]_8
-Aliivibrio_fischeri_ATCC            2e-07  24_[-1]_15
-Aliivibrio_fischeri_ATCC          2.2e-07  6_[-1]_33
-Escherichia_fergusonii_A          2.4e-07  27_[-1]_12
-Sphingobacteriaceae_sp_G          2.4e-07  18_[+1]_21
-Providencia_stuartii_ATC          2.4e-07  19_[+1]_20
-Salmonella_enterica_LT2_          2.7e-07  22_[+1]_17
-Enterobacter_cloacae_ATC          2.7e-07  14_[-1]_25
-Saccharophagus_degradans          2.7e-07  25_[-1]_14
-Escherichia_coli_MG1655_          2.7e-07  22_[+1]_17
-Saccharophagus_degradans          2.7e-07  20_[-1]_19
-Echinicola_vietnamensis_          2.9e-07  21_[-1]_18
-Klebsiella_psneumoniae_A          3.2e-07  26_[+1]_13
-Enterobacter_cloacae_ATC          3.5e-07  15_[-1]_24
-Citrobacter_koseri_ATCCB          3.8e-07  15_[+1]_24
-Shewanella_oneidensis_MR          3.8e-07  25_[+1]_14
-Saccharophagus_degradans          4.2e-07  30_[-1]_9
-Kangiella_aquimarina_DSM          4.2e-07  14_[-1]_25
-Klebsiella_psneumoniae_A          4.6e-07  22_[+1]_17
-Saccharophagus_degradans            5e-07  26_[+1]_13
-Sphingobacteriaceae_sp_G          5.4e-07  24_[+1]_15
-Citrobacter_koseri_ATCCB          8.2e-07  13_[+1]_26
-Aliivibrio_fischeri_ATCC          8.2e-07  23_[-1]_16
-Shewanella_sp_ANA-3_cpxR            1e-06  14_[+1]_25
-Providencia_stuartii_ATC          1.3e-06  25_[-1]_14
-Escherichia_fergusonii_A          2.5e-06  13_[-1]_26
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif cpxR MEME-1 in BLOCKS format
---------------------------------------------------------------------------------
-BL   MOTIF cpxR width=21 seqs=50
-Haemophilus_influenzae_A (   29) CTTTACATTTCTTTACAAATC  1
-Sinorhizobium_meliloti_1 (   29) CTTTACATTTCTTTACATTCG  1
-Sphingobacteriaceae_sp_G (   14) ATTTACAGATATTTACAATCC  1
-Citrobacter_koseri_ATCCB (   19) ATTTACAGAAATTGACAATTC  1
-Escherichia_coli_BW25113 (   28) ATTTACAGAAATTGACACATC  1
-Escherichia_coli_MG1655_ (   27) ATTTACAGAAATTGACACATC  1
-Shewanella_sp_ANA-3_cpxR (   24) CTTTACACGAATTTACAATTC  1
-Aliivibrio_fischeri_ATCC (   22) CTTTACGTTTCTTGACACAAA  1
-Escherichia_fergusonii_A (   19) GTTTACAGAAATTGACAGTTC  1
-Shewanella_sp_ANA-3_cpxR (   15) TTTTACATAACTTTACAAATA  1
-Pseudomonas_simiae_WCS41 (   24) CTTTACCTAACTTGACATTCA  1
-Marinobacter_adhaerens_H (   17) GTTTACAAAAATTTACAAAAA  1
-Salmonella_enterica_LT2_ (   29) ATTTACAGAAATTGACAGTAC  1
-Shewanella_amazonensis_S (   27) CTTTACGGTAATTTACAATTG  1
-Salmonella_enterica_LT2_ (   20) GTTGACATTTCTTTACATAAA  1
-Shewanella_oneidensis_MR (   27) TTTTACATAACTTTACATATA  1
-Haemophilus_influenzae_A (   18) CCTGACAAATCTTTACAAAAA  1
-Escherichia_fergusonii_A (   13) ATTGACATAAATTGACAAATA  1
-Paraburkholderia_phytofi (   28) ATTGACAATTCTTGACACATG  1
-Echinicola_vietnamensis_ (   38) ATTTACAGGTATTGACAAAAA  1
-Shewanella_oneidensis_MR (   10) GCTTACATTAATTGACAATAA  1
-Aliivibrio_fischeri_ATCC (   14) CTTTACCTTTCTTTACGAATG  1
-Klebsiella_psneumoniae_A (   18) TTTTACCCTTCTTTACACAAC  1
-Echinicola_vietnamensis_ (   32) GTTTACGAATATTTACACTTA  1
-Aliivibrio_fischeri_ATCC (   25) CTTTACACATATTTACCAAAA  1
-Aliivibrio_fischeri_ATCC (    7) CTTTACAGAAACTGACAGATA  1
-Escherichia_fergusonii_A (   28) TTTTACCTTTCTTTACACATA  1
-Sphingobacteriaceae_sp_G (   19) CGTTACAAAACTTTACAAATG  1
-Providencia_stuartii_ATC (   20) ATTTAAACTTCTTTACAATTC  1
-Salmonella_enterica_LT2_ (   23) CTTTAAACTTCTTTACATTAG  1
-Enterobacter_cloacae_ATC (   15) CTTTAAACTTCTTTACATAAG  1
-Saccharophagus_degradans (   26) GTTTACACAAACTTACATTCA  1
-Escherichia_coli_MG1655_ (   23) CTTTAAACTTCTTTACATTAG  1
-Saccharophagus_degradans (   21) ATTTACATAAATTTACGCTAA  1
-Echinicola_vietnamensis_ (   16) ATTTACAAATATTTACAAAAT  1
-Echinicola_vietnamensis_ (   22) ATTTACAAATACTTACATTAC  1
-Klebsiella_psneumoniae_A (   27) CTTTACGTTTATTTACGATTA  1
-Enterobacter_cloacae_ATC (   16) TTTTACACAAACTGACAATTC  1
-Citrobacter_koseri_ATCCB (   16) GCTGACAAAAATTGACAAAAC  1
-Shewanella_oneidensis_MR (   26) CTTTACATGAATTTACAATCT  1
-Saccharophagus_degradans (   31) GTTTACAAATCTTCACATTCC  1
-Kangiella_aquimarina_DSM (   15) ATTTACACTGATTTACAAAAA  1
-Klebsiella_psneumoniae_A (   23) ATTTACATATCTATACAAAAG  1
-Saccharophagus_degradans (   27) ATTTACATTTCTTTACTTTTA  1
-Sphingobacteriaceae_sp_G (   25) GTTTACGTTTCTTTACAGTTT  1
-Citrobacter_koseri_ATCCB (   14) ACATACACATCTTTACATTTG  1
-Aliivibrio_fischeri_ATCC (   24) CTATACATATCTTTACATAAT  1
-Shewanella_sp_ANA-3_cpxR (   15) ATTTACCTATCTTTACTTTAA  1
-Providencia_stuartii_ATC (   26) CCTTACAATTGTTTACACTCA  1
-Escherichia_fergusonii_A (   14) GCTGACATATTTTTACAATCG  1
-//
-
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif cpxR MEME-1 position-specific scoring matrix
---------------------------------------------------------------------------------
-log-odds matrix: alength= 4 w= 21 n= 4000 bayes= 6.79908 E= 9.9e-119
-     0    118     33   -177
- -1229    -41   -299    134
-  -309  -1229  -1229    149
- -1229  -1229    -41    137
-   155  -1229  -1229  -1229
-  -209    253  -1229  -1229
-   123    -67    -67  -1229
-   -77     47     18     23
-    72  -1229   -141     16
-    23  -1229   -299     77
-    49    159   -299   -409
- -1229    -99  -1229    143
-  -409  -1229  -1229    152
- -1229   -299     81    104
-   155  -1229  -1229  -1229
- -1229    265  -1229  -1229
-   137   -299   -141   -309
-    43     18    -99    -28
-    49  -1229  -1229     61
-    23      1  -1229     37
-    30     81     47   -209
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif cpxR MEME-1 position-specific probability matrix
---------------------------------------------------------------------------------
-letter-probability matrix: alength= 4 w= 21 nsites= 50 E= 9.9e-119
- 0.340000  0.360000  0.200000  0.100000
- 0.000000  0.120000  0.020000  0.860000
- 0.040000  0.000000  0.000000  0.960000
- 0.000000  0.000000  0.120000  0.880000
- 1.000000  0.000000  0.000000  0.000000
- 0.080000  0.920000  0.000000  0.000000
- 0.800000  0.100000  0.100000  0.000000
- 0.200000  0.220000  0.180000  0.400000
- 0.560000  0.000000  0.060000  0.380000
- 0.400000  0.000000  0.020000  0.580000
- 0.480000  0.480000  0.020000  0.020000
- 0.000000  0.080000  0.000000  0.920000
- 0.020000  0.000000  0.000000  0.980000
- 0.000000  0.020000  0.280000  0.700000
- 1.000000  0.000000  0.000000  0.000000
- 0.000000  1.000000  0.000000  0.000000
- 0.880000  0.020000  0.060000  0.040000
- 0.460000  0.180000  0.080000  0.280000
- 0.480000  0.000000  0.000000  0.520000
- 0.400000  0.160000  0.000000  0.440000
- 0.420000  0.280000  0.220000  0.080000
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif cpxR MEME-1 regular expression
---------------------------------------------------------------------------------
-[CAG]TTTACA[TCA][AT][TA][AC]TT[TG]ACA[AT][TA][TA][ACG]
---------------------------------------------------------------------------------
-
-
-
-
-Time  4.91 secs.
-
-********************************************************************************
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/lexA.txt b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/lexA.txt
deleted file mode 100644
index 0ec3df11..00000000
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/lexA.txt
+++ /dev/null
@@ -1,382 +0,0 @@
-********************************************************************************
-MEME - Motif discovery tool
-********************************************************************************
-MEME version 4.12.0 (Release date: Tue Jun 27 16:22:50 2017 -0700)
-
-For further information on how to interpret these results or to get
-a copy of the MEME software please access http://meme-suite.org .
-
-This file may be used as input to the MAST algorithm for searching
-sequence databases for matches to groups of motifs.  MAST is available
-for interactive use and downloading at http://meme-suite.org .
-********************************************************************************
-
-
-********************************************************************************
-REFERENCE
-********************************************************************************
-If you use this program in your research, please cite:
-
-Timothy L. Bailey and Charles Elkan,
-"Fitting a mixture model by expectation maximization to discover
-motifs in biopolymers", Proceedings of the Second International
-Conference on Intelligent Systems for Molecular Biology, pp. 28-36,
-AAAI Press, Menlo Park, California, 1994.
-********************************************************************************
-
-
-********************************************************************************
-TRAINING SET
-********************************************************************************
-DATAFILE= /global/projectb/scratch/leo/multiMotifs/lexA_B-TCGGGTTA/lexA_B-TCGGGTTA_summits_combined_sorted.fasta
-ALPHABET= ACGT
-Sequence name            Weight Length  Sequence name            Weight Length
--------------            ------ ------  -------------            ------ ------
-Aliivibrio_fischeri_ATCC 1.0000     60  Citrobacter_koseri_ATCCB 1.0000     60
-Salmonella_enterica_LT2_ 1.0000     60  Aliivibrio_fischeri_ATCC 1.0000     60
-Enterobacter_cloacae_ATC 1.0000     60  Enterobacter_cloacae_ATC 1.0000     60
-Escherichia_coli_MG1655_ 1.0000     60  Pseudomonas_aeruginosa_P 1.0000     60
-Cupriavidus_basilensis_4 1.0000     60  Salmonella_enterica_LT2_ 1.0000     60
-Salmonella_enterica_LT2_ 1.0000     60  Klebsiella_psneumoniae_A 1.0000     60
-Herbaspirillum_seropedic 1.0000     60  Klebsiella_oxytoca_M5al_ 1.0000     60
-Shewanella_amazonensis_S 1.0000     60  Shewanella_amazonensis_S 1.0000     60
-Paraburkholderia_phytofi 1.0000     60  Paraburkholderia_phytofi 1.0000     60
-Aliivibrio_fischeri_ATCC 1.0000     60  Escherichia_fergusonii_A 1.0000     60
-Providencia_stuartii_ATC 1.0000     60  Escherichia_fergusonii_A 1.0000     60
-Pseudomonas_fluorescens_ 1.0000     60  Aliivibrio_fischeri_ATCC 1.0000     60
-Enterobacter_cloacae_ATC 1.0000     60  Pseudomonas_fluorescens_ 1.0000     60
-Pseudomonas_simiae_WCS41 1.0000     60  Klebsiella_psneumoniae_A 1.0000     60
-Pseudomonas_fluorescens_ 1.0000     60  Pseudomonas_stutzeri_RCH 1.0000     60
-Aliivibrio_fischeri_ATCC 1.0000     60  Escherichia_coli_BW25113 1.0000     60
-Klebsiella_psneumoniae_A 1.0000     60  Cupriavidus_basilensis_4 1.0000     60
-Klebsiella_psneumoniae_A 1.0000     60  Pseudomonas_fluorescens_ 1.0000     60
-Burkholderia_bryophila_3 1.0000     60  Acidovorax_sp_GW101-3H11 1.0000     60
-Providencia_stuartii_ATC 1.0000     60  Klebsiella_psneumoniae_A 1.0000     60
-Aliivibrio_fischeri_ATCC 1.0000     60  Shewanella_loihica_PV-4_ 1.0000     60
-Cupriavidus_basilensis_4 1.0000     60  Shewanella_amazonensis_S 1.0000     60
-Enterobacter_cloacae_ATC 1.0000     60  Escherichia_coli_MG1655_ 1.0000     60
-Pseudomonas_putida_KT244 1.0000     60  Shigella_flexneri_ATCC70 1.0000     60
-Pseudomonas_fluorescens_ 1.0000     60  Burkholderia_bryophila_3 1.0000     60
-Cupriavidus_basilensis_4 1.0000     60  Shigella_flexneri_ATCC70 1.0000     60
-Shewanella_loihica_PV-4_ 1.0000     60  Paraburkholderia_phytofi 1.0000     60
-Klebsiella_oxytoca_M5al_ 1.0000     60  Marinobacter_adhaerens_H 1.0000     60
-Marinobacter_adhaerens_H 1.0000     60  Salmonella_enterica_LT2_ 1.0000     60
-Shewanella_amazonensis_S 1.0000     60  Paraburkholderia_phytofi 1.0000     60
-Paraburkholderia_phytofi 1.0000     60  Marinobacter_hydrocarbon 1.0000     60
-Burkholderia_bryophila_3 1.0000     60  Enterobacter_cloacae_ATC 1.0000     60
-Acidovorax_sp_GW101-3H11 1.0000     60  Shewanella_sp_ANA-3_lexA 1.0000     60
-Cupriavidus_basilensis_4 1.0000     60  Pseudomonas_fluorescens_ 1.0000     60
-Salmonella_enterica_LT2_ 1.0000     60  Shigella_flexneri_ATCC70 1.0000     60
-Pseudomonas_simiae_WCS41 1.0000     60  Paraburkholderia_phytofi 1.0000     60
-Cupriavidus_basilensis_4 1.0000     60  Cupriavidus_basilensis_4 1.0000     60
-Marinobacter_hydrocarbon 1.0000     60  Herbaspirillum_seropedic 1.0000     60
-Paraburkholderia_phytofi 1.0000     60  Aliivibrio_fischeri_ATCC 1.0000     60
-Shewanella_oneidensis_MR 1.0000     60  Ralstonia_sp_UNC404CL21C 1.0000     60
-Klebsiella_oxytoca_M5al_ 1.0000     60  Klebsiella_oxytoca_M5al_ 1.0000     60
-Escherichia_coli_MG1655_ 1.0000     60  Cupriavidus_basilensis_4 1.0000     60
-Shewanella_oneidensis_MR 1.0000     60  Enterobacter_cloacae_ATC 1.0000     60
-Pseudomonas_fluorescens_ 1.0000     60  Paraburkholderia_phytofi 1.0000     60
-Ralstonia_sp_UNC404CL21C 1.0000     60  Herbaspirillum_seropedic 1.0000     60
-Marinobacter_hydrocarbon 1.0000     60  Escherichia_coli_MG1655_ 1.0000     60
-Escherichia_fergusonii_A 1.0000     60  Klebsiella_oxytoca_M5al_ 1.0000     60
-Enterobacter_cloacae_ATC 1.0000     60  Aeromonas_hydrophila_ATC 1.0000     60
-Klebsiella_psneumoniae_A 1.0000     60  Saccharophagus_degradans 1.0000     60
-Burkholderia_bryophila_3 1.0000     60  Herbaspirillum_seropedic 1.0000     60
-********************************************************************************
-
-********************************************************************************
-COMMAND LINE SUMMARY
-********************************************************************************
-This information can also be useful in the event you wish to report a
-problem with the MEME software.
-
-command: meme /global/projectb/scratch/leo/multiMotifs/lexA_B-TCGGGTTA/lexA_B-TCGGGTTA_summits_combined_sorted.fasta -oc /global/projectb/scratch/leo/multiMotifs/lexA_B-TCGGGTTA_motifs_summits -dna -revcomp -mod anr -nmotifs 3 -minw 8 -maxw 32 -maxsize 1000000
-
-model:  mod=           anr    nmotifs=         3    evt=           inf
-object function=  E-value of product of p-values
-width:  minw=            8    maxw=           32
-width:  wg=             11    ws=              1    endgaps=       yes
-nsites: minsites=        2    maxsites=       50    wnsites=       0.8
-theta:  spmap=         uni    spfuzz=        0.5
-global: substring=     yes    branching=      no    wbranch=        no
-em:     prior=   dirichlet    b=            0.01    maxiter=        50
-        distance=    1e-05
-data:   n=            6000    N=             100    shuffle=        -1
-strands: + -
-sample: seed=            0    ctfrac=         -1    maxwords=       -1
-Letter frequencies in dataset:
-A 0.317 C 0.183 G 0.183 T 0.317
-Background letter frequencies (from dataset with add-one prior applied):
-A 0.317 C 0.183 G 0.183 T 0.317
-********************************************************************************
-
-
-********************************************************************************
-MOTIF lexA MEME-1	width =  22  sites =  50  llr = 932  E-value = 2.4e-218
-********************************************************************************
---------------------------------------------------------------------------------
-	Motif lexA MEME-1 Description
---------------------------------------------------------------------------------
-Simplified        A  ::::a:94a6628:a::561:1
-pos.-specific     C  a::::2:1:1:42a:::3:41:
-probability       G  ::a:::1::::::::a:2::36
-matrix            T  :a:a:8:4:343::::a:4563
-
-         bits    2.5 * *          *
-                 2.2 * *          * *
-                 2.0 * *          * *
-                 1.7 *****        ***
-Relative         1.5 *****   *    ****
-Entropy          1.2 *****   *   *****
-(26.9 bits)      1.0 ******* *   *****
-                 0.7 ******* * * *****    *
-                 0.5 ******* ********* ****
-                 0.2 **********************
-                 0.0 ----------------------
-
-Multilevel           CTGTATATAAACACAGTAATTG
-consensus                 C A TTT     CTCGT
-sequence                        A     G
-
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif lexA MEME-1 sites sorted by position p-value
---------------------------------------------------------------------------------
-Sequence name            Strand  Start   P-value                     Site
--------------            ------  ----- ---------            ----------------------
-Marinobacter_hydrocarbon     +     15  9.76e-12 AGGAAAAAGC CTGTATATATTCACAGTCACTG TATGAAAAAA
-Marinobacter_adhaerens_H     +     14  9.76e-12 CACAAAAACC CTGTATATATTCACAGTCACTG TATACATCCA
-Pseudomonas_fluorescens_     +     16  2.71e-11 GCGCCGCCCA CTGTATATAATCCCAGTCACTG TATAAAAAGA
-Pseudomonas_fluorescens_     +     17  2.71e-11 GACCACTCCA CTGTATATAATCCCAGTCACTG TATAAAAAGA
-Pseudomonas_stutzeri_RCH     +     16  2.71e-11 GGCCGGATCA CTGTATATAATCCCAGTCACTG TATAAAAGAA
-Pseudomonas_fluorescens_     +     17  2.71e-11 GACCGCCTTA CTGTATATAATCCCAGTCACTG TATAAAAAAA
-Pseudomonas_simiae_WCS41     -     26  2.71e-11 GCGCAGTCCA CTGTATATAATCCCAGTCACTG TATAAAAAGA
-Pseudomonas_fluorescens_     +     17  2.71e-11 GTGGCGTTCA CTGTATATAATCCCAGTCACTG TATAAAAAGA
-Pseudomonas_aeruginosa_P     +     20  2.71e-11 GCCCCGCTCA CTGTATATAATCCCAGTCACTG GATAAAAACA
-Ralstonia_sp_UNC404CL21C     -     26  9.92e-11 CTGTGGATAC CTGTATATATTTACAGTAACTG TAAATTTATA
-Cupriavidus_basilensis_4     +     17  9.92e-11 AACCGCATTA CTGTATATATTTACAGTAACTG TATGGATGAA
-Pseudomonas_putida_KT244     -     23  1.71e-10 GCACCCAGTA CTGTATATAATTCCAGTCACTG TACAAAAAGA
-Enterobacter_cloacae_ATC     +     32  2.02e-10 CCAGTTTATA CTGTATGAAAACACAGTCATGG TTTTTCA
-Herbaspirillum_seropedic     +     22  3.16e-10 ATGGCGGTAA CTGTATAAATATACAGTATTTG TTTTGGTGTG
-Klebsiella_psneumoniae_A     +     24  3.16e-10 TTAATTTATA CTGTATAAAAACACAGTGTATG ATTTTGCTTC
-Shewanella_oneidensis_MR     +     16  3.65e-10 ATGAATAATA CTGTATATACTAACAGTAACTG TATAGAAAAA
-Shewanella_sp_ANA-3_lexA     +     20  3.65e-10 ACGAATAATA CTGTATATACTAACAGTAACTG TATAGAAAAA
-Cupriavidus_basilensis_4     +     22  4.78e-10 GTGACGACAA CTGTATAAATATACAGTGTTTG CCGGCTAACC
-Klebsiella_oxytoca_M5al_     +     24  5.45e-10 CCAGATTATA CTGTACGAAAACACAGTAATGG TTTTTCATAC
-Pseudomonas_fluorescens_     -     17  6.19e-10 TTCAAAACAA CTGTACAAAAACACAGTATTTT TTGAATCCCA
-Pseudomonas_fluorescens_     -     18  8.96e-10 TTCAAAACAA CTGTACAAAAACACAGTGTTTT TTTATCCCAC
-Shigella_flexneri_ATCC70     +     26  8.96e-10 CCAGTTTATA CTGTACACAATAACAGTAATGG TTTTTCATAC
-Escherichia_coli_BW25113     +     26  8.96e-10 CCAGTTTATA CTGTACACAATAACAGTAATGG TTTTTCATAC
-Escherichia_fergusonii_A     -     16  8.96e-10 CCAGTTTATA CTGTACACAATAACAGTAATGG TTTTTCATAC
-Klebsiella_psneumoniae_A     +     26  8.96e-10 CCAGTCTATA CTGTACGAAAACACAGTATTGG TTTTTCATAC
-Escherichia_coli_MG1655_     +     27  8.96e-10 CCAGTTTATA CTGTACACAATAACAGTAATGG TTTTTCATAC
-Herbaspirillum_seropedic     +     24  1.13e-09 AGGCAGAATA CTGTATATAAATACAGTATTGT TGAGCTTCCG
-Providencia_stuartii_ATC     +     15  1.26e-09 CGCAATCAAA CTGTATGCATATACAGTAACTG TATATTTATC
-Paraburkholderia_phytofi     -     19  1.40e-09 ATACCCTGTA CTGTACAAATATACAGTGTTGG ATGCAACCTC
-Paraburkholderia_phytofi     -     29  1.40e-09 GCGCCGAATA CTGTATAAAAATACAGTCACCT GTTCATACAT
-Cupriavidus_basilensis_4     -     20  1.91e-09 ATCCGTGATA CTGTATAAACATACAGTATCCG GAAACTTCCT
-Pseudomonas_fluorescens_     -     15  2.32e-09 CTCAAAACAA CTGTACAAAAACACAGTATATT TTGAACCAAC
-Pseudomonas_simiae_WCS41     +     28  3.05e-09 GTCAAAACAA CTGTACAAAAACACAGTATAGT TTGCCCTCCC
-Shewanella_amazonensis_S     -     17  3.05e-09 AATTGTTATA CTGTATAAATATACAGTTATTG GTGATTTGGA
-Shewanella_amazonensis_S     +     17  3.05e-09 GATTAAAATA CTGTATATACTGACAGTAACTG TATAGAAAAA
-Herbaspirillum_seropedic     +     20  4.27e-09 AGCACAAACA CTGTATGCATACACAGTATTTT TTGTACAGTA
-Klebsiella_oxytoca_M5al_     +     25  4.62e-09 GCTAAATCTA CTGTATATATAAACAGTGTTTA TGTGAGCGAG
-Enterobacter_cloacae_ATC     +     20  5.39e-09 GTCAATAATA CTGTACAAAAACACAGTATAGA GTTAACTGTA
-Burkholderia_bryophila_3     +     20  6.27e-09 AACCCAGACA CTGTATAAAAAAACAGTGGTTT TGGCAAGCGC
-Salmonella_enterica_LT2_     +     26  6.75e-09 CCAGTTTATA CTGTATTTAATTACAGTCATGG TTTTTCATAC
-Paraburkholderia_phytofi     -     20  8.34e-09 CGGACAAATA CTGTATAAATTCACAGTACTGT TTTTATATAC
-Escherichia_coli_MG1655_     -     11  1.24e-08 TAGAAGTTTA CTGTATAAATAAACAGTAATAT TTGGACAAAA
-Klebsiella_oxytoca_M5al_     +     26  1.24e-08 TTTACGTATA CTGTATATACATACAGTATTCA TTCACCGTAC
-Burkholderia_bryophila_3     -     29  1.68e-08 GCGCCGGATA CTGTATAAAAATACAGGCACCT GTTCATCCAT
-Aliivibrio_fischeri_ATCC     +     20  1.68e-08 TTTAAAAGTA CTGTATAAATACACACTGTATG GATGAACAGG
-Citrobacter_koseri_ATCCB     -     24  1.78e-08 AATTTTTATA CTGTATATAAAACCAGTGGTTA TATGTACAGT
-Klebsiella_psneumoniae_A     +     27  1.89e-08 GTCAAAACTA CTGTATATAAAAACAGTGTTAA TGTGAGCGAG
-Cupriavidus_basilensis_4     +     14  3.58e-08 TCACTCGATA CTGTATAAAATCACAGCATACT GTTTAAACAT
-Aliivibrio_fischeri_ATCC     +     18  4.52e-08 TCATTAGCAA CTGTATATTTAAACAGTGTTTA TTTTTATACG
-Salmonella_enterica_LT2_     -     14  4.95e-08 TTACACAATA CTGTATGCATATACAGTTAATT TTTTGCATTT
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif lexA MEME-1 block diagrams
---------------------------------------------------------------------------------
-SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
--------------            ----------------  -------------
-Marinobacter_hydrocarbon          9.8e-12  14_[+1]_24
-Marinobacter_adhaerens_H          9.8e-12  13_[+1]_25
-Pseudomonas_fluorescens_          2.7e-11  15_[+1]_23
-Pseudomonas_fluorescens_          2.7e-11  16_[+1]_22
-Pseudomonas_stutzeri_RCH          2.7e-11  15_[+1]_23
-Pseudomonas_fluorescens_          2.7e-11  16_[+1]_22
-Pseudomonas_simiae_WCS41          2.7e-11  25_[-1]_13
-Pseudomonas_fluorescens_          2.7e-11  16_[+1]_22
-Pseudomonas_aeruginosa_P          2.7e-11  19_[+1]_19
-Ralstonia_sp_UNC404CL21C          9.9e-11  25_[-1]_13
-Cupriavidus_basilensis_4          9.9e-11  16_[+1]_22
-Pseudomonas_putida_KT244          1.7e-10  22_[-1]_16
-Enterobacter_cloacae_ATC            2e-10  31_[+1]_7
-Herbaspirillum_seropedic          3.2e-10  21_[+1]_17
-Klebsiella_psneumoniae_A          3.2e-10  23_[+1]_15
-Shewanella_oneidensis_MR          3.6e-10  15_[+1]_23
-Shewanella_sp_ANA-3_lexA          3.6e-10  19_[+1]_19
-Cupriavidus_basilensis_4          4.8e-10  21_[+1]_17
-Klebsiella_oxytoca_M5al_          5.4e-10  23_[+1]_15
-Pseudomonas_fluorescens_          6.2e-10  16_[-1]_22
-Pseudomonas_fluorescens_            9e-10  17_[-1]_21
-Shigella_flexneri_ATCC70            9e-10  25_[+1]_13
-Escherichia_coli_BW25113            9e-10  25_[+1]_13
-Escherichia_fergusonii_A            9e-10  15_[-1]_23
-Klebsiella_psneumoniae_A            9e-10  25_[+1]_13
-Escherichia_coli_MG1655_            9e-10  26_[+1]_12
-Herbaspirillum_seropedic          1.1e-09  23_[+1]_15
-Providencia_stuartii_ATC          1.3e-09  14_[+1]_24
-Paraburkholderia_phytofi          1.4e-09  18_[-1]_20
-Paraburkholderia_phytofi          1.4e-09  28_[-1]_10
-Cupriavidus_basilensis_4          1.9e-09  19_[-1]_19
-Pseudomonas_fluorescens_          2.3e-09  14_[-1]_24
-Pseudomonas_simiae_WCS41            3e-09  27_[+1]_11
-Shewanella_amazonensis_S            3e-09  16_[-1]_22
-Shewanella_amazonensis_S            3e-09  16_[+1]_22
-Herbaspirillum_seropedic          4.3e-09  19_[+1]_19
-Klebsiella_oxytoca_M5al_          4.6e-09  24_[+1]_14
-Enterobacter_cloacae_ATC          5.4e-09  19_[+1]_19
-Burkholderia_bryophila_3          6.3e-09  19_[+1]_19
-Salmonella_enterica_LT2_          6.7e-09  25_[+1]_13
-Paraburkholderia_phytofi          8.3e-09  19_[-1]_19
-Escherichia_coli_MG1655_          1.2e-08  10_[-1]_28
-Klebsiella_oxytoca_M5al_          1.2e-08  25_[+1]_13
-Burkholderia_bryophila_3          1.7e-08  28_[-1]_10
-Aliivibrio_fischeri_ATCC          1.7e-08  19_[+1]_19
-Citrobacter_koseri_ATCCB          1.8e-08  23_[-1]_15
-Klebsiella_psneumoniae_A          1.9e-08  26_[+1]_12
-Cupriavidus_basilensis_4          3.6e-08  13_[+1]_25
-Aliivibrio_fischeri_ATCC          4.5e-08  17_[+1]_21
-Salmonella_enterica_LT2_          4.9e-08  13_[-1]_25
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif lexA MEME-1 in BLOCKS format
---------------------------------------------------------------------------------
-BL   MOTIF lexA width=22 seqs=50
-Marinobacter_hydrocarbon (   15) CTGTATATATTCACAGTCACTG  1
-Marinobacter_adhaerens_H (   14) CTGTATATATTCACAGTCACTG  1
-Pseudomonas_fluorescens_ (   16) CTGTATATAATCCCAGTCACTG  1
-Pseudomonas_fluorescens_ (   17) CTGTATATAATCCCAGTCACTG  1
-Pseudomonas_stutzeri_RCH (   16) CTGTATATAATCCCAGTCACTG  1
-Pseudomonas_fluorescens_ (   17) CTGTATATAATCCCAGTCACTG  1
-Pseudomonas_simiae_WCS41 (   26) CTGTATATAATCCCAGTCACTG  1
-Pseudomonas_fluorescens_ (   17) CTGTATATAATCCCAGTCACTG  1
-Pseudomonas_aeruginosa_P (   20) CTGTATATAATCCCAGTCACTG  1
-Ralstonia_sp_UNC404CL21C (   26) CTGTATATATTTACAGTAACTG  1
-Cupriavidus_basilensis_4 (   17) CTGTATATATTTACAGTAACTG  1
-Pseudomonas_putida_KT244 (   23) CTGTATATAATTCCAGTCACTG  1
-Enterobacter_cloacae_ATC (   32) CTGTATGAAAACACAGTCATGG  1
-Herbaspirillum_seropedic (   22) CTGTATAAATATACAGTATTTG  1
-Klebsiella_psneumoniae_A (   24) CTGTATAAAAACACAGTGTATG  1
-Shewanella_oneidensis_MR (   16) CTGTATATACTAACAGTAACTG  1
-Shewanella_sp_ANA-3_lexA (   20) CTGTATATACTAACAGTAACTG  1
-Cupriavidus_basilensis_4 (   22) CTGTATAAATATACAGTGTTTG  1
-Klebsiella_oxytoca_M5al_ (   24) CTGTACGAAAACACAGTAATGG  1
-Pseudomonas_fluorescens_ (   17) CTGTACAAAAACACAGTATTTT  1
-Pseudomonas_fluorescens_ (   18) CTGTACAAAAACACAGTGTTTT  1
-Shigella_flexneri_ATCC70 (   26) CTGTACACAATAACAGTAATGG  1
-Escherichia_coli_BW25113 (   26) CTGTACACAATAACAGTAATGG  1
-Escherichia_fergusonii_A (   16) CTGTACACAATAACAGTAATGG  1
-Klebsiella_psneumoniae_A (   26) CTGTACGAAAACACAGTATTGG  1
-Escherichia_coli_MG1655_ (   27) CTGTACACAATAACAGTAATGG  1
-Herbaspirillum_seropedic (   24) CTGTATATAAATACAGTATTGT  1
-Providencia_stuartii_ATC (   15) CTGTATGCATATACAGTAACTG  1
-Paraburkholderia_phytofi (   19) CTGTACAAATATACAGTGTTGG  1
-Paraburkholderia_phytofi (   29) CTGTATAAAAATACAGTCACCT  1
-Cupriavidus_basilensis_4 (   20) CTGTATAAACATACAGTATCCG  1
-Pseudomonas_fluorescens_ (   15) CTGTACAAAAACACAGTATATT  1
-Pseudomonas_simiae_WCS41 (   28) CTGTACAAAAACACAGTATAGT  1
-Shewanella_amazonensis_S (   17) CTGTATAAATATACAGTTATTG  1
-Shewanella_amazonensis_S (   17) CTGTATATACTGACAGTAACTG  1
-Herbaspirillum_seropedic (   20) CTGTATGCATACACAGTATTTT  1
-Klebsiella_oxytoca_M5al_ (   25) CTGTATATATAAACAGTGTTTA  1
-Enterobacter_cloacae_ATC (   20) CTGTACAAAAACACAGTATAGA  1
-Burkholderia_bryophila_3 (   20) CTGTATAAAAAAACAGTGGTTT  1
-Salmonella_enterica_LT2_ (   26) CTGTATTTAATTACAGTCATGG  1
-Paraburkholderia_phytofi (   20) CTGTATAAATTCACAGTACTGT  1
-Escherichia_coli_MG1655_ (   11) CTGTATAAATAAACAGTAATAT  1
-Klebsiella_oxytoca_M5al_ (   26) CTGTATATACATACAGTATTCA  1
-Burkholderia_bryophila_3 (   29) CTGTATAAAAATACAGGCACCT  1
-Aliivibrio_fischeri_ATCC (   20) CTGTATAAATACACACTGTATG  1
-Citrobacter_koseri_ATCCB (   24) CTGTATATAAAACCAGTGGTTA  1
-Klebsiella_psneumoniae_A (   27) CTGTATATAAAAACAGTGTTAA  1
-Cupriavidus_basilensis_4 (   14) CTGTATAAAATCACAGCATACT  1
-Aliivibrio_fischeri_ATCC (   18) CTGTATATTTAAACAGTGTTTA  1
-Salmonella_enterica_LT2_ (   14) CTGTATGCATATACAGTTAATT  1
-//
-
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif lexA MEME-1 position-specific scoring matrix
---------------------------------------------------------------------------------
-log-odds matrix: alength= 4 w= 22 n= 3900 bayes= 7.40984 E= 2.4e-218
- -1229    245  -1229  -1229
- -1229  -1229  -1229    166
- -1229  -1229    245  -1229
- -1229  -1229  -1229    166
-   166  -1229  -1229  -1229
- -1229     39  -1229    126
-   144  -1229    -61   -398
-    41    -39  -1229     47
-   163  -1229  -1229   -398
-    87    -87  -1229      1
-    82  -1229  -1229     47
-   -40    127   -319     -8
-   137     -2  -1229  -1229
- -1229    245  -1229  -1229
-   166  -1229  -1229  -1229
- -1229   -319    242  -1229
- -1229   -319   -319    160
-    60     61     13   -298
-    82   -319   -219     26
-  -118    105  -1229     60
-  -298    -87     51     92
-  -140  -1229    176    -29
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif lexA MEME-1 position-specific probability matrix
---------------------------------------------------------------------------------
-letter-probability matrix: alength= 4 w= 22 nsites= 50 E= 2.4e-218
- 0.000000  1.000000  0.000000  0.000000
- 0.000000  0.000000  0.000000  1.000000
- 0.000000  0.000000  1.000000  0.000000
- 0.000000  0.000000  0.000000  1.000000
- 1.000000  0.000000  0.000000  0.000000
- 0.000000  0.240000  0.000000  0.760000
- 0.860000  0.000000  0.120000  0.020000
- 0.420000  0.140000  0.000000  0.440000
- 0.980000  0.000000  0.000000  0.020000
- 0.580000  0.100000  0.000000  0.320000
- 0.560000  0.000000  0.000000  0.440000
- 0.240000  0.440000  0.020000  0.300000
- 0.820000  0.180000  0.000000  0.000000
- 0.000000  1.000000  0.000000  0.000000
- 1.000000  0.000000  0.000000  0.000000
- 0.000000  0.020000  0.980000  0.000000
- 0.000000  0.020000  0.020000  0.960000
- 0.480000  0.280000  0.200000  0.040000
- 0.560000  0.020000  0.040000  0.380000
- 0.140000  0.380000  0.000000  0.480000
- 0.040000  0.100000  0.260000  0.600000
- 0.120000  0.000000  0.620000  0.260000
---------------------------------------------------------------------------------
-
---------------------------------------------------------------------------------
-	Motif lexA MEME-1 regular expression
---------------------------------------------------------------------------------
-CTGTA[TC]A[TA]A[AT][AT][CTA]ACAGT[ACG][AT][TC][TG][GT]
---------------------------------------------------------------------------------
-
-
-
-
-Time  4.82 secs.
-
-********************************************************************************
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
new file mode 100644
index 00000000..20f19c2c
--- /dev/null
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
@@ -0,0 +1,306 @@
+{
+  "alphabet": "ACGT",
+  "background": {
+    "A": 0.341,
+    "C": 0.159,
+    "G": 0.159,
+    "T": 0.341
+  },
+  "checksums": {
+    "sha256_norm": "2fa5ae90ed27c1da68354f75e7cd73ad269f5b3e16c95bdba8f2e2febdd8d195",
+    "sha256_raw": "4fd26b4d936da1f5d85621894f8bac9d4d3a24a19443c5ae4f4f45374afabbcd"
+  },
+  "length": 21,
+  "log_odds": [
+    {
+      "A": -0.0029368567321361493,
+      "C": 0.8171992708969933,
+      "G": 0.22941295932798927,
+      "T": -1.2267098813007393
+    },
+    {
+      "A": -13.815511557963774,
+      "C": -0.28141213443856333,
+      "G": -2.0731649786973416,
+      "T": 0.9250493084760859
+    },
+    {
+      "A": -2.142995498205418,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": 1.0350501623871353
+    },
+    {
+      "A": -13.815511557963774,
+      "C": -13.815511557963774,
+      "G": -0.28141213443856333,
+      "T": 0.9480388176891603
+    },
+    {
+      "A": 1.075872142699062,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": -13.815511557963774
+    },
+    {
+      "A": -1.4498525801182196,
+      "C": 1.7554686406494266,
+      "G": -13.815511557963774,
+      "T": -13.815511557963774
+    },
+    {
+      "A": 0.8527286766348198,
+      "C": -0.4637334262329042,
+      "G": -0.4637334262329042,
+      "T": -13.815511557963774
+    },
+    {
+      "A": -0.5335644057364336,
+      "C": 0.32472306685964164,
+      "G": 0.12405253200342208,
+      "T": 0.159581922324602
+    },
+    {
+      "A": 0.4960539153745643,
+      "C": -13.815511557963774,
+      "G": -0.974557990001142,
+      "T": 0.10828867280543321
+    },
+    {
+      "A": 0.159581922324602,
+      "C": -13.815511557963774,
+      "G": -2.0731649786973416,
+      "T": 0.5311452141883097
+    },
+    {
+      "A": 0.3419033370353341,
+      "C": 1.1048812329321502,
+      "G": -2.0731649786973416,
+      "T": -2.8361341538743754
+    },
+    {
+      "A": -13.815511557963774,
+      "C": -0.6868765800478248,
+      "G": -13.815511557963774,
+      "T": 0.9924905634121745
+    },
+    {
+      "A": -2.8361341538743754,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": 1.055669442340724
+    },
+    {
+      "A": -13.815511557963774,
+      "C": -2.0731649786973416,
+      "G": 0.5658849688065,
+      "T": 0.7191973449031264
+    },
+    {
+      "A": 1.075872142699062,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": -13.815511557963774
+    },
+    {
+      "A": -13.815511557963774,
+      "C": 1.838850235762393,
+      "G": -13.815511557963774,
+      "T": -13.815511557963774
+    },
+    {
+      "A": 0.9480388176891603,
+      "C": -2.0731649786973416,
+      "G": -0.974557990001142,
+      "T": -2.142995498205418
+    },
+    {
+      "A": 0.29934375350419706,
+      "C": 0.12405253200342208,
+      "G": -0.6868765800478248,
+      "T": -0.19709265625736577
+    },
+    {
+      "A": 0.3419033370353341,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": 0.42194599006147204
+    },
+    {
+      "A": 0.159581922324602,
+      "C": 0.0062696067636015445,
+      "G": -13.815511557963774,
+      "T": 0.2548920246289898
+    },
+    {
+      "A": 0.20837204589882954,
+      "C": 0.5658849688065,
+      "G": 0.32472306685964164,
+      "T": -1.4498525801182196
+    }
+  ],
+  "matrix_semantics": "probabilities",
+  "motif_id": "cpxR",
+  "organism": {
+    "assembly": null,
+    "name": "Escherichia coli",
+    "strain": null,
+    "taxon": null
+  },
+  "probabilities": [
+    {
+      "A": 0.34,
+      "C": 0.36,
+      "G": 0.2,
+      "T": 0.1
+    },
+    {
+      "A": 0.0,
+      "C": 0.12,
+      "G": 0.02,
+      "T": 0.86
+    },
+    {
+      "A": 0.04,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 0.96
+    },
+    {
+      "A": 0.0,
+      "C": 0.0,
+      "G": 0.12,
+      "T": 0.88
+    },
+    {
+      "A": 1.0,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 0.0
+    },
+    {
+      "A": 0.08,
+      "C": 0.92,
+      "G": 0.0,
+      "T": 0.0
+    },
+    {
+      "A": 0.8,
+      "C": 0.1,
+      "G": 0.1,
+      "T": 0.0
+    },
+    {
+      "A": 0.2,
+      "C": 0.22,
+      "G": 0.18,
+      "T": 0.4
+    },
+    {
+      "A": 0.56,
+      "C": 0.0,
+      "G": 0.06,
+      "T": 0.38
+    },
+    {
+      "A": 0.4,
+      "C": 0.0,
+      "G": 0.02,
+      "T": 0.58
+    },
+    {
+      "A": 0.48,
+      "C": 0.48,
+      "G": 0.02,
+      "T": 0.02
+    },
+    {
+      "A": 0.0,
+      "C": 0.08,
+      "G": 0.0,
+      "T": 0.92
+    },
+    {
+      "A": 0.02,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 0.98
+    },
+    {
+      "A": 0.0,
+      "C": 0.02,
+      "G": 0.28,
+      "T": 0.7
+    },
+    {
+      "A": 1.0,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 0.0
+    },
+    {
+      "A": 0.0,
+      "C": 1.0,
+      "G": 0.0,
+      "T": 0.0
+    },
+    {
+      "A": 0.88,
+      "C": 0.02,
+      "G": 0.06,
+      "T": 0.04
+    },
+    {
+      "A": 0.46,
+      "C": 0.18,
+      "G": 0.08,
+      "T": 0.28
+    },
+    {
+      "A": 0.48,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 0.52
+    },
+    {
+      "A": 0.4,
+      "C": 0.16,
+      "G": 0.0,
+      "T": 0.44
+    },
+    {
+      "A": 0.42,
+      "C": 0.28,
+      "G": 0.22,
+      "T": 0.08
+    }
+  ],
+  "producer": "cruncher",
+  "provenance": {
+    "citation": "Local demo DAP-seq motifs (MEME text, bundled with cruncher demo)",
+    "license": null,
+    "raw_artifact_paths": [
+      "cpxR.txt"
+    ],
+    "retrieved_at": "2026-01-18T01:33:16.788976+00:00",
+    "source_url": null,
+    "source_version": null,
+    "tags": {}
+  },
+  "schema_version": "1.0",
+  "source": "demo_local_meme",
+  "tags": {
+    "assay": "dapseq",
+    "demo": "true",
+    "format": "meme",
+    "matrix_source": "file",
+    "meme_evalue": "9.9e-119",
+    "meme_motif_id": "cpxR",
+    "meme_motif_label": "cpxR MEME-1\twidth =  21  sites =  50  llr = 694  E-value = 9.9e-119",
+    "meme_motif_name": "MEME-1 width = 21 sites = 50 llr = 694 E-value = 9.9e-119",
+    "meme_nsites": "50",
+    "meme_version": "4.12.0 (Release date: Tue Jun 27 16:22:50 2017 -0700)",
+    "meme_width": "21"
+  },
+  "tf_name": "cpxR"
+}
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json
new file mode 100644
index 00000000..4a493b2f
--- /dev/null
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json
@@ -0,0 +1,318 @@
+{
+  "alphabet": "ACGT",
+  "background": {
+    "A": 0.317,
+    "C": 0.183,
+    "G": 0.183,
+    "T": 0.317
+  },
+  "checksums": {
+    "sha256_norm": "ff3341612eb11bd9354e74c53510934b7c643b15a986e1d8cf0a041fb71de82a",
+    "sha256_raw": "8ee13b7f7d2bd1aa016ad82550f28be6a603e4bead31fb7c2258fe095dfa402d"
+  },
+  "length": 22,
+  "log_odds": [
+    {
+      "A": -13.815511557963774,
+      "C": 1.6982683091411994,
+      "G": -13.815511557963774,
+      "T": -13.815511557963774
+    },
+    {
+      "A": -13.815511557963774,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": 1.1488528221053063
+    },
+    {
+      "A": -13.815511557963774,
+      "C": -13.815511557963774,
+      "G": 1.6982683091411994,
+      "T": -13.815511557963774
+    },
+    {
+      "A": -13.815511557963774,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": 1.1488528221053063
+    },
+    {
+      "A": 1.1488528221053063,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": -13.815511557963774
+    },
+    {
+      "A": -13.815511557963774,
+      "C": 0.27115253300077974,
+      "G": -13.815511557963774,
+      "T": 0.8744160765087724
+    },
+    {
+      "A": 0.998029983975356,
+      "C": -13.815511557963774,
+      "G": -0.42199388506003765,
+      "T": -2.7631546504483997
+    },
+    {
+      "A": 0.2813526921622533,
+      "C": -0.26784342308961356,
+      "G": -13.815511557963774,
+      "T": 0.3278726734898123
+    },
+    {
+      "A": 1.1286501212571725,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": -2.7631546504483997
+    },
+    {
+      "A": 0.6041258762152593,
+      "C": -0.6043151368545039,
+      "G": -13.815511557963774,
+      "T": 0.00941921254150112
+    },
+    {
+      "A": 0.569034575923683,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": 0.3278726734898123
+    },
+    {
+      "A": -0.2782625297023283,
+      "C": 0.8772879899803904,
+      "G": -2.2137457293287905,
+      "T": -0.05511924255447106
+    },
+    {
+      "A": 0.9504019529668093,
+      "C": -0.016529285284560668,
+      "G": -13.815511557963774,
+      "T": -13.815511557963774
+    },
+    {
+      "A": -13.815511557963774,
+      "C": 1.6982683091411994,
+      "G": -13.815511557963774,
+      "T": -13.815511557963774
+    },
+    {
+      "A": 1.1488528221053063,
+      "C": -13.815511557963774,
+      "G": -13.815511557963774,
+      "T": -13.815511557963774
+    },
+    {
+      "A": -13.815511557963774,
+      "C": -2.2137457293287905,
+      "G": 1.6780656055583731,
+      "T": -13.815511557963774
+    },
+    {
+      "A": -13.815511557963774,
+      "C": -2.2137457293287905,
+      "G": -2.2137457293287905,
+      "T": 1.10803084079338
+    },
+    {
+      "A": 0.4148839904416047,
+      "C": 0.42530310389954373,
+      "G": 0.08883112870669725,
+      "T": -2.070015394794247
+    },
+    {
+      "A": 0.569034575923683,
+      "C": -2.2137457293287905,
+      "G": -1.5206031237374498,
+      "T": 0.18126931305382946
+    },
+    {
+      "A": -0.8172580869843253,
+      "C": 0.7306845814583421,
+      "G": -13.815511557963774,
+      "T": 0.4148839904416047
+    },
+    {
+      "A": -2.070015394794247,
+      "C": -0.6043151368545039,
+      "G": 0.351195182020513,
+      "T": 0.6380274096725597
+    },
+    {
+      "A": -0.9714083894315572,
+      "C": -13.815511557963774,
+      "G": 1.2202326203594633,
+      "T": -0.19821992363122673
+    }
+  ],
+  "matrix_semantics": "probabilities",
+  "motif_id": "lexA",
+  "organism": {
+    "assembly": null,
+    "name": "Escherichia coli",
+    "strain": null,
+    "taxon": null
+  },
+  "probabilities": [
+    {
+      "A": 0.0,
+      "C": 1.0,
+      "G": 0.0,
+      "T": 0.0
+    },
+    {
+      "A": 0.0,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 1.0
+    },
+    {
+      "A": 0.0,
+      "C": 0.0,
+      "G": 1.0,
+      "T": 0.0
+    },
+    {
+      "A": 0.0,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 1.0
+    },
+    {
+      "A": 1.0,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 0.0
+    },
+    {
+      "A": 0.0,
+      "C": 0.24,
+      "G": 0.0,
+      "T": 0.76
+    },
+    {
+      "A": 0.86,
+      "C": 0.0,
+      "G": 0.12,
+      "T": 0.02
+    },
+    {
+      "A": 0.42,
+      "C": 0.14,
+      "G": 0.0,
+      "T": 0.44
+    },
+    {
+      "A": 0.98,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 0.02
+    },
+    {
+      "A": 0.58,
+      "C": 0.1,
+      "G": 0.0,
+      "T": 0.32
+    },
+    {
+      "A": 0.56,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 0.44
+    },
+    {
+      "A": 0.24,
+      "C": 0.44,
+      "G": 0.02,
+      "T": 0.3
+    },
+    {
+      "A": 0.82,
+      "C": 0.18,
+      "G": 0.0,
+      "T": 0.0
+    },
+    {
+      "A": 0.0,
+      "C": 1.0,
+      "G": 0.0,
+      "T": 0.0
+    },
+    {
+      "A": 1.0,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 0.0
+    },
+    {
+      "A": 0.0,
+      "C": 0.02,
+      "G": 0.98,
+      "T": 0.0
+    },
+    {
+      "A": 0.0,
+      "C": 0.02,
+      "G": 0.02,
+      "T": 0.96
+    },
+    {
+      "A": 0.48,
+      "C": 0.28,
+      "G": 0.2,
+      "T": 0.04
+    },
+    {
+      "A": 0.56,
+      "C": 0.02,
+      "G": 0.04,
+      "T": 0.38
+    },
+    {
+      "A": 0.14,
+      "C": 0.38,
+      "G": 0.0,
+      "T": 0.48
+    },
+    {
+      "A": 0.04,
+      "C": 0.1,
+      "G": 0.26,
+      "T": 0.6
+    },
+    {
+      "A": 0.12,
+      "C": 0.0,
+      "G": 0.62,
+      "T": 0.26
+    }
+  ],
+  "producer": "cruncher",
+  "provenance": {
+    "citation": "Local demo DAP-seq motifs (MEME text, bundled with cruncher demo)",
+    "license": null,
+    "raw_artifact_paths": [
+      "lexA.txt"
+    ],
+    "retrieved_at": "2026-01-18T01:33:16.785777+00:00",
+    "source_url": null,
+    "source_version": null,
+    "tags": {}
+  },
+  "schema_version": "1.0",
+  "source": "demo_local_meme",
+  "tags": {
+    "assay": "dapseq",
+    "demo": "true",
+    "format": "meme",
+    "matrix_source": "file",
+    "meme_evalue": "2.4e-218",
+    "meme_motif_id": "lexA",
+    "meme_motif_label": "lexA MEME-1\twidth =  22  sites =  50  llr = 932  E-value = 2.4e-218",
+    "meme_motif_name": "MEME-1 width = 22 sites = 50 llr = 932 E-value = 2.4e-218",
+    "meme_nsites": "50",
+    "meme_version": "4.12.0 (Release date: Tue Jun 27 16:22:50 2017 -0700)",
+    "meme_width": "22"
+  },
+  "tf_name": "lexA"
+}

From f0052f97da16798477a1fc7b871329f6ac3a63f7 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 19:08:14 -0500
Subject: [PATCH 28/40] docs align densegen demo workflow

---
 src/dnadesign/densegen/README.md              | 28 ++++----
 .../densegen/docs/demo/demo_basic.md          | 39 ++++++-----
 .../densegen/docs/dev/architecture.md         | 14 ++--
 .../densegen/docs/guide/generation.md         | 70 ++++++-------------
 .../densegen/docs/reference/config.md         |  5 +-
 .../docs/reference/motif_artifacts.md         | 17 +++--
 .../densegen/docs/reference/outputs.md        | 16 ++---
 .../docs/workflows/cruncher_pwm_pipeline.md   | 21 +++---
 8 files changed, 86 insertions(+), 124 deletions(-)

diff --git a/src/dnadesign/densegen/README.md b/src/dnadesign/densegen/README.md
index d9b9ff84..1fe308bd 100644
--- a/src/dnadesign/densegen/README.md
+++ b/src/dnadesign/densegen/README.md
@@ -21,43 +21,43 @@ For a full walkthrough with expected outputs, see [DenseGen demo](docs/demo/demo
 Prerequisites include Python, dense-arrays, and a MILP solver. CBC is open-source; [GUROBI](https://www.gurobi.com/) is supported if installed and licensed. Stage‑A FIMO sampling requires MEME Suite (`fimo` on PATH; use `pixi run` if needed).
 
 ```bash
-# 1) Scaffold a workspace from a packaged template (self-contained).
-dense workspace init --id demo --template-id demo_meme_two_tf --copy-inputs
+# 1) Enter the pre‑staged demo workspace (config.yaml is auto‑discovered).
+cd src/dnadesign/densegen/workspaces/demo_meme_two_tf
 
-# 2) Enter the workspace so config.yaml is auto‑discovered.
-cd demo
-
-# 3) Validate schema + solver availability before long runs.
+# 2) Validate schema + solver availability before long runs.
 dense validate-config --probe-solver
 
-# 4) Inspect Stage‑A inputs and sampling settings.
+# 3) Inspect Stage‑A inputs and sampling settings.
 dense inspect inputs
 
-# 5) Inspect resolved outputs + Stage‑A/Stage‑B settings.
+# 4) Inspect resolved outputs + Stage‑A/Stage‑B settings.
 dense inspect config
 
-# 6) Stage‑A: materialize TFBS pools (optional, for inspection).
+# 5) Stage‑A: materialize TFBS pools (optional, for inspection).
 dense stage-a build-pool
 
-# 7) Stage‑B: materialize solver libraries (optional, for inspection).
+# 6) Stage‑B: materialize solver libraries (optional, for inspection).
 dense stage-b build-libraries
 
-# 8) Run generation (use --resume or --fresh if outputs already exist).
+# 7) Run generation (use --resume or --fresh if outputs already exist).
 dense run
 
-# 9) Inspect run summary (library + events are optional add‑ons).
+# 8) Inspect run summary (library + events are optional add‑ons).
 dense inspect run --library --events
 
-# 10) Emit an audit report.
+# 9) Emit an audit report.
 dense report --format md
 
-# 11) List plots and render a subset.
+# 10) List plots and render a subset.
 dense ls-plots
 dense plot --only tf_usage,tf_coverage
 ```
 
 If you rerun a workspace that already has run outputs (e.g., `outputs/tables/attempts.parquet` or `outputs/meta/run_state.json`), choose `--resume` (continue) or `--fresh` (clear outputs and start over).
 
+If you want to scaffold a new workspace from a packaged template, run `dense workspace init` from
+`src/dnadesign/densegen/workspaces/` to keep new workspaces colocated.
+
 ---
 
 ### More documentation
diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index be6aa508..85a960ab 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -1,6 +1,6 @@
 ## DenseGen demo
 
-This walkthrough uses the packaged demo template. The staged workspace contains MEME `txt` motifs in `inputs/` (lexA + cpxR), and Stage‑A sampling uses those files directly.
+This walkthrough uses the packaged demo workspace. The staged workspace contains **DenseGen PWM artifacts** in `inputs/motif_artifacts/` (lexA + cpxR), and Stage‑A sampling uses those JSON files directly.
 
 ### Contents
 0. [Prereqs](#0-prereqs) - sync deps and ensure solver tools.
@@ -38,7 +38,7 @@ pixi run fimo --version
 pixi run dense --help
 ```
 
-Optional convenience aliases if you plan to use pixi for the rest of the demo:
+Optional convenience alias for MEME tools when using pixi:
 
 ```bash
 alias fimo="pixi run fimo"
@@ -68,7 +68,8 @@ From the repo root:
 cd src/dnadesign/densegen/workspaces/demo_meme_two_tf
 ```
 
-If you use pixi tasks for DenseGen, define an alias that pins the config path in this workspace:
+If you use pixi tasks for DenseGen, define an alias that pins the config path in this workspace
+(pixi tasks run from the repo root, so relative `-c` paths will not resolve):
 
 ```bash
 alias dense="pixi run dense -c $PWD/config.yaml"
@@ -100,42 +101,44 @@ Why: confirm Stage‑A inputs and sampling settings.
 dense inspect inputs
 ```
 
-The demo uses MEME `.txt` motifs already in `inputs/` (`lexA.txt`, `cpxR.txt`).
+The demo uses DenseGen PWM artifacts in `inputs/motif_artifacts/` (`lexA__demo_local_meme__lexA.json`,
+`cpxR__demo_local_meme__cpxR.json`).
 
 ---
 
 ### 3b. (Optional) Build inputs via Cruncher (external workspace)
 
-Generate Stage‑A motif artifacts and binding‑site tables in **Cruncher’s** workspace, then copy
-the exports into this DenseGen workspace.
+Refresh the demo motifs by exporting **DenseGen PWM artifacts** from Cruncher, then copy them into
+this DenseGen workspace. This is optional — the demo already ships with artifacts.
 
 Follow the Cruncher demo (see `cruncher/docs/demos/demo_basics_two_tf.md`) in its own workspace.
-From the Cruncher workspace directory, export DenseGen inputs (no `-c` flag needed when you run in CWD):
+From the Cruncher workspace directory, export DenseGen artifacts (no `-c` flag needed when you run in CWD):
 
 ```bash
 cd <cruncher_workspace>
-cruncher catalog export-sites --set 1 --out outputs/exports/densegen_sites.csv
-cruncher catalog export-densegen --set 1 --out outputs/exports/densegen_pwms
+cruncher catalog export-densegen --set 1 --out outputs/densegen_motifs
 ```
 
-Copy those exports into **this** DenseGen workspace:
+Copy those artifacts into **this** DenseGen workspace:
 
 ```bash
-cp <cruncher_workspace>/outputs/exports/densegen_sites.csv inputs/
-cp -R <cruncher_workspace>/outputs/exports/densegen_pwms inputs/motif_artifacts
+cp -R <cruncher_workspace>/outputs/densegen_motifs/* inputs/motif_artifacts/
 ```
 
-Update `config.yaml` inputs to point at the exported artifacts, for example:
+If you also want to drive Stage‑A from binding sites instead of PWM sampling, export them too:
+
+```bash
+cruncher catalog export-sites --set 1 --out outputs/densegen_sites.parquet
+cp <cruncher_workspace>/outputs/densegen_sites.parquet inputs/
+```
+
+Then update `config.yaml` inputs to point at the exported binding sites (optional), for example:
 
 ```yaml
 inputs:
   - name: demo_sites
     type: binding_sites
-    path: inputs/densegen_sites.csv
-  - name: demo_pwms
-    type: pwm_artifact_set
-    paths:
-      - inputs/motif_artifacts/<motif>.json
+    path: inputs/densegen_sites.parquet
 ```
 
 The DenseGen workspace stays config‑centric (one runtime config); Cruncher keeps its own workspace + config.
diff --git a/src/dnadesign/densegen/docs/dev/architecture.md b/src/dnadesign/densegen/docs/dev/architecture.md
index 8efd348f..fbeee002 100644
--- a/src/dnadesign/densegen/docs/dev/architecture.md
+++ b/src/dnadesign/densegen/docs/dev/architecture.md
@@ -50,14 +50,11 @@ YAML config
 - **Workspace‑first execution:** CLI resolves config from `./config.yaml` in CWD unless `-c` is provided.
 - **No config fallbacks:** missing config exits immediately with an actionable error message.
 - **Strict schema:** unknown keys, mixed quota/fraction plans, or missing required fields are errors.
-- **Run‑scoped I/O:** outputs/tables/logs/plots/report must resolve inside `outputs/` under
-  `densegen.run.root` (enforced).
-- **Stage‑A invariants:** Stage‑A sampling is defined per input and produces pools (plus optional
-  candidate artifacts). Stage‑A pools are cached per run.
-- **Stage‑B invariants:** Stage‑B sampling constructs solver libraries from pools or artifacts;
-  resampling happens only in Stage‑B. Library artifacts capture sampling metadata.
+- **Run‑scoped I/O:** outputs/tables/logs/plots/report must resolve inside `outputs/` under `densegen.run.root` (enforced).
+- **Stage‑A invariants:** Stage‑A sampling is defined per input and produces pools (plus optional candidate artifacts). Stage‑A pools are cached per run.
+- **Stage‑B invariants:** Stage‑B sampling constructs solver libraries from pools or artifacts; resampling happens only in Stage‑B. Library artifacts capture sampling metadata.
 - **Explicit policies:** Stage‑A/Stage‑B sampling, solver settings, and pad policies are
-  recorded in metadata.
+ recorded in metadata.
 - **Canonical IDs:** Parquet and USR share the same deterministic ID computation.
 - **Output schema:** `output.schema` defines `bio_type` and `alphabet` once for all sinks.
 - **Optional deps:** USR support is imported only when `output.targets` includes `usr`.
@@ -68,8 +65,7 @@ YAML config
 
 ### Extension points
 
-- Add input types by implementing a new data source and wiring it into
-  `adapters/sources/factory.py`.
+- Add input types by implementing a new data source and wiring it into `adapters/sources/factory.py`.
 - Add new plot types by registering them in `viz/plotting.py` (names are strict; unknowns error).
 - Add new postprocess steps under `core/postprocess/` and wire them in the pipeline with
   explicit policy metadata.
diff --git a/src/dnadesign/densegen/docs/guide/generation.md b/src/dnadesign/densegen/docs/guide/generation.md
index 20e82d7f..9430e9c2 100644
--- a/src/dnadesign/densegen/docs/guide/generation.md
+++ b/src/dnadesign/densegen/docs/guide/generation.md
@@ -89,68 +89,37 @@ DenseGen fails fast if the solver cannot apply requested time limits or thread c
 
 ### Stage‑B sampling controls
 
-Stage‑B reminder: Stage‑A sampling lives under `densegen.inputs[].sampling` and produces TFBS pools.
-Stage‑B sampling below selects solver libraries from those pools (or from library artifacts) and
-is the only place resampling happens. Stage‑B outcomes are recorded in `outputs/libraries/*`,
-`outputs/tables/attempts.parquet`, and `outputs/meta/run_manifest.json`.
+Stage‑B reminder: Stage‑A sampling lives under `densegen.inputs[].sampling` and produces TFBS pools. Stage‑B sampling below selects solver libraries from those pools (or from library artifacts) and is the only place resampling happens. Stage‑B outcomes are recorded in `outputs/libraries/*`, `outputs/tables/attempts.parquet`, and `outputs/meta/run_manifest.json`.
 
 Per‑field guide (what it does → when to use → failure modes → artifacts impacted):
 
-- `pool_strategy` — chooses Stage‑B library construction mode (full vs subsample vs iterative). Use `full`
-  for tiny pools, `subsample` for large pools, `iterative_subsample` to resample aggressively. Failure
-  modes: `full` ignores **Stage‑B resampling** and can stall if pools are weak; `iterative_subsample` with low caps
-  can terminate early. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `library_source` — Stage‑B source of libraries (`build` vs `artifact`). Use `artifact` for
-  deterministic replays. Failure: missing or mismatched artifact metadata. Artifacts: `outputs/libraries/*`,
+- `pool_strategy` — chooses Stage‑B library construction mode (full vs subsample vs iterative). Use `full` for tiny pools, `subsample` for large pools `iterative_subsample` to resample aggressively. Failure modes: `full` ignores **Stage‑B resampling** and can stall if pools are weak; `iterative_subsample` with low caps can terminate early. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `library_source` — Stage‑B source of libraries (`build` vs `artifact`). Use `artifact` for deterministic replays. Failure: missing or mismatched artifact metadata. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `library_artifact_path` — path to a Stage‑B library artifact directory when `library_source: artifact`. Use when replaying a prior Stage‑B build. Failure: path missing or incompatible metadata. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `library_size` — number of TFBS per Stage‑B library (subsample modes). Use to control library diversity/size. Failure: too small can starve constraints; too large can over‑constrain the solver. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts parquet`, `outputs/meta/run_manifest.json`.
+- `subsample_over_length_budget_by` — Stage‑B budget in bp to bias sampling toward longer libraries. Use to penalize over‑length libraries when pools contain long motifs. Failure: too low can bias against required motifs. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `library_sampling_strategy` — Stage‑B selection policy (`tf_balanced`, `uniform_over_pairs`, `coverage_weighted`). Use `tf_balanced` for even TF coverage, `uniform_over_pairs` for pair diversity, `coverage_weighted` to boost under‑used motifs. Failure: aggressive weighting can overfit recent runs. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `coverage_boost_alpha` — Stage‑B weighting strength for `coverage_weighted`. Use small values (e.g., 0.1) to avoid oscillation. Failure: too large can destabilize coverage. Artifacts: `outputs/libraries/*`,
   `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `library_artifact_path` — path to a Stage‑B library artifact directory when `library_source: artifact`.
-  Use when replaying a prior Stage‑B build. Failure: path missing or incompatible metadata. Artifacts:
-  `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `library_size` — number of TFBS per Stage‑B library (subsample modes). Use to control library
-  diversity/size. Failure: too small can starve constraints; too large can over‑constrain the solver.
-  Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `subsample_over_length_budget_by` — Stage‑B budget in bp to bias sampling toward longer libraries.
-  Use to penalize over‑length libraries when pools contain long motifs. Failure: too low can bias
-  against required motifs. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `library_sampling_strategy` — Stage‑B selection policy (`tf_balanced`, `uniform_over_pairs`,
-  `coverage_weighted`). Use `tf_balanced` for even TF coverage, `uniform_over_pairs` for pair diversity,
-  `coverage_weighted` to boost under‑used motifs. Failure: aggressive weighting can overfit recent runs.
+- `coverage_boost_power` — Stage‑B exponent for `coverage_weighted`. Use to shape how quickly under‑used motifs are boosted. Failure: extreme values can flatten or over‑amplify weights. Artifacts:`outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `avoid_failed_motifs` — Stage‑B penalty toggle for motifs tied to failed solve attempts. Use when repeated failures are dominated by a few motifs. Failure: can over‑penalize rare motifs in small pools.
   Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `coverage_boost_alpha` — Stage‑B weighting strength for `coverage_weighted`. Use small values (e.g., 0.1)
-  to avoid oscillation. Failure: too large can destabilize coverage. Artifacts: `outputs/libraries/*`,
+- `failure_penalty_alpha` — Stage‑B penalty strength when `avoid_failed_motifs` is enabled. Use low values to soften penalties. Failure: too large can collapse library diversity. Artifacts: `outputs/libraries/*`,
   `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `coverage_boost_power` — Stage‑B exponent for `coverage_weighted`. Use to shape how quickly under‑used
-  motifs are boosted. Failure: extreme values can flatten or over‑amplify weights. Artifacts:
-  `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `avoid_failed_motifs` — Stage‑B penalty toggle for motifs tied to failed solve attempts. Use when
-  repeated failures are dominated by a few motifs. Failure: can over‑penalize rare motifs in small pools.
-  Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `failure_penalty_alpha` — Stage‑B penalty strength when `avoid_failed_motifs` is enabled. Use low values
-  to soften penalties. Failure: too large can collapse library diversity. Artifacts: `outputs/libraries/*`,
+- `failure_penalty_power` — Stage‑B penalty exponent when `avoid_failed_motifs` is enabled. Use to sharpen or smooth penalties. Failure: extreme values can zero‑out motifs. Artifacts: `outputs/libraries/*`,
   `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `failure_penalty_power` — Stage‑B penalty exponent when `avoid_failed_motifs` is enabled. Use to sharpen
-  or smooth penalties. Failure: extreme values can zero‑out motifs. Artifacts: `outputs/libraries/*`,
+- `cover_all_regulators` — Stage‑B rule to ensure each TF appears in the library. Use when constraints require per‑TF coverage. Failure: can be impossible for sparse pools. Artifacts: `outputs/libraries/*`,
   `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `cover_all_regulators` — Stage‑B rule to ensure each TF appears in the library. Use when constraints
-  require per‑TF coverage. Failure: can be impossible for sparse pools. Artifacts: `outputs/libraries/*`,
+- `unique_binding_sites` — Stage‑B uniqueness filter at the TF+TFBS pair level. Use to avoid duplicate sites. Failure: can under‑fill libraries when pools are small. Artifacts: `outputs/libraries/*`,
   `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `unique_binding_sites` — Stage‑B uniqueness filter at the TF+TFBS pair level. Use to avoid duplicate
-  sites. Failure: can under‑fill libraries when pools are small. Artifacts: `outputs/libraries/*`,
-  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `max_sites_per_regulator` — Stage‑B cap per TF. Use to prevent dominance by a single TF. Failure:
-  too low can make libraries infeasible for constraint plans. Artifacts: `outputs/libraries/*`,
-  `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `relax_on_exhaustion` — Stage‑B relaxation toggle when sampling can’t fill a library. Use with small
-  pools to avoid hard failures. Failure: relaxed libraries can violate intended coverage. Artifacts:
-  `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `allow_incomplete_coverage` — Stage‑B permit missing TF coverage when pools are sparse. Use for
-  exploratory runs. Failure: can hide missing TFs unless monitored. Artifacts: `outputs/libraries/*`,
+- `max_sites_per_regulator` — Stage‑B cap per TF. Use to prevent dominance by a single TF. Failure: too low can make libraries infeasible for constraint plans. Artifacts: `outputs/libraries/*`,
   `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `relax_on_exhaustion` — Stage‑B relaxation toggle when sampling can’t fill a library. Use with small pools to avoid hard failures. Failure: relaxed libraries can violate intended coverage. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
+- `allow_incomplete_coverage` — Stage‑B permit missing TF coverage when pools are sparse. Use for exploratory runs. Failure: can hide missing TFs unless monitored. Artifacts: `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
 - `iterative_max_libraries` — Stage‑B cap for `iterative_subsample` library rebuilds. Use to bound
   runtime. Failure: too low can terminate early with unmet quotas. Artifacts: `outputs/libraries/*`,
   `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
-- `iterative_min_new_solutions` — Stage‑B threshold to decide whether a new library “worked.”
-  Use to prevent wasteful **Stage‑B resampling**. Failure: too high can force endless resamples. Artifacts:
+- `iterative_min_new_solutions` — Stage‑B threshold to decide whether a new library “worked.” Use to prevent wasteful **Stage‑B resampling**. Failure: too high can force endless resamples. Artifacts:
   `outputs/libraries/*`, `outputs/tables/attempts.parquet`, `outputs/meta/run_manifest.json`.
 
 ### Run scheduling (round‑robin)
@@ -172,7 +141,8 @@ Key `runtime.*` controls:
 - `arrays_generated_before_resample` — number of successful arrays to emit before forcing a new
   Stage‑B library (for iterative subsampling).
 - `stall_seconds_before_resample` — idle time with no new solutions before Stage‑B resampling.
-  This also applies a per‑solve time limit (seconds) for solver‑based strategies; set to `0` to disable.
+  The timer resets whenever the solver yields a solution. This does not cap solver time; use
+  `solver.time_limit_seconds` for per‑solve limits. Set to `0` to disable.
 - `stall_warning_every_seconds` — how often to log stall warnings.
 - `max_resample_attempts` / `max_total_resamples` — caps on resample retries.
 - `max_seconds_per_plan` — time budget per plan item (0 = no limit).
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 3b5cbc42..58673d14 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -1,9 +1,6 @@
 ## DenseGen Config Reference
 
-This is the strict YAML schema for DenseGen. Unknown keys are errors and all paths resolve
-relative to the config file directory. Stage‑A sampling lives under `densegen.inputs[].sampling`,
-Stage‑B sampling lives under `densegen.generation.sampling`. Use this reference for exact
-field names; see the guide for conceptual flow.
+This is the YAML schema for DenseGen. Unknown keys are errors and all paths resolve relative to the config file directory. Stage‑A sampling lives under `densegen.inputs[].sampling`, Stage‑B sampling lives under `densegen.generation.sampling`. Use this reference for exact field names; see the guide for conceptual flow.
 
 ### Contents
 - [Top-level](#top-level) - required roots and plotting.
diff --git a/src/dnadesign/densegen/docs/reference/motif_artifacts.md b/src/dnadesign/densegen/docs/reference/motif_artifacts.md
index 64134ea0..46ee81f9 100644
--- a/src/dnadesign/densegen/docs/reference/motif_artifacts.md
+++ b/src/dnadesign/densegen/docs/reference/motif_artifacts.md
@@ -1,6 +1,6 @@
 ## Motif artifact contract (JSON)
 
-DenseGen can consume **per‑motif JSON artifacts** that encode a single PWM. This keeps DenseGen decoupled from parsing code: any producer (Cruncher or external tooling) can emit the contract, and DenseGen only reads the artifact path specified in `config.yaml`. Cruncher produces these artifacts via `cruncher catalog export-densegen` (implemented in `cruncher/src/app/motif_artifacts.py`).
+DenseGen can consume **per‑motif JSON artifacts** that encode a single PWM. This keeps DenseGen decoupled from parsing code e.g., from Cruncher, and DenseGen only reads the artifact path specified in `config.yaml`. Cruncher produces these artifacts via `cruncher catalog export-densegen` (implemented in `cruncher/src/app/motif_artifacts.py`).
 
 ### Contents
 - [Context](#context) - why artifacts exist and where they fit.
@@ -14,10 +14,9 @@ DenseGen can consume **per‑motif JSON artifacts** that encode a single PWM. Th
 
 ### Context
 
-Artifact‑first PWM inputs are a decoupling contract: producers generate stable, versioned JSON,
-and DenseGen consumes them without embedding parser logic. This enables independent producers,
-reproducible ingestion, and clear provenance. DenseGen uses these artifacts in **Stage‑A sampling**
-to build TFBS pools from the PWM matrices.
+Artifact‑first PWM inputs are a decoupling contract: producers generate stable, versioned JSON, and DenseGen consumes them. This enables independent producers, reproducible ingestion, and clear provenance. DenseGen uses these artifacts in **Stage‑A sampling** to build TFBS pools from the PWM matrices.
+
+---
 
 ### Core principles
 
@@ -26,6 +25,8 @@ to build TFBS pools from the PWM matrices.
 - **Strict, fail-fast validation** to keep runs deterministic.
 - **Both probabilities and log-odds** are required.
 
+---
+
 ### Required fields
 
 Top-level JSON object with the following required keys:
@@ -43,6 +44,8 @@ Optional keys (ignored by DenseGen but recommended for provenance):
 
 - `tf_name`, `source`, `organism`, `provenance`, `checksums`, `tags`, `length`
 
+---
+
 ### Scoring semantics
 
 DenseGen scores sampled candidates using **PWM log-odds** with the provided background.
@@ -53,6 +56,8 @@ Log-odds values must be **finite** (no infinities). DenseGen assumes log-odds ar
 computed with the natural log (ln) of `p/background`. If your matrices contain zeros,
 apply pseudocounts before emitting artifacts.
 
+---
+
 ### Example artifact
 
 ```json
@@ -77,6 +82,8 @@ apply pseudocounts before emitting artifacts.
 }
 ```
 
+---
+
 ### Config usage
 
 In `config.yaml`, reference the artifact explicitly and set **Stage‑A sampling** behavior there:
diff --git a/src/dnadesign/densegen/docs/reference/outputs.md b/src/dnadesign/densegen/docs/reference/outputs.md
index 7be88683..0c36e287 100644
--- a/src/dnadesign/densegen/docs/reference/outputs.md
+++ b/src/dnadesign/densegen/docs/reference/outputs.md
@@ -1,6 +1,6 @@
 ## DenseGen Output Formats
 
-DenseGen can emit USR datasets and/or Parquet datasets. Both formats share the same canonical ID scheme and metadata semantics. Parquet is the canonical non-USR output format (columnar, appendable, analytics-ready).
+DenseGen can emit Parquet datasets (stored locally or in the sibling USR package).
 
 ### Contents
 - [Canonical IDs](#canonical-ids) - deterministic sequence identifiers.
@@ -58,8 +58,7 @@ Keys are namespaced as `densegen__<key>`. Categories include:
 - **Library + Stage‑B sampling**: library size, unique TF/TFBS counts, sampling caps and relaxations.
 - **Placement stats**: used TFBS details, coverage of required regulators, per-TF counts.
 
-Exact fields may expand over time. For the canonical list and types, see
-`src/dnadesign/densegen/src/core/metadata_schema.py`.
+Exact fields may expand over time. For the canonical list and types, see `src/dnadesign/densegen/src/core/metadata_schema.py`.
 
 ---
 
@@ -87,8 +86,7 @@ DenseGen writes `outputs/meta/events.jsonl` (JSON lines) with structured events
 DenseGen records solver library provenance in two places:
 
 - `outputs/libraries/library_builds.parquet` + `library_members.parquet` (canonical library artifacts).
-- `outputs/tables/attempts.parquet` (attempt-level audit log with offered library lists). Each attempt row stores the full library offered to the solver (`library_tfbs`, `library_tfs`,
-`library_site_ids`, `library_sources`) along with the library hash/index and solver status. Attempts include `attempt_id` and `solution_id` (when successful) for stable joins. Output records carry `densegen__sampling_library_hash` (Stage‑B) so you can join placements to libraries.
+- `outputs/tables/attempts.parquet` (attempt-level audit log with offered library lists). Each attempt row stores the full library offered to the solver (`library_tfbs`, `library_tfs`, `library_site_ids`, `library_sources`) along with the library hash/index and solver status. Attempts include `attempt_id` and `solution_id` (when successful) for stable joins. Output records carry `densegen__sampling_library_hash` (Stage‑B) so you can join placements to libraries.
 
 ---
 
@@ -110,17 +108,13 @@ The `dense report` command writes a compact audit summary under `outputs/report/
 - `outputs/report/report.md`
 - `outputs/report/report.html` (basic HTML wrapper for quick sharing)
 
-These summarize run scope and link to the canonical outputs (`outputs/tables/dense_arrays.parquet` and
-`outputs/tables/attempts.parquet`). Reports do not generate plots; run `dense plot` to populate
-`outputs/plots/`, and use `dense report --plots include` to link the existing plot manifest.
-Use `dense report --format json|md|html|all` to control which files are emitted.
+These summarize run scope and link to the canonical outputs (`outputs/tables/dense_arrays.parquet` and `outputs/tables/attempts.parquet`). Reports do not generate plots; run `dense plot` to populate `outputs/plots/`, and use `dense report --plots include` to link the existing plot manifest. Use `dense report --format json|md|html|all` to control which files are emitted.
 
 ---
 
 ### Plots
 
-`dense plot` writes plot images under `outputs/plots/` (format controlled by `plots.format`).
-`outputs/plots/plot_manifest.json` records the plot inventory for reports.
+`dense plot` writes plot images under `outputs/plots/` (format controlled by `plots.format`). `outputs/plots/plot_manifest.json` records the plot inventory for reports.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
index bc31d9d1..9880989f 100644
--- a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
+++ b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
@@ -1,8 +1,6 @@
-## Cruncher to DenseGen PWM workflow (artifact-first)
+## Cruncher to DenseGen PWM workflow
 
-This workflow describes the **artifact‑first** handoff: Cruncher exports per‑motif JSON artifacts,
-and DenseGen consumes them for **Stage‑A sampling**. For a full, progressive walkthrough that
-uses Cruncher in its own workspace and then hands off to DenseGen, see the [demo](../demo/demo_basic.md).
+Cruncher exports per‑motif JSON artifacts, and DenseGen consumes them for **Stage‑A sampling**. For a full, progressive walkthrough that uses Cruncher in its own workspace and then hands off to DenseGen, see the [demo](../demo/demo_basic.md).
 
 ### Contents
 - [Overview](#overview) - what this handoff enables.
@@ -13,16 +11,13 @@ uses Cruncher in its own workspace and then hands off to DenseGen, see the [demo
 
 ### Overview
 
-Cruncher produces stable PWM artifacts (one JSON per motif) with explicit background + log‑odds.
-DenseGen treats these artifacts as a strict contract and uses them in **Stage‑A sampling** to
-build TFBS pools. Stage‑B sampling remains fully controlled by the DenseGen config.
+Cruncher produces stable PWM artifacts (one JSON per motif) with explicit background + log‑odds. DenseGen treats these artifacts as a strict contract and uses them in **Stage‑A sampling** to build TFBS pools. Stage‑B sampling remains fully controlled by the DenseGen config.
 
 ---
 
 ### Minimal operator flow
 
-Run Cruncher in its **own** workspace (Cruncher owns its configs and outputs). Cruncher resolves
-its config from CWD, so no `-c` flags are required when you run inside that workspace:
+Run Cruncher in its **own** workspace (Cruncher owns its configs and outputs). Cruncher resolves its config from CWD.
 
 ```bash
 # From a Cruncher workspace (see cruncher demo docs)
@@ -31,15 +26,15 @@ cruncher fetch sites --source demo_local_meme --tf lexA --tf cpxR --hydrate
 cruncher lock
 
 # Export to a Cruncher-owned location
-cruncher catalog export-densegen --set 1 --out outputs/exports/densegen_pwms
-cruncher catalog export-sites --set 1 --out outputs/exports/densegen_sites.csv
+cruncher catalog export-densegen --set 1 --out outputs/densegen_motifs
+cruncher catalog export-sites --set 1 --out outputs/densegen_sites.parquet
 ```
 
 Copy the exports into the DenseGen workspace `inputs/` (DenseGen remains config‑centric):
 
 ```bash
-cp outputs/exports/densegen_sites.csv <densegen_workspace>/inputs/
-cp -R outputs/exports/densegen_pwms <densegen_workspace>/inputs/motif_artifacts
+cp outputs/densegen_sites.parquet <densegen_workspace>/inputs/
+cp -R outputs/densegen_motifs <densegen_workspace>/inputs/motif_artifacts
 ```
 
 ---

From f6dff3ce6cfad764d05951bc28725d8dfbcb12e4 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 19:29:33 -0500
Subject: [PATCH 29/40] cruncher export to densegen workspace

---
 .../cruncher/src/cli/commands/catalog.py      | 88 +++++++++++++++++-
 .../cruncher/tests/test_catalog_assets_cli.py | 91 +++++++++++++++++++
 2 files changed, 175 insertions(+), 4 deletions(-)

diff --git a/src/dnadesign/cruncher/src/cli/commands/catalog.py b/src/dnadesign/cruncher/src/cli/commands/catalog.py
index 7b41d5a8..b22d8edc 100644
--- a/src/dnadesign/cruncher/src/cli/commands/catalog.py
+++ b/src/dnadesign/cruncher/src/cli/commands/catalog.py
@@ -10,6 +10,7 @@
 from __future__ import annotations
 
 import json
+import os
 import re
 from dataclasses import dataclass
 from datetime import datetime, timezone
@@ -95,6 +96,54 @@ def _dedupe(values: Sequence[str]) -> list[str]:
     return output
 
 
+def _densegen_workspaces_root(config_path: Path) -> Path | None:
+    for parent in (config_path.parent, *config_path.parents):
+        candidate = parent / "src" / "dnadesign" / "densegen" / "workspaces"
+        if candidate.is_dir():
+            return candidate.resolve()
+    return None
+
+
+def _resolve_densegen_workspace(selector: str, *, config_path: Path) -> Path:
+    raw = str(selector or "").strip()
+    if not raw:
+        raise typer.BadParameter("--densegen-workspace must be a non-empty string.")
+    candidate = Path(raw).expanduser()
+    looks_like_path = candidate.is_absolute() or any(sep in raw for sep in (os.sep, os.altsep) if sep)
+    if looks_like_path or candidate.exists():
+        resolved = candidate
+        if not resolved.is_absolute():
+            resolved = (Path.cwd() / resolved).resolve()
+    else:
+        root = _densegen_workspaces_root(config_path)
+        if root is None:
+            raise typer.BadParameter(
+                "Unable to locate DenseGen workspaces root relative to the cruncher config. "
+                "Pass --densegen-workspace as an absolute path."
+            )
+        resolved = (root / raw).resolve()
+    if not resolved.exists():
+        raise typer.BadParameter(f"DenseGen workspace not found: {resolved}")
+    if not resolved.is_dir():
+        raise typer.BadParameter(f"DenseGen workspace is not a directory: {resolved}")
+    config_candidate = resolved / "config.yaml"
+    if not config_candidate.is_file():
+        raise typer.BadParameter(f"DenseGen workspace missing config.yaml: {config_candidate}")
+    inputs_root = resolved / "inputs"
+    if not inputs_root.is_dir():
+        raise typer.BadParameter(f"DenseGen workspace missing inputs/ directory: {inputs_root}")
+    return resolved
+
+
+def _require_densegen_inputs_path(path: Path, *, inputs_root: Path, label: str) -> Path:
+    resolved = path.resolve()
+    try:
+        resolved.relative_to(inputs_root.resolve())
+    except ValueError as exc:
+        raise typer.BadParameter(f"{label} must be under {inputs_root} when --densegen-workspace is set.") from exc
+    return resolved
+
+
 def _logo_manifest_path(out_dir: Path) -> Path:
     return out_dir / RUN_META_DIR / LOGO_MANIFEST_NAME
 
@@ -986,8 +1035,13 @@ def export_densegen(
         "--source",
         help="Limit TF resolution to a single source adapter.",
     ),
-    out_dir: Path = typer.Option(
-        ...,
+    densegen_workspace: str | None = typer.Option(
+        None,
+        "--densegen-workspace",
+        help="DenseGen workspace name or path (defaults --out to inputs/motif_artifacts).",
+    ),
+    out_dir: Path | None = typer.Option(
+        None,
         "--out",
         "-o",
         help="Directory to write DenseGen motif artifacts.",
@@ -1019,6 +1073,16 @@ def export_densegen(
         raise typer.BadParameter("--background must be 'record', 'uniform', or 'matrix'.")
     if not producer.strip():
         raise typer.BadParameter("--producer must be a non-empty string.")
+    densegen_root = None
+    if densegen_workspace:
+        densegen_root = _resolve_densegen_workspace(densegen_workspace, config_path=config_path)
+        inputs_root = densegen_root / "inputs"
+        if out_dir is None:
+            out_dir = inputs_root / "motif_artifacts"
+        else:
+            out_dir = _require_densegen_inputs_path(out_dir, inputs_root=inputs_root, label="--out")
+    if out_dir is None:
+        raise typer.BadParameter("--out is required when --densegen-workspace is not set.")
 
     try:
         targets, catalog = _resolve_targets(
@@ -1117,8 +1181,13 @@ def export_sites(
         "--source",
         help="Limit TF resolution to a single source adapter.",
     ),
-    out_path: Path = typer.Option(
-        ...,
+    densegen_workspace: str | None = typer.Option(
+        None,
+        "--densegen-workspace",
+        help="DenseGen workspace name or path (defaults --out to inputs/densegen_sites.parquet).",
+    ),
+    out_path: Path | None = typer.Option(
+        None,
         "--out",
         "-o",
         help="Output file path (.csv or .parquet).",
@@ -1136,6 +1205,16 @@ def export_sites(
         console.print(str(exc))
         raise typer.Exit(code=1)
     cfg = load_config(config_path)
+    densegen_root = None
+    if densegen_workspace:
+        densegen_root = _resolve_densegen_workspace(densegen_workspace, config_path=config_path)
+        inputs_root = densegen_root / "inputs"
+        if out_path is None:
+            out_path = inputs_root / "densegen_sites.parquet"
+        else:
+            out_path = _require_densegen_inputs_path(out_path, inputs_root=inputs_root, label="--out")
+    if out_path is None:
+        raise typer.BadParameter("--out is required when --densegen-workspace is not set.")
 
     try:
         targets, catalog = _resolve_site_targets(
@@ -1152,6 +1231,7 @@ def export_sites(
         raise typer.Exit(code=1)
 
     out_path = out_path.resolve()
+    out_path.parent.mkdir(parents=True, exist_ok=True)
     if out_path.exists():
         if out_path.is_dir():
             console.print(f"[red]Output path is a directory:[/] {out_path}")
diff --git a/src/dnadesign/cruncher/tests/test_catalog_assets_cli.py b/src/dnadesign/cruncher/tests/test_catalog_assets_cli.py
index 057feac4..64d57cd8 100644
--- a/src/dnadesign/cruncher/tests/test_catalog_assets_cli.py
+++ b/src/dnadesign/cruncher/tests/test_catalog_assets_cli.py
@@ -46,6 +46,36 @@ def _write_config(tmp_path: Path, *, pwm_source: str = "matrix") -> Path:
     return config_path
 
 
+def _write_prob_motif(path: Path, *, source: str, motif_id: str, tf_name: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    payload = {
+        "descriptor": {"source": source, "motif_id": motif_id, "tf_name": tf_name, "alphabet": "ACGT"},
+        "matrix_semantics": "probabilities",
+        "matrix": [[0.7, 0.1, 0.1, 0.1], [0.25, 0.25, 0.25, 0.25]],
+        "background": [0.25, 0.25, 0.25, 0.25],
+        "checksums": {"sha256_norm": "good"},
+    }
+    path.write_text(json.dumps(payload))
+
+
+def _write_densegen_workspace(tmp_path: Path, *, name: str = "demo_densegen") -> Path:
+    workspace = tmp_path / "src" / "dnadesign" / "densegen" / "workspaces" / name
+    inputs_root = workspace / "inputs"
+    inputs_root.mkdir(parents=True, exist_ok=True)
+    payload = "\n".join(
+        [
+            "densegen:",
+            '  schema_version: "2.5"',
+            "  run:",
+            "    id: demo",
+            '    root: "."',
+            "",
+        ]
+    )
+    (workspace / "config.yaml").write_text(payload)
+    return workspace
+
+
 def test_catalog_pwms_defaults_to_regulator_sets(tmp_path: Path) -> None:
     catalog_root = tmp_path / ".cruncher"
     entries = {
@@ -156,3 +186,64 @@ def test_export_sites_ignores_pwm_source_for_selection(tmp_path: Path) -> None:
     assert result.exit_code == 0
     assert "DenseGen binding-site export" in result.output
     assert out_path.exists()
+
+
+def test_export_densegen_defaults_to_densegen_workspace(tmp_path: Path) -> None:
+    densegen_ws = _write_densegen_workspace(tmp_path)
+    catalog_root = tmp_path / ".cruncher"
+    entry = CatalogEntry(
+        source="regulondb",
+        motif_id="RBM1",
+        tf_name="lexA",
+        kind="PFM",
+        has_matrix=True,
+        matrix_source="alignment",
+    )
+    CatalogIndex(entries={entry.key: entry}).save(catalog_root)
+    _write_prob_motif(
+        catalog_root / "normalized" / "motifs" / "regulondb" / "RBM1.json",
+        source="regulondb",
+        motif_id="RBM1",
+        tf_name="lexA",
+    )
+    config_path = _write_config(tmp_path)
+    result = runner.invoke(
+        app,
+        ["catalog", "export-densegen", "--tf", "lexA", "--densegen-workspace", densegen_ws.name, str(config_path)],
+        color=False,
+    )
+    assert result.exit_code == 0, result.output
+    out_dir = densegen_ws / "inputs" / "motif_artifacts"
+    assert out_dir.exists()
+    assert list(out_dir.glob("*.json"))
+    assert (out_dir / "artifact_manifest.json").exists()
+
+
+def test_export_sites_defaults_to_densegen_workspace(tmp_path: Path) -> None:
+    densegen_ws = _write_densegen_workspace(tmp_path)
+    catalog_root = tmp_path / ".cruncher"
+    entry = CatalogEntry(
+        source="regulondb",
+        motif_id="RBM1",
+        tf_name="lexA",
+        kind="sites",
+        has_matrix=False,
+        has_sites=True,
+        site_count=1,
+        site_total=1,
+        site_kind="curated",
+    )
+    CatalogIndex(entries={entry.key: entry}).save(catalog_root)
+    sites_path = catalog_root / "normalized" / "sites" / "regulondb" / "RBM1.jsonl"
+    sites_path.parent.mkdir(parents=True, exist_ok=True)
+    sites_path.write_text(json.dumps({"sequence": "ACGT", "site_id": "s1", "motif_ref": entry.key}) + "\n")
+
+    config_path = _write_config(tmp_path, pwm_source="matrix")
+    result = runner.invoke(
+        app,
+        ["catalog", "export-sites", "--tf", "lexA", "--densegen-workspace", densegen_ws.name, str(config_path)],
+        color=False,
+    )
+    assert result.exit_code == 0, result.output
+    out_path = densegen_ws / "inputs" / "densegen_sites.parquet"
+    assert out_path.exists()

From ba92a581cd967c80ad11b7ff0952f061b9c7b49d Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 19:30:40 -0500
Subject: [PATCH 30/40] docs: export densegen via workspace

---
 .../cruncher/docs/demos/demo_basics_two_tf.md |  8 ++++--
 src/dnadesign/cruncher/docs/reference/cli.md  |  6 +++++
 .../densegen/docs/demo/demo_basic.md          | 27 +++++++------------
 .../docs/workflows/cruncher_pwm_pipeline.md   | 14 ++++------
 4 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md b/src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md
index 7aabeb4f..1a6b047f 100644
--- a/src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md
+++ b/src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md
@@ -483,12 +483,16 @@ Export the binding-site superset and the selected motifs for DenseGen runs:
 
 ```bash
 # Export binding sites (CSV/Parquet) for DenseGen binding_sites inputs
-cruncher catalog export-sites --set 1 --out /tmp/densegen_sites.csv -c "$CONFIG"
+cruncher catalog export-sites --set 1 --densegen-workspace demo_meme_two_tf -c "$CONFIG"
 
 # Export per-motif JSON artifacts for DenseGen PWM artifact inputs
-cruncher catalog export-densegen --set 1 --out /tmp/densegen_pwms -c "$CONFIG"
+cruncher catalog export-densegen --set 1 --densegen-workspace demo_meme_two_tf -c "$CONFIG"
 ```
 
+`--densegen-workspace` accepts a workspace name (resolved under `src/dnadesign/densegen/workspaces`)
+or an absolute path, and writes under that workspace's `inputs/`. You can still provide `--out`,
+but the path must remain inside the target `inputs/` directory.
+
 Then point DenseGen configs at the exported files (`type: binding_sites`) or artifacts
 (`type: pwm_artifact_set`).
 
diff --git a/src/dnadesign/cruncher/docs/reference/cli.md b/src/dnadesign/cruncher/docs/reference/cli.md
index 1d576bd8..5c3ad326 100644
--- a/src/dnadesign/cruncher/docs/reference/cli.md
+++ b/src/dnadesign/cruncher/docs/reference/cli.md
@@ -475,9 +475,15 @@ Examples:
 * `cruncher catalog pwms <config>`
 * `cruncher catalog pwms --set 1 <config>`
 * `cruncher catalog export-sites --set 1 --out densegen/sites.csv <config>`
+* `cruncher catalog export-sites --set 1 --densegen-workspace demo_meme_two_tf <config>`
 * `cruncher catalog export-densegen --set 1 --out densegen/pwms <config>`
+* `cruncher catalog export-densegen --set 1 --densegen-workspace demo_meme_two_tf <config>`
 * `cruncher catalog logos --set 1 <config>`
 
+`catalog export-densegen` and `catalog export-sites` accept `--densegen-workspace` (workspace
+name under `src/dnadesign/densegen/workspaces/` or an absolute path). When provided, outputs
+default to the workspace `inputs/` locations and must stay within that directory.
+
 ---
 
 #### `cruncher discover`
diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 85a960ab..cf050946 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -60,9 +60,7 @@ uv run dense --help
 
 ### 1. Stage a workspace
 
-Use the pre‑staged demo workspace and run commands from its directory so relative paths resolve correctly.
-
-From the repo root:
+Navigate to the demo workspace and run commands from its directory so relative paths resolve correctly.
 
 ```bash
 cd src/dnadesign/densegen/workspaces/demo_meme_two_tf
@@ -95,41 +93,34 @@ dense validate-config --probe-solver
 
 ### 3. Inspect inputs
 
-Why: confirm Stage‑A inputs and sampling settings.
+Confirm Stage‑A inputs and sampling settings.
 
 ```bash
 dense inspect inputs
 ```
 
-The demo uses DenseGen PWM artifacts in `inputs/motif_artifacts/` (`lexA__demo_local_meme__lexA.json`,
-`cpxR__demo_local_meme__cpxR.json`).
+The demo uses DenseGen PWM artifacts in `inputs/motif_artifacts/` (`lexA__demo_local_meme__lexA.json`, `cpxR__demo_local_meme__cpxR.json`).
 
 ---
 
 ### 3b. (Optional) Build inputs via Cruncher (external workspace)
 
-Refresh the demo motifs by exporting **DenseGen PWM artifacts** from Cruncher, then copy them into
-this DenseGen workspace. This is optional — the demo already ships with artifacts.
+Refresh the demo motifs by exporting **DenseGen PWM artifacts** from Cruncher, then copy them into this DenseGen workspace. This is optional as the demo already ships with artifacts.
 
-Follow the Cruncher demo (see `cruncher/docs/demos/demo_basics_two_tf.md`) in its own workspace.
-From the Cruncher workspace directory, export DenseGen artifacts (no `-c` flag needed when you run in CWD):
+Follow the Cruncher demo (see `cruncher/docs/demos/demo_basics_two_tf.md`) in its own workspace. From the Cruncher workspace directory, export DenseGen artifacts into the target DenseGen workspace name or path (no `-c` flag needed when you run in CWD):
 
 ```bash
 cd <cruncher_workspace>
-cruncher catalog export-densegen --set 1 --out outputs/densegen_motifs
+cruncher catalog export-densegen --set 1 --densegen-workspace demo_meme_two_tf
 ```
 
-Copy those artifacts into **this** DenseGen workspace:
-
-```bash
-cp -R <cruncher_workspace>/outputs/densegen_motifs/* inputs/motif_artifacts/
-```
+Tip: `--densegen-workspace` accepts a workspace name (resolved under `src/dnadesign/densegen/workspaces`)
+or an absolute path. Cruncher fails fast if it cannot find `config.yaml` + `inputs/`.
 
 If you also want to drive Stage‑A from binding sites instead of PWM sampling, export them too:
 
 ```bash
-cruncher catalog export-sites --set 1 --out outputs/densegen_sites.parquet
-cp <cruncher_workspace>/outputs/densegen_sites.parquet inputs/
+cruncher catalog export-sites --set 1 --densegen-workspace demo_meme_two_tf
 ```
 
 Then update `config.yaml` inputs to point at the exported binding sites (optional), for example:
diff --git a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
index 9880989f..9527cd5e 100644
--- a/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
+++ b/src/dnadesign/densegen/docs/workflows/cruncher_pwm_pipeline.md
@@ -25,17 +25,13 @@ cruncher fetch motifs --source demo_local_meme --tf lexA --tf cpxR
 cruncher fetch sites --source demo_local_meme --tf lexA --tf cpxR --hydrate
 cruncher lock
 
-# Export to a Cruncher-owned location
-cruncher catalog export-densegen --set 1 --out outputs/densegen_motifs
-cruncher catalog export-sites --set 1 --out outputs/densegen_sites.parquet
+# Export directly into a DenseGen workspace (name or absolute path).
+cruncher catalog export-densegen --set 1 --densegen-workspace demo_meme_two_tf
+cruncher catalog export-sites --set 1 --densegen-workspace demo_meme_two_tf
 ```
 
-Copy the exports into the DenseGen workspace `inputs/` (DenseGen remains config‑centric):
-
-```bash
-cp outputs/densegen_sites.parquet <densegen_workspace>/inputs/
-cp -R outputs/densegen_motifs <densegen_workspace>/inputs/motif_artifacts
-```
+If you prefer explicit paths, you can still use `--out`, but when `--densegen-workspace`
+is set the output path must live under that workspace's `inputs/` directory.
 
 ---
 

From 16e02fb661f705ff4d43d33049be1691d4731bc4 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 19:38:11 -0500
Subject: [PATCH 31/40] docs: tighten demo tips

---
 src/dnadesign/densegen/docs/demo/demo_basic.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index cf050946..5d2a8514 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -114,8 +114,7 @@ cd <cruncher_workspace>
 cruncher catalog export-densegen --set 1 --densegen-workspace demo_meme_two_tf
 ```
 
-Tip: `--densegen-workspace` accepts a workspace name (resolved under `src/dnadesign/densegen/workspaces`)
-or an absolute path. Cruncher fails fast if it cannot find `config.yaml` + `inputs/`.
+Tip: `--densegen-workspace` accepts a workspace name (resolved under `src/dnadesign/densegen/workspaces`) or an absolute path. Cruncher fails fast if it cannot find `config.yaml` + `inputs/`.
 
 If you also want to drive Stage‑A from binding sites instead of PWM sampling, export them too:
 
@@ -138,7 +137,7 @@ The DenseGen workspace stays config‑centric (one runtime config); Cruncher kee
 
 ### 4. Inspect config
 
-Why: confirm resolved outputs, Stage‑A sampling knobs, fixed elements, and Stage‑B sampling policy.
+Confirm resolved outputs, Stage‑A sampling knobs, fixed elements, and Stage‑B sampling policy.
 
 Rationale for the demo settings: we want **dozens of binding sites per motif**, so we set Stage‑A
 `n_sites` and oversampling/mining caps to reach that target; Stage‑B sampling then builds fixed‑size

From 515d880b34edd29488d76b0f55ff29bbf155b0f6 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 19:51:46 -0500
Subject: [PATCH 32/40] demo: align sampling narrative

---
 src/dnadesign/densegen/docs/demo/demo_basic.md      | 13 +++++--------
 .../workspaces/demo_meme_two_tf/config.yaml         |  2 +-
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 5d2a8514..8839006d 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -139,14 +139,11 @@ The DenseGen workspace stays config‑centric (one runtime config); Cruncher kee
 
 Confirm resolved outputs, Stage‑A sampling knobs, fixed elements, and Stage‑B sampling policy.
 
-Rationale for the demo settings: we want **dozens of binding sites per motif**, so we set Stage‑A
-`n_sites` and oversampling/mining caps to reach that target; Stage‑B sampling then builds fixed‑size
-libraries before running the solver.
-This demo also pins a strong σ70 promoter pair (`TTGACA`/`TATAAT`) as fixed elements; the default
-`tf_coverage` plot overlays these sites when `plots.options.tf_coverage.include_promoter_sites: true`.
-To keep the 60‑bp budget feasible with ~21–22 bp TFBS lengths, the plan sets
-`min_required_regulators: 1` while listing both LexA and CpxR, so each sequence must include at
-least one of the two regulators.
+Stage‑A is configured to mine **hundreds of binding sites per TF** from the PWM artifacts. The motif JSONs declare widths (LexA 22 bp, CpxR 21 bp), so `length_policy: range` with `length_range: [22, 28]` adds jitter around those sizes while still selecting high‑scoring windows from the top p‑value strata. (Scale to thousands by raising `n_sites` if you need a larger pool.)
+
+Stage‑B then **subsamples** the Stage‑A pool into candidate libraries (`pool_strategy: subsample`, `library_size: 20`) with coverage weighting so each library includes the TFs you specified (`cover_all_regulators: true`). Each library is a candidate set offered to the solver; the solver assembles 60‑bp sequences by selecting a subset, and the run resamples new libraries as needed.
+
+This demo constrains a strong σ70 promoter pair (`TTGACA`/`TATAAT`) as fixed elements with a 15–19 bp spacer; the default `tf_coverage` plot overlays these sites when `plots.options.tf_coverage.include_promoter_sites: true`. To keep the 60‑bp budget feasible with ~21–22 bp TFBS lengths, the plan sets `min_required_regulators: 1` while listing both LexA and CpxR, so each sequence must include at least one of the two regulators.
 
 ```bash
 dense inspect config
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index fcf986f7..e904681e 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -14,7 +14,7 @@ densegen:
         - inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
       sampling:
         strategy: stochastic
-        n_sites: 80
+        n_sites: 200
         oversample_factor: 200
         scoring_backend: fimo
         pvalue_threshold: 1e-4

From e74bd0e650e0eb5d390d5b972b6359e96a67aea1 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 20:03:47 -0500
Subject: [PATCH 33/40] demo: use meme-derived motifs

---
 .secrets.baseline                             |  26 +-
 .../densegen/docs/demo/demo_basic.md          |   4 +-
 .../workspaces/demo_meme_two_tf/config.yaml   |   6 +-
 .../cpxR__demo_local_meme__cpxR.json          | 306 -----------------
 ...xR__meme_suite_meme__cpxR_MANWWHTTTAM.json | 184 ++++++++++
 .../lexA__demo_local_meme__lexA.json          | 318 ------------------
 ...meme_suite_meme__lexA_CTGTATAWAWWHACA.json | 232 +++++++++++++
 7 files changed, 436 insertions(+), 640 deletions(-)
 delete mode 100644 src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
 create mode 100644 src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json
 delete mode 100644 src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json
 create mode 100644 src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json

diff --git a/.secrets.baseline b/.secrets.baseline
index cc1e8785..4f28b857 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -90,6 +90,10 @@
     {
       "path": "detect_secrets.filters.allowlist.is_line_allowlisted"
     },
+    {
+      "path": "detect_secrets.filters.common.is_baseline_file",
+      "filename": ".secrets.baseline"
+    },
     {
       "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
       "min_level": 2
@@ -138,34 +142,34 @@
         "line_number": 181
       }
     ],
-    "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json": [
+    "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json": [
       {
         "type": "Hex High Entropy String",
-        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json",
-        "hashed_secret": "ad5e5635f0c80045fa29ddd221dad64cfb301dcd",
+        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json",
+        "hashed_secret": "2598c3ba7f3985f5df916954885b71931380e2ad",
         "is_verified": false,
         "line_number": 10
       },
       {
         "type": "Hex High Entropy String",
-        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json",
-        "hashed_secret": "fdfc920b9be507648e9872bc1a2927d2708cba13",
+        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json",
+        "hashed_secret": "23616517bff0fc8f7749dc3f40e0ec36ec8ebcd1",
         "is_verified": false,
         "line_number": 11
       }
     ],
-    "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json": [
+    "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json": [
       {
         "type": "Hex High Entropy String",
-        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json",
-        "hashed_secret": "ea1da5eaece4276b68edae4f5cac401f40c9e53a",
+        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json",
+        "hashed_secret": "733c5c02dcc073c2c1040be08dbb665375e48571",
         "is_verified": false,
         "line_number": 10
       },
       {
         "type": "Hex High Entropy String",
-        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json",
-        "hashed_secret": "9d643ed6995650a5c99e5cc12502cea7f6e7d5cf",
+        "filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json",
+        "hashed_secret": "b76157d075f0bf4ee272f029f598f911769f42d6",
         "is_verified": false,
         "line_number": 11
       }
@@ -207,5 +211,5 @@
       }
     ]
   },
-  "generated_at": "2026-01-15T18:03:36Z"
+  "generated_at": "2026-01-24T01:02:10Z"
 }
diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 8839006d..ee207b62 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -99,7 +99,7 @@ Confirm Stage‑A inputs and sampling settings.
 dense inspect inputs
 ```
 
-The demo uses DenseGen PWM artifacts in `inputs/motif_artifacts/` (`lexA__demo_local_meme__lexA.json`, `cpxR__demo_local_meme__cpxR.json`).
+The demo uses DenseGen PWM artifacts in `inputs/motif_artifacts/` (`lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json`, `cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json`).
 
 ---
 
@@ -139,7 +139,7 @@ The DenseGen workspace stays config‑centric (one runtime config); Cruncher kee
 
 Confirm resolved outputs, Stage‑A sampling knobs, fixed elements, and Stage‑B sampling policy.
 
-Stage‑A is configured to mine **hundreds of binding sites per TF** from the PWM artifacts. The motif JSONs declare widths (LexA 22 bp, CpxR 21 bp), so `length_policy: range` with `length_range: [22, 28]` adds jitter around those sizes while still selecting high‑scoring windows from the top p‑value strata. (Scale to thousands by raising `n_sites` if you need a larger pool.)
+Stage‑A is configured to mine **hundreds of binding sites per TF** from the MEME‑derived PWM artifacts. The motif JSONs declare widths (LexA 15 bp, CpxR 11 bp), so `length_policy: range` with `length_range: [15, 20]` samples target lengths and pads flanks to reach the chosen length while still selecting high‑scoring windows from the top p‑value strata. (Scale to thousands by raising `n_sites` if you need a larger pool.)
 
 Stage‑B then **subsamples** the Stage‑A pool into candidate libraries (`pool_strategy: subsample`, `library_size: 20`) with coverage weighting so each library includes the TFs you specified (`cover_all_regulators: true`). Each library is a candidate set offered to the solver; the solver assembles 60‑bp sequences by selecting a subset, and the run resamples new libraries as needed.
 
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index e904681e..4155c5a9 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -10,8 +10,8 @@ densegen:
     - name: lexA_cpxR_artifacts
       type: pwm_artifact_set
       paths:
-        - inputs/motif_artifacts/lexA__demo_local_meme__lexA.json
-        - inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
+        - inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json
+        - inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json
       sampling:
         strategy: stochastic
         n_sites: 200
@@ -25,7 +25,7 @@ densegen:
           retain_bin_ids: [0, 1, 2, 3]
           log_every_batches: 1
         length_policy: range
-        length_range: [22, 28]
+        length_range: [15, 20]
 
   output:
     targets: [parquet]
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
deleted file mode 100644
index 20f19c2c..00000000
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__demo_local_meme__cpxR.json
+++ /dev/null
@@ -1,306 +0,0 @@
-{
-  "alphabet": "ACGT",
-  "background": {
-    "A": 0.341,
-    "C": 0.159,
-    "G": 0.159,
-    "T": 0.341
-  },
-  "checksums": {
-    "sha256_norm": "2fa5ae90ed27c1da68354f75e7cd73ad269f5b3e16c95bdba8f2e2febdd8d195",
-    "sha256_raw": "4fd26b4d936da1f5d85621894f8bac9d4d3a24a19443c5ae4f4f45374afabbcd"
-  },
-  "length": 21,
-  "log_odds": [
-    {
-      "A": -0.0029368567321361493,
-      "C": 0.8171992708969933,
-      "G": 0.22941295932798927,
-      "T": -1.2267098813007393
-    },
-    {
-      "A": -13.815511557963774,
-      "C": -0.28141213443856333,
-      "G": -2.0731649786973416,
-      "T": 0.9250493084760859
-    },
-    {
-      "A": -2.142995498205418,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": 1.0350501623871353
-    },
-    {
-      "A": -13.815511557963774,
-      "C": -13.815511557963774,
-      "G": -0.28141213443856333,
-      "T": 0.9480388176891603
-    },
-    {
-      "A": 1.075872142699062,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": -13.815511557963774
-    },
-    {
-      "A": -1.4498525801182196,
-      "C": 1.7554686406494266,
-      "G": -13.815511557963774,
-      "T": -13.815511557963774
-    },
-    {
-      "A": 0.8527286766348198,
-      "C": -0.4637334262329042,
-      "G": -0.4637334262329042,
-      "T": -13.815511557963774
-    },
-    {
-      "A": -0.5335644057364336,
-      "C": 0.32472306685964164,
-      "G": 0.12405253200342208,
-      "T": 0.159581922324602
-    },
-    {
-      "A": 0.4960539153745643,
-      "C": -13.815511557963774,
-      "G": -0.974557990001142,
-      "T": 0.10828867280543321
-    },
-    {
-      "A": 0.159581922324602,
-      "C": -13.815511557963774,
-      "G": -2.0731649786973416,
-      "T": 0.5311452141883097
-    },
-    {
-      "A": 0.3419033370353341,
-      "C": 1.1048812329321502,
-      "G": -2.0731649786973416,
-      "T": -2.8361341538743754
-    },
-    {
-      "A": -13.815511557963774,
-      "C": -0.6868765800478248,
-      "G": -13.815511557963774,
-      "T": 0.9924905634121745
-    },
-    {
-      "A": -2.8361341538743754,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": 1.055669442340724
-    },
-    {
-      "A": -13.815511557963774,
-      "C": -2.0731649786973416,
-      "G": 0.5658849688065,
-      "T": 0.7191973449031264
-    },
-    {
-      "A": 1.075872142699062,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": -13.815511557963774
-    },
-    {
-      "A": -13.815511557963774,
-      "C": 1.838850235762393,
-      "G": -13.815511557963774,
-      "T": -13.815511557963774
-    },
-    {
-      "A": 0.9480388176891603,
-      "C": -2.0731649786973416,
-      "G": -0.974557990001142,
-      "T": -2.142995498205418
-    },
-    {
-      "A": 0.29934375350419706,
-      "C": 0.12405253200342208,
-      "G": -0.6868765800478248,
-      "T": -0.19709265625736577
-    },
-    {
-      "A": 0.3419033370353341,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": 0.42194599006147204
-    },
-    {
-      "A": 0.159581922324602,
-      "C": 0.0062696067636015445,
-      "G": -13.815511557963774,
-      "T": 0.2548920246289898
-    },
-    {
-      "A": 0.20837204589882954,
-      "C": 0.5658849688065,
-      "G": 0.32472306685964164,
-      "T": -1.4498525801182196
-    }
-  ],
-  "matrix_semantics": "probabilities",
-  "motif_id": "cpxR",
-  "organism": {
-    "assembly": null,
-    "name": "Escherichia coli",
-    "strain": null,
-    "taxon": null
-  },
-  "probabilities": [
-    {
-      "A": 0.34,
-      "C": 0.36,
-      "G": 0.2,
-      "T": 0.1
-    },
-    {
-      "A": 0.0,
-      "C": 0.12,
-      "G": 0.02,
-      "T": 0.86
-    },
-    {
-      "A": 0.04,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 0.96
-    },
-    {
-      "A": 0.0,
-      "C": 0.0,
-      "G": 0.12,
-      "T": 0.88
-    },
-    {
-      "A": 1.0,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 0.0
-    },
-    {
-      "A": 0.08,
-      "C": 0.92,
-      "G": 0.0,
-      "T": 0.0
-    },
-    {
-      "A": 0.8,
-      "C": 0.1,
-      "G": 0.1,
-      "T": 0.0
-    },
-    {
-      "A": 0.2,
-      "C": 0.22,
-      "G": 0.18,
-      "T": 0.4
-    },
-    {
-      "A": 0.56,
-      "C": 0.0,
-      "G": 0.06,
-      "T": 0.38
-    },
-    {
-      "A": 0.4,
-      "C": 0.0,
-      "G": 0.02,
-      "T": 0.58
-    },
-    {
-      "A": 0.48,
-      "C": 0.48,
-      "G": 0.02,
-      "T": 0.02
-    },
-    {
-      "A": 0.0,
-      "C": 0.08,
-      "G": 0.0,
-      "T": 0.92
-    },
-    {
-      "A": 0.02,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 0.98
-    },
-    {
-      "A": 0.0,
-      "C": 0.02,
-      "G": 0.28,
-      "T": 0.7
-    },
-    {
-      "A": 1.0,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 0.0
-    },
-    {
-      "A": 0.0,
-      "C": 1.0,
-      "G": 0.0,
-      "T": 0.0
-    },
-    {
-      "A": 0.88,
-      "C": 0.02,
-      "G": 0.06,
-      "T": 0.04
-    },
-    {
-      "A": 0.46,
-      "C": 0.18,
-      "G": 0.08,
-      "T": 0.28
-    },
-    {
-      "A": 0.48,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 0.52
-    },
-    {
-      "A": 0.4,
-      "C": 0.16,
-      "G": 0.0,
-      "T": 0.44
-    },
-    {
-      "A": 0.42,
-      "C": 0.28,
-      "G": 0.22,
-      "T": 0.08
-    }
-  ],
-  "producer": "cruncher",
-  "provenance": {
-    "citation": "Local demo DAP-seq motifs (MEME text, bundled with cruncher demo)",
-    "license": null,
-    "raw_artifact_paths": [
-      "cpxR.txt"
-    ],
-    "retrieved_at": "2026-01-18T01:33:16.788976+00:00",
-    "source_url": null,
-    "source_version": null,
-    "tags": {}
-  },
-  "schema_version": "1.0",
-  "source": "demo_local_meme",
-  "tags": {
-    "assay": "dapseq",
-    "demo": "true",
-    "format": "meme",
-    "matrix_source": "file",
-    "meme_evalue": "9.9e-119",
-    "meme_motif_id": "cpxR",
-    "meme_motif_label": "cpxR MEME-1\twidth =  21  sites =  50  llr = 694  E-value = 9.9e-119",
-    "meme_motif_name": "MEME-1 width = 21 sites = 50 llr = 694 E-value = 9.9e-119",
-    "meme_nsites": "50",
-    "meme_version": "4.12.0 (Release date: Tue Jun 27 16:22:50 2017 -0700)",
-    "meme_width": "21"
-  },
-  "tf_name": "cpxR"
-}
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json
new file mode 100644
index 00000000..539cb4be
--- /dev/null
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json
@@ -0,0 +1,184 @@
+{
+  "alphabet": "ACGT",
+  "background": {
+    "A": 0.35,
+    "C": 0.157,
+    "G": 0.126,
+    "T": 0.367
+  },
+  "checksums": {
+    "sha256_norm": "f9b643325dbcb66bee077d33f675b8644ce3ffe22cace204c62a723e68cd580d",
+    "sha256_raw": "9b7543e59d5d0ebc60007fe0dac63a5f4be6b9d7f56f875f9c9445ce3ebcb49d"
+  },
+  "length": 11,
+  "log_odds": [
+    {
+      "A": -0.09895968830948555,
+      "C": 1.1220867658722318,
+      "G": -0.8106055935872811,
+      "T": -0.930203012861189
+    },
+    {
+      "A": 0.4816803204375704,
+      "C": -0.334966711546891,
+      "G": 0.1102640580346532,
+      "T": -0.7100592656964112
+    },
+    {
+      "A": -0.3537322082981596,
+      "C": 0.2039834969089502,
+      "G": 0.4324203239938066,
+      "T": 0.001602301577332475
+    },
+    {
+      "A": 0.2227870120804992,
+      "C": -0.37545299082956257,
+      "G": -0.2253185516460324,
+      "T": -0.03544531967658976
+    },
+    {
+      "A": -0.23421763021888942,
+      "C": -0.46170463501327524,
+      "G": 0.20104252715062612,
+      "T": 0.247548780528227
+    },
+    {
+      "A": 0.044816913286500344,
+      "C": 0.600127369488116,
+      "G": 0.010414555231856052,
+      "T": -0.5093322303093696
+    },
+    {
+      "A": -1.2220479203029928,
+      "C": -0.5561020157604494,
+      "G": 0.7282742869488217,
+      "T": 0.3969666488180588
+    },
+    {
+      "A": -1.2220486117458198,
+      "C": -1.239253018917171,
+      "G": 0.3105305191364324,
+      "T": 0.6160408724945885
+    },
+    {
+      "A": -0.5991554610419313,
+      "C": -1.9127206825448284,
+      "G": -0.27063720782818496,
+      "T": 0.6290716023674767
+    },
+    {
+      "A": 0.8019780845608864,
+      "C": -0.7767420026673312,
+      "G": -0.5343435376632144,
+      "T": -1.6084717689710384
+    },
+    {
+      "A": 0.06887407655232468,
+      "C": 1.2838617431066877,
+      "G": -2.3978952727983707,
+      "T": -2.0609171306928595
+    }
+  ],
+  "matrix_semantics": "probabilities",
+  "motif_id": "cpxR_MANWWHTTTAM",
+  "organism": {
+    "assembly": null,
+    "name": "Escherichia coli",
+    "strain": null,
+    "taxon": null
+  },
+  "probabilities": [
+    {
+      "A": 0.313725,
+      "C": 0.514706,
+      "G": 0.04902,
+      "T": 0.122549
+    },
+    {
+      "A": 0.588235,
+      "C": 0.107843,
+      "G": 0.142157,
+      "T": 0.161765
+    },
+    {
+      "A": 0.2352942352942353,
+      "C": 0.1960781960781961,
+      "G": 0.20098020098020097,
+      "T": 0.36764736764736766
+    },
+    {
+      "A": 0.44607844607844604,
+      "C": 0.10294110294110295,
+      "G": 0.09803909803909805,
+      "T": 0.352941352941353
+    },
+    {
+      "A": 0.269608,
+      "C": 0.093137,
+      "G": 0.156863,
+      "T": 0.480392
+    },
+    {
+      "A": 0.367647,
+      "C": 0.29902,
+      "G": 0.127451,
+      "T": 0.205882
+    },
+    {
+      "A": 0.07843107843107844,
+      "C": 0.08333308333308334,
+      "G": 0.2745102745102745,
+      "T": 0.5637255637255638
+    },
+    {
+      "A": 0.078431,
+      "C": 0.034314,
+      "G": 0.176471,
+      "T": 0.710784
+    },
+    {
+      "A": 0.176471,
+      "C": 0.009804,
+      "G": 0.093137,
+      "T": 0.720588
+    },
+    {
+      "A": 0.8235298235298235,
+      "C": 0.06372506372506373,
+      "G": 0.06862706862706862,
+      "T": 0.04411804411804412
+    },
+    {
+      "A": 0.377451,
+      "C": 0.607843,
+      "G": 0.0,
+      "T": 0.014706
+    }
+  ],
+  "producer": "cruncher",
+  "provenance": {
+    "citation": null,
+    "license": null,
+    "raw_artifact_paths": [
+      "/Users/Shockwing/Dropbox/projects/phd/dnadesign/src/dnadesign/cruncher/.cruncher/discoveries/discover_cpxR_20260123_195714_73f06c/meme.txt"
+    ],
+    "retrieved_at": "2026-01-24T00:57:15.105298+00:00",
+    "source_url": null,
+    "source_version": null,
+    "tags": {}
+  },
+  "schema_version": "1.0",
+  "source": "meme_suite_meme",
+  "tags": {
+    "discovery_maxw": "21",
+    "discovery_minw": "11",
+    "discovery_nmotifs": "1",
+    "discovery_nsites": "204",
+    "discovery_run": "discover_cpxR_20260123_195714_73f06c",
+    "discovery_tool": "meme",
+    "discovery_tool_path": "/Users/Shockwing/Dropbox/projects/phd/dnadesign/.pixi/envs/default/bin/meme",
+    "discovery_tool_version": "5.5.9",
+    "matrix_source": "meme"
+  },
+  "tf_name": "cpxR"
+}
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json
deleted file mode 100644
index 4a493b2f..00000000
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__demo_local_meme__lexA.json
+++ /dev/null
@@ -1,318 +0,0 @@
-{
-  "alphabet": "ACGT",
-  "background": {
-    "A": 0.317,
-    "C": 0.183,
-    "G": 0.183,
-    "T": 0.317
-  },
-  "checksums": {
-    "sha256_norm": "ff3341612eb11bd9354e74c53510934b7c643b15a986e1d8cf0a041fb71de82a",
-    "sha256_raw": "8ee13b7f7d2bd1aa016ad82550f28be6a603e4bead31fb7c2258fe095dfa402d"
-  },
-  "length": 22,
-  "log_odds": [
-    {
-      "A": -13.815511557963774,
-      "C": 1.6982683091411994,
-      "G": -13.815511557963774,
-      "T": -13.815511557963774
-    },
-    {
-      "A": -13.815511557963774,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": 1.1488528221053063
-    },
-    {
-      "A": -13.815511557963774,
-      "C": -13.815511557963774,
-      "G": 1.6982683091411994,
-      "T": -13.815511557963774
-    },
-    {
-      "A": -13.815511557963774,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": 1.1488528221053063
-    },
-    {
-      "A": 1.1488528221053063,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": -13.815511557963774
-    },
-    {
-      "A": -13.815511557963774,
-      "C": 0.27115253300077974,
-      "G": -13.815511557963774,
-      "T": 0.8744160765087724
-    },
-    {
-      "A": 0.998029983975356,
-      "C": -13.815511557963774,
-      "G": -0.42199388506003765,
-      "T": -2.7631546504483997
-    },
-    {
-      "A": 0.2813526921622533,
-      "C": -0.26784342308961356,
-      "G": -13.815511557963774,
-      "T": 0.3278726734898123
-    },
-    {
-      "A": 1.1286501212571725,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": -2.7631546504483997
-    },
-    {
-      "A": 0.6041258762152593,
-      "C": -0.6043151368545039,
-      "G": -13.815511557963774,
-      "T": 0.00941921254150112
-    },
-    {
-      "A": 0.569034575923683,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": 0.3278726734898123
-    },
-    {
-      "A": -0.2782625297023283,
-      "C": 0.8772879899803904,
-      "G": -2.2137457293287905,
-      "T": -0.05511924255447106
-    },
-    {
-      "A": 0.9504019529668093,
-      "C": -0.016529285284560668,
-      "G": -13.815511557963774,
-      "T": -13.815511557963774
-    },
-    {
-      "A": -13.815511557963774,
-      "C": 1.6982683091411994,
-      "G": -13.815511557963774,
-      "T": -13.815511557963774
-    },
-    {
-      "A": 1.1488528221053063,
-      "C": -13.815511557963774,
-      "G": -13.815511557963774,
-      "T": -13.815511557963774
-    },
-    {
-      "A": -13.815511557963774,
-      "C": -2.2137457293287905,
-      "G": 1.6780656055583731,
-      "T": -13.815511557963774
-    },
-    {
-      "A": -13.815511557963774,
-      "C": -2.2137457293287905,
-      "G": -2.2137457293287905,
-      "T": 1.10803084079338
-    },
-    {
-      "A": 0.4148839904416047,
-      "C": 0.42530310389954373,
-      "G": 0.08883112870669725,
-      "T": -2.070015394794247
-    },
-    {
-      "A": 0.569034575923683,
-      "C": -2.2137457293287905,
-      "G": -1.5206031237374498,
-      "T": 0.18126931305382946
-    },
-    {
-      "A": -0.8172580869843253,
-      "C": 0.7306845814583421,
-      "G": -13.815511557963774,
-      "T": 0.4148839904416047
-    },
-    {
-      "A": -2.070015394794247,
-      "C": -0.6043151368545039,
-      "G": 0.351195182020513,
-      "T": 0.6380274096725597
-    },
-    {
-      "A": -0.9714083894315572,
-      "C": -13.815511557963774,
-      "G": 1.2202326203594633,
-      "T": -0.19821992363122673
-    }
-  ],
-  "matrix_semantics": "probabilities",
-  "motif_id": "lexA",
-  "organism": {
-    "assembly": null,
-    "name": "Escherichia coli",
-    "strain": null,
-    "taxon": null
-  },
-  "probabilities": [
-    {
-      "A": 0.0,
-      "C": 1.0,
-      "G": 0.0,
-      "T": 0.0
-    },
-    {
-      "A": 0.0,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 1.0
-    },
-    {
-      "A": 0.0,
-      "C": 0.0,
-      "G": 1.0,
-      "T": 0.0
-    },
-    {
-      "A": 0.0,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 1.0
-    },
-    {
-      "A": 1.0,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 0.0
-    },
-    {
-      "A": 0.0,
-      "C": 0.24,
-      "G": 0.0,
-      "T": 0.76
-    },
-    {
-      "A": 0.86,
-      "C": 0.0,
-      "G": 0.12,
-      "T": 0.02
-    },
-    {
-      "A": 0.42,
-      "C": 0.14,
-      "G": 0.0,
-      "T": 0.44
-    },
-    {
-      "A": 0.98,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 0.02
-    },
-    {
-      "A": 0.58,
-      "C": 0.1,
-      "G": 0.0,
-      "T": 0.32
-    },
-    {
-      "A": 0.56,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 0.44
-    },
-    {
-      "A": 0.24,
-      "C": 0.44,
-      "G": 0.02,
-      "T": 0.3
-    },
-    {
-      "A": 0.82,
-      "C": 0.18,
-      "G": 0.0,
-      "T": 0.0
-    },
-    {
-      "A": 0.0,
-      "C": 1.0,
-      "G": 0.0,
-      "T": 0.0
-    },
-    {
-      "A": 1.0,
-      "C": 0.0,
-      "G": 0.0,
-      "T": 0.0
-    },
-    {
-      "A": 0.0,
-      "C": 0.02,
-      "G": 0.98,
-      "T": 0.0
-    },
-    {
-      "A": 0.0,
-      "C": 0.02,
-      "G": 0.02,
-      "T": 0.96
-    },
-    {
-      "A": 0.48,
-      "C": 0.28,
-      "G": 0.2,
-      "T": 0.04
-    },
-    {
-      "A": 0.56,
-      "C": 0.02,
-      "G": 0.04,
-      "T": 0.38
-    },
-    {
-      "A": 0.14,
-      "C": 0.38,
-      "G": 0.0,
-      "T": 0.48
-    },
-    {
-      "A": 0.04,
-      "C": 0.1,
-      "G": 0.26,
-      "T": 0.6
-    },
-    {
-      "A": 0.12,
-      "C": 0.0,
-      "G": 0.62,
-      "T": 0.26
-    }
-  ],
-  "producer": "cruncher",
-  "provenance": {
-    "citation": "Local demo DAP-seq motifs (MEME text, bundled with cruncher demo)",
-    "license": null,
-    "raw_artifact_paths": [
-      "lexA.txt"
-    ],
-    "retrieved_at": "2026-01-18T01:33:16.785777+00:00",
-    "source_url": null,
-    "source_version": null,
-    "tags": {}
-  },
-  "schema_version": "1.0",
-  "source": "demo_local_meme",
-  "tags": {
-    "assay": "dapseq",
-    "demo": "true",
-    "format": "meme",
-    "matrix_source": "file",
-    "meme_evalue": "2.4e-218",
-    "meme_motif_id": "lexA",
-    "meme_motif_label": "lexA MEME-1\twidth =  22  sites =  50  llr = 932  E-value = 2.4e-218",
-    "meme_motif_name": "MEME-1 width = 22 sites = 50 llr = 932 E-value = 2.4e-218",
-    "meme_nsites": "50",
-    "meme_version": "4.12.0 (Release date: Tue Jun 27 16:22:50 2017 -0700)",
-    "meme_width": "22"
-  },
-  "tf_name": "lexA"
-}
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json
new file mode 100644
index 00000000..619c6ee5
--- /dev/null
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json
@@ -0,0 +1,232 @@
+{
+  "alphabet": "ACGT",
+  "background": {
+    "A": 0.33466533466533466,
+    "C": 0.18781218781218778,
+    "G": 0.15784215784215783,
+    "T": 0.3196803196803196
+  },
+  "checksums": {
+    "sha256_norm": "e309999a13a454f3c45e0f6d0928562068a2ab314bdd8833b1a1d8c4c4416413",
+    "sha256_raw": "2276f80dadbeeeadafbd003aa7dcf71970c1b75b0ef09abf32ba09145dde5c02"
+  },
+  "length": 15,
+  "log_odds": [
+    {
+      "A": -2.1341289835903083,
+      "C": 1.56541396544984,
+      "G": -2.3978952727983707,
+      "T": -1.9081234454643299
+    },
+    {
+      "A": -2.3978952727983707,
+      "C": -2.3978952727983707,
+      "G": -2.3978952727983707,
+      "T": 1.0765912935215414
+    },
+    {
+      "A": -2.1341289835903083,
+      "C": -2.3978952727983707,
+      "G": 1.7565166937249792,
+      "T": -2.3978952727983707
+    },
+    {
+      "A": -1.6061271980877694,
+      "C": -1.4372150689150451,
+      "G": -0.3964921657257688,
+      "T": 0.8945925690259594
+    },
+    {
+      "A": 0.777598912561046,
+      "C": -1.2500808143368407,
+      "G": -0.6968628553446534,
+      "T": -0.831017766087727
+    },
+    {
+      "A": -1.7531656668727358,
+      "C": 0.01968371686256069,
+      "G": -1.1281986611401122,
+      "T": 0.7829039703484924
+    },
+    {
+      "A": 0.777598912561046,
+      "C": -1.6676213764329006,
+      "G": -0.2369524090646017,
+      "T": -1.0514475600633517
+    },
+    {
+      "A": 0.07575422246499516,
+      "C": -0.38915779229958775,
+      "G": -1.5737691808659353,
+      "T": 0.4044123656170014
+    },
+    {
+      "A": 0.8147257839662305,
+      "C": -0.9564152595342995,
+      "G": -1.5737697422479557,
+      "T": -0.831017766087727
+    },
+    {
+      "A": 0.23962095047192591,
+      "C": -0.38915779229958775,
+      "G": -1.5737691808659353,
+      "T": 0.26032255455596365
+    },
+    {
+      "A": 0.5037456805266448,
+      "C": -0.9564152595342995,
+      "G": -1.5737697422479557,
+      "T": 0.06536586168050276
+    },
+    {
+      "A": -0.18422295625312549,
+      "C": 0.47411883380933406,
+      "G": -0.962877724502476,
+      "T": 0.11778303565638346
+    },
+    {
+      "A": 0.6428298676493698,
+      "C": 0.3443155578816637,
+      "G": -1.5737697422479557,
+      "T": -1.5808117492900282
+    },
+    {
+      "A": -1.9256143504942562,
+      "C": 1.5342780195520933,
+      "G": -1.9032337259142345,
+      "T": -1.7311353340427549
+    },
+    {
+      "A": 0.9401557496285993,
+      "C": -1.9675265036231258,
+      "G": -0.8210510901784651,
+      "T": -1.9081234454643299
+    }
+  ],
+  "matrix_semantics": "probabilities",
+  "motif_id": "lexA_CTGTATAWAWWHACA",
+  "organism": {
+    "assembly": null,
+    "name": "Escherichia coli",
+    "strain": null,
+    "taxon": null
+  },
+  "probabilities": [
+    {
+      "A": 0.010101,
+      "C": 0.969697,
+      "G": 0.0,
+      "T": 0.020202
+    },
+    {
+      "A": 0.0,
+      "C": 0.0,
+      "G": 0.0,
+      "T": 1.0
+    },
+    {
+      "A": 0.010101,
+      "C": 0.0,
+      "G": 0.989899,
+      "T": 0.0
+    },
+    {
+      "A": 0.040404,
+      "C": 0.030303,
+      "G": 0.10101,
+      "T": 0.828283
+    },
+    {
+      "A": 0.767677,
+      "C": 0.040404,
+      "G": 0.070707,
+      "T": 0.121212
+    },
+    {
+      "A": 0.030303,
+      "C": 0.191919,
+      "G": 0.040404,
+      "T": 0.737374
+    },
+    {
+      "A": 0.767677,
+      "C": 0.020202,
+      "G": 0.121212,
+      "T": 0.090909
+    },
+    {
+      "A": 0.36363636363636365,
+      "C": 0.12121212121212122,
+      "G": 0.020202020202020204,
+      "T": 0.4949494949494949
+    },
+    {
+      "A": 0.79798,
+      "C": 0.060606,
+      "G": 0.020202,
+      "T": 0.121212
+    },
+    {
+      "A": 0.4343434343434343,
+      "C": 0.12121212121212122,
+      "G": 0.020202020202020204,
+      "T": 0.42424242424242425
+    },
+    {
+      "A": 0.575758,
+      "C": 0.060606,
+      "G": 0.020202,
+      "T": 0.343434
+    },
+    {
+      "A": 0.2727272727272727,
+      "C": 0.31313131313131315,
+      "G": 0.05050505050505051,
+      "T": 0.36363636363636365
+    },
+    {
+      "A": 0.666667,
+      "C": 0.272727,
+      "G": 0.020202,
+      "T": 0.040404
+    },
+    {
+      "A": 0.020202,
+      "C": 0.939394,
+      "G": 0.010101,
+      "T": 0.030303
+    },
+    {
+      "A": 0.909091,
+      "C": 0.010101,
+      "G": 0.060606,
+      "T": 0.020202
+    }
+  ],
+  "producer": "cruncher",
+  "provenance": {
+    "citation": null,
+    "license": null,
+    "raw_artifact_paths": [
+      "/Users/Shockwing/Dropbox/projects/phd/dnadesign/src/dnadesign/cruncher/.cruncher/discoveries/discover_lexA_20260123_195714_efaabc/meme.txt"
+    ],
+    "retrieved_at": "2026-01-24T00:57:14.763773+00:00",
+    "source_url": null,
+    "source_version": null,
+    "tags": {}
+  },
+  "schema_version": "1.0",
+  "source": "meme_suite_meme",
+  "tags": {
+    "discovery_maxw": "22",
+    "discovery_minw": "15",
+    "discovery_nmotifs": "1",
+    "discovery_nsites": "99",
+    "discovery_run": "discover_lexA_20260123_195714_efaabc",
+    "discovery_tool": "meme",
+    "discovery_tool_path": "/Users/Shockwing/Dropbox/projects/phd/dnadesign/.pixi/envs/default/bin/meme",
+    "discovery_tool_version": "5.5.9",
+    "matrix_source": "meme"
+  },
+  "tf_name": "lexA"
+}

From 743f07e752517b7d4b5096e31602885f6fcb8002 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 21:23:50 -0500
Subject: [PATCH 34/40] docs: refine demo flow

---
 .../densegen/docs/demo/demo_basic.md          | 31 +++++++------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index ee207b62..ddda036c 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -107,7 +107,7 @@ The demo uses DenseGen PWM artifacts in `inputs/motif_artifacts/` (`lexA__meme_s
 
 Refresh the demo motifs by exporting **DenseGen PWM artifacts** from Cruncher, then copy them into this DenseGen workspace. This is optional as the demo already ships with artifacts.
 
-Follow the Cruncher demo (see `cruncher/docs/demos/demo_basics_two_tf.md`) in its own workspace. From the Cruncher workspace directory, export DenseGen artifacts into the target DenseGen workspace name or path (no `-c` flag needed when you run in CWD):
+Follow the Cruncher demo (see `cruncher/docs/demos/demo_basics_two_tf.md`) in its own workspace. From the Cruncher workspace directory, export DenseGen artifacts into the target DenseGen workspace name or path.
 
 ```bash
 cd <cruncher_workspace>
@@ -135,15 +135,15 @@ The DenseGen workspace stays config‑centric (one runtime config); Cruncher kee
 
 ---
 
-### 4. Inspect config
+### 4. Inspect the config
 
-Confirm resolved outputs, Stage‑A sampling knobs, fixed elements, and Stage‑B sampling policy.
+Review the resolved outputs, Stage‑A sampling settings, fixed elements, and Stage‑B sampling policy.
 
-Stage‑A is configured to mine **hundreds of binding sites per TF** from the MEME‑derived PWM artifacts. The motif JSONs declare widths (LexA 15 bp, CpxR 11 bp), so `length_policy: range` with `length_range: [15, 20]` samples target lengths and pads flanks to reach the chosen length while still selecting high‑scoring windows from the top p‑value strata. (Scale to thousands by raising `n_sites` if you need a larger pool.)
+Stage‑A sampling: the pipeline mines hundreds of binding sites per TF from the MEME‑derived PWM artifacts. The motif JSONs specify widths (LexA 15 bp, CpxR 11 bp), and `length_policy: range` with `length_range: [15, 20]` chooses a target length and pads flanks to it while pulling from the top p‑value bins. Increase `n_sites` if you need a larger pool.
 
-Stage‑B then **subsamples** the Stage‑A pool into candidate libraries (`pool_strategy: subsample`, `library_size: 20`) with coverage weighting so each library includes the TFs you specified (`cover_all_regulators: true`). Each library is a candidate set offered to the solver; the solver assembles 60‑bp sequences by selecting a subset, and the run resamples new libraries as needed.
+Stage‑B sampling: the Stage‑A pool is subsampled into candidate libraries (`pool_strategy: subsample`, `library_size: 20`) with coverage weighting so each library contains the specified TFs (`cover_all_regulators: true`). Each library is offered to the solver, which assembles 60‑bp sequences by selecting a subset; new libraries are sampled as needed.
 
-This demo constrains a strong σ70 promoter pair (`TTGACA`/`TATAAT`) as fixed elements with a 15–19 bp spacer; the default `tf_coverage` plot overlays these sites when `plots.options.tf_coverage.include_promoter_sites: true`. To keep the 60‑bp budget feasible with ~21–22 bp TFBS lengths, the plan sets `min_required_regulators: 1` while listing both LexA and CpxR, so each sequence must include at least one of the two regulators.
+Fixed promoter: this demo fixes a strong σ70 promoter pair (`TTGACA`/`TATAAT`) with a 15–19 bp spacer; if `plots.options.tf_coverage.include_promoter_sites` is true, the `tf_coverage` plot overlays these sites. To keep the 60‑bp sequence length feasible alongside ~11–15 bp TFBS, the plan sets `min_required_regulators: 1` while still listing both LexA and CpxR, so each sequence includes at least one of them.
 
 ```bash
 dense inspect config
@@ -153,7 +153,7 @@ dense inspect config
 
 ### 5. Stage-A build-pool
 
-Why: materialize TFBS pools for inspection and for deterministic Stage‑B previews.
+Materialize TFBS pools for inspection and for deterministic Stage‑B previews.
 
 ```bash
 dense stage-a build-pool --fresh
@@ -166,7 +166,7 @@ when re‑running to avoid cumulative pools and candidate logs.
 
 ### 6. Stage-B build-libraries
 
-Why: preview solver libraries without running the optimizer.
+Preview solver libraries without running the optimizer.
 
 ```bash
 dense stage-b build-libraries
@@ -176,22 +176,15 @@ dense stage-b build-libraries
 
 ### 7. Run generation
 
-Why: execute Stage‑A sampling (if needed), Stage‑B sampling, and solver optimization.
+Execute Stage‑A sampling (if needed), Stage‑B sampling, and solver optimization.
 
 ```bash
 dense run
 ```
 
-This demo config also enables plot generation from the run (`plots.default`) and saves plots in
-`outputs/plots/` using `plots.format` (switch to `pdf` or `svg` in `config.yaml` if desired).
-Reports do not generate plots; they can optionally link the existing plot manifest.
-The demo quota is intentionally small (`generation.quota: 12` with `runtime.max_seconds_per_plan: 60`)
-to keep the end‑to‑end run fast; scale these up for production runs.
-The demo uses `solver.strategy: iterate` for full solver runs; switch to `diverse` or `optimal`
-as needed for exploration.
-If run outputs already exist (e.g., `outputs/tables/*.parquet` or `outputs/meta/run_state.json`),
-choose `--resume` to continue or `--fresh` to clear outputs. Use `dense run --no-plot` to skip
-auto‑plots when re‑running.
+This demo config also enables plot generation from the run (`plots.default`) and saves plots in `outputs/plots/` using `plots.format` (switch to `pdf` or `svg` in `config.yaml` if desired). Reports do not generate plots; they can optionally link the existing plot manifest.
+
+The demo quota is intentionally small (`generation.quota: 12` with `runtime.max_seconds_per_plan: 60`) to keep the end‑to‑end run fast; scale these up for production runs. The demo uses `solver.strategy: iterate` for full solver runs; switch to `diverse` or `optimal` as needed for exploration. If run outputs already exist (e.g., `outputs/tables/*.parquet` or `outputs/meta/run_state.json`), choose `--resume` to continue or `--fresh` to clear outputs. Use `dense run --no-plot` to skip auto‑plots when re‑running.
 
 ---
 

From a5f977e0cee1bfa046b24a8e2524660e616746ab Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Fri, 23 Jan 2026 22:23:25 -0500
Subject: [PATCH 35/40] stage-a: show bins + logging

---
 src/dnadesign/densegen/src/cli.py             | 77 +++++++++++++++----
 .../tests/test_cli_stage_a_summary.py         | 28 ++++++-
 2 files changed, 88 insertions(+), 17 deletions(-)

diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 6308411e..693e7752 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -78,6 +78,7 @@
     resolve_plan,
     run_pipeline,
 )
+from .core.pvalue_bins import resolve_pvalue_bins
 from .core.reporting import collect_report_data, write_report
 from .core.run_manifest import load_run_manifest
 from .core.run_paths import (
@@ -456,6 +457,51 @@ def _print_inputs_summary(loaded) -> None:
     )
 
 
+def _resolve_fimo_bin_edges(cfg, *, input_name: str) -> list[float] | None:
+    for inp in cfg.inputs:
+        if inp.name != input_name:
+            continue
+        sampling = getattr(inp, "sampling", None)
+        if sampling is None:
+            return None
+        backend = getattr(sampling, "scoring_backend", None)
+        if backend is None or str(backend).lower() != "fimo":
+            return None
+        bins = getattr(sampling, "pvalue_bins", None)
+        return resolve_pvalue_bins(bins)
+    return None
+
+
+def _fimo_bin_rows(df: pd.DataFrame, edges: list[float] | None) -> list[tuple[int, str, int]]:
+    counts = df["fimo_bin_id"].value_counts().to_dict()
+    rows: list[tuple[int, str, int]] = []
+    if edges:
+        low = 0.0
+        for idx, high in enumerate(edges):
+            count = int(counts.get(idx, 0))
+            rows.append((idx, f"({low:g}, {float(high):g}]", count))
+            low = float(high)
+        return rows
+    for bin_id in sorted(counts):
+        count = int(counts[bin_id])
+        low = None
+        high = None
+        if "fimo_bin_low" in df.columns:
+            low_vals = df.loc[df["fimo_bin_id"] == bin_id, "fimo_bin_low"]
+            if not low_vals.empty:
+                low = float(low_vals.iloc[0])
+        if "fimo_bin_high" in df.columns:
+            high_vals = df.loc[df["fimo_bin_id"] == bin_id, "fimo_bin_high"]
+            if not high_vals.empty:
+                high = float(high_vals.iloc[0])
+        if low is not None and high is not None:
+            range_label = f"({low:g}, {high:g}]"
+        else:
+            range_label = "-"
+        rows.append((int(bin_id), range_label, count))
+    return rows
+
+
 def _list_dir_entries(path: Path, *, limit: int = 10) -> list[str]:
     if not path.exists() or not path.is_dir():
         return []
@@ -1580,6 +1626,19 @@ def stage_a_build_pool(
     cfg = loaded.root.densegen
     _ensure_fimo_available(cfg, strict=True)
     run_root = _run_root_for(loaded)
+    log_cfg = cfg.logging
+    log_dir = _resolve_outputs_path_or_exit(
+        loaded.path,
+        run_root,
+        Path(log_cfg.log_dir),
+        label="logging.log_dir",
+    )
+    logfile = log_dir / f"{cfg.run.id}.stage_a.log"
+    setup_logging(
+        level=log_cfg.level,
+        logfile=str(logfile),
+        suppress_solver_stderr=bool(log_cfg.suppress_solver_stderr),
+    )
     out_dir = _resolve_outputs_path_or_exit(cfg_path, run_root, out, label="stage-a.out")
     out_dir.mkdir(parents=True, exist_ok=True)
 
@@ -1652,23 +1711,9 @@ def stage_a_build_pool(
             continue
         df = pool.df
         if "fimo_bin_id" in df.columns:
-            bin_counts = df["fimo_bin_id"].value_counts().sort_index()
             bin_table = Table("bin_id", "pvalue_range", "count")
-            for bin_id, count in bin_counts.items():
-                low = None
-                high = None
-                if "fimo_bin_low" in df.columns:
-                    low_vals = df.loc[df["fimo_bin_id"] == bin_id, "fimo_bin_low"]
-                    if not low_vals.empty:
-                        low = float(low_vals.iloc[0])
-                if "fimo_bin_high" in df.columns:
-                    high_vals = df.loc[df["fimo_bin_id"] == bin_id, "fimo_bin_high"]
-                    if not high_vals.empty:
-                        high = float(high_vals.iloc[0])
-                if low is not None and high is not None:
-                    range_label = f"({low:g}, {high:g}]"
-                else:
-                    range_label = "-"
+            edges = _resolve_fimo_bin_edges(cfg, input_name=pool.name)
+            for bin_id, range_label, count in _fimo_bin_rows(df, edges):
                 bin_table.add_row(str(bin_id), range_label, str(int(count)))
             console.print(f"[bold]FIMO p-value bins for {pool.name}[/]")
             console.print(bin_table)
diff --git a/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
index 21c5639c..b71d01a1 100644
--- a/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
+++ b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
@@ -5,9 +5,10 @@
 import textwrap
 from pathlib import Path
 
+import pandas as pd
 from typer.testing import CliRunner
 
-from dnadesign.densegen.src.cli import app
+from dnadesign.densegen.src.cli import _fimo_bin_rows, app
 
 
 def _write_stage_a_config(tmp_path: Path) -> Path:
@@ -77,3 +78,28 @@ def test_stage_a_build_pool_accepts_fresh_flag(tmp_path: Path) -> None:
     runner = CliRunner()
     result = runner.invoke(app, ["stage-a", "build-pool", "--fresh", "-c", str(cfg_path)])
     assert result.exit_code == 0, result.output
+
+
+def test_stage_a_build_pool_logs_initialized(tmp_path: Path) -> None:
+    cfg_path = _write_stage_a_config(tmp_path)
+    runner = CliRunner()
+    result = runner.invoke(app, ["stage-a", "build-pool", "-c", str(cfg_path)])
+    assert result.exit_code == 0, result.output
+    assert "Logging initialized" in result.output
+
+
+def test_fimo_bin_rows_include_zero_counts() -> None:
+    df = pd.DataFrame(
+        {
+            "fimo_bin_id": [1, 1, 2],
+            "fimo_bin_low": [1e-10, 1e-10, 1e-8],
+            "fimo_bin_high": [1e-8, 1e-8, 1e-6],
+        }
+    )
+    edges = [1e-10, 1e-8, 1e-6]
+    rows = _fimo_bin_rows(df, edges)
+    by_id = {row[0]: row for row in rows}
+    assert by_id[0][2] == 0
+    assert by_id[1][2] == 2
+    assert by_id[2][2] == 1
+    assert by_id[0][1] == "(0, 1e-10]"

From e4dc395751d9bb5e98f156127ef23b815f9e2025 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Sun, 25 Jan 2026 13:04:29 -0500
Subject: [PATCH 36/40] Add Stage-A strata design

---
 .../plans/2026-01-25-stage-a-strata-design.md | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 docs/plans/2026-01-25-stage-a-strata-design.md

diff --git a/docs/plans/2026-01-25-stage-a-strata-design.md b/docs/plans/2026-01-25-stage-a-strata-design.md
new file mode 100644
index 00000000..c2ec7ded
--- /dev/null
+++ b/docs/plans/2026-01-25-stage-a-strata-design.md
@@ -0,0 +1,72 @@
+# Stage-A PWM Sampling: Strata-First Semantics (FIMO)
+
+## Context
+Stage-A PWM sampling currently mixes the ideas of thresholding, binning, and selection in a way that exposes too many knobs and makes the logs hard to interpret. The desired behavior is: (1) mine PWM-like sequences, (2) account for a spectrum of p-value strata for diagnostics/visualization, and (3) retain only the best strata prefix for Stage-B. Configuration should be minimal and ergonomic, with a single obvious knob to adjust strictness, while still capturing per-bin distributions for later analyses (e.g., Hamming/Levenshtein by bin).
+
+## Goals
+- Align sampling semantics with user intent: generated → eligible → retained.
+- Keep configuration minimal and hard to misconfigure.
+- Preserve per-bin counts for didactic plots and diagnostics.
+- Make shortfalls expected and interpretable without extra debug logs.
+- Ensure docs, demo config, and tests align with the new semantics.
+
+## Non-goals
+- Automatic per-regulator threshold calibration.
+- Changing the FIMO backend itself or its internal scoring.
+- Adding new diversity-selection algorithms (post-hoc analysis stays separate).
+
+## Proposed Semantics
+We define three counts per regulator:
+- **Generated**: number of candidate sequences sampled.
+- **Eligible**: candidates with a FIMO hit at or below a floor threshold.
+- **Retained**: eligible hits within the best strata prefix, deduped and capped.
+
+FIMO only reports hits under its reporting threshold, so eligibility is defined by that floor. Per-bin counts are computed for eligible hits to support plots and later analysis. Retention is a strict prefix of bins (best p-values), not an arbitrary list of indices.
+
+## Config Changes (Breaking)
+Replace `pvalue_threshold` and `mining.retain_bin_ids` with two semantic knobs:
+- `pvalue_strata`: ordered p-value edges (best → worst). The **last** edge is the eligibility floor (FIMO `--thresh`).
+- `retain_depth`: number of best bins to keep for Stage-B (prefix of strata).
+
+`n_sites` remains as the **cap** on retained unique sites per regulator (not a target). The default behavior should be explicit in docs; a typical default is `pvalue_strata: [1e-8, 1e-6, 1e-4]` with `retain_depth: 2`.
+
+## Data Flow
+1. Generate candidate sequences as today.
+2. Run FIMO with `--thresh = last(pvalue_strata)`.
+3. Bin each reported hit by `pvalue_strata`.
+4. Accumulate **eligible** counts per bin (all bins up to the floor).
+5. Retain only bins `0..retain_depth-1`.
+6. Dedup retained sequences; if retained > `n_sites`, keep best by `(pvalue asc, score desc)`.
+
+This keeps accounting broad while retention remains strict and bounded.
+
+## Reporting & UX
+Stage-A recap table should show:
+- `candidates` = generated/target
+- `eligible` = eligible/generated
+- `pool` = retained/n_sites
+- `bins` = per-bin `eligible/retained` pairs (e.g., `b0 12/12 | b1 55/20 | b2 400/0`)
+- `len` = `n/min/med/avg/max` for retained (pool) sequences
+
+Zero-retained cases become interpretable without extra logs:
+- `eligible=0` → no hits under floor.
+- `eligible>0` but retained bins empty → hits exist, none in strict strata.
+
+## Migration
+- Remove `pvalue_threshold` and `mining.retain_bin_ids` from config schema.
+- Require `pvalue_strata` and `retain_depth` for FIMO inputs.
+- Update metadata fields to reflect `pvalue_strata` and `retain_depth`.
+- Update demo config and docs to use the new semantics.
+
+## Testing Plan
+- Config validation rejects legacy keys and enforces `pvalue_strata` + `retain_depth`.
+- Sampling tests verify:
+  - FIMO floor applied from last stratum edge.
+  - Eligibility counts include all bins up to floor.
+  - Retention is a prefix of bins (best strata).
+  - Dedup + cap are enforced on retained sites.
+- CLI recap tests verify new column labels and bin formatting.
+
+## Open Questions
+- Default `retain_depth` (require explicit vs. default to full strata).
+- Whether to surface the eligibility floor explicitly in metadata or derive from `pvalue_strata`.

From 2bb5fc750d27188a058fe5eea6a252bf40a931c5 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Sun, 25 Jan 2026 13:35:32 -0500
Subject: [PATCH 37/40] Refactor Stage-A strata config and summaries

---
 .../densegen/docs/demo/demo_basic.md          |   5 +-
 src/dnadesign/densegen/docs/guide/inputs.md   |   8 +-
 .../densegen/docs/reference/config.md         |   6 +-
 .../densegen/src/adapters/sources/base.py     |   5 +-
 .../src/adapters/sources/binding_sites.py     |   2 +-
 .../src/adapters/sources/pwm_artifact.py      |  18 +-
 .../src/adapters/sources/pwm_artifact_set.py  |  20 +-
 .../src/adapters/sources/pwm_jaspar.py        |  20 +-
 .../src/adapters/sources/pwm_matrix_csv.py    |  24 +-
 .../densegen/src/adapters/sources/pwm_meme.py |  20 +-
 .../src/adapters/sources/pwm_meme_set.py      |  20 +-
 .../src/adapters/sources/pwm_sampling.py      | 636 ++++++++++++------
 .../src/adapters/sources/sequence_library.py  |   2 +-
 .../src/adapters/sources/usr_sequences.py     |   2 +-
 src/dnadesign/densegen/src/cli.py             | 173 +++--
 src/dnadesign/densegen/src/config/__init__.py |  64 +-
 .../densegen/src/core/artifacts/pool.py       |   9 +-
 .../densegen/src/core/metadata_schema.py      |   7 +-
 src/dnadesign/densegen/src/core/pipeline.py   |  53 +-
 .../densegen/src/core/pvalue_bins.py          |  10 +-
 .../densegen/src/utils/logging_utils.py       |  65 +-
 .../tests/test_cli_stage_a_summary.py         |  19 +-
 .../densegen/tests/test_config_strict.py      |   3 +-
 .../test_demo_config_selection_policy.py      |  35 +
 .../tests/test_pwm_log_odds_smoothing.py      |  16 +-
 .../densegen/tests/test_pwm_meme_source.py    |  22 +-
 .../densegen/tests/test_pwm_progress.py       | 173 +++++
 .../densegen/tests/test_pwm_sampling_bins.py  |  74 --
 .../tests/test_pwm_sampling_mining.py         |  55 +-
 .../test_pwm_sampling_relaxed_selection.py    |  65 ++
 .../tests/test_pwm_sampling_stratification.py |  90 +++
 .../workspaces/demo_meme_two_tf/config.yaml   |   3 +-
 32 files changed, 1223 insertions(+), 501 deletions(-)
 create mode 100644 src/dnadesign/densegen/tests/test_demo_config_selection_policy.py
 create mode 100644 src/dnadesign/densegen/tests/test_pwm_progress.py
 delete mode 100644 src/dnadesign/densegen/tests/test_pwm_sampling_bins.py
 create mode 100644 src/dnadesign/densegen/tests/test_pwm_sampling_relaxed_selection.py
 create mode 100644 src/dnadesign/densegen/tests/test_pwm_sampling_stratification.py

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index ddda036c..7461a651 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -139,7 +139,7 @@ The DenseGen workspace stays config‑centric (one runtime config); Cruncher kee
 
 Review the resolved outputs, Stage‑A sampling settings, fixed elements, and Stage‑B sampling policy.
 
-Stage‑A sampling: the pipeline mines hundreds of binding sites per TF from the MEME‑derived PWM artifacts. The motif JSONs specify widths (LexA 15 bp, CpxR 11 bp), and `length_policy: range` with `length_range: [15, 20]` chooses a target length and pads flanks to it while pulling from the top p‑value bins. Increase `n_sites` if you need a larger pool.
+Stage‑A sampling: the pipeline can mine hundreds of binding sites per TF from the MEME‑derived PWM artifacts. The motif JSONs specify widths (LexA 15 bp, CpxR 11 bp), and `length_policy: range` with `length_range: [15, 20]` chooses a target length and pads flanks to it while pulling from the top p‑value bins. Increase `n_sites` if you need a larger pool.
 
 Stage‑B sampling: the Stage‑A pool is subsampled into candidate libraries (`pool_strategy: subsample`, `library_size: 20`) with coverage weighting so each library contains the specified TFs (`cover_all_regulators: true`). Each library is offered to the solver, which assembles 60‑bp sequences by selecting a subset; new libraries are sampled as needed.
 
@@ -159,8 +159,7 @@ Materialize TFBS pools for inspection and for deterministic Stage‑B previews.
 dense stage-a build-pool --fresh
 ```
 
-Note: `stage-a build-pool` appends new unique TFBS into existing pools by default. Use `--fresh`
-when re‑running to avoid cumulative pools and candidate logs.
+**Note:** `stage-a build-pool` appends new unique TFBS into existing pools by default. Use `--fresh` when re‑running if you want to avoid cumulative pools and candidate logs.
 
 ---
 
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index 8f591a44..87f83f35 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -40,7 +40,6 @@ Optional (supported):
 - `oversample_factor` (int > 0; default `10`)
 - `max_candidates` (densegen‑only; int > 0 when set)
 - `max_seconds` (densegen‑only; float > 0 when set)
-- `selection_policy` (fimo‑only): `random_uniform | top_n | stratified`
 - `pvalue_bins` (fimo‑only): list of floats, strictly increasing, must end with `1.0`
 - `mining` (fimo‑only):
   - `batch_size` (int > 0)
@@ -65,6 +64,11 @@ Strict validation behavior:
 - FIMO backend requires `pvalue_threshold`; `max_candidates`/`max_seconds` are **not** allowed.
 - `consensus` requires `n_sites: 1`.
 
+FIMO stratification behavior:
+- `pvalue_threshold` is the primary stringency knob; lower values yield fewer matches.
+- `pvalue_threshold` and `mining.retain_bin_ids` define the eligible population.
+- Stage‑A selection is top‑N within that population, ordered by p‑value (score tie‑break).
+
 Minimal Stage‑A PWM example (DenseGen backend):
 
 ```yaml
@@ -86,7 +90,7 @@ inputs:
     path: inputs/lexA.txt
     sampling:  # Stage‑A sampling
       scoring_backend: fimo
-      pvalue_threshold: 1e-4
+      pvalue_threshold: 1e-8
       n_sites: 80
 ```
 
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index 58673d14..fe8529e3 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -63,7 +63,6 @@ PWM inputs perform **Stage‑A sampling** (sampling sites from PWMs) via
     - `scoring_backend`: `densegen | fimo` (default: `densegen`)
     - `score_threshold` or `score_percentile` (exactly one; **densegen** backend only)
     - `pvalue_threshold` (float in (0, 1]; **fimo** backend only)
-    - `selection_policy`: `random_uniform | top_n | stratified` (default: `random_uniform`; fimo only)
     - `pvalue_bins` (optional list of floats; must end with `1.0`) - p‑value bin edges for Stage‑A stratified sampling
     - `mining` (fimo only) - batch/time controls for mining via FIMO:
       - `batch_size` (int > 0; default 100000) - candidates per FIMO batch
@@ -89,8 +88,9 @@ PWM inputs perform **Stage‑A sampling** (sampling sites from PWMs) via
     - Canonical p‑value bins (default): `[1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]`
       (bin 0 is `(0, 1e-10]`, bin 1 is `(1e-10, 1e-8]`, etc.)
     - FIMO runs log per‑bin yield summaries (hits, accepted, selected). If `retain_bin_ids` is set,
-      only those bins are reported; otherwise all bins are reported. `selection_policy: stratified`
-      makes the selected‑bin distribution explicit for mining workflows.
+      only those bins are reported; otherwise all bins are reported. Stratification defines the
+      eligible population (bins + `pvalue_threshold`), and selection is top‑N within that population
+      by p‑value (score tie‑break).
     - For `scoring_backend: fimo`, use `mining.max_seconds` (time mode) or
       `mining.max_candidates`/`mining.max_batches` (quota mode). The default is
       `mining.max_seconds: 60`. Set `mining.max_seconds: null` to make quotas the primary cap.
diff --git a/src/dnadesign/densegen/src/adapters/sources/base.py b/src/dnadesign/densegen/src/adapters/sources/base.py
index 506121ea..c97bfe3e 100644
--- a/src/dnadesign/densegen/src/adapters/sources/base.py
+++ b/src/dnadesign/densegen/src/adapters/sources/base.py
@@ -45,11 +45,12 @@ def load_data(
         rng=None,
         outputs_root: Path | None = None,
         run_id: str | None = None,
-    ) -> Tuple[List, Optional[pd.DataFrame]]:
+    ) -> Tuple[List, Optional[pd.DataFrame], Optional[list[dict]]]:
         """
         Returns:
-            (data_entries, meta_df)
+            (data_entries, meta_df, sampling_summaries)
             - For binding-site inputs: meta_df is a DataFrame with 'tf' and 'tfbs' columns.
             - For sequence library inputs: data_entries is a list of sequences; meta_df None.
+            - sampling_summaries is only populated for PWM sampling inputs.
         """
         raise NotImplementedError
diff --git a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
index 337e5735..45520dba 100644
--- a/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
+++ b/src/dnadesign/densegen/src/adapters/sources/binding_sites.py
@@ -146,4 +146,4 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         else:
             src_list = [source_default] * len(out)
         entries = list(zip(out["tf"].tolist(), out["tfbs"].tolist(), src_list))
-        return entries, out
+        return entries, out, None
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
index eb316727..7f054a5e 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact.py
@@ -183,11 +183,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
-        pvalue_threshold = sampling.get("pvalue_threshold")
-        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_strata = sampling.get("pvalue_strata")
+        retain_depth = sampling.get("retain_depth")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
-        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
         include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
         bgfile_path: Path | None = None
@@ -218,11 +217,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
             score_threshold=threshold,
             score_percentile=percentile,
             scoring_backend=scoring_backend,
-            pvalue_threshold=pvalue_threshold,
-            pvalue_bins=pvalue_bins,
+            pvalue_strata=pvalue_strata,
+            retain_depth=retain_depth,
             mining=mining,
             bgfile=bgfile_path,
-            selection_policy=selection_policy,
             keep_all_candidates_debug=keep_all_candidates_debug,
             include_matched_sequence=include_matched_sequence,
             debug_output_dir=debug_output_dir,
@@ -232,11 +230,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
             trim_window_length=trim_window_length,
             trim_window_strategy=str(trim_window_strategy),
             return_metadata=return_meta,
+            return_summary=True,
         )
         if return_meta:
-            selected, meta_by_seq = result  # type: ignore[misc]
+            selected, meta_by_seq, summary = result  # type: ignore[misc]
         else:
-            selected = result  # type: ignore[assignment]
+            selected, summary = result  # type: ignore[assignment]
             meta_by_seq = {}
 
         entries = [(motif.motif_id, seq, str(artifact_path)) for seq in selected]
@@ -267,4 +266,5 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 row.update(meta)
             rows.append(row)
         df_out = pd.DataFrame(rows)
-        return entries, df_out
+        summaries = [summary] if summary is not None else []
+        return entries, df_out, summaries
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
index eaedb938..aee39aa2 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_artifact_set.py
@@ -56,6 +56,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
 
         entries = []
         all_rows = []
+        summaries = []
         for motif, path in zip(motifs, resolved):
             motif_hash = hash_pwm_motif(
                 motif_label=motif.motif_id,
@@ -79,11 +80,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
             trim_window_length = sampling_cfg.get("trim_window_length")
             trim_window_strategy = sampling_cfg.get("trim_window_strategy", "max_info")
             scoring_backend = str(sampling_cfg.get("scoring_backend", "densegen")).lower()
-            pvalue_threshold = sampling_cfg.get("pvalue_threshold")
-            pvalue_bins = sampling_cfg.get("pvalue_bins")
+            pvalue_strata = sampling_cfg.get("pvalue_strata")
+            retain_depth = sampling_cfg.get("retain_depth")
             mining = sampling_cfg.get("mining")
             bgfile = sampling_cfg.get("bgfile")
-            selection_policy = str(sampling_cfg.get("selection_policy", "random_uniform"))
             keep_all_candidates_debug = bool(sampling_cfg.get("keep_all_candidates_debug", False))
             include_matched_sequence = bool(sampling_cfg.get("include_matched_sequence", False))
             bgfile_path: Path | None = None
@@ -113,11 +113,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 score_threshold=threshold,
                 score_percentile=percentile,
                 scoring_backend=scoring_backend,
-                pvalue_threshold=pvalue_threshold,
-                pvalue_bins=pvalue_bins,
+                pvalue_strata=pvalue_strata,
+                retain_depth=retain_depth,
                 mining=mining,
                 bgfile=bgfile_path,
-                selection_policy=selection_policy,
                 keep_all_candidates_debug=keep_all_candidates_debug,
                 include_matched_sequence=include_matched_sequence,
                 debug_output_dir=debug_output_dir,
@@ -127,12 +126,15 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 trim_window_length=trim_window_length,
                 trim_window_strategy=str(trim_window_strategy),
                 return_metadata=return_meta,
+                return_summary=True,
             )
             if return_meta:
-                selected, meta_by_seq = result  # type: ignore[misc]
+                selected, meta_by_seq, summary = result  # type: ignore[misc]
             else:
-                selected = result  # type: ignore[assignment]
+                selected, summary = result  # type: ignore[assignment]
                 meta_by_seq = {}
+            if summary is not None:
+                summaries.append(summary)
 
             for seq in selected:
                 entries.append((motif.motif_id, seq, str(path)))
@@ -162,4 +164,4 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         import pandas as pd
 
         df = pd.DataFrame(all_rows)
-        return entries, df
+        return entries, df, summaries
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
index 88507430..82790670 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_jaspar.py
@@ -117,11 +117,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
-        pvalue_threshold = sampling.get("pvalue_threshold")
-        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_strata = sampling.get("pvalue_strata")
+        retain_depth = sampling.get("retain_depth")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
-        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
         include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
         bgfile_path: Path | None = None
@@ -139,6 +138,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
 
         entries = []
         all_rows = []
+        summaries = []
         for motif in motifs:
             motif_hash = hash_pwm_motif(
                 motif_label=motif.motif_id,
@@ -161,11 +161,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 score_threshold=threshold,
                 score_percentile=percentile,
                 scoring_backend=scoring_backend,
-                pvalue_threshold=pvalue_threshold,
-                pvalue_bins=pvalue_bins,
+                pvalue_strata=pvalue_strata,
+                retain_depth=retain_depth,
                 mining=mining,
                 bgfile=bgfile_path,
-                selection_policy=selection_policy,
                 keep_all_candidates_debug=keep_all_candidates_debug,
                 include_matched_sequence=include_matched_sequence,
                 debug_output_dir=debug_output_dir,
@@ -175,12 +174,15 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 trim_window_length=trim_window_length,
                 trim_window_strategy=str(trim_window_strategy),
                 return_metadata=return_meta,
+                return_summary=True,
             )
             if return_meta:
-                selected, meta_by_seq = result  # type: ignore[misc]
+                selected, meta_by_seq, summary = result  # type: ignore[misc]
             else:
-                selected = result  # type: ignore[assignment]
+                selected, summary = result  # type: ignore[assignment]
                 meta_by_seq = {}
+            if summary is not None:
+                summaries.append(summary)
             for seq in selected:
                 entries.append((motif.motif_id, seq, str(jaspar_path)))
                 meta = meta_by_seq.get(seq, {}) if meta_by_seq else {}
@@ -209,4 +211,4 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         import pandas as pd
 
         df = pd.DataFrame(all_rows)
-        return entries, df
+        return entries, df, summaries
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
index a4a3c27a..64913e03 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_matrix_csv.py
@@ -87,11 +87,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
-        pvalue_threshold = sampling.get("pvalue_threshold")
-        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_strata = sampling.get("pvalue_strata")
+        retain_depth = sampling.get("retain_depth")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
-        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
         include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
         bgfile_path: Path | None = None
@@ -122,11 +121,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
             score_threshold=threshold,
             score_percentile=percentile,
             scoring_backend=scoring_backend,
-            pvalue_threshold=pvalue_threshold,
-            pvalue_bins=pvalue_bins,
+            pvalue_strata=pvalue_strata,
+            retain_depth=retain_depth,
             mining=mining,
             bgfile=bgfile_path,
-            selection_policy=selection_policy,
             keep_all_candidates_debug=keep_all_candidates_debug,
             include_matched_sequence=include_matched_sequence,
             debug_output_dir=debug_output_dir,
@@ -136,11 +134,12 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
             trim_window_length=trim_window_length,
             trim_window_strategy=str(trim_window_strategy),
             return_metadata=return_meta,
+            return_summary=True,
         )
         if return_meta:
-            selected, meta_by_seq = result  # type: ignore[misc]
+            selected, meta_by_seq, summary = result  # type: ignore[misc]
         else:
-            selected = result  # type: ignore[assignment]
+            selected, summary = result  # type: ignore[assignment]
             meta_by_seq = {}
 
         entries = [(motif.motif_id, seq, str(csv_path)) for seq in selected]
@@ -165,8 +164,9 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 "motif_id": motif_hash,
                 "tfbs_id": tfbs_id,
             }
-            if meta:
-                row.update(meta)
-            rows.append(row)
+        if meta:
+            row.update(meta)
+        rows.append(row)
         df_out = pd.DataFrame(rows)
-        return entries, df_out
+        summaries = [summary] if summary is not None else []
+        return entries, df_out, summaries
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
index 265d24c3..00c2bd01 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme.py
@@ -95,11 +95,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
-        pvalue_threshold = sampling.get("pvalue_threshold")
-        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_strata = sampling.get("pvalue_strata")
+        retain_depth = sampling.get("retain_depth")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
-        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
         include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
         bgfile_path: Path | None = None
@@ -117,6 +116,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
 
         entries = []
         all_rows = []
+        summaries = []
         for motif in motifs:
             pwm = _motif_to_pwm(motif, background)
             motif_hash = hash_pwm_motif(
@@ -140,11 +140,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 score_threshold=threshold,
                 score_percentile=percentile,
                 scoring_backend=scoring_backend,
-                pvalue_threshold=pvalue_threshold,
-                pvalue_bins=pvalue_bins,
+                pvalue_strata=pvalue_strata,
+                retain_depth=retain_depth,
                 mining=mining,
                 bgfile=bgfile_path,
-                selection_policy=selection_policy,
                 keep_all_candidates_debug=keep_all_candidates_debug,
                 include_matched_sequence=include_matched_sequence,
                 debug_output_dir=debug_output_dir,
@@ -154,12 +153,15 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 trim_window_length=trim_window_length,
                 trim_window_strategy=str(trim_window_strategy),
                 return_metadata=return_meta,
+                return_summary=True,
             )
             if return_meta:
-                selected, meta_by_seq = result  # type: ignore[misc]
+                selected, meta_by_seq, summary = result  # type: ignore[misc]
             else:
-                selected = result  # type: ignore[assignment]
+                selected, summary = result  # type: ignore[assignment]
                 meta_by_seq = {}
+            if summary is not None:
+                summaries.append(summary)
 
             for seq in selected:
                 entries.append((pwm.motif_id, seq, str(meme_path)))
@@ -189,4 +191,4 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         import pandas as pd
 
         df = pd.DataFrame(all_rows)
-        return entries, df
+        return entries, df, summaries
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
index 91068d1c..3b0c7000 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_meme_set.py
@@ -89,11 +89,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         trim_window_length = sampling.get("trim_window_length")
         trim_window_strategy = sampling.get("trim_window_strategy", "max_info")
         scoring_backend = str(sampling.get("scoring_backend", "densegen")).lower()
-        pvalue_threshold = sampling.get("pvalue_threshold")
-        pvalue_bins = sampling.get("pvalue_bins")
+        pvalue_strata = sampling.get("pvalue_strata")
+        retain_depth = sampling.get("retain_depth")
         mining = sampling.get("mining")
         bgfile = sampling.get("bgfile")
-        selection_policy = str(sampling.get("selection_policy", "random_uniform"))
         keep_all_candidates_debug = bool(sampling.get("keep_all_candidates_debug", False))
         include_matched_sequence = bool(sampling.get("include_matched_sequence", False))
         bgfile_path: Path | None = None
@@ -111,6 +110,7 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
 
         entries = []
         all_rows = []
+        summaries = []
         for motif, background, path in motifs_payload:
             pwm = _motif_to_pwm(motif, background)
             motif_hash = hash_pwm_motif(
@@ -134,11 +134,10 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 score_threshold=threshold,
                 score_percentile=percentile,
                 scoring_backend=scoring_backend,
-                pvalue_threshold=pvalue_threshold,
-                pvalue_bins=pvalue_bins,
+                pvalue_strata=pvalue_strata,
+                retain_depth=retain_depth,
                 mining=mining,
                 bgfile=bgfile_path,
-                selection_policy=selection_policy,
                 keep_all_candidates_debug=keep_all_candidates_debug,
                 include_matched_sequence=include_matched_sequence,
                 debug_output_dir=debug_output_dir,
@@ -148,12 +147,15 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
                 trim_window_length=trim_window_length,
                 trim_window_strategy=str(trim_window_strategy),
                 return_metadata=return_meta,
+                return_summary=True,
             )
             if return_meta:
-                selected, meta_by_seq = result  # type: ignore[misc]
+                selected, meta_by_seq, summary = result  # type: ignore[misc]
             else:
-                selected = result  # type: ignore[assignment]
+                selected, summary = result  # type: ignore[assignment]
                 meta_by_seq = {}
+            if summary is not None:
+                summaries.append(summary)
             for seq in selected:
                 entries.append((pwm.motif_id, seq, str(path)))
                 meta = meta_by_seq.get(seq, {}) if meta_by_seq else {}
@@ -182,4 +184,4 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         import pandas as pd
 
         df = pd.DataFrame(all_rows)
-        return entries, df
+        return entries, df, summaries
diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 85047239..760d7ba1 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -1,34 +1,133 @@
 """
 --------------------------------------------------------------------------------
-<dnadesign project>
-dnadesign/densegen/adapters/sources/pwm_sampling.py
+dnadesign
+src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
 
 Shared Stage-A PWM sampling utilities.
+Dunlop Lab.
 
 Module Author(s): Eric J. South
-Dunlop Lab
 --------------------------------------------------------------------------------
 """
 
 from __future__ import annotations
 
 import logging
+import sys
 import time
 from dataclasses import dataclass
 from pathlib import Path
-from typing import List, Optional, Sequence, Tuple
+from typing import List, Optional, Sequence, TextIO, Tuple, Union
 
 import numpy as np
 import pandas as pd
 
 from ...core.artifacts.ids import hash_candidate_id
-from ...core.pvalue_bins import resolve_pvalue_bins
+from ...core.pvalue_bins import resolve_pvalue_strata
+from ...utils import logging_utils
 
 SMOOTHING_ALPHA = 1e-6
 log = logging.getLogger(__name__)
 _SAFE_LABEL_RE = None
 
 
+def _format_rate(rate: float) -> str:
+    if rate >= 1000.0:
+        return f"{rate / 1000.0:.1f}k/s"
+    return f"{rate:.1f}/s"
+
+
+def _format_pwm_progress_line(
+    *,
+    motif_id: str,
+    backend: str,
+    generated: int,
+    target: int,
+    accepted: Optional[int],
+    accepted_target: Optional[int],
+    batch_index: Optional[int],
+    batch_total: Optional[int],
+    elapsed: float,
+) -> str:
+    safe_target = max(1, int(target))
+    gen_pct = min(100, int(100 * generated / safe_target))
+    parts = [f"PWM {motif_id}", backend, f"gen {gen_pct}% ({generated}/{safe_target})"]
+    if batch_index is not None:
+        total_label = "-" if batch_total is None else str(int(batch_total))
+        parts.append(f"batch {int(batch_index)}/{total_label}")
+    elapsed_label = f"{max(0.0, float(elapsed)):.1f}s"
+    rate = generated / elapsed if elapsed > 0 else 0.0
+    parts.append(elapsed_label)
+    parts.append(_format_rate(rate))
+    return " | ".join(parts)
+
+
+@dataclass
+class _PwmSamplingProgress:
+    motif_id: str
+    backend: str
+    target: int
+    accepted_target: Optional[int]
+    stream: TextIO
+    min_interval: float = 0.2
+
+    def __post_init__(self) -> None:
+        self._enabled = bool(getattr(self.stream, "isatty", lambda: False)())
+        self._start = time.monotonic()
+        self._last_update = self._start
+        self._last_len = 0
+        self._last_state: tuple[int, Optional[int], Optional[int]] | None = None
+        self._shown = False
+        if self._enabled:
+            logging_utils.set_progress_active(True)
+
+    def update(
+        self,
+        *,
+        generated: int,
+        accepted: Optional[int],
+        batch_index: Optional[int] = None,
+        batch_total: Optional[int] = None,
+        force: bool = False,
+    ) -> None:
+        if not self._enabled:
+            return
+        now = time.monotonic()
+        if not force and (now - self._last_update) < float(self.min_interval):
+            return
+        state = (int(generated), batch_index, batch_total)
+        if self._shown and state == self._last_state and logging_utils.is_progress_line_visible():
+            self._last_update = now
+            return
+        line = _format_pwm_progress_line(
+            motif_id=self.motif_id,
+            backend=self.backend,
+            generated=int(generated),
+            target=int(self.target),
+            accepted=accepted,
+            accepted_target=self.accepted_target,
+            batch_index=batch_index,
+            batch_total=batch_total,
+            elapsed=now - self._start,
+        )
+        padded = line.ljust(self._last_len)
+        self._last_len = max(self._last_len, len(line))
+        self.stream.write(f"\r{padded}")
+        self.stream.flush()
+        self._last_update = now
+        self._last_state = state
+        self._shown = True
+        logging_utils.mark_progress_line_visible()
+
+    def finish(self) -> None:
+        if self._enabled:
+            logging_utils.set_progress_active(False)
+        if not self._shown:
+            return
+        self.stream.write("\n")
+        self.stream.flush()
+
+
 def _safe_label(text: str) -> str:
     global _SAFE_LABEL_RE
     if _SAFE_LABEL_RE is None:
@@ -109,6 +208,29 @@ class PWMMotif:
     log_odds: Optional[List[dict[str, float]]] = None
 
 
+@dataclass(frozen=True)
+class PWMSamplingSummary:
+    input_name: Optional[str]
+    regulator: str
+    backend: str
+    generated: int
+    target: int
+    target_sites: Optional[int]
+    eligible: int
+    retained: int
+    retained_len_min: Optional[int]
+    retained_len_median: Optional[float]
+    retained_len_mean: Optional[float]
+    retained_len_max: Optional[int]
+    strata_bins: Optional[str]
+
+
+@dataclass(frozen=True)
+class SelectionOutcome:
+    selected: List[str]
+    retained: List[str]
+
+
 def normalize_background(bg: Optional[dict[str, float]]) -> dict[str, float]:
     if not bg:
         return {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25}
@@ -162,6 +284,44 @@ def score_sequence(
     return score
 
 
+def _summarize_lengths(lengths: Sequence[int]) -> tuple[Optional[int], Optional[float], Optional[float], Optional[int]]:
+    if not lengths:
+        return None, None, None, None
+    arr = np.asarray(lengths, dtype=float)
+    return int(arr.min()), float(np.median(arr)), float(arr.mean()), int(arr.max())
+
+
+def _build_summary(
+    *,
+    generated: int,
+    target: int,
+    target_sites: Optional[int],
+    eligible: Sequence[str],
+    retained: Sequence[str],
+    strata_bins: Optional[str],
+    input_name: Optional[str] = None,
+    regulator: Optional[str] = None,
+    backend: Optional[str] = None,
+) -> PWMSamplingSummary:
+    lengths = [len(seq) for seq in retained]
+    min_len, median_len, mean_len, max_len = _summarize_lengths(lengths)
+    return PWMSamplingSummary(
+        input_name=input_name,
+        regulator=str(regulator or ""),
+        backend=str(backend or ""),
+        generated=int(generated),
+        target=int(target),
+        target_sites=int(target_sites) if target_sites is not None else None,
+        eligible=int(len(eligible)),
+        retained=int(len(retained)),
+        retained_len_min=min_len,
+        retained_len_median=median_len,
+        retained_len_mean=mean_len,
+        retained_len_max=max_len,
+        strata_bins=strata_bins,
+    )
+
+
 def sample_sequence_from_background(rng: np.random.Generator, probs: dict[str, float], length: int) -> str:
     bases = ["A", "C", "G", "T"]
     weights = np.array([probs[b] for b in bases], dtype=float)
@@ -240,7 +400,7 @@ def select_by_score(
     percentile: Optional[float],
     keep_low: bool,
     context: Optional[dict] = None,
-) -> List[str]:
+) -> SelectionOutcome:
     scores = np.array([s for _, s in candidates], dtype=float)
     if threshold is None:
         cutoff = np.percentile(scores, float(percentile))  # type: ignore[arg-type]
@@ -254,20 +414,31 @@ def select_by_score(
             context = dict(context)
             context["score_label"] = f"threshold={threshold}"
     if keep_low:
-        picked = [seq for seq, score in candidates if score <= cutoff]
+        picked = [(seq, score) for seq, score in candidates if score <= cutoff]
+        ordered = sorted(picked, key=lambda item: item[1])
     else:
-        picked = [seq for seq, score in candidates if score >= cutoff]
-    unique = list(dict.fromkeys(picked))
+        picked = [(seq, score) for seq, score in candidates if score >= cutoff]
+        ordered = sorted(picked, key=lambda item: item[1], reverse=True)
+    seen: set[str] = set()
+    unique: list[str] = []
+    for seq, _score in ordered:
+        if seq in seen:
+            continue
+        seen.add(seq)
+        unique.append(seq)
     if len(unique) < n_sites:
         unique_total = len({seq for seq, _ in candidates})
         if context is None:
-            raise ValueError(
-                f"Stage-A PWM sampling produced {len(unique)} unique sites after filtering; "
-                f"need {n_sites}. Adjust thresholds or oversample_factor."
+            log.warning(
+                "Stage-A PWM sampling shortfall: retained=%d need=%d (candidates=%d).",
+                len(unique),
+                n_sites,
+                unique_total,
             )
+            return SelectionOutcome(selected=unique, retained=unique)
         msg_lines = [
             (
-                "Stage-A PWM sampling failed for motif "
+                "Stage-A PWM sampling shortfall for motif "
                 f"'{context.get('motif_id')}' "
                 f"(width={context.get('width')}, strategy={context.get('strategy')}, "
                 f"length={context.get('length_label')}, window={context.get('window_label')}, "
@@ -285,7 +456,7 @@ def select_by_score(
             msg_lines.append(f"Observed candidate lengths={context.get('length_observed')}.")
         suggestions = [
             "reduce n_sites",
-            "lower score_percentile (e.g., 90 → 80)",
+            "lower score_threshold",
             "increase oversample_factor",
         ]
         if context.get("cap_applied"):
@@ -298,26 +469,25 @@ def select_by_score(
         if context.get("width") is not None and int(context.get("width")) <= 6:
             suggestions.append("try length_policy=range with a longer length_range")
         msg_lines.append("Try next: " + "; ".join(suggestions) + ".")
-        raise ValueError(" ".join(msg_lines))
-    return unique[:n_sites]
+        log.warning(" ".join(msg_lines))
+        return SelectionOutcome(selected=unique, retained=unique)
+    return SelectionOutcome(selected=unique[:n_sites], retained=unique)
 
 
-def _resolve_pvalue_edges(pvalue_bins: Sequence[float] | None) -> list[float]:
-    edges = resolve_pvalue_bins(pvalue_bins)
+def _resolve_pvalue_strata(pvalue_strata: Sequence[float] | None) -> list[float]:
+    edges = resolve_pvalue_strata(pvalue_strata)
     if not edges:
-        raise ValueError("pvalue_bins must contain at least one edge.")
+        raise ValueError("pvalue_strata must contain at least one edge.")
     cleaned: list[float] = []
     prev = 0.0
     for edge in edges:
         edge_val = float(edge)
         if not (0.0 < edge_val <= 1.0):
-            raise ValueError("pvalue_bins values must be in (0, 1].")
+            raise ValueError("pvalue_strata values must be in (0, 1].")
         if edge_val <= prev:
-            raise ValueError("pvalue_bins must be strictly increasing.")
+            raise ValueError("pvalue_strata must be strictly increasing.")
         cleaned.append(edge_val)
         prev = edge_val
-    if abs(cleaned[-1] - 1.0) > 1e-12:
-        raise ValueError("pvalue_bins must end with 1.0.")
     return cleaned
 
 
@@ -354,42 +524,26 @@ def _format_pvalue_bins(
     return " ".join(labels) if labels else "-"
 
 
-def _stratified_sample(
-    candidates: List[FimoCandidate],
-    *,
-    n_sites: int,
-    rng: np.random.Generator,
-    n_bins: int,
-) -> List[FimoCandidate]:
-    bins: list[list[FimoCandidate]] = [[] for _ in range(n_bins)]
-    for cand in candidates:
-        idx = max(0, min(int(cand.bin_id), n_bins - 1))
-        bins[idx].append(cand)
-    for bucket in bins:
-        rng.shuffle(bucket)
-    picked: list[FimoCandidate] = []
-    while len(picked) < n_sites:
-        progressed = False
-        for bucket in bins:
-            if bucket:
-                picked.append(bucket.pop())
-                progressed = True
-                if len(picked) >= n_sites:
-                    break
-        if not progressed:
-            break
-    return picked
+def _format_pvalue_bin_pairs(
+    edges: Sequence[float],
+    eligible_counts: Sequence[int],
+    retained_counts: Sequence[int],
+) -> str:
+    if not edges or not eligible_counts or not retained_counts:
+        return "-"
+    labels: list[str] = []
+    low = 0.0
+    for edge, eligible, retained in zip(edges, eligible_counts, retained_counts):
+        labels.append(f"({low:.0e},{float(edge):.0e}]:{int(eligible)}/{int(retained)}")
+        low = float(edge)
+    return " ".join(labels) if labels else "-"
 
 
 def _select_fimo_candidates(
     candidates: List[FimoCandidate],
     *,
     n_sites: int,
-    selection_policy: str,
-    rng: np.random.Generator,
-    pvalue_threshold: float,
     keep_weak: bool,
-    n_bins: int,
     context: dict,
 ) -> List[FimoCandidate]:
     unique: list[FimoCandidate] = []
@@ -402,12 +556,11 @@ def _select_fimo_candidates(
     if len(unique) < n_sites:
         msg_lines = [
             (
-                "Stage-A PWM sampling failed for motif "
+                "Stage-A PWM sampling shortfall for motif "
                 f"'{context.get('motif_id')}' "
                 f"(width={context.get('width')}, strategy={context.get('strategy')}, "
                 f"length={context.get('length_label')}, window={context.get('window_label')}, "
-                f"backend=fimo, selection={selection_policy}, "
-                f"pvalue={context.get('pvalue_label')})."
+                f"backend=fimo, stratified, pvalue={context.get('pvalue_label')})."
             ),
             (
                 f"Requested n_sites={context.get('n_sites')} oversample_factor={context.get('oversample_factor')} "
@@ -419,17 +572,17 @@ def _select_fimo_candidates(
         ]
         if context.get("length_observed"):
             msg_lines.append(f"Observed candidate lengths={context.get('length_observed')}.")
-        if context.get("pvalue_bins_label") is not None:
-            msg_lines.append(f"P-value bins={context.get('pvalue_bins_label')}.")
-        if context.get("retain_bin_ids") is not None:
-            msg_lines.append(f"Retained bins={context.get('retain_bin_ids')}.")
+        if context.get("pvalue_strata_label") is not None:
+            msg_lines.append(f"P-value strata={context.get('pvalue_strata_label')}.")
+        if context.get("retain_depth") is not None:
+            msg_lines.append(f"Retain depth={context.get('retain_depth')}.")
         suggestions = [
             "reduce n_sites",
-            "relax pvalue_threshold (e.g., 1e-4 → 1e-3)",
+            "relax pvalue_strata floor (e.g., 1e-4 → 1e-3)",
             "increase oversample_factor",
         ]
-        if context.get("retain_bin_ids") is not None:
-            suggestions.append("broaden mining.retain_bin_ids (or remove bin filtering)")
+        if context.get("retain_depth") is not None:
+            suggestions.append("increase retain_depth")
         if context.get("cap_applied"):
             suggestions.append("increase max_candidates (cap was hit)")
         if context.get("time_limited"):
@@ -443,26 +596,13 @@ def _select_fimo_candidates(
         if context.get("width") is not None and int(context.get("width")) <= 6:
             suggestions.append("try length_policy=range with a longer length_range")
         msg_lines.append("Try next: " + "; ".join(suggestions) + ".")
-        raise ValueError(" ".join(msg_lines))
-    if selection_policy == "random_uniform":
-        if len(unique) == n_sites:
-            return unique
-        picks = rng.choice(len(unique), size=n_sites, replace=False)
-        return [unique[int(i)] for i in picks]
-    if selection_policy == "top_n":
-        if keep_weak:
-            ordered = sorted(unique, key=lambda c: (-c.pvalue, c.score))
-        else:
-            ordered = sorted(unique, key=lambda c: (c.pvalue, -c.score))
-        return ordered[:n_sites]
-    if selection_policy == "stratified":
-        return _stratified_sample(
-            unique,
-            n_sites=n_sites,
-            rng=rng,
-            n_bins=n_bins,
-        )
-    raise ValueError(f"Unsupported pwm selection_policy: {selection_policy}")
+        log.warning(" ".join(msg_lines))
+        return unique
+    if keep_weak:
+        ordered = sorted(unique, key=lambda c: (-c.pvalue, c.score))
+    else:
+        ordered = sorted(unique, key=lambda c: (c.pvalue, -c.score))
+    return ordered[:n_sites]
 
 
 def sample_pwm_sites(
@@ -480,11 +620,10 @@ def sample_pwm_sites(
     score_threshold: Optional[float],
     score_percentile: Optional[float],
     scoring_backend: str = "densegen",
-    pvalue_threshold: Optional[float] = None,
-    pvalue_bins: Optional[Sequence[float]] = None,
+    pvalue_strata: Optional[Sequence[float]] = None,
+    retain_depth: Optional[int] = None,
     mining: Optional[object] = None,
     bgfile: Optional[str | Path] = None,
-    selection_policy: str = "random_uniform",
     keep_all_candidates_debug: bool = False,
     include_matched_sequence: bool = False,
     debug_output_dir: Optional[Path] = None,
@@ -494,7 +633,13 @@ def sample_pwm_sites(
     trim_window_length: Optional[int] = None,
     trim_window_strategy: str = "max_info",
     return_metadata: bool = False,
-) -> List[str] | Tuple[List[str], dict[str, dict]]:
+    return_summary: bool = False,
+) -> Union[
+    List[str],
+    Tuple[List[str], dict[str, dict]],
+    Tuple[List[str], PWMSamplingSummary],
+    Tuple[List[str], dict[str, dict], Optional[PWMSamplingSummary]],
+]:
     if n_sites <= 0:
         raise ValueError("n_sites must be > 0")
     if oversample_factor <= 0:
@@ -505,27 +650,28 @@ def sample_pwm_sites(
     if scoring_backend not in {"densegen", "fimo"}:
         raise ValueError(f"Unsupported Stage-A PWM sampling scoring_backend: {scoring_backend}")
     if scoring_backend == "densegen":
-        if (score_threshold is None) == (score_percentile is None):
-            raise ValueError("Stage-A PWM sampling requires exactly one of score_threshold or score_percentile")
-        if pvalue_bins is not None:
-            raise ValueError("pvalue_bins is only valid when scoring_backend='fimo'")
+        if score_threshold is None:
+            raise ValueError("Stage-A PWM sampling requires score_threshold when scoring_backend='densegen'")
+        if score_percentile is not None:
+            raise ValueError("Stage-A PWM sampling does not support score_percentile when scoring_backend='densegen'")
+        if pvalue_strata is not None:
+            raise ValueError("pvalue_strata is only valid when scoring_backend='fimo'")
+        if retain_depth is not None:
+            raise ValueError("retain_depth is only valid when scoring_backend='fimo'")
         if mining is not None:
             raise ValueError("mining is only valid when scoring_backend='fimo'")
         if include_matched_sequence:
             raise ValueError("include_matched_sequence is only valid when scoring_backend='fimo'")
     else:
-        if pvalue_threshold is None:
-            raise ValueError("Stage-A PWM sampling requires pvalue_threshold when scoring_backend='fimo'")
-        pvalue_threshold = float(pvalue_threshold)
-        if not (0.0 < pvalue_threshold <= 1.0):
-            raise ValueError("pwm.sampling.pvalue_threshold must be between 0 and 1")
+        if pvalue_strata is None:
+            raise ValueError("Stage-A PWM sampling requires pvalue_strata when scoring_backend='fimo'")
+        if retain_depth is None:
+            raise ValueError("Stage-A PWM sampling requires retain_depth when scoring_backend='fimo'")
         if max_candidates is not None or max_seconds is not None:
             raise ValueError(
                 "max_candidates/max_seconds are only supported for densegen scoring; "
                 "use mining.max_candidates or mining.max_seconds for fimo."
             )
-        if selection_policy not in {"random_uniform", "top_n", "stratified"}:
-            raise ValueError(f"Unsupported pwm selection_policy: {selection_policy}")
         if score_threshold is not None or score_percentile is not None:
             log.warning(
                 "Stage-A PWM sampling scoring_backend=fimo ignores score_threshold/score_percentile for motif %s.",
@@ -565,9 +711,10 @@ def sample_pwm_sites(
 
     score_label = f"threshold={score_threshold}" if score_threshold is not None else f"percentile={score_percentile}"
     pvalue_label = None
-    if scoring_backend == "fimo" and pvalue_threshold is not None:
+    if scoring_backend == "fimo" and pvalue_strata is not None:
+        floor = float(_resolve_pvalue_strata(pvalue_strata)[-1])
         comparator = ">=" if keep_low else "<="
-        pvalue_label = f"{comparator}{pvalue_threshold:g}"
+        pvalue_label = f"{comparator}{floor:g}"
     length_label = str(length_policy)
     if length_policy == "range" and length_range is not None and len(length_range) == 2:
         length_label = f"{length_policy}({length_range[0]}..{length_range[1]})"
@@ -611,7 +758,6 @@ def _context(length_obs: str, cap_applied: bool, requested: int, generated: int,
             "mining_max_batches": _mining_attr(mining_cfg, "max_batches"),
             "mining_max_seconds": _mining_attr(mining_cfg, "max_seconds"),
             "mining_log_every_batches": _mining_attr(mining_cfg, "log_every_batches"),
-            "mining_retain_bin_ids": _mining_attr(mining_cfg, "retain_bin_ids"),
             "mining_max_candidates": mining_max_candidates,
         }
 
@@ -657,6 +803,8 @@ def _embed_with_background(seq: str, target_len: int) -> str:
         right = sample_sequence_from_background(rng, motif.background, right_len)
         return f"{left}{seq}{right}"
 
+    progress: _PwmSamplingProgress | None = None
+
     def _score_with_fimo(
         *,
         n_candidates: int,
@@ -674,17 +822,23 @@ def _score_with_fimo(
             write_minimal_meme_motif,
         )
 
-        if pvalue_threshold is None:
-            raise ValueError("pvalue_threshold required for fimo backend")
-        resolved_bins = _resolve_pvalue_edges(pvalue_bins)
-        retain_bins = _mining_attr(mining, "retain_bin_ids")
-        allowed_bins: Optional[set[int]] = None
-        if retain_bins is not None:
-            allowed_bins = {int(idx) for idx in retain_bins}
-            max_idx = len(resolved_bins) - 1
-            if any(idx > max_idx for idx in allowed_bins):
-                raise ValueError(f"retain_bin_ids contains an index outside the available bins (max={max_idx}).")
+        if pvalue_strata is None:
+            raise ValueError("pvalue_strata required for fimo backend")
+        if retain_depth is None:
+            raise ValueError("retain_depth required for fimo backend")
+        resolved_bins = _resolve_pvalue_strata(pvalue_strata)
+        floor = float(resolved_bins[-1])
+        depth = int(retain_depth)
+        if depth <= 0:
+            raise ValueError("retain_depth must be >= 1")
+        if depth > len(resolved_bins):
+            raise ValueError("retain_depth cannot exceed the number of pvalue_strata bins")
         keep_weak = keep_low
+        if keep_weak:
+            retain_bins = list(range(len(resolved_bins) - depth, len(resolved_bins)))
+        else:
+            retain_bins = list(range(depth))
+        retained_set = {int(idx) for idx in retain_bins}
         mining_batch_size = int(_mining_attr(mining, "batch_size", n_candidates))
         mining_max_batches = _mining_attr(mining, "max_batches")
         mining_max_candidates = _mining_attr(mining, "max_candidates")
@@ -692,14 +846,16 @@ def _score_with_fimo(
         mining_log_every = int(_mining_attr(mining, "log_every_batches", 1))
         log.info(
             "FIMO mining config for %s: target=%d batch=%d "
-            "max_batches=%s max_candidates=%s max_seconds=%s retain_bins=%s",
+            "max_batches=%s max_candidates=%s max_seconds=%s floor=%s retain_depth=%d",
             motif.motif_id,
             n_candidates,
             mining_batch_size,
             str(mining_max_batches) if mining_max_batches is not None else "-",
             str(mining_max_candidates) if mining_max_candidates is not None else "-",
             str(mining_max_seconds) if mining_max_seconds is not None else "-",
-            str(sorted(allowed_bins)) if allowed_bins is not None else "all",
+            f"{floor:g}",
+            depth,
+            extra={"suppress_stdout": True},
         )
         debug_path: Optional[Path] = None
         debug_dir = debug_output_dir
@@ -752,8 +908,7 @@ def _generate_batch(count: int) -> tuple[list[str], list[int], bool]:
                 sequences.append(full_seq)
             return sequences, lengths, time_limited
 
-        total_bin_counts = [0 for _ in resolved_bins]
-        accepted_bin_counts = [0 for _ in resolved_bins]
+        eligible_bin_counts = [0 for _ in resolved_bins]
         candidates: List[FimoCandidate] = []
         seen: set[str] = set()
         lengths_all: list[int] = []
@@ -820,7 +975,7 @@ def _record_candidate(
                 fasta_path = tmp_path / "candidates.fasta"
                 records = build_candidate_records(motif.motif_id, provided_sequences, start_index=0)
                 write_candidates_fasta(records, fasta_path)
-                thresh = 1.0 if keep_all_candidates_debug or keep_weak else float(pvalue_threshold)
+                thresh = 1.0 if keep_all_candidates_debug or keep_weak else float(floor)
                 rows, raw_tsv = run_fimo(
                     meme_motif_path=meme_path,
                     fasta_path=fasta_path,
@@ -846,23 +1001,11 @@ def _record_candidate(
                         )
                         continue
                     bin_id, bin_low, bin_high = _assign_pvalue_bin(hit.pvalue, resolved_bins)
-                    if allowed_bins is not None and bin_id not in allowed_bins:
-                        _record_candidate(
-                            seq=seq,
-                            hit=hit,
-                            bin_id=bin_id,
-                            bin_low=bin_low,
-                            bin_high=bin_high,
-                            accepted=False,
-                            reject_reason="bin_filtered",
-                        )
-                        continue
-                    total_bin_counts[bin_id] += 1
                     if keep_weak:
-                        accept = hit.pvalue >= float(pvalue_threshold)
+                        eligible = hit.pvalue >= float(floor)
                     else:
-                        accept = hit.pvalue <= float(pvalue_threshold)
-                    if not accept:
+                        eligible = hit.pvalue <= float(floor)
+                    if not eligible:
                         _record_candidate(
                             seq=seq,
                             hit=hit,
@@ -870,7 +1013,7 @@ def _record_candidate(
                             bin_low=bin_low,
                             bin_high=bin_high,
                             accepted=False,
-                            reject_reason="pvalue_threshold",
+                            reject_reason="pvalue_floor",
                         )
                         continue
                     if seq in seen:
@@ -885,7 +1028,7 @@ def _record_candidate(
                         )
                         continue
                     seen.add(seq)
-                    accepted_bin_counts[bin_id] += 1
+                    eligible_bin_counts[bin_id] += 1
                     candidates.append(
                         FimoCandidate(
                             seq=seq,
@@ -911,6 +1054,8 @@ def _record_candidate(
                     )
                 generated_total = len(provided_sequences)
                 batches = 1
+                if progress is not None:
+                    progress.update(generated=generated_total, accepted=len(candidates), force=True)
             else:
                 mining_start = time.monotonic()
                 while generated_total < n_candidates:
@@ -938,7 +1083,7 @@ def _record_candidate(
                     fasta_path = tmp_path / "candidates.fasta"
                     records = build_candidate_records(motif.motif_id, sequences, start_index=generated_total)
                     write_candidates_fasta(records, fasta_path)
-                    thresh = 1.0 if keep_all_candidates_debug or keep_weak else float(pvalue_threshold)
+                    thresh = 1.0 if keep_all_candidates_debug or keep_weak else float(floor)
                     rows, raw_tsv = run_fimo(
                         meme_motif_path=meme_path,
                         fasta_path=fasta_path,
@@ -964,23 +1109,11 @@ def _record_candidate(
                             )
                             continue
                         bin_id, bin_low, bin_high = _assign_pvalue_bin(hit.pvalue, resolved_bins)
-                        if allowed_bins is not None and bin_id not in allowed_bins:
-                            _record_candidate(
-                                seq=seq,
-                                hit=hit,
-                                bin_id=bin_id,
-                                bin_low=bin_low,
-                                bin_high=bin_high,
-                                accepted=False,
-                                reject_reason="bin_filtered",
-                            )
-                            continue
-                        total_bin_counts[bin_id] += 1
                         if keep_weak:
-                            accept = hit.pvalue >= float(pvalue_threshold)
+                            eligible = hit.pvalue >= float(floor)
                         else:
-                            accept = hit.pvalue <= float(pvalue_threshold)
-                        if not accept:
+                            eligible = hit.pvalue <= float(floor)
+                        if not eligible:
                             _record_candidate(
                                 seq=seq,
                                 hit=hit,
@@ -988,7 +1121,7 @@ def _record_candidate(
                                 bin_low=bin_low,
                                 bin_high=bin_high,
                                 accepted=False,
-                                reject_reason="pvalue_threshold",
+                                reject_reason="pvalue_floor",
                             )
                             continue
                         if seq in seen:
@@ -1003,7 +1136,7 @@ def _record_candidate(
                             )
                             continue
                         seen.add(seq)
-                        accepted_bin_counts[bin_id] += 1
+                        eligible_bin_counts[bin_id] += 1
                         candidates.append(
                             FimoCandidate(
                                 seq=seq,
@@ -1029,11 +1162,17 @@ def _record_candidate(
                         )
                     generated_total += len(sequences)
                     batches += 1
+                    if progress is not None:
+                        progress.update(
+                            generated=generated_total,
+                            accepted=len(candidates),
+                            batch_index=batches,
+                            batch_total=mining_max_batches,
+                        )
                     if mining_log_every > 0 and batches % mining_log_every == 0:
-                        bins_label = _format_pvalue_bins(resolved_bins, total_bin_counts, only_bins=retain_bins)
-                        accepted_label = _format_pvalue_bins(resolved_bins, accepted_bin_counts, only_bins=retain_bins)
+                        bins_label = _format_pvalue_bins(resolved_bins, eligible_bin_counts)
                         log.info(
-                            "FIMO mining %s batch %d/%s: generated=%d/%d accepted=%d bins=%s accepted_bins=%s",
+                            "FIMO mining %s batch %d/%s: generated=%d/%d eligible=%d strata=%s",
                             motif.motif_id,
                             batches,
                             str(mining_max_batches) if mining_max_batches is not None else "-",
@@ -1041,17 +1180,13 @@ def _record_candidate(
                             n_candidates,
                             len(candidates),
                             bins_label,
-                            accepted_label,
                         )
 
         if debug_path is not None and tsv_lines:
             debug_path.write_text("\n".join(tsv_lines) + "\n")
             log.info("FIMO debug TSV written: %s", debug_path)
 
-        total_hits = sum(total_bin_counts)
-        accepted_hits = sum(accepted_bin_counts)
-        bins_label = _format_pvalue_bins(resolved_bins, total_bin_counts, only_bins=retain_bins)
-        accepted_label = _format_pvalue_bins(resolved_bins, accepted_bin_counts, only_bins=retain_bins)
+        eligible_hits = sum(eligible_bin_counts)
         length_obs = "-"
         if lengths_all:
             length_obs = (
@@ -1061,8 +1196,8 @@ def _record_candidate(
             )
 
         context = _context(length_obs, cap_applied, requested, generated_total, time_limited)
-        context["pvalue_bins_label"] = bins_label
-        context["retain_bin_ids"] = sorted(allowed_bins) if allowed_bins is not None else None
+        context["pvalue_strata_label"] = ",".join(f"{edge:g}" for edge in resolved_bins)
+        context["retain_depth"] = depth
         context["mining_batch_size"] = mining_batch_size
         context["mining_max_batches"] = mining_max_batches
         context["mining_max_candidates"] = mining_max_candidates
@@ -1070,30 +1205,34 @@ def _record_candidate(
         context["mining_time_limited"] = mining_time_limited
         context["mining_batches_limited"] = mining_batches_limited
         context["mining_candidates_limited"] = mining_candidates_limited
+        retained_candidates = [cand for cand in candidates if int(cand.bin_id) in retained_set]
         picked = _select_fimo_candidates(
-            candidates,
+            retained_candidates,
             n_sites=n_sites,
-            selection_policy=selection_policy,
-            rng=rng,
-            pvalue_threshold=float(pvalue_threshold),
             keep_weak=keep_weak,
-            n_bins=len(resolved_bins),
             context=context,
         )
-        selected_bin_counts = [0 for _ in resolved_bins]
+        retained_bin_counts = [0 for _ in resolved_bins]
         for cand in picked:
             idx = max(0, min(int(cand.bin_id), len(resolved_bins) - 1))
-            selected_bin_counts[idx] += 1
-        selected_label = _format_pvalue_bins(resolved_bins, selected_bin_counts, only_bins=retain_bins)
+            retained_bin_counts[idx] += 1
+        strata_label = _format_pvalue_bin_pairs(resolved_bins, eligible_bin_counts, retained_bin_counts)
+        if progress is not None:
+            progress.update(
+                generated=generated_total,
+                accepted=len(candidates),
+                batch_index=batches if batches > 0 else None,
+                batch_total=mining_max_batches,
+                force=True,
+            )
+            progress.finish()
         log.info(
-            "FIMO yield for motif %s: hits=%d accepted=%d selected=%d bins=%s accepted_bins=%s selected_bins=%s",
+            "FIMO yield for motif %s: eligible=%d retained=%d strata=%s",
             motif.motif_id,
-            total_hits,
-            accepted_hits,
+            eligible_hits,
             len(picked),
-            bins_label,
-            accepted_label,
-            selected_label,
+            strata_label,
+            extra={"suppress_stdout": True},
         )
         meta_by_seq: dict[str, dict] = {}
         for cand in picked:
@@ -1117,7 +1256,11 @@ def _record_candidate(
                     row["selected"] = True
                     row["reject_reason"] = None
                 elif row.get("accepted"):
-                    row["reject_reason"] = "not_selected"
+                    bin_id = row.get("bin_id")
+                    if bin_id is not None and int(bin_id) not in retained_set:
+                        row["reject_reason"] = "outside_retained_bins"
+                    else:
+                        row["reject_reason"] = "not_selected"
             try:
                 path = _write_candidate_records(
                     candidate_records,
@@ -1129,15 +1272,35 @@ def _record_candidate(
                 log.info("FIMO candidate records written: %s", path)
             except Exception:
                 log.warning("Failed to write FIMO candidate records.", exc_info=True)
-        return [c.seq for c in picked], meta_by_seq
+        summary = None
+        if return_summary:
+            summary = _build_summary(
+                generated=generated_total,
+                target=requested,
+                target_sites=n_sites,
+                eligible=[c.seq for c in candidates],
+                retained=[c.seq for c in picked],
+                strata_bins=strata_label,
+                input_name=input_name,
+                regulator=motif.motif_id,
+                backend=scoring_backend,
+            )
+        return [c.seq for c in picked], meta_by_seq, summary
 
     if strategy == "consensus":
+        progress = _PwmSamplingProgress(
+            motif_id=motif.motif_id,
+            backend=scoring_backend,
+            target=1,
+            accepted_target=n_sites if scoring_backend == "fimo" else None,
+            stream=sys.stdout,
+        )
         seq = "".join(max(row.items(), key=lambda kv: kv[1])[0] for row in matrix)
         target_len = _resolve_length()
         full_seq = _embed_with_background(seq, target_len)
         if scoring_backend == "densegen":
             score = score_sequence(seq, matrix, log_odds=log_odds, background=motif.background)
-            selected = _select(
+            selection = _select(
                 [(full_seq, score)],
                 length_obs=str(target_len),
                 cap_applied=False,
@@ -1145,14 +1308,50 @@ def _record_candidate(
                 generated=1,
                 time_limited=False,
             )
-            return (selected, {}) if return_metadata else selected
-        selected, meta = _score_with_fimo(
+            progress.update(generated=1, accepted=None, force=True)
+            progress.finish()
+            if return_metadata and return_summary:
+                summary = _build_summary(
+                    generated=1,
+                    target=1,
+                    target_sites=n_sites,
+                    eligible=selection.retained,
+                    retained=selection.selected,
+                    strata_bins=None,
+                    input_name=input_name,
+                    regulator=motif.motif_id,
+                    backend=scoring_backend,
+                )
+                return selection.selected, {}, summary
+            if return_metadata:
+                return selection.selected, {}
+            if return_summary:
+                summary = _build_summary(
+                    generated=1,
+                    target=1,
+                    target_sites=n_sites,
+                    eligible=selection.retained,
+                    retained=selection.selected,
+                    strata_bins=None,
+                    input_name=input_name,
+                    regulator=motif.motif_id,
+                    backend=scoring_backend,
+                )
+                return selection.selected, summary
+            return selection.selected
+        selected, meta, summary = _score_with_fimo(
             n_candidates=1,
             cap_applied=False,
             requested=1,
             sequences=[full_seq],
         )
-        return (selected, meta) if return_metadata else selected
+        if return_metadata and return_summary:
+            return selected, meta, summary
+        if return_metadata:
+            return selected, meta
+        if return_summary:
+            return selected, summary
+        return selected
 
     requested_candidates = max(1, n_sites * oversample_factor)
     n_candidates = requested_candidates
@@ -1172,21 +1371,34 @@ def _record_candidate(
                     requested_candidates,
                     cap_val,
                 )
-    else:
-        if mining_max_candidates is not None:
-            mining_cap = int(mining_max_candidates)
-            if mining_cap < n_sites:
-                raise ValueError("pwm.sampling.mining.max_candidates must be >= n_sites")
-            if mining_cap != requested_candidates:
-                cap_applied = mining_cap < requested_candidates
-                n_candidates = mining_cap
-                log.info(
-                    "PWM mining candidate target for motif %s: requested=%d mining.max_candidates=%d",
-                    motif.motif_id,
-                    requested_candidates,
-                    mining_cap,
-                )
+        else:
+            if mining_max_candidates is not None:
+                mining_cap = int(mining_max_candidates)
+                if mining_cap < n_sites:
+                    log.warning(
+                        "Stage-A PWM sampling mining.max_candidates=%d is below n_sites=%d for motif %s; "
+                        "shortfall possible.",
+                        mining_cap,
+                        n_sites,
+                        motif.motif_id,
+                    )
+                if mining_cap != requested_candidates:
+                    cap_applied = mining_cap < requested_candidates
+                    n_candidates = mining_cap
+                    log.info(
+                        "PWM mining candidate target for motif %s: requested=%d mining.max_candidates=%d",
+                        motif.motif_id,
+                        requested_candidates,
+                        mining_cap,
+                    )
     n_candidates = max(1, n_candidates)
+    progress = _PwmSamplingProgress(
+        motif_id=motif.motif_id,
+        backend=scoring_backend,
+        target=requested_candidates,
+        accepted_target=n_sites if scoring_backend == "fimo" else None,
+        stream=sys.stdout,
+    )
     if scoring_backend == "densegen":
         candidates: List[Tuple[str, str]] = []
         lengths: List[int] = []
@@ -1205,6 +1417,9 @@ def _record_candidate(
                 core = sample_sequence_from_pwm(rng, matrix)
             full_seq = _embed_with_background(core, target_len)
             candidates.append((full_seq, core))
+            progress.update(generated=len(candidates), accepted=None)
+        progress.update(generated=len(candidates), accepted=None, force=True)
+        progress.finish()
         if time_limited:
             log.warning(
                 "Stage-A PWM sampling hit max_seconds for motif %s: generated=%d requested=%d",
@@ -1219,7 +1434,7 @@ def _record_candidate(
             (full_seq, score_sequence(core, matrix, log_odds=log_odds, background=motif.background))
             for full_seq, core in candidates
         ]
-        selected = _select(
+        selection = _select(
             scored,
             length_obs=length_obs,
             cap_applied=cap_applied,
@@ -1227,10 +1442,35 @@ def _record_candidate(
             generated=len(candidates),
             time_limited=time_limited,
         )
-        return (selected, {}) if return_metadata else selected
-    selected, meta = _score_with_fimo(
+        summary = None
+        if return_summary:
+            summary = _build_summary(
+                generated=len(candidates),
+                target=requested_candidates,
+                target_sites=n_sites,
+                eligible=selection.retained,
+                retained=selection.selected,
+                strata_bins=None,
+                input_name=input_name,
+                regulator=motif.motif_id,
+                backend=scoring_backend,
+            )
+        if return_metadata and return_summary:
+            return selection.selected, {}, summary
+        if return_metadata:
+            return selection.selected, {}
+        if return_summary:
+            return selection.selected, summary
+        return selection.selected
+    selected, meta, summary = _score_with_fimo(
         cap_applied=cap_applied,
         requested=requested_candidates,
         n_candidates=n_candidates,
     )
-    return (selected, meta) if return_metadata else selected
+    if return_metadata and return_summary:
+        return selected, meta, summary
+    if return_metadata:
+        return selected, meta
+    if return_summary:
+        return selected, summary
+    return selected
diff --git a/src/dnadesign/densegen/src/adapters/sources/sequence_library.py b/src/dnadesign/densegen/src/adapters/sources/sequence_library.py
index 79742c7f..e660703b 100644
--- a/src/dnadesign/densegen/src/adapters/sources/sequence_library.py
+++ b/src/dnadesign/densegen/src/adapters/sources/sequence_library.py
@@ -78,4 +78,4 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
         if invalid_rows:
             preview = ", ".join(str(i) for i in invalid_rows[:10])
             raise ValueError(f"Invalid sequences in {data_path} (rows: {preview}). DenseGen requires A/C/G/T only.")
-        return seqs, None
+        return seqs, None, None
diff --git a/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py b/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py
index 9e99fe35..49bc3f82 100644
--- a/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py
+++ b/src/dnadesign/densegen/src/adapters/sources/usr_sequences.py
@@ -60,4 +60,4 @@ def load_data(self, *, rng=None, outputs_root: Path | None = None, run_id: str |
             )
         if self.limit:
             seqs = seqs[: max(0, int(self.limit))]
-        return seqs, None
+        return seqs, None, None
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 693e7752..83cfb7c8 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -53,6 +53,7 @@
 from rich.table import Table
 from rich.traceback import install as rich_traceback
 
+from .adapters.sources.pwm_sampling import PWMSamplingSummary
 from .config import (
     LATEST_SCHEMA_VERSION,
     ConfigError,
@@ -78,7 +79,7 @@
     resolve_plan,
     run_pipeline,
 )
-from .core.pvalue_bins import resolve_pvalue_bins
+from .core.pvalue_bins import resolve_pvalue_strata
 from .core.reporting import collect_report_data, write_report
 from .core.run_manifest import load_run_manifest
 from .core.run_paths import (
@@ -365,7 +366,6 @@ def _print_inputs_summary(loaded) -> None:
         "strategy",
         "backend",
         "score",
-        "selection",
         "bins",
         "mining",
         "bgfile",
@@ -392,23 +392,22 @@ def _print_inputs_summary(loaded) -> None:
             motif_label = "from artifact"
         backend = getattr(sampling, "scoring_backend", "densegen")
         score_label = "-"
-        if backend == "fimo" and sampling.pvalue_threshold is not None:
-            comparator = ">=" if sampling.strategy == "background" else "<="
-            score_label = f"pvalue{comparator}{sampling.pvalue_threshold}"
+        if backend == "fimo":
+            strata = getattr(sampling, "pvalue_strata", None) or []
+            if strata:
+                comparator = ">=" if sampling.strategy == "background" else "<="
+                score_label = f"floor{comparator}{strata[-1]:g}"
         elif sampling.score_threshold is not None:
             score_label = f"threshold={sampling.score_threshold}"
         elif sampling.score_percentile is not None:
             score_label = f"percentile={sampling.score_percentile}"
-        selection_label = "-" if backend != "fimo" else (getattr(sampling, "selection_policy", None) or "-")
         bins_label = "-"
         if backend == "fimo":
-            bins_label = "canonical"
-            if getattr(sampling, "pvalue_bins", None) is not None:
-                bins_label = "custom"
-            mining_cfg = getattr(sampling, "mining", None)
-            bin_ids = getattr(mining_cfg, "retain_bin_ids", None)
-            if bin_ids:
-                bins_label = f"{bins_label}; retain={','.join(str(i) for i in bin_ids)}"
+            strata = getattr(sampling, "pvalue_strata", None) or []
+            retain_depth = getattr(sampling, "retain_depth", None)
+            bins_label = f"strata={len(strata)}"
+            if retain_depth is not None:
+                bins_label = f"{bins_label}; retain={retain_depth}"
         mining_label = "-"
         mining_cfg = getattr(sampling, "mining", None)
         if backend == "fimo" and mining_cfg is not None:
@@ -441,7 +440,6 @@ def _print_inputs_summary(loaded) -> None:
             str(sampling.strategy),
             str(backend),
             score_label,
-            str(selection_label),
             str(bins_label),
             str(mining_label),
             str(bgfile_label),
@@ -467,8 +465,8 @@ def _resolve_fimo_bin_edges(cfg, *, input_name: str) -> list[float] | None:
         backend = getattr(sampling, "scoring_backend", None)
         if backend is None or str(backend).lower() != "fimo":
             return None
-        bins = getattr(sampling, "pvalue_bins", None)
-        return resolve_pvalue_bins(bins)
+        strata = getattr(sampling, "pvalue_strata", None)
+        return resolve_pvalue_strata(strata)
     return None
 
 
@@ -502,6 +500,97 @@ def _fimo_bin_rows(df: pd.DataFrame, edges: list[float] | None) -> list[tuple[in
     return rows
 
 
+def _format_sampling_ratio(value: int, target: int | None) -> str:
+    if target is None or target <= 0:
+        return str(int(value))
+    return f"{int(value)}/{int(target)}"
+
+
+def _format_sampling_lengths(
+    *,
+    min_len: int | None,
+    median_len: float | None,
+    mean_len: float | None,
+    max_len: int | None,
+    count: int | None,
+) -> str:
+    if count is None:
+        return "-"
+    if min_len is None or median_len is None or mean_len is None or max_len is None:
+        return f"{int(count)}/-/-/-/-"
+    return f"{int(count)}/{int(min_len)}/{median_len:.1f}/{mean_len:.1f}/{int(max_len)}"
+
+
+def _stage_a_sampling_rows(pool_data: dict[str, PoolData]) -> list[tuple[str, str, str, str, str, str, str, str]]:
+    rows: list[tuple[str, str, str, str, str, str, str, str]] = []
+    for pool in pool_data.values():
+        summaries = pool.summaries or []
+        if summaries:
+            for summary in summaries:
+                if not isinstance(summary, PWMSamplingSummary):
+                    continue
+                input_name = summary.input_name or pool.name
+                regulator = summary.regulator or "-"
+                backend = summary.backend or "-"
+                candidates = _format_sampling_ratio(summary.generated, summary.target)
+                eligible = _format_sampling_ratio(summary.eligible, summary.generated)
+                pooled = _format_sampling_ratio(summary.retained, summary.target_sites)
+                bins_label = summary.strata_bins or "-"
+                length_label = _format_sampling_lengths(
+                    min_len=summary.retained_len_min,
+                    median_len=summary.retained_len_median,
+                    mean_len=summary.retained_len_mean,
+                    max_len=summary.retained_len_max,
+                    count=summary.retained,
+                )
+                rows.append(
+                    (
+                        str(input_name),
+                        str(regulator),
+                        str(backend),
+                        candidates,
+                        eligible,
+                        pooled,
+                        bins_label,
+                        length_label,
+                    )
+                )
+            continue
+        total = len(pool.sequences)
+        lengths = [len(seq) for seq in pool.sequences]
+        if lengths:
+            arr = np.asarray(lengths, dtype=float)
+            length_label = _format_sampling_lengths(
+                min_len=int(arr.min()),
+                median_len=float(np.median(arr)),
+                mean_len=float(arr.mean()),
+                max_len=int(arr.max()),
+                count=int(total),
+            )
+        else:
+            length_label = _format_sampling_lengths(
+                min_len=None,
+                median_len=None,
+                mean_len=None,
+                max_len=None,
+                count=int(total),
+            )
+        rows.append(
+            (
+                str(pool.name),
+                "-",
+                "provided",
+                _format_sampling_ratio(total, None),
+                "-",
+                _format_sampling_ratio(total, None),
+                "-",
+                length_label,
+            )
+        )
+    rows.sort(key=lambda row: (row[0], row[1]))
+    return rows
+
+
 def _list_dir_entries(path: Path, *, limit: int = 10) -> list[str]:
     if not path.exists() or not path.is_dir():
         return []
@@ -1706,40 +1795,26 @@ def stage_a_build_pool(
                     f"[yellow]Candidate logging enabled but no candidate records found under {candidates_dir}.[/]"
                 )
 
-    for pool in pool_data.values():
-        if pool.df is None:
-            continue
-        df = pool.df
-        if "fimo_bin_id" in df.columns:
-            bin_table = Table("bin_id", "pvalue_range", "count")
-            edges = _resolve_fimo_bin_edges(cfg, input_name=pool.name)
-            for bin_id, range_label, count in _fimo_bin_rows(df, edges):
-                bin_table.add_row(str(bin_id), range_label, str(int(count)))
-            console.print(f"[bold]FIMO p-value bins for {pool.name}[/]")
-            console.print(bin_table)
-
-    length_table = Table("input", "count", "min_len", "median_len", "max_len")
-    for pool in pool_data.values():
-        if pool.df is None or "tfbs" not in pool.df.columns:
-            continue
-        lengths = pool.df["tfbs"].astype(str).str.len()
-        if lengths.empty:
-            continue
-        length_table.add_row(
-            str(pool.name),
-            str(int(lengths.count())),
-            str(int(lengths.min())),
-            f"{float(lengths.median()):.1f}",
-            str(int(lengths.max())),
+    recap_rows = _stage_a_sampling_rows(pool_data)
+    if recap_rows:
+        recap_table = Table()
+        recap_table.add_column("input", overflow="fold")
+        recap_table.add_column("reg", overflow="fold")
+        recap_table.add_column("backend")
+        recap_table.add_column("candidates")
+        recap_table.add_column("eligible")
+        recap_table.add_column("pool")
+        recap_table.add_column("bins", overflow="fold")
+        recap_table.add_column("len(n/min/med/avg/max)")
+        for row in recap_rows:
+            recap_table.add_row(*row)
+        console.print("[bold]Stage-A sampling recap[/]")
+        console.print(recap_table)
+        console.print(
+            "  candidates=generated/target; eligible=hits at/below p-value floor; "
+            "pool=top-N within retained strata (shortfall ok); bins=eligible/retained per bin; "
+            "len=n/min/med/avg/max (pool)"
         )
-    if length_table.row_count:
-        console.print("[bold]TFBS length summary[/]")
-        console.print(length_table)
-
-    table = Table("input", "type", "rows", "pool_file")
-    for entry in artifact.inputs.values():
-        table.add_row(entry.name, entry.input_type, str(entry.rows), entry.pool_path.name)
-    console.print(table)
     console.print(f":sparkles: [bold green]Pool manifest written[/]: {artifact.manifest_path}")
 
 
diff --git a/src/dnadesign/densegen/src/config/__init__.py b/src/dnadesign/densegen/src/config/__init__.py
index 3665a55b..82dc7f19 100644
--- a/src/dnadesign/densegen/src/config/__init__.py
+++ b/src/dnadesign/densegen/src/config/__init__.py
@@ -21,8 +21,6 @@
 from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator, model_validator
 from typing_extensions import Literal
 
-from ..core.pvalue_bins import CANONICAL_PVALUE_BINS
-
 
 # ---- Strict YAML loader (duplicate keys fail) ----
 class _StrictLoader(yaml.SafeLoader):
@@ -157,7 +155,6 @@ class PWMMiningConfig(BaseModel):
     max_batches: Optional[int] = None
     max_candidates: Optional[int] = None
     max_seconds: Optional[float] = 60.0
-    retain_bin_ids: Optional[List[int]] = None
     log_every_batches: int = 1
 
     @field_validator("batch_size")
@@ -190,20 +187,6 @@ def _max_seconds_ok(cls, v: Optional[float]):
             raise ValueError("pwm.sampling.mining.max_seconds must be > 0 when set")
         return float(v)
 
-    @field_validator("retain_bin_ids")
-    @classmethod
-    def _retain_bin_ids_ok(cls, v: Optional[List[int]]):
-        if v is None:
-            return v
-        if not v:
-            raise ValueError("pwm.sampling.mining.retain_bin_ids must be non-empty when set")
-        ids = [int(x) for x in v]
-        if any(idx < 0 for idx in ids):
-            raise ValueError("pwm.sampling.mining.retain_bin_ids values must be >= 0")
-        if len(set(ids)) != len(ids):
-            raise ValueError("pwm.sampling.mining.retain_bin_ids must be unique")
-        return ids
-
     @field_validator("log_every_batches")
     @classmethod
     def _log_every_batches_ok(cls, v: int):
@@ -222,11 +205,10 @@ class PWMSamplingConfig(BaseModel):
     score_threshold: Optional[float] = None
     score_percentile: Optional[float] = None
     scoring_backend: Literal["densegen", "fimo"] = "densegen"
-    pvalue_threshold: Optional[float] = None
-    pvalue_bins: Optional[List[float]] = None
+    pvalue_strata: Optional[List[float]] = None
+    retain_depth: Optional[int] = None
     mining: Optional[PWMMiningConfig] = None
     bgfile: Optional[str] = None
-    selection_policy: Literal["random_uniform", "top_n", "stratified"] = "random_uniform"
     keep_all_candidates_debug: bool = False
     include_matched_sequence: bool = False
     length_policy: Literal["exact", "range"] = "exact"
@@ -296,23 +278,21 @@ def _bgfile_ok(cls, v: Optional[str]):
             raise ValueError("pwm.sampling.bgfile must be a non-empty string when set")
         return str(v).strip()
 
-    @field_validator("pvalue_bins")
+    @field_validator("pvalue_strata")
     @classmethod
-    def _pvalue_bins_ok(cls, v: Optional[List[float]]):
+    def _pvalue_strata_ok(cls, v: Optional[List[float]]):
         if v is None:
             return v
         if not v:
-            raise ValueError("pwm.sampling.pvalue_bins must be non-empty when set")
+            raise ValueError("pwm.sampling.pvalue_strata must be non-empty when set")
         bins = [float(x) for x in v]
         prev = 0.0
         for val in bins:
             if not (0.0 < val <= 1.0):
-                raise ValueError("pwm.sampling.pvalue_bins values must be in (0, 1]")
+                raise ValueError("pwm.sampling.pvalue_strata values must be in (0, 1]")
             if val <= prev:
-                raise ValueError("pwm.sampling.pvalue_bins must be strictly increasing")
+                raise ValueError("pwm.sampling.pvalue_strata must be strictly increasing")
             prev = val
-        if abs(bins[-1] - 1.0) > 1e-12:
-            raise ValueError("pwm.sampling.pvalue_bins must end with 1.0")
         return bins
 
     @model_validator(mode="after")
@@ -322,19 +302,19 @@ def _score_mode(self):
         if self.scoring_backend == "densegen":
             if has_thresh == has_pct:
                 raise ValueError("pwm.sampling must set exactly one of score_threshold or score_percentile")
-            if self.pvalue_threshold is not None:
-                raise ValueError("pwm.sampling.pvalue_threshold is only valid when scoring_backend='fimo'")
-            if self.pvalue_bins is not None:
-                raise ValueError("pwm.sampling.pvalue_bins is only valid when scoring_backend='fimo'")
+            if self.pvalue_strata is not None:
+                raise ValueError("pwm.sampling.pvalue_strata is only valid when scoring_backend='fimo'")
+            if self.retain_depth is not None:
+                raise ValueError("pwm.sampling.retain_depth is only valid when scoring_backend='fimo'")
             if self.mining is not None:
                 raise ValueError("pwm.sampling.mining is only valid when scoring_backend='fimo'")
             if self.include_matched_sequence:
                 raise ValueError("pwm.sampling.include_matched_sequence is only valid when scoring_backend='fimo'")
         else:
-            if self.pvalue_threshold is None:
-                raise ValueError("pwm.sampling.pvalue_threshold is required when scoring_backend='fimo'")
-            if not (0.0 < float(self.pvalue_threshold) <= 1.0):
-                raise ValueError("pwm.sampling.pvalue_threshold must be between 0 and 1")
+            if self.pvalue_strata is None:
+                raise ValueError("pwm.sampling.pvalue_strata is required when scoring_backend='fimo'")
+            if self.retain_depth is None:
+                raise ValueError("pwm.sampling.retain_depth is required when scoring_backend='fimo'")
             if "max_candidates" in self.model_fields_set and self.max_candidates is not None:
                 raise ValueError(
                     "pwm.sampling.max_candidates is not used with scoring_backend='fimo'. "
@@ -351,16 +331,16 @@ def _score_mode(self):
                 self.max_seconds = None
             if self.mining is None:
                 self.mining = PWMMiningConfig()
-            if self.pvalue_bins is None:
-                self.pvalue_bins = list(CANONICAL_PVALUE_BINS)
             if self.mining is not None and self.mining.max_candidates is not None:
                 if int(self.mining.max_candidates) < int(self.n_sites):
                     raise ValueError("pwm.sampling.mining.max_candidates must be >= n_sites")
-            if self.mining is not None and self.mining.retain_bin_ids is not None:
-                bins = list(self.pvalue_bins) if self.pvalue_bins is not None else list(CANONICAL_PVALUE_BINS)
-                max_idx = len(bins) - 1
-                if any(idx > max_idx for idx in self.mining.retain_bin_ids):
-                    raise ValueError("pwm.sampling.mining.retain_bin_ids contains an index outside the available bins")
+            depth = int(self.retain_depth)
+            if depth <= 0:
+                raise ValueError("pwm.sampling.retain_depth must be >= 1")
+            if self.pvalue_strata is None:
+                raise ValueError("pwm.sampling.pvalue_strata is required when scoring_backend='fimo'")
+            if depth > len(self.pvalue_strata):
+                raise ValueError("pwm.sampling.retain_depth cannot exceed the number of pvalue_strata bins")
         if self.strategy == "consensus" and int(self.n_sites) != 1:
             raise ValueError("pwm.sampling.strategy=consensus requires n_sites=1")
         if self.scoring_backend == "densegen" and self.score_percentile is not None:
diff --git a/src/dnadesign/densegen/src/core/artifacts/pool.py b/src/dnadesign/densegen/src/core/artifacts/pool.py
index 008ed934..58f6a5fe 100644
--- a/src/dnadesign/densegen/src/core/artifacts/pool.py
+++ b/src/dnadesign/densegen/src/core/artifacts/pool.py
@@ -95,6 +95,7 @@ class PoolData:
     df: pd.DataFrame | None
     sequences: list[str]
     pool_path: Path
+    summaries: list[object] | None = None
 
 
 @dataclass(frozen=True)
@@ -204,6 +205,7 @@ def load_pool_data(out_dir: Path) -> tuple[TFBSPoolArtifact, dict[str, PoolData]
             df=pool_df,
             sequences=sequences,
             pool_path=pool_path,
+            summaries=None,
         )
     return artifact, pool_data
 
@@ -304,7 +306,11 @@ def build_pool_artifact(
                 raise FileNotFoundError(f"Pool file listed in manifest is missing: {pool_path}")
             existing_df = pd.read_parquet(pool_path)
         src = deps.source_factory(inp, cfg_path)
-        data_entries, meta_df = src.load_data(rng=rng, outputs_root=outputs_root, run_id=str(cfg.run.id))
+        data_entries, meta_df, summaries = src.load_data(
+            rng=rng,
+            outputs_root=outputs_root,
+            run_id=str(cfg.run.id),
+        )
         if meta_df is None:
             df = _build_sequence_pool(data_entries)
         else:
@@ -366,6 +372,7 @@ def build_pool_artifact(
             df=pool_df,
             sequences=sequences,
             pool_path=dest,
+            summaries=summaries,
         )
         rows.append((inp.name, str(inp.type), str(len(df)), dest))
 
diff --git a/src/dnadesign/densegen/src/core/metadata_schema.py b/src/dnadesign/densegen/src/core/metadata_schema.py
index 5904f63c..5ef7bea3 100644
--- a/src/dnadesign/densegen/src/core/metadata_schema.py
+++ b/src/dnadesign/densegen/src/core/metadata_schema.py
@@ -130,7 +130,12 @@ class MetaField:
         "Stage-A PWM mining log frequency (batches).",
         allow_none=True,
     ),
-    MetaField("input_pwm_selection_policy", (str,), "Stage-A PWM selection policy (FIMO).", allow_none=True),
+    MetaField(
+        "input_pwm_selection_policy",
+        (str,),
+        "Stage-A PWM selection policy (FIMO; fixed stratified top-N).",
+        allow_none=True,
+    ),
     MetaField("input_pwm_bgfile", (str,), "Stage-A PWM background model path (FIMO).", allow_none=True),
     MetaField("input_pwm_keep_all_candidates_debug", (bool,), "Stage-A PWM FIMO debug TSV enabled.", allow_none=True),
     MetaField("input_pwm_include_matched_sequence", (bool,), "Stage-A PWM matched-sequence capture.", allow_none=True),
diff --git a/src/dnadesign/densegen/src/core/pipeline.py b/src/dnadesign/densegen/src/core/pipeline.py
index ddd07d20..e0c5aa71 100644
--- a/src/dnadesign/densegen/src/core/pipeline.py
+++ b/src/dnadesign/densegen/src/core/pipeline.py
@@ -57,7 +57,7 @@
 from .artifacts.records import AttemptRecord, SolutionRecord
 from .metadata import build_metadata
 from .postprocess import generate_pad
-from .pvalue_bins import resolve_pvalue_bins
+from .pvalue_bins import resolve_pvalue_strata
 from .run_manifest import PlanManifest, RunManifest
 from .run_paths import (
     candidates_root,
@@ -206,16 +206,16 @@ def _mining_attr(mining, name: str, default=None):
     return default
 
 
-def _resolve_pvalue_bins_meta(sampling) -> list[float] | None:
+def _resolve_pvalue_strata_meta(sampling) -> list[float] | None:
     if sampling is None:
         return None
     backend = str(_sampling_attr(sampling, "scoring_backend") or "densegen").lower()
-    bins = _sampling_attr(sampling, "pvalue_bins")
+    strata = _sampling_attr(sampling, "pvalue_strata")
     if backend == "fimo":
-        return resolve_pvalue_bins(bins)
-    if bins is None:
+        return resolve_pvalue_strata(strata)
+    if strata is None:
         return None
-    return [float(v) for v in bins]
+    return [float(v) for v in strata]
 
 
 def _extract_pwm_sampling_config(source_cfg) -> dict | None:
@@ -256,15 +256,15 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
     if length_range is not None:
         length_range = list(length_range)
     mining = _sampling_attr(sampling, "mining")
+    scoring_backend = _sampling_attr(sampling, "scoring_backend")
     mining_batch_size = _mining_attr(mining, "batch_size")
     mining_max_batches = _mining_attr(mining, "max_batches")
     mining_max_candidates = _mining_attr(mining, "max_candidates")
     mining_max_seconds = _mining_attr(mining, "max_seconds")
-    mining_retain_bin_ids = _mining_attr(mining, "retain_bin_ids")
     mining_log_every_batches = _mining_attr(mining, "log_every_batches")
     return {
         "strategy": _sampling_attr(sampling, "strategy"),
-        "scoring_backend": _sampling_attr(sampling, "scoring_backend"),
+        "scoring_backend": scoring_backend,
         "n_sites": _sampling_attr(sampling, "n_sites"),
         "oversample_factor": _sampling_attr(sampling, "oversample_factor"),
         "max_candidates": _sampling_attr(sampling, "max_candidates"),
@@ -274,9 +274,8 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
         "capped": capped,
         "score_threshold": _sampling_attr(sampling, "score_threshold"),
         "score_percentile": _sampling_attr(sampling, "score_percentile"),
-        "pvalue_threshold": _sampling_attr(sampling, "pvalue_threshold"),
-        "pvalue_bins": _resolve_pvalue_bins_meta(sampling),
-        "selection_policy": _sampling_attr(sampling, "selection_policy"),
+        "pvalue_strata": _resolve_pvalue_strata_meta(sampling),
+        "retain_depth": _sampling_attr(sampling, "retain_depth"),
         "bgfile": _sampling_attr(sampling, "bgfile"),
         "keep_all_candidates_debug": _sampling_attr(sampling, "keep_all_candidates_debug"),
         "length_policy": _sampling_attr(sampling, "length_policy"),
@@ -286,7 +285,6 @@ def _extract_pwm_sampling_config(source_cfg) -> dict | None:
             "max_batches": mining_max_batches,
             "max_candidates": mining_max_candidates,
             "max_seconds": mining_max_seconds,
-            "retain_bin_ids": mining_retain_bin_ids,
             "log_every_batches": mining_log_every_batches,
         }
         if mining is not None
@@ -635,17 +633,14 @@ def _input_metadata(source_cfg, cfg_path: Path) -> dict:
             meta["input_pwm_scoring_backend"] = getattr(sampling, "scoring_backend", None)
             meta["input_pwm_score_threshold"] = getattr(sampling, "score_threshold", None)
             meta["input_pwm_score_percentile"] = getattr(sampling, "score_percentile", None)
-            meta["input_pwm_pvalue_threshold"] = getattr(sampling, "pvalue_threshold", None)
-            meta["input_pwm_pvalue_bins"] = _resolve_pvalue_bins_meta(sampling)
+            meta["input_pwm_pvalue_strata"] = _resolve_pvalue_strata_meta(sampling)
+            meta["input_pwm_retain_depth"] = getattr(sampling, "retain_depth", None)
             mining_cfg = getattr(sampling, "mining", None)
-            retained_bins = _mining_attr(mining_cfg, "retain_bin_ids")
             meta["input_pwm_mining_batch_size"] = _mining_attr(mining_cfg, "batch_size")
             meta["input_pwm_mining_max_batches"] = _mining_attr(mining_cfg, "max_batches")
             meta["input_pwm_mining_max_candidates"] = _mining_attr(mining_cfg, "max_candidates")
             meta["input_pwm_mining_max_seconds"] = _mining_attr(mining_cfg, "max_seconds")
-            meta["input_pwm_mining_retain_bin_ids"] = retained_bins
             meta["input_pwm_mining_log_every_batches"] = _mining_attr(mining_cfg, "log_every_batches")
-            meta["input_pwm_selection_policy"] = getattr(sampling, "selection_policy", None)
             meta["input_pwm_bgfile"] = getattr(sampling, "bgfile", None)
             meta["input_pwm_keep_all_candidates_debug"] = getattr(sampling, "keep_all_candidates_debug", None)
             meta["input_pwm_include_matched_sequence"] = getattr(sampling, "include_matched_sequence", None)
@@ -2086,7 +2081,7 @@ def _record_library_build(
     cached = source_cache.get(cache_key) if source_cache is not None else None
     if cached is None:
         src_obj = deps.source_factory(source_cfg, cfg_path)
-        data_entries, meta_df = src_obj.load_data(
+        data_entries, meta_df, _summaries = src_obj.load_data(
             rng=np_rng,
             outputs_root=outputs_root,
             run_id=str(run_id),
@@ -2163,8 +2158,8 @@ def _record_library_build(
             score_threshold = _sampling_attr(input_sampling_cfg, "score_threshold")
             score_percentile = _sampling_attr(input_sampling_cfg, "score_percentile")
             scoring_backend = _sampling_attr(input_sampling_cfg, "scoring_backend") or "densegen"
-            pvalue_threshold = _sampling_attr(input_sampling_cfg, "pvalue_threshold")
-            selection_policy = _sampling_attr(input_sampling_cfg, "selection_policy")
+            pvalue_strata = _sampling_attr(input_sampling_cfg, "pvalue_strata")
+            retain_depth = _sampling_attr(input_sampling_cfg, "retain_depth")
             length_policy = _sampling_attr(input_sampling_cfg, "length_policy")
             length_range = _sampling_attr(input_sampling_cfg, "length_range")
             mining_cfg = _sampling_attr(input_sampling_cfg, "mining")
@@ -2172,23 +2167,23 @@ def _record_library_build(
             mining_max_batches = _mining_attr(mining_cfg, "max_batches")
             mining_max_candidates = _mining_attr(mining_cfg, "max_candidates")
             mining_max_seconds = _mining_attr(mining_cfg, "max_seconds")
-            mining_retain_bins = _mining_attr(mining_cfg, "retain_bin_ids")
             if length_range is not None:
                 length_range = list(length_range)
             score_label = "-"
-            if scoring_backend == "fimo" and pvalue_threshold is not None:
+            if scoring_backend == "fimo" and pvalue_strata:
+                floor = float(resolve_pvalue_strata(pvalue_strata)[-1])
                 comparator = ">=" if str(strategy) == "background" else "<="
-                score_label = f"pvalue{comparator}{pvalue_threshold}"
+                score_label = f"floor{comparator}{floor:g}"
             elif score_threshold is not None:
                 score_label = f"threshold={score_threshold}"
             elif score_percentile is not None:
                 score_label = f"percentile={score_percentile}"
             bins_label = "-"
             if scoring_backend == "fimo":
-                bins_label = "canonical" if _sampling_attr(input_sampling_cfg, "pvalue_bins") is None else "custom"
-                bin_ids = mining_retain_bins
-                if bin_ids:
-                    bins_label = f"{bins_label} retain={sorted(list(bin_ids))}"
+                strata_len = len(pvalue_strata or [])
+                bins_label = f"strata={strata_len}"
+                if retain_depth is not None:
+                    bins_label = f"{bins_label} retain={int(retain_depth)}"
             length_label = str(length_policy)
             if length_policy == "range" and length_range:
                 length_label = f"{length_policy}({length_range[0]}..{length_range[1]})"
@@ -2210,7 +2205,6 @@ def _record_library_build(
                     if max_seconds is not None:
                         cap_label = f"{cap_label}; max_seconds={max_seconds}" if cap_label != "-" else f"{max_seconds}s"
             counts_label = _summarize_tf_counts(meta_df["tf"].tolist())
-            selection_label = selection_policy if scoring_backend == "fimo" else "-"
             mining_label = "-"
             if scoring_backend == "fimo" and mining_cfg is not None:
                 parts = []
@@ -2225,14 +2219,13 @@ def _record_library_build(
                 mining_label = ", ".join(parts) if parts else "enabled"
             log.info(
                 "Stage-A PWM sampling for %s: motifs=%d | sites=%s | strategy=%s | backend=%s | score=%s | "
-                "selection=%s | bins=%s | mining=%s | oversample=%s | caps=%s | length=%s",
+                "bins=%s | mining=%s | oversample=%s | caps=%s | length=%s",
                 source_label,
                 len(input_meta.get("input_pwm_ids") or []),
                 counts_label or "-",
                 strategy,
                 scoring_backend,
                 score_label,
-                selection_label,
                 bins_label,
                 mining_label,
                 oversample,
diff --git a/src/dnadesign/densegen/src/core/pvalue_bins.py b/src/dnadesign/densegen/src/core/pvalue_bins.py
index 0f80af80..17fee57c 100644
--- a/src/dnadesign/densegen/src/core/pvalue_bins.py
+++ b/src/dnadesign/densegen/src/core/pvalue_bins.py
@@ -14,7 +14,7 @@
 
 from typing import Sequence
 
-CANONICAL_PVALUE_BINS: tuple[float, ...] = (
+CANONICAL_PVALUE_STRATA: tuple[float, ...] = (
     1e-10,
     1e-8,
     1e-6,
@@ -26,7 +26,7 @@
 )
 
 
-def resolve_pvalue_bins(pvalue_bins: Sequence[float] | None) -> list[float]:
-    if pvalue_bins is None:
-        return list(CANONICAL_PVALUE_BINS)
-    return [float(v) for v in pvalue_bins]
+def resolve_pvalue_strata(pvalue_strata: Sequence[float] | None) -> list[float]:
+    if pvalue_strata is None:
+        return list(CANONICAL_PVALUE_STRATA)
+    return [float(v) for v in pvalue_strata]
diff --git a/src/dnadesign/densegen/src/utils/logging_utils.py b/src/dnadesign/densegen/src/utils/logging_utils.py
index 21567c54..9038c6e1 100644
--- a/src/dnadesign/densegen/src/utils/logging_utils.py
+++ b/src/dnadesign/densegen/src/utils/logging_utils.py
@@ -1,10 +1,12 @@
 """
 --------------------------------------------------------------------------------
-<dnadesign project>
-dnadesign/densegen/utils/logging_utils.py
+dnadesign
+src/dnadesign/densegen/src/utils/logging_utils.py
+
+Logging setup and stderr filtering utilities.
+Dunlop Lab.
 
 Module Author(s): Eric J. South
-Dunlop Lab
 --------------------------------------------------------------------------------
 """
 
@@ -16,10 +18,62 @@
 import sys
 import threading
 from pathlib import Path
-from typing import Iterable, Optional
+from typing import Iterable, Optional, TextIO
 
 _NATIVE_STDERR_PATTERNS: list[tuple[str, re.Pattern, str | None]] = []
 _NATIVE_STDERR_LOCK = threading.Lock()
+_FIMO_STDOUT_SUPPRESS_RE = re.compile(r"^\s*FIMO (mining|yield)\b")
+_PROGRESS_LOCK = threading.Lock()
+_PROGRESS_ACTIVE = False
+_PROGRESS_VISIBLE = False
+
+
+def set_progress_active(active: bool) -> None:
+    global _PROGRESS_ACTIVE, _PROGRESS_VISIBLE
+    with _PROGRESS_LOCK:
+        _PROGRESS_ACTIVE = bool(active)
+        if not _PROGRESS_ACTIVE:
+            _PROGRESS_VISIBLE = False
+
+
+def mark_progress_line_visible() -> None:
+    global _PROGRESS_VISIBLE
+    with _PROGRESS_LOCK:
+        if _PROGRESS_ACTIVE:
+            _PROGRESS_VISIBLE = True
+
+
+def is_progress_line_visible() -> bool:
+    with _PROGRESS_LOCK:
+        return bool(_PROGRESS_VISIBLE)
+
+
+def _maybe_clear_progress_line(stream: TextIO) -> None:
+    global _PROGRESS_VISIBLE
+    with _PROGRESS_LOCK:
+        if not (_PROGRESS_ACTIVE and _PROGRESS_VISIBLE):
+            return
+        _PROGRESS_VISIBLE = False
+    stream.write("\n")
+    stream.flush()
+
+
+class FimoMiningBatchLogFilter(logging.Filter):
+    def filter(self, record: logging.LogRecord) -> bool:
+        message = record.getMessage()
+        if _FIMO_STDOUT_SUPPRESS_RE.search(message):
+            return False
+        return True
+
+
+class ProgressAwareStreamHandler(logging.StreamHandler):
+    def emit(self, record: logging.LogRecord) -> None:
+        if getattr(record, "suppress_stdout", False):
+            return
+        if _FIMO_STDOUT_SUPPRESS_RE.search(record.getMessage()):
+            return
+        _maybe_clear_progress_line(self.stream)
+        super().emit(record)
 
 
 def _register_native_stderr_patterns(patterns: Iterable[tuple[str, str | None]]) -> None:
@@ -164,9 +218,10 @@ def setup_logging(
         datefmt="%Y-%m-%d %H:%M:%S",
     )
 
-    sh = logging.StreamHandler(stream=sys.stdout)
+    sh = ProgressAwareStreamHandler(stream=sys.stdout)
     sh.setLevel(lvl)
     sh.setFormatter(fmt)
+    sh.addFilter(FimoMiningBatchLogFilter())
     root.addHandler(sh)
 
     if logfile:
diff --git a/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
index b71d01a1..46aacfb7 100644
--- a/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
+++ b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
@@ -1,3 +1,15 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
+
+CLI coverage for Stage-A build-pool summaries.
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 # ABOUTME: CLI coverage for Stage-A build-pool length summaries.
 # ABOUTME: Ensures pooled TFBS length stats are surfaced in stdout.
 from __future__ import annotations
@@ -64,12 +76,15 @@ def _write_stage_a_config(tmp_path: Path) -> Path:
     return cfg_path
 
 
-def test_stage_a_build_pool_reports_length_summary(tmp_path: Path) -> None:
+def test_stage_a_build_pool_reports_sampling_recap(tmp_path: Path) -> None:
     cfg_path = _write_stage_a_config(tmp_path)
     runner = CliRunner()
     result = runner.invoke(app, ["stage-a", "build-pool", "-c", str(cfg_path)])
     assert result.exit_code == 0, result.output
-    assert "TFBS length summary" in result.output
+    assert "Stage-A sampling recap" in result.output
+    assert "candidates" in result.output
+    assert "strata" in result.output
+    assert "pool" in result.output
     assert "toy_sites" in result.output
 
 
diff --git a/src/dnadesign/densegen/tests/test_config_strict.py b/src/dnadesign/densegen/tests/test_config_strict.py
index 76f98db9..edbea363 100644
--- a/src/dnadesign/densegen/tests/test_config_strict.py
+++ b/src/dnadesign/densegen/tests/test_config_strict.py
@@ -325,7 +325,8 @@ def test_fimo_rejects_max_candidates(tmp_path: Path) -> None:
                 "n_sites": 2,
                 "oversample_factor": 2,
                 "scoring_backend": "fimo",
-                "pvalue_threshold": 1e-4,
+                "pvalue_strata": [1e-8, 1e-6, 1e-4],
+                "retain_depth": 2,
                 "max_candidates": 100,
                 "mining": {"batch_size": 10},
             },
diff --git a/src/dnadesign/densegen/tests/test_demo_config_selection_policy.py b/src/dnadesign/densegen/tests/test_demo_config_selection_policy.py
new file mode 100644
index 00000000..32fffb36
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_demo_config_selection_policy.py
@@ -0,0 +1,35 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_demo_config_selection_policy.py
+
+Ensures the demo config hides selection policy for FIMO sampling.
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from dnadesign.densegen.src.config import load_config
+
+
+def _demo_config_path() -> Path:
+    return Path(__file__).resolve().parents[1] / "workspaces" / "demo_meme_two_tf" / "config.yaml"
+
+
+def test_demo_config_hides_selection_policy_for_fimo() -> None:
+    cfg_path = _demo_config_path()
+    loaded = load_config(cfg_path)
+    cfg = loaded.root.densegen
+    for inp in cfg.inputs:
+        sampling = getattr(inp, "sampling", None)
+        if sampling is None:
+            continue
+        backend = str(getattr(sampling, "scoring_backend", "") or "").lower()
+        if backend != "fimo":
+            continue
+        assert not hasattr(sampling, "selection_policy"), f"{inp.name} should not expose selection_policy"
diff --git a/src/dnadesign/densegen/tests/test_pwm_log_odds_smoothing.py b/src/dnadesign/densegen/tests/test_pwm_log_odds_smoothing.py
index a969a873..7da43b69 100644
--- a/src/dnadesign/densegen/tests/test_pwm_log_odds_smoothing.py
+++ b/src/dnadesign/densegen/tests/test_pwm_log_odds_smoothing.py
@@ -1,3 +1,15 @@
+"""
+--------------------------------------------------------------------------------
+<dnadesign project>
+dnadesign/densegen/tests/test_pwm_log_odds_smoothing.py
+
+Checks PWM log-odds smoothing and sampling stability.
+
+Module Author(s): Eric J. South
+Dunlop Lab
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 import numpy as np
@@ -28,8 +40,8 @@ def test_pwm_log_odds_smoothing_finite() -> None:
         strategy="stochastic",
         n_sites=1,
         oversample_factor=3,
-        score_threshold=None,
-        score_percentile=50.0,
+        score_threshold=-100.0,
+        score_percentile=None,
     )
     assert len(sites) == 1
     core = sites[0][: len(matrix)]
diff --git a/src/dnadesign/densegen/tests/test_pwm_meme_source.py b/src/dnadesign/densegen/tests/test_pwm_meme_source.py
index 7333d2d1..b28578db 100644
--- a/src/dnadesign/densegen/tests/test_pwm_meme_source.py
+++ b/src/dnadesign/densegen/tests/test_pwm_meme_source.py
@@ -1,3 +1,15 @@
+"""
+--------------------------------------------------------------------------------
+<dnadesign project>
+dnadesign/densegen/tests/test_pwm_meme_source.py
+
+Stage-A PWM sampling via MEME sources.
+
+Module Author(s): Eric J. South
+Dunlop Lab
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 import logging
@@ -97,7 +109,7 @@ def test_pwm_sampling_cap_warns(caplog: pytest.LogCaptureFixture) -> None:
     assert "capped candidate generation" in caplog.text
 
 
-def test_pwm_sampling_error_context(tmp_path: Path) -> None:
+def test_pwm_sampling_shortfall_warns(tmp_path: Path, caplog: pytest.LogCaptureFixture) -> None:
     meme_path = tmp_path / "motifs.meme"
     meme_path.write_text(MEME_TEXT)
     ds = PWMMemeDataSource(
@@ -114,10 +126,6 @@ def test_pwm_sampling_error_context(tmp_path: Path) -> None:
             "score_percentile": None,
         },
     )
-    with pytest.raises(ValueError) as exc:
+    with caplog.at_level(logging.WARNING):
         ds.load_data(rng=np.random.default_rng(2))
-    msg = str(exc.value)
-    assert "motif 'M1'" in msg
-    assert "width=" in msg
-    assert "requested" in msg
-    assert "unique candidates" in msg or "Unique candidates" in msg
+    assert "shortfall" in caplog.text
diff --git a/src/dnadesign/densegen/tests/test_pwm_progress.py b/src/dnadesign/densegen/tests/test_pwm_progress.py
new file mode 100644
index 00000000..72d40d20
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_pwm_progress.py
@@ -0,0 +1,173 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_pwm_progress.py
+
+Stage-A PWM progress formatting and stdout filtering.
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
+from __future__ import annotations
+
+import io
+import logging
+
+from dnadesign.densegen.src.adapters.sources import pwm_sampling
+from dnadesign.densegen.src.utils import logging_utils
+
+
+def test_pwm_progress_line_densegen_fields() -> None:
+    line = pwm_sampling._format_pwm_progress_line(
+        motif_id="M1",
+        backend="densegen",
+        generated=50,
+        target=100,
+        accepted=None,
+        accepted_target=None,
+        batch_index=None,
+        batch_total=None,
+        elapsed=1.2,
+    )
+    assert line.startswith("PWM M1 | densegen | gen 50% (50/100)")
+    assert "acc" not in line
+    assert "| 1.2s |" in line
+    assert line.endswith("/s")
+
+
+def test_pwm_progress_line_fimo_fields() -> None:
+    line = pwm_sampling._format_pwm_progress_line(
+        motif_id="M2",
+        backend="fimo",
+        generated=25,
+        target=100,
+        accepted=10,
+        accepted_target=40,
+        batch_index=3,
+        batch_total=None,
+        elapsed=2.5,
+    )
+    assert line.startswith("PWM M2 | fimo | gen 25% (25/100) | batch 3/-")
+    assert "| 2.5s |" in line
+    assert line.endswith("/s")
+
+
+def test_fimo_mining_batch_filter_blocks_batch_lines() -> None:
+    record = logging.LogRecord(
+        name="dnadesign.densegen.src.adapters.sources.pwm_sampling",
+        level=logging.INFO,
+        pathname=__file__,
+        lineno=1,
+        msg="FIMO mining M1 batch 2/-: generated=10/20 accepted=5",
+        args=(),
+        exc_info=None,
+    )
+    filt = logging_utils.FimoMiningBatchLogFilter()
+    assert filt.filter(record) is False
+
+
+def test_fimo_mining_batch_filter_allows_non_batch_lines() -> None:
+    record = logging.LogRecord(
+        name="dnadesign.densegen.src.adapters.sources.pwm_sampling",
+        level=logging.INFO,
+        pathname=__file__,
+        lineno=1,
+        msg="FIMO debug TSV written: /tmp/fimo.tsv",
+        args=(),
+        exc_info=None,
+    )
+    filt = logging_utils.FimoMiningBatchLogFilter()
+    assert filt.filter(record) is True
+
+
+def test_fimo_mining_batch_filter_blocks_config_lines() -> None:
+    record = logging.LogRecord(
+        name="dnadesign.densegen.src.adapters.sources.pwm_sampling",
+        level=logging.INFO,
+        pathname=__file__,
+        lineno=1,
+        msg="FIMO mining config for M1: target=10 batch=2",
+        args=(),
+        exc_info=None,
+    )
+    filt = logging_utils.FimoMiningBatchLogFilter()
+    assert filt.filter(record) is False
+
+
+def test_fimo_mining_batch_filter_blocks_yield_lines() -> None:
+    record = logging.LogRecord(
+        name="dnadesign.densegen.src.adapters.sources.pwm_sampling",
+        level=logging.INFO,
+        pathname=__file__,
+        lineno=1,
+        msg="FIMO yield for motif M1: hits=10 accepted=9 selected=2",
+        args=(),
+        exc_info=None,
+    )
+    filt = logging_utils.FimoMiningBatchLogFilter()
+    assert filt.filter(record) is False
+
+
+def test_progress_handler_inserts_newline_when_progress_visible() -> None:
+    stream = io.StringIO()
+    handler = logging_utils.ProgressAwareStreamHandler(stream=stream)
+    handler.setFormatter(logging.Formatter("%(message)s"))
+    logger = logging.getLogger("progress_handler_visible")
+    logger.handlers = [handler]
+    logger.setLevel(logging.INFO)
+    logger.propagate = False
+    logging_utils.set_progress_active(True)
+    logging_utils.mark_progress_line_visible()
+    logger.info("hello")
+    logging_utils.set_progress_active(False)
+    output = stream.getvalue()
+    assert output.startswith("\nhello\n")
+
+
+def test_progress_handler_no_newline_when_progress_hidden() -> None:
+    stream = io.StringIO()
+    handler = logging_utils.ProgressAwareStreamHandler(stream=stream)
+    handler.setFormatter(logging.Formatter("%(message)s"))
+    logger = logging.getLogger("progress_handler_hidden")
+    logger.handlers = [handler]
+    logger.setLevel(logging.INFO)
+    logger.propagate = False
+    logging_utils.set_progress_active(False)
+    logger.info("hello")
+    output = stream.getvalue()
+    assert output == "hello\n"
+
+
+def test_progress_handler_suppresses_flagged_records() -> None:
+    stream = io.StringIO()
+    handler = logging_utils.ProgressAwareStreamHandler(stream=stream)
+    handler.setFormatter(logging.Formatter("%(message)s"))
+    logger = logging.getLogger("progress_handler_suppress")
+    logger.handlers = [handler]
+    logger.setLevel(logging.INFO)
+    logger.propagate = False
+    logger.info("quiet", extra={"suppress_stdout": True})
+    assert stream.getvalue() == ""
+
+
+class _TtyBuffer(io.StringIO):
+    def isatty(self) -> bool:
+        return True
+
+
+def test_pwm_progress_dedupes_identical_updates() -> None:
+    stream = _TtyBuffer()
+    progress = pwm_sampling._PwmSamplingProgress(
+        motif_id="M1",
+        backend="densegen",
+        target=10,
+        accepted_target=None,
+        stream=stream,
+    )
+    progress.update(generated=5, accepted=None, force=True)
+    progress.update(generated=5, accepted=None, force=True)
+    progress.finish()
+    output = stream.getvalue()
+    assert output.count("PWM M1") == 1
diff --git a/src/dnadesign/densegen/tests/test_pwm_sampling_bins.py b/src/dnadesign/densegen/tests/test_pwm_sampling_bins.py
deleted file mode 100644
index c20bbeb9..00000000
--- a/src/dnadesign/densegen/tests/test_pwm_sampling_bins.py
+++ /dev/null
@@ -1,74 +0,0 @@
-from __future__ import annotations
-
-import numpy as np
-
-from dnadesign.densegen.src.adapters.sources.pwm_sampling import (
-    FimoCandidate,
-    _assign_pvalue_bin,
-    _stratified_sample,
-)
-
-
-def test_assign_pvalue_bin_edges() -> None:
-    edges = [1e-4, 1e-2, 1.0]
-    assert _assign_pvalue_bin(1e-4, edges) == (0, 0.0, 1e-4)
-    assert _assign_pvalue_bin(5e-4, edges) == (1, 1e-4, 1e-2)
-    assert _assign_pvalue_bin(0.5, edges) == (2, 1e-2, 1.0)
-
-
-def test_stratified_sample_balances_bins() -> None:
-    rng = np.random.default_rng(0)
-    candidates = [
-        FimoCandidate(
-            seq="AAAA",
-            pvalue=1e-6,
-            score=10.0,
-            bin_id=0,
-            bin_low=0.0,
-            bin_high=1e-4,
-            start=0,
-            stop=3,
-            strand="+",
-            matched_sequence=None,
-        ),
-        FimoCandidate(
-            seq="AAAT",
-            pvalue=5e-6,
-            score=9.0,
-            bin_id=0,
-            bin_low=0.0,
-            bin_high=1e-4,
-            start=0,
-            stop=3,
-            strand="+",
-            matched_sequence=None,
-        ),
-        FimoCandidate(
-            seq="TTTT",
-            pvalue=5e-3,
-            score=6.0,
-            bin_id=1,
-            bin_low=1e-4,
-            bin_high=1e-2,
-            start=0,
-            stop=3,
-            strand="+",
-            matched_sequence=None,
-        ),
-        FimoCandidate(
-            seq="TTTA",
-            pvalue=8e-3,
-            score=5.0,
-            bin_id=1,
-            bin_low=1e-4,
-            bin_high=1e-2,
-            start=0,
-            stop=3,
-            strand="+",
-            matched_sequence=None,
-        ),
-    ]
-
-    picked = _stratified_sample(candidates, n_sites=3, rng=rng, n_bins=2)
-    assert len(picked) == 3
-    assert {int(c.bin_id) for c in picked} == {0, 1}
diff --git a/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py b/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py
index 4b653038..fc90f260 100644
--- a/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py
+++ b/src/dnadesign/densegen/tests/test_pwm_sampling_mining.py
@@ -1,9 +1,21 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_pwm_sampling_mining.py
+
+FIMO mining behavior for Stage-A PWM sampling.
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
+import logging
 from pathlib import Path
 
 import numpy as np
-import pytest
 
 from dnadesign.densegen.src.adapters.sources import pwm_fimo
 from dnadesign.densegen.src.adapters.sources.pwm_sampling import PWMMotif, sample_pwm_sites
@@ -18,7 +30,7 @@ def _parse_fasta(path: Path) -> list[str]:
     return ids
 
 
-def test_pwm_sampling_fimo_mining_retain_bins(monkeypatch) -> None:
+def test_pwm_sampling_fimo_mining_retain_depth(monkeypatch) -> None:
     motif = PWMMotif(
         motif_id="M1",
         matrix=[
@@ -33,7 +45,7 @@ def fake_run_fimo(*, meme_motif_path, fasta_path, **_kwargs):  # type: ignore[ov
         ids = _parse_fasta(Path(fasta_path))
         rows = []
         for idx, rec_id in enumerate(ids):
-            pval = 1e-6 if idx % 2 == 0 else 1e-2
+            pval = 1e-6 if idx < 2 else 1e-2
             rows.append(
                 {
                     "sequence_name": rec_id,
@@ -61,13 +73,11 @@ def fake_run_fimo(*, meme_motif_path, fasta_path, **_kwargs):  # type: ignore[ov
         score_threshold=None,
         score_percentile=None,
         scoring_backend="fimo",
-        pvalue_threshold=1e-1,
-        pvalue_bins=[1e-5, 1e-3, 1.0],
-        selection_policy="random_uniform",
+        pvalue_strata=[1e-5, 1e-3, 1.0],
+        retain_depth=1,
         mining={
             "batch_size": 2,
             "max_batches": 2,
-            "retain_bin_ids": [0],
             "log_every_batches": 1,
         },
         include_matched_sequence=True,
@@ -81,7 +91,7 @@ def fake_run_fimo(*, meme_motif_path, fasta_path, **_kwargs):  # type: ignore[ov
         assert info["fimo_matched_sequence"] == "AAA"
 
 
-def test_pwm_sampling_fimo_mining_max_candidates_guard() -> None:
+def test_pwm_sampling_fimo_mining_max_candidates_guard(monkeypatch, caplog) -> None:
     motif = PWMMotif(
         motif_id="M2",
         matrix=[
@@ -90,9 +100,28 @@ def test_pwm_sampling_fimo_mining_max_candidates_guard() -> None:
         ],
         background={"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
     )
+
+    def fake_run_fimo(*, meme_motif_path, fasta_path, **_kwargs):  # type: ignore[override]
+        ids = _parse_fasta(Path(fasta_path))
+        rows = []
+        for rec_id in ids:
+            rows.append(
+                {
+                    "sequence_name": rec_id,
+                    "start": 1,
+                    "stop": 2,
+                    "strand": "+",
+                    "score": 5.0,
+                    "p_value": 1e-6,
+                    "matched_sequence": "AA",
+                }
+            )
+        return rows, None
+
+    monkeypatch.setattr(pwm_fimo, "run_fimo", fake_run_fimo)
     rng = np.random.default_rng(0)
-    with pytest.raises(ValueError, match="mining.max_candidates must be >= n_sites"):
-        sample_pwm_sites(
+    with caplog.at_level(logging.WARNING):
+        selected = sample_pwm_sites(
             rng,
             motif,
             strategy="stochastic",
@@ -103,7 +132,9 @@ def test_pwm_sampling_fimo_mining_max_candidates_guard() -> None:
             score_threshold=None,
             score_percentile=None,
             scoring_backend="fimo",
-            pvalue_threshold=1e-2,
+            pvalue_strata=[1e-5, 1e-3, 1.0],
+            retain_depth=2,
             mining={"batch_size": 2, "max_candidates": 2},
-            selection_policy="random_uniform",
         )
+    assert len(selected) == 2
+    assert "shortfall" in caplog.text
diff --git a/src/dnadesign/densegen/tests/test_pwm_sampling_relaxed_selection.py b/src/dnadesign/densegen/tests/test_pwm_sampling_relaxed_selection.py
new file mode 100644
index 00000000..12d0d101
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_pwm_sampling_relaxed_selection.py
@@ -0,0 +1,65 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_pwm_sampling_relaxed_selection.py
+
+Stage-A PWM sampling relaxed selection rules.
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
+from __future__ import annotations
+
+import logging
+
+import numpy as np
+import pytest
+
+from dnadesign.densegen.src.adapters.sources.pwm_sampling import PWMMotif, sample_pwm_sites
+
+
+def _motif() -> PWMMotif:
+    return PWMMotif(
+        motif_id="M1",
+        matrix=[
+            {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+            {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+        ],
+        background={"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+    )
+
+
+def test_densegen_requires_score_threshold() -> None:
+    rng = np.random.default_rng(0)
+    with pytest.raises(ValueError, match="score_threshold"):
+        sample_pwm_sites(
+            rng,
+            _motif(),
+            strategy="stochastic",
+            n_sites=3,
+            oversample_factor=2,
+            max_candidates=None,
+            max_seconds=None,
+            score_threshold=None,
+            score_percentile=90.0,
+        )
+
+
+def test_densegen_allows_shortfall_with_warning(caplog: pytest.LogCaptureFixture) -> None:
+    rng = np.random.default_rng(0)
+    with caplog.at_level(logging.WARNING):
+        sites = sample_pwm_sites(
+            rng,
+            _motif(),
+            strategy="stochastic",
+            n_sites=10,
+            oversample_factor=1,
+            max_candidates=None,
+            max_seconds=None,
+            score_threshold=1e6,
+            score_percentile=None,
+        )
+    assert len(sites) < 10
+    assert "shortfall" in caplog.text
diff --git a/src/dnadesign/densegen/tests/test_pwm_sampling_stratification.py b/src/dnadesign/densegen/tests/test_pwm_sampling_stratification.py
new file mode 100644
index 00000000..1f08e12b
--- /dev/null
+++ b/src/dnadesign/densegen/tests/test_pwm_sampling_stratification.py
@@ -0,0 +1,90 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_pwm_sampling_stratification.py
+
+FIMO stratification behavior for Stage-A PWM sampling.
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+
+from dnadesign.densegen.src.adapters.sources import pwm_fimo
+from dnadesign.densegen.src.adapters.sources.pwm_sampling import PWMMotif, sample_pwm_sites
+
+
+def _parse_fasta(path: Path) -> list[str]:
+    ids: list[str] = []
+    with path.open() as handle:
+        for line in handle:
+            if line.startswith(">"):
+                ids.append(line.strip().lstrip(">"))
+    return ids
+
+
+def test_fimo_stratification_selects_top_n_within_retain_depth(monkeypatch) -> None:
+    motif = PWMMotif(
+        motif_id="M1",
+        matrix=[
+            {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+            {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+        ],
+        background={"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25},
+    )
+
+    def fake_run_fimo(*, meme_motif_path, fasta_path, **_kwargs):  # type: ignore[override]
+        ids = _parse_fasta(Path(fasta_path))
+        rows = []
+        for idx, rec_id in enumerate(ids):
+            if idx == 0:
+                pval, score = 1e-8, 3.0
+            elif idx == 1:
+                pval, score = 1e-8, 7.0
+            elif idx == 2:
+                pval, score = 1e-6, 9.0
+            else:
+                pval, score = 1e-3, 2.0
+            rows.append(
+                {
+                    "sequence_name": rec_id,
+                    "start": 1,
+                    "stop": 2,
+                    "strand": "+",
+                    "score": score,
+                    "p_value": pval,
+                    "matched_sequence": "AA",
+                }
+            )
+        return rows, None
+
+    monkeypatch.setattr(pwm_fimo, "run_fimo", fake_run_fimo)
+
+    rng = np.random.default_rng(0)
+    selected, meta = sample_pwm_sites(
+        rng,
+        motif,
+        strategy="stochastic",
+        n_sites=1,
+        oversample_factor=4,
+        max_candidates=None,
+        max_seconds=None,
+        score_threshold=None,
+        score_percentile=None,
+        scoring_backend="fimo",
+        pvalue_strata=[1e-8, 1e-6, 1e-4],
+        retain_depth=1,
+        mining={"batch_size": 4, "max_batches": 1},
+        return_metadata=True,
+    )
+
+    assert len(selected) == 1
+    info = meta[selected[0]]
+    assert info["fimo_pvalue"] == 1e-8
+    assert info["fimo_score"] == 7.0
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index 4155c5a9..34c129f2 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -17,8 +17,7 @@ densegen:
         n_sites: 200
         oversample_factor: 200
         scoring_backend: fimo
-        pvalue_threshold: 1e-4
-        selection_policy: stratified
+        pvalue_threshold: 1e-8
         mining:
           batch_size: 5000
           max_seconds: 60

From 7fe3ae468e71641c2a0faced94f52b832b6f0783 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Sun, 25 Jan 2026 13:45:51 -0500
Subject: [PATCH 38/40] Align metadata and docs with strata sampling

---
 .../densegen/docs/demo/demo_basic.md          |  2 +-
 src/dnadesign/densegen/docs/guide/inputs.md   | 16 ++---
 .../densegen/docs/reference/config.md         | 18 +++---
 .../densegen/src/adapters/outputs/parquet.py  |  8 +--
 src/dnadesign/densegen/src/core/metadata.py   |  6 +-
 .../densegen/src/core/metadata_schema.py      | 33 ++---------
 src/dnadesign/densegen/tests/meta_fixtures.py | 17 ++++--
 .../densegen/tests/test_config_strict.py      | 58 +++++++++++++++++++
 .../densegen/tests/test_outputs_parquet.py    | 17 ++++--
 .../workspaces/demo_meme_two_tf/config.yaml   |  4 +-
 10 files changed, 115 insertions(+), 64 deletions(-)

diff --git a/src/dnadesign/densegen/docs/demo/demo_basic.md b/src/dnadesign/densegen/docs/demo/demo_basic.md
index 7461a651..60172b7b 100644
--- a/src/dnadesign/densegen/docs/demo/demo_basic.md
+++ b/src/dnadesign/densegen/docs/demo/demo_basic.md
@@ -139,7 +139,7 @@ The DenseGen workspace stays config‑centric (one runtime config); Cruncher kee
 
 Review the resolved outputs, Stage‑A sampling settings, fixed elements, and Stage‑B sampling policy.
 
-Stage‑A sampling: the pipeline can mine hundreds of binding sites per TF from the MEME‑derived PWM artifacts. The motif JSONs specify widths (LexA 15 bp, CpxR 11 bp), and `length_policy: range` with `length_range: [15, 20]` chooses a target length and pads flanks to it while pulling from the top p‑value bins. Increase `n_sites` if you need a larger pool.
+Stage‑A sampling: the pipeline can mine hundreds of binding sites per TF from the MEME‑derived PWM artifacts. The motif JSONs specify widths (LexA 15 bp, CpxR 11 bp), and `length_policy: range` with `length_range: [15, 20]` chooses a target length and pads flanks to it while retaining the best p‑value strata (`pvalue_strata` + `retain_depth`). Increase `n_sites` if you need a larger pool.
 
 Stage‑B sampling: the Stage‑A pool is subsampled into candidate libraries (`pool_strategy: subsample`, `library_size: 20`) with coverage weighting so each library contains the specified TFs (`cover_all_regulators: true`). Each library is offered to the solver, which assembles 60‑bp sequences by selecting a subset; new libraries are sampled as needed.
 
diff --git a/src/dnadesign/densegen/docs/guide/inputs.md b/src/dnadesign/densegen/docs/guide/inputs.md
index 87f83f35..43759d0e 100644
--- a/src/dnadesign/densegen/docs/guide/inputs.md
+++ b/src/dnadesign/densegen/docs/guide/inputs.md
@@ -33,20 +33,19 @@ Required when `scoring_backend: densegen`:
 
 Required when `scoring_backend: fimo`:
 - `scoring_backend: fimo`
-- `pvalue_threshold` (float in (0, 1])
+- `pvalue_strata` (list of floats; strictly increasing)
+- `retain_depth` (int > 0)
 
 Optional (supported):
 - `strategy`: `consensus | stochastic | background` (default `stochastic`)
 - `oversample_factor` (int > 0; default `10`)
 - `max_candidates` (densegen‑only; int > 0 when set)
 - `max_seconds` (densegen‑only; float > 0 when set)
-- `pvalue_bins` (fimo‑only): list of floats, strictly increasing, must end with `1.0`
 - `mining` (fimo‑only):
   - `batch_size` (int > 0)
   - `max_batches` (optional int > 0)
   - `max_candidates` (optional int > 0; must be ≥ `n_sites`)
   - `max_seconds` (optional float > 0; default 60s)
-  - `retain_bin_ids` (optional list of ints; unique, in‑range)
   - `log_every_batches` (int > 0)
 - `bgfile` (fimo‑only): MEME background file
 - `keep_all_candidates_debug` (bool): write candidate‑level Parquet under `outputs/pools/candidates/`
@@ -61,13 +60,13 @@ Optional (supported):
 Strict validation behavior:
 - Unknown keys are errors (extra fields are rejected).
 - DenseGen backend requires exactly one of `score_threshold` or `score_percentile`.
-- FIMO backend requires `pvalue_threshold`; `max_candidates`/`max_seconds` are **not** allowed.
+- FIMO backend requires `pvalue_strata` + `retain_depth`; `max_candidates`/`max_seconds` are **not** allowed.
 - `consensus` requires `n_sites: 1`.
 
 FIMO stratification behavior:
-- `pvalue_threshold` is the primary stringency knob; lower values yield fewer matches.
-- `pvalue_threshold` and `mining.retain_bin_ids` define the eligible population.
-- Stage‑A selection is top‑N within that population, ordered by p‑value (score tie‑break).
+- `pvalue_strata` defines p‑value bins; the last edge is the eligibility floor used for FIMO `--thresh`.
+- `retain_depth` keeps the best bins (prefix of `pvalue_strata`) for Stage‑B.
+- Stage‑A selection is top‑N within the retained bins, ordered by p‑value (score tie‑break).
 
 Minimal Stage‑A PWM example (DenseGen backend):
 
@@ -90,7 +89,8 @@ inputs:
     path: inputs/lexA.txt
     sampling:  # Stage‑A sampling
       scoring_backend: fimo
-      pvalue_threshold: 1e-8
+      pvalue_strata: [1e-8, 1e-6, 1e-4]
+      retain_depth: 1
       n_sites: 80
 ```
 
diff --git a/src/dnadesign/densegen/docs/reference/config.md b/src/dnadesign/densegen/docs/reference/config.md
index fe8529e3..565b0e07 100644
--- a/src/dnadesign/densegen/docs/reference/config.md
+++ b/src/dnadesign/densegen/docs/reference/config.md
@@ -62,16 +62,17 @@ PWM inputs perform **Stage‑A sampling** (sampling sites from PWMs) via
     - `max_seconds` (optional float > 0; time limit for candidate generation; **densegen** backend only)
     - `scoring_backend`: `densegen | fimo` (default: `densegen`)
     - `score_threshold` or `score_percentile` (exactly one; **densegen** backend only)
-    - `pvalue_threshold` (float in (0, 1]; **fimo** backend only)
-    - `pvalue_bins` (optional list of floats; must end with `1.0`) - p‑value bin edges for Stage‑A stratified sampling
+    - `pvalue_strata` (list of floats; strictly increasing; **fimo** backend only) - p‑value bin edges
+      for Stage‑A stratified sampling. The last edge is the eligibility floor used for FIMO `--thresh`.
+      Example: `[1e-8, 1e-6, 1e-4]`
+    - `retain_depth` (int > 0; **fimo** backend only) - number of best p‑value bins to retain for Stage‑B
+      (prefix of `pvalue_strata`)
     - `mining` (fimo only) - batch/time controls for mining via FIMO:
       - `batch_size` (int > 0; default 100000) - candidates per FIMO batch
       - `max_batches` (optional int > 0) - max batches per motif
       - `max_candidates` (optional int > 0) - total candidates to generate per motif (quota mode)
         (must be >= `n_sites`)
       - `max_seconds` (optional float > 0; default 60s) - max seconds per motif mining loop
-      - `retain_bin_ids` (optional list of ints) - select p‑value bins to retain (0‑based indices);
-        retained bins are the only bins reported in yield summaries
       - `log_every_batches` (int > 0; default 1) - log per‑bin yield summaries every N batches
     - `bgfile` (optional path) - MEME bfile-format background model for FIMO
     - `keep_all_candidates_debug` (bool, default false) - write candidate Parquet logs to
@@ -83,13 +84,12 @@ PWM inputs perform **Stage‑A sampling** (sampling sites from PWMs) via
     - `trim_window_length` (optional int > 0; trims PWM to a max‑information window before Stage‑A sampling)
     - `trim_window_strategy`: `max_info` (window selection strategy)
     - `consensus` requires `n_sites: 1`
-    - `background` selects low-scoring sequences (<= threshold/percentile; or pvalue >= threshold for fimo)
+    - `background` selects low-scoring sequences (<= threshold/percentile; or pvalue >= floor for fimo)
     - FIMO resolves `fimo` via `MEME_BIN` or PATH; pixi users should run `pixi run dense ...` so it is available.
-    - Canonical p‑value bins (default): `[1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]`
+    - Canonical p‑value strata: `[1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]`
       (bin 0 is `(0, 1e-10]`, bin 1 is `(1e-10, 1e-8]`, etc.)
-    - FIMO runs log per‑bin yield summaries (hits, accepted, selected). If `retain_bin_ids` is set,
-      only those bins are reported; otherwise all bins are reported. Stratification defines the
-      eligible population (bins + `pvalue_threshold`), and selection is top‑N within that population
+    - FIMO logs per‑bin yield summaries as eligible/retained counts. Stratification defines the
+      eligible population (bins up to the floor), and selection is top‑N within the retained bins
       by p‑value (score tie‑break).
     - For `scoring_backend: fimo`, use `mining.max_seconds` (time mode) or
       `mining.max_candidates`/`mining.max_batches` (quota mode). The default is
diff --git a/src/dnadesign/densegen/src/adapters/outputs/parquet.py b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
index c1513444..f67ab996 100644
--- a/src/dnadesign/densegen/src/adapters/outputs/parquet.py
+++ b/src/dnadesign/densegen/src/adapters/outputs/parquet.py
@@ -35,11 +35,9 @@ def _meta_arrow_type(name: str, pa):
         "required_regulators",
     }
     list_float = {
-        "input_pwm_pvalue_bins",
-    }
-    list_int = {
-        "input_pwm_mining_retain_bin_ids",
+        "input_pwm_pvalue_strata",
     }
+    list_int = set()
     int_fields = {
         "length",
         "random_seed",
@@ -48,6 +46,7 @@ def _meta_arrow_type(name: str, pa):
         "min_required_regulators",
         "input_pwm_n_sites",
         "input_pwm_oversample_factor",
+        "input_pwm_retain_depth",
         "input_pwm_mining_batch_size",
         "input_pwm_mining_max_batches",
         "input_pwm_mining_max_candidates",
@@ -74,7 +73,6 @@ def _meta_arrow_type(name: str, pa):
         "compression_ratio",
         "input_pwm_score_threshold",
         "input_pwm_score_percentile",
-        "input_pwm_pvalue_threshold",
         "input_pwm_mining_max_seconds",
         "sampling_fraction",
         "sampling_fraction_pairs",
diff --git a/src/dnadesign/densegen/src/core/metadata.py b/src/dnadesign/densegen/src/core/metadata.py
index 33ba2146..cc365050 100644
--- a/src/dnadesign/densegen/src/core/metadata.py
+++ b/src/dnadesign/densegen/src/core/metadata.py
@@ -146,15 +146,13 @@ def build_metadata(
         "input_pwm_scoring_backend": input_meta.get("input_pwm_scoring_backend"),
         "input_pwm_score_threshold": input_meta.get("input_pwm_score_threshold"),
         "input_pwm_score_percentile": input_meta.get("input_pwm_score_percentile"),
-        "input_pwm_pvalue_threshold": input_meta.get("input_pwm_pvalue_threshold"),
-        "input_pwm_pvalue_bins": input_meta.get("input_pwm_pvalue_bins"),
+        "input_pwm_pvalue_strata": input_meta.get("input_pwm_pvalue_strata"),
+        "input_pwm_retain_depth": input_meta.get("input_pwm_retain_depth"),
         "input_pwm_mining_batch_size": input_meta.get("input_pwm_mining_batch_size"),
         "input_pwm_mining_max_batches": input_meta.get("input_pwm_mining_max_batches"),
         "input_pwm_mining_max_candidates": input_meta.get("input_pwm_mining_max_candidates"),
         "input_pwm_mining_max_seconds": input_meta.get("input_pwm_mining_max_seconds"),
-        "input_pwm_mining_retain_bin_ids": input_meta.get("input_pwm_mining_retain_bin_ids"),
         "input_pwm_mining_log_every_batches": input_meta.get("input_pwm_mining_log_every_batches"),
-        "input_pwm_selection_policy": input_meta.get("input_pwm_selection_policy"),
         "input_pwm_bgfile": input_meta.get("input_pwm_bgfile"),
         "input_pwm_keep_all_candidates_debug": input_meta.get("input_pwm_keep_all_candidates_debug"),
         "input_pwm_include_matched_sequence": input_meta.get("input_pwm_include_matched_sequence"),
diff --git a/src/dnadesign/densegen/src/core/metadata_schema.py b/src/dnadesign/densegen/src/core/metadata_schema.py
index 5ef7bea3..5dbb5ddb 100644
--- a/src/dnadesign/densegen/src/core/metadata_schema.py
+++ b/src/dnadesign/densegen/src/core/metadata_schema.py
@@ -107,8 +107,8 @@ class MetaField:
     MetaField("input_pwm_scoring_backend", (str,), "Stage-A PWM scoring backend (densegen|fimo).", allow_none=True),
     MetaField("input_pwm_score_threshold", (numbers.Real,), "Stage-A PWM score threshold.", allow_none=True),
     MetaField("input_pwm_score_percentile", (numbers.Real,), "Stage-A PWM score percentile.", allow_none=True),
-    MetaField("input_pwm_pvalue_threshold", (numbers.Real,), "Stage-A PWM p-value threshold (FIMO).", allow_none=True),
-    MetaField("input_pwm_pvalue_bins", (list,), "Stage-A PWM p-value bins (FIMO).", allow_none=True),
+    MetaField("input_pwm_pvalue_strata", (list,), "Stage-A PWM p-value strata edges (FIMO).", allow_none=True),
+    MetaField("input_pwm_retain_depth", (int,), "Stage-A PWM retained strata depth (FIMO).", allow_none=True),
     MetaField("input_pwm_mining_batch_size", (int,), "Stage-A PWM mining batch size (FIMO).", allow_none=True),
     MetaField("input_pwm_mining_max_batches", (int,), "Stage-A PWM mining max batches (FIMO).", allow_none=True),
     MetaField("input_pwm_mining_max_candidates", (int,), "Stage-A PWM mining max candidates (FIMO).", allow_none=True),
@@ -118,24 +118,12 @@ class MetaField:
         "Stage-A PWM mining max seconds (FIMO).",
         allow_none=True,
     ),
-    MetaField(
-        "input_pwm_mining_retain_bin_ids",
-        (list,),
-        "Stage-A PWM mining retained p-value bin indices (FIMO).",
-        allow_none=True,
-    ),
     MetaField(
         "input_pwm_mining_log_every_batches",
         (int,),
         "Stage-A PWM mining log frequency (batches).",
         allow_none=True,
     ),
-    MetaField(
-        "input_pwm_selection_policy",
-        (str,),
-        "Stage-A PWM selection policy (FIMO; fixed stratified top-N).",
-        allow_none=True,
-    ),
     MetaField("input_pwm_bgfile", (str,), "Stage-A PWM background model path (FIMO).", allow_none=True),
     MetaField("input_pwm_keep_all_candidates_debug", (bool,), "Stage-A PWM FIMO debug TSV enabled.", allow_none=True),
     MetaField("input_pwm_include_matched_sequence", (bool,), "Stage-A PWM matched-sequence capture.", allow_none=True),
@@ -244,23 +232,14 @@ def _validate_list_fields(meta: Mapping[str, Any]) -> None:
             if "tf" not in item or "count" not in item:
                 raise ValueError("used_tf_counts entries must include 'tf' and 'count'")
 
-    if "input_pwm_pvalue_bins" in meta:
-        vals = meta["input_pwm_pvalue_bins"]
+    if "input_pwm_pvalue_strata" in meta:
+        vals = meta["input_pwm_pvalue_strata"]
         if vals is not None:
             if isinstance(vals, (str, bytes)) or not isinstance(vals, Sequence):
-                raise TypeError("Metadata field 'input_pwm_pvalue_bins' must be a list of numbers")
+                raise TypeError("Metadata field 'input_pwm_pvalue_strata' must be a list of numbers")
             for item in vals:
                 if not isinstance(item, numbers.Real):
-                    raise TypeError("Metadata field 'input_pwm_pvalue_bins' must contain only numbers")
-
-    if "input_pwm_mining_retain_bin_ids" in meta:
-        vals = meta["input_pwm_mining_retain_bin_ids"]
-        if vals is not None:
-            if isinstance(vals, (str, bytes)) or not isinstance(vals, Sequence):
-                raise TypeError("Metadata field 'input_pwm_mining_retain_bin_ids' must be a list of integers")
-            for item in vals:
-                if not isinstance(item, int):
-                    raise TypeError("Metadata field 'input_pwm_mining_retain_bin_ids' must contain only integers")
+                    raise TypeError("Metadata field 'input_pwm_pvalue_strata' must contain only numbers")
 
     if "min_count_by_regulator" in meta:
         vals = meta["min_count_by_regulator"]
diff --git a/src/dnadesign/densegen/tests/meta_fixtures.py b/src/dnadesign/densegen/tests/meta_fixtures.py
index f61ab593..f7581728 100644
--- a/src/dnadesign/densegen/tests/meta_fixtures.py
+++ b/src/dnadesign/densegen/tests/meta_fixtures.py
@@ -1,3 +1,14 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/meta_fixtures.py
+
+Reusable metadata fixtures for DenseGen tests.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 
@@ -53,15 +64,13 @@ def output_meta(*, library_hash: str, library_index: int) -> dict:
         "input_pwm_scoring_backend": None,
         "input_pwm_score_threshold": None,
         "input_pwm_score_percentile": None,
-        "input_pwm_pvalue_threshold": None,
-        "input_pwm_pvalue_bins": None,
+        "input_pwm_pvalue_strata": None,
+        "input_pwm_retain_depth": None,
         "input_pwm_mining_batch_size": None,
         "input_pwm_mining_max_batches": None,
         "input_pwm_mining_max_candidates": None,
         "input_pwm_mining_max_seconds": None,
-        "input_pwm_mining_retain_bin_ids": None,
         "input_pwm_mining_log_every_batches": None,
-        "input_pwm_selection_policy": None,
         "input_pwm_bgfile": None,
         "input_pwm_keep_all_candidates_debug": None,
         "input_pwm_include_matched_sequence": None,
diff --git a/src/dnadesign/densegen/tests/test_config_strict.py b/src/dnadesign/densegen/tests/test_config_strict.py
index edbea363..1d68a212 100644
--- a/src/dnadesign/densegen/tests/test_config_strict.py
+++ b/src/dnadesign/densegen/tests/test_config_strict.py
@@ -1,3 +1,14 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_config_strict.py
+
+Config validation strictness checks for DenseGen.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 import copy
@@ -337,6 +348,53 @@ def test_fimo_rejects_max_candidates(tmp_path: Path) -> None:
         load_config(cfg_path)
 
 
+def test_fimo_rejects_legacy_pvalue_threshold(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["inputs"] = [
+        {
+            "name": "motifs",
+            "type": "pwm_meme",
+            "path": "inputs.meme",
+            "sampling": {
+                "strategy": "stochastic",
+                "n_sites": 2,
+                "oversample_factor": 2,
+                "scoring_backend": "fimo",
+                "pvalue_strata": [1e-8, 1e-6, 1e-4],
+                "retain_depth": 1,
+                "pvalue_threshold": 1e-8,
+                "mining": {"batch_size": 10},
+            },
+        }
+    ]
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError):
+        load_config(cfg_path)
+
+
+def test_fimo_rejects_legacy_retain_bin_ids(tmp_path: Path) -> None:
+    cfg = copy.deepcopy(MIN_CONFIG)
+    cfg["densegen"]["inputs"] = [
+        {
+            "name": "motifs",
+            "type": "pwm_meme",
+            "path": "inputs.meme",
+            "sampling": {
+                "strategy": "stochastic",
+                "n_sites": 2,
+                "oversample_factor": 2,
+                "scoring_backend": "fimo",
+                "pvalue_strata": [1e-8, 1e-6, 1e-4],
+                "retain_depth": 1,
+                "mining": {"batch_size": 10, "retain_bin_ids": [0]},
+            },
+        }
+    ]
+    cfg_path = _write(cfg, tmp_path / "cfg.yaml")
+    with pytest.raises(ConfigError):
+        load_config(cfg_path)
+
+
 def test_promoter_constraint_range_non_negative(tmp_path: Path) -> None:
     cfg = copy.deepcopy(MIN_CONFIG)
     cfg["densegen"]["generation"]["plan"] = [
diff --git a/src/dnadesign/densegen/tests/test_outputs_parquet.py b/src/dnadesign/densegen/tests/test_outputs_parquet.py
index c350e189..d28da2f2 100644
--- a/src/dnadesign/densegen/tests/test_outputs_parquet.py
+++ b/src/dnadesign/densegen/tests/test_outputs_parquet.py
@@ -1,3 +1,14 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_outputs_parquet.py
+
+Parquet output metadata contract tests.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -58,15 +69,13 @@ def _dummy_meta() -> dict:
         "input_pwm_scoring_backend": None,
         "input_pwm_score_threshold": None,
         "input_pwm_score_percentile": None,
-        "input_pwm_pvalue_threshold": None,
-        "input_pwm_pvalue_bins": None,
+        "input_pwm_pvalue_strata": None,
+        "input_pwm_retain_depth": None,
         "input_pwm_mining_batch_size": None,
         "input_pwm_mining_max_batches": None,
         "input_pwm_mining_max_candidates": None,
         "input_pwm_mining_max_seconds": None,
-        "input_pwm_mining_retain_bin_ids": None,
         "input_pwm_mining_log_every_batches": None,
-        "input_pwm_selection_policy": None,
         "input_pwm_bgfile": None,
         "input_pwm_keep_all_candidates_debug": None,
         "input_pwm_include_matched_sequence": None,
diff --git a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
index 34c129f2..92ec0106 100644
--- a/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
+++ b/src/dnadesign/densegen/workspaces/demo_meme_two_tf/config.yaml
@@ -17,11 +17,11 @@ densegen:
         n_sites: 200
         oversample_factor: 200
         scoring_backend: fimo
-        pvalue_threshold: 1e-8
+        pvalue_strata: [1e-8, 1e-6, 1e-4]
+        retain_depth: 1
         mining:
           batch_size: 5000
           max_seconds: 60
-          retain_bin_ids: [0, 1, 2, 3]
           log_every_batches: 1
         length_policy: range
         length_range: [15, 20]

From 39912bde276e6e35d87bb5ed5eb225f482b8aef7 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Sun, 25 Jan 2026 14:31:35 -0500
Subject: [PATCH 39/40] cruncher: write parquet via pyarrow

---
 src/dnadesign/cruncher/src/analysis/parquet.py     |  4 ++++
 src/dnadesign/cruncher/src/analysis/per_pwm.py     |  4 ++--
 .../cruncher/src/analysis/plots/summary.py         | 10 +++++-----
 src/dnadesign/cruncher/src/app/analyze_workflow.py | 14 +++++++-------
 .../cruncher/tests/test_analysis_validation.py     | 14 +++++++++++++-
 5 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/src/dnadesign/cruncher/src/analysis/parquet.py b/src/dnadesign/cruncher/src/analysis/parquet.py
index 67880da1..5cd932f4 100644
--- a/src/dnadesign/cruncher/src/analysis/parquet.py
+++ b/src/dnadesign/cruncher/src/analysis/parquet.py
@@ -16,3 +16,7 @@ def read_parquet(path: Path):
     import pandas as pd
 
     return pd.read_parquet(path, engine="fastparquet")
+
+
+def write_parquet(df, path: Path) -> None:
+    df.to_parquet(path, engine="pyarrow", index=False)
diff --git a/src/dnadesign/cruncher/src/analysis/per_pwm.py b/src/dnadesign/cruncher/src/analysis/per_pwm.py
index 5990c7a9..6717d56d 100644
--- a/src/dnadesign/cruncher/src/analysis/per_pwm.py
+++ b/src/dnadesign/cruncher/src/analysis/per_pwm.py
@@ -12,7 +12,7 @@
 
 import pandas as pd
 
-from dnadesign.cruncher.analysis.parquet import read_parquet
+from dnadesign.cruncher.analysis.parquet import read_parquet, write_parquet
 from dnadesign.cruncher.analysis.plots.scatter_utils import encode_sequence
 from dnadesign.cruncher.artifacts.layout import sequences_path
 from dnadesign.cruncher.core.scoring import Scorer
@@ -152,7 +152,7 @@ def gather_per_pwm_scores(
     out_df = out_df.sort_values(["chain", "draw"]).reset_index(drop=True)
     out_path.parent.mkdir(parents=True, exist_ok=True)
     if out_path.suffix == ".parquet":
-        out_df.to_parquet(out_path, engine="fastparquet", index=False)
+        write_parquet(out_df, out_path)
     else:
         out_df.to_csv(out_path, index=False)
     logger.info("Wrote change-threshold per-PWM scores → %s", out_path)
diff --git a/src/dnadesign/cruncher/src/analysis/plots/summary.py b/src/dnadesign/cruncher/src/analysis/plots/summary.py
index ed86f1bc..1a56a188 100644
--- a/src/dnadesign/cruncher/src/analysis/plots/summary.py
+++ b/src/dnadesign/cruncher/src/analysis/plots/summary.py
@@ -18,7 +18,7 @@
 import pandas as pd
 import seaborn as sns
 
-from dnadesign.cruncher.analysis.parquet import read_parquet
+from dnadesign.cruncher.analysis.parquet import read_parquet, write_parquet
 from dnadesign.cruncher.analysis.plots._savefig import savefig
 
 logger = logging.getLogger(__name__)
@@ -49,7 +49,7 @@ def write_score_summary(score_df: pd.DataFrame, tf_names: list[str], out_path: P
     summary.reset_index(drop=True, inplace=True)
     out_path.parent.mkdir(parents=True, exist_ok=True)
     if out_path.suffix == ".parquet":
-        summary.to_parquet(out_path, engine="fastparquet", index=False)
+        write_parquet(summary, out_path)
     else:
         summary.to_csv(out_path, index=False)
 
@@ -71,7 +71,7 @@ def write_elite_topk(elites_df: pd.DataFrame, tf_names: list[str], out_path: Pat
     keep_cols = ["sequence"] + [c for c in ("rank", "norm_sum") if c in df.columns] + cols
     out_path.parent.mkdir(parents=True, exist_ok=True)
     if out_path.suffix == ".parquet":
-        df[keep_cols].to_parquet(out_path, engine="fastparquet", index=False)
+        write_parquet(df[keep_cols], out_path)
     else:
         df[keep_cols].to_csv(out_path, index=False)
 
@@ -120,7 +120,7 @@ def write_joint_metrics(elites_df: pd.DataFrame, tf_names: list[str], out_path:
         }
         df = pd.DataFrame([payload])
         if out_path.suffix == ".parquet":
-            df.to_parquet(out_path, engine="fastparquet", index=False)
+            write_parquet(df, out_path)
         else:
             df.to_csv(out_path, index=False)
         return
@@ -160,7 +160,7 @@ def write_joint_metrics(elites_df: pd.DataFrame, tf_names: list[str], out_path:
     }
     df = pd.DataFrame([payload])
     if out_path.suffix == ".parquet":
-        df.to_parquet(out_path, engine="fastparquet", index=False)
+        write_parquet(df, out_path)
     else:
         df.to_csv(out_path, index=False)
 
diff --git a/src/dnadesign/cruncher/src/app/analyze_workflow.py b/src/dnadesign/cruncher/src/app/analyze_workflow.py
index c13ada9e..9ea3377a 100644
--- a/src/dnadesign/cruncher/src/app/analyze_workflow.py
+++ b/src/dnadesign/cruncher/src/app/analyze_workflow.py
@@ -34,7 +34,7 @@
 )
 from dnadesign.cruncher.analysis.objective import compute_objective_components
 from dnadesign.cruncher.analysis.overlap import compute_overlap_tables
-from dnadesign.cruncher.analysis.parquet import read_parquet
+from dnadesign.cruncher.analysis.parquet import read_parquet, write_parquet
 from dnadesign.cruncher.analysis.plot_registry import PLOT_SPECS
 from dnadesign.cruncher.analysis.report import ensure_report
 from dnadesign.cruncher.app.run_service import list_runs
@@ -977,8 +977,8 @@ def _plot_path(stem: str) -> Path:
         overlap_summary_path = tables_dir / f"overlap_summary.{table_ext}"
         elite_overlap_path = tables_dir / f"elite_overlap.{table_ext}"
         if table_ext == "parquet":
-            overlap_summary_df.to_parquet(overlap_summary_path, engine="fastparquet", index=False)
-            elite_overlap_df.to_parquet(elite_overlap_path, engine="fastparquet", index=False)
+            write_parquet(overlap_summary_df, overlap_summary_path)
+            write_parquet(elite_overlap_df, elite_overlap_path)
         else:
             overlap_summary_df.to_csv(overlap_summary_path, index=False)
             elite_overlap_df.to_csv(elite_overlap_path, index=False)
@@ -1041,13 +1041,13 @@ def _plot_path(stem: str) -> Path:
             if move_stats_summary_df is not None and not move_stats_summary_df.empty:
                 move_stats_summary_path = tables_dir / f"move_stats_summary.{table_ext}"
                 if table_ext == "parquet":
-                    move_stats_summary_df.to_parquet(move_stats_summary_path, engine="fastparquet", index=False)
+                    write_parquet(move_stats_summary_df, move_stats_summary_path)
                 else:
                     move_stats_summary_df.to_csv(move_stats_summary_path, index=False)
             if analysis_cfg.extra_tables and move_stats_df is not None:
                 move_stats_path = tables_dir / f"move_stats.{table_ext}"
                 if table_ext == "parquet":
-                    move_stats_df.to_parquet(move_stats_path, engine="fastparquet", index=False)
+                    write_parquet(move_stats_df, move_stats_path)
                 else:
                     move_stats_df.to_csv(move_stats_path, index=False)
 
@@ -1083,7 +1083,7 @@ def _plot_path(stem: str) -> Path:
                     pt_swap_pairs_df = pd.DataFrame(rows)
                     pt_swap_pairs_path = tables_dir / f"pt_swap_pairs.{table_ext}"
                     if table_ext == "parquet":
-                        pt_swap_pairs_df.to_parquet(pt_swap_pairs_path, engine="fastparquet", index=False)
+                        write_parquet(pt_swap_pairs_df, pt_swap_pairs_path)
                     else:
                         pt_swap_pairs_df.to_csv(pt_swap_pairs_path, index=False)
 
@@ -1097,7 +1097,7 @@ def _plot_path(stem: str) -> Path:
                     auto_opt_table_path = tables_dir / f"auto_opt_pilots.{table_ext}"
                     df_auto_table = pd.DataFrame(candidates)
                     if table_ext == "parquet":
-                        df_auto_table.to_parquet(auto_opt_table_path, engine="fastparquet", index=False)
+                        write_parquet(df_auto_table, auto_opt_table_path)
                     else:
                         df_auto_table.to_csv(auto_opt_table_path, index=False)
                 if analysis_cfg.extra_plots:
diff --git a/src/dnadesign/cruncher/tests/test_analysis_validation.py b/src/dnadesign/cruncher/tests/test_analysis_validation.py
index 1706d2fe..589f238f 100644
--- a/src/dnadesign/cruncher/tests/test_analysis_validation.py
+++ b/src/dnadesign/cruncher/tests/test_analysis_validation.py
@@ -21,7 +21,7 @@
 from dnadesign.cruncher.analysis.per_pwm import gather_per_pwm_scores
 from dnadesign.cruncher.analysis.plots.diagnostics import make_pair_idata
 from dnadesign.cruncher.analysis.plots.scatter import _normalize_threshold_points, plot_scatter
-from dnadesign.cruncher.analysis.plots.summary import write_elite_topk
+from dnadesign.cruncher.analysis.plots.summary import write_elite_topk, write_score_summary
 from dnadesign.cruncher.app.analyze_workflow import _get_git_commit
 from dnadesign.cruncher.artifacts.layout import elites_path, manifest_path, sequences_path
 from dnadesign.cruncher.config.load import load_config
@@ -314,3 +314,15 @@ def test_normalize_threshold_points_scales_values() -> None:
     points = [(2.0, 6.0, "a"), (1.0, 3.0, "b")]
     normalized = _normalize_threshold_points(points, cons_x=2.0, cons_y=3.0)
     assert normalized == [(1.0, 2.0, "a"), (0.5, 1.0, "b")]
+
+
+def test_write_score_summary_handles_pyarrow_strings(tmp_path: Path) -> None:
+    score_df = pd.DataFrame({"score_lexA": [0.1, 0.2], "score_cpxR": [0.3, 0.4]})
+    out_path = tmp_path / "score_summary.parquet"
+    previous = pd.options.mode.string_storage
+    pd.options.mode.string_storage = "pyarrow"
+    try:
+        write_score_summary(score_df, ["lexA", "cpxR"], out_path)
+    finally:
+        pd.options.mode.string_storage = previous
+    assert out_path.exists()

From 4325a6ca2cd5d220bcb8da02e68cf81aff5f9453 Mon Sep 17 00:00:00 2001
From: Eric South <ericjohnsouth@gmail.com>
Date: Sun, 25 Jan 2026 14:32:07 -0500
Subject: [PATCH 40/40] densegen: harden sampling and recap output

---
 .../src/adapters/sources/pwm_sampling.py      | 13 +++++++---
 src/dnadesign/densegen/src/cli.py             | 20 ++++++++++----
 .../densegen/src/utils/logging_utils.py       |  9 ++++++-
 .../tests/test_binding_sites_duplicates.py    | 15 ++++++++++-
 .../densegen/tests/test_binding_sites_xlsx.py | 15 ++++++++++-
 .../tests/test_cli_stage_a_summary.py         |  2 +-
 .../tests/test_data_ingestor_strict.py        | 15 ++++++++++-
 .../tests/test_pwm_artifact_set_source.py     | 17 ++++++++++--
 .../tests/test_pwm_artifact_source.py         | 23 +++++++++++++---
 .../tests/test_pwm_meme_set_source.py         | 15 ++++++++++-
 .../densegen/tests/test_pwm_meme_source.py    |  2 +-
 .../densegen/tests/test_pwm_other_sources.py  | 17 ++++++++++--
 .../densegen/tests/test_pwm_progress.py       | 20 ++++++++++++++
 .../test_pwm_sampling_relaxed_selection.py    | 26 +++++++++----------
 .../densegen/tests/test_source_cache.py       | 15 ++++++++++-
 15 files changed, 186 insertions(+), 38 deletions(-)

diff --git a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
index 760d7ba1..0c12c134 100644
--- a/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
+++ b/src/dnadesign/densegen/src/adapters/sources/pwm_sampling.py
@@ -650,10 +650,15 @@ def sample_pwm_sites(
     if scoring_backend not in {"densegen", "fimo"}:
         raise ValueError(f"Unsupported Stage-A PWM sampling scoring_backend: {scoring_backend}")
     if scoring_backend == "densegen":
-        if score_threshold is None:
-            raise ValueError("Stage-A PWM sampling requires score_threshold when scoring_backend='densegen'")
-        if score_percentile is not None:
-            raise ValueError("Stage-A PWM sampling does not support score_percentile when scoring_backend='densegen'")
+        if score_threshold is None and score_percentile is None:
+            raise ValueError(
+                "Stage-A PWM sampling requires score_threshold or score_percentile when scoring_backend='densegen'"
+            )
+        if score_threshold is not None and score_percentile is not None:
+            raise ValueError(
+                "Stage-A PWM sampling requires exactly one of score_threshold or score_percentile when "
+                "scoring_backend='densegen'"
+            )
         if pvalue_strata is not None:
             raise ValueError("pvalue_strata is only valid when scoring_backend='fimo'")
         if retain_depth is not None:
diff --git a/src/dnadesign/densegen/src/cli.py b/src/dnadesign/densegen/src/cli.py
index 83cfb7c8..4eee5ba4 100644
--- a/src/dnadesign/densegen/src/cli.py
+++ b/src/dnadesign/densegen/src/cli.py
@@ -1810,11 +1810,21 @@ def stage_a_build_pool(
             recap_table.add_row(*row)
         console.print("[bold]Stage-A sampling recap[/]")
         console.print(recap_table)
-        console.print(
-            "  candidates=generated/target; eligible=hits at/below p-value floor; "
-            "pool=top-N within retained strata (shortfall ok); bins=eligible/retained per bin; "
-            "len=n/min/med/avg/max (pool)"
-        )
+        backends = {row[2] for row in recap_rows}
+        if "fimo" in backends:
+            console.print(
+                "  fimo: candidates=generated/target; eligible=hits at/below p-value floor; "
+                "pool=top-N within retained strata (shortfall ok); bins=eligible/retained per bin; "
+                "len=n/min/med/avg/max (pool)"
+            )
+        if "densegen" in backends:
+            console.print(
+                "  densegen: candidates=generated/target; eligible=score-filtered candidates "
+                "(threshold/percentile); pool=top-N by score (shortfall ok); bins=-; "
+                "len=n/min/med/avg/max (pool)"
+            )
+        if "provided" in backends:
+            console.print("  provided: candidates=total; pool=total; bins=-; len=n/min/med/avg/max")
     console.print(f":sparkles: [bold green]Pool manifest written[/]: {artifact.manifest_path}")
 
 
diff --git a/src/dnadesign/densegen/src/utils/logging_utils.py b/src/dnadesign/densegen/src/utils/logging_utils.py
index 9038c6e1..8588f2ea 100644
--- a/src/dnadesign/densegen/src/utils/logging_utils.py
+++ b/src/dnadesign/densegen/src/utils/logging_utils.py
@@ -68,12 +68,19 @@ def filter(self, record: logging.LogRecord) -> bool:
 
 class ProgressAwareStreamHandler(logging.StreamHandler):
     def emit(self, record: logging.LogRecord) -> None:
+        if getattr(self.stream, "closed", False):
+            return
         if getattr(record, "suppress_stdout", False):
             return
         if _FIMO_STDOUT_SUPPRESS_RE.search(record.getMessage()):
             return
         _maybe_clear_progress_line(self.stream)
-        super().emit(record)
+        try:
+            super().emit(record)
+        except ValueError:
+            if getattr(self.stream, "closed", False):
+                return
+            raise
 
 
 def _register_native_stderr_patterns(patterns: Iterable[tuple[str, str | None]]) -> None:
diff --git a/src/dnadesign/densegen/tests/test_binding_sites_duplicates.py b/src/dnadesign/densegen/tests/test_binding_sites_duplicates.py
index cd04ac21..43540197 100644
--- a/src/dnadesign/densegen/tests/test_binding_sites_duplicates.py
+++ b/src/dnadesign/densegen/tests/test_binding_sites_duplicates.py
@@ -1,3 +1,16 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_binding_sites_duplicates.py
+
+Binding-sites duplicate handling tests.
+
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -12,7 +25,7 @@ def test_binding_sites_duplicates_allowed(tmp_path: Path, caplog) -> None:
     csv_path.write_text("tf,tfbs\nTF1,AAA\nTF1,AAA\nTF2,CCC\n")
     ds = BindingSitesDataSource(path=str(csv_path), cfg_path=tmp_path)
     with caplog.at_level("WARNING"):
-        entries, df = ds.load_data()
+        entries, df, _summaries = ds.load_data()
     assert len(entries) == 3
     assert isinstance(df, pd.DataFrame)
     assert df.shape[0] == 3
diff --git a/src/dnadesign/densegen/tests/test_binding_sites_xlsx.py b/src/dnadesign/densegen/tests/test_binding_sites_xlsx.py
index 46a997a5..c52d2c1f 100644
--- a/src/dnadesign/densegen/tests/test_binding_sites_xlsx.py
+++ b/src/dnadesign/densegen/tests/test_binding_sites_xlsx.py
@@ -1,3 +1,16 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_binding_sites_xlsx.py
+
+Binding-sites XLSX ingest tests.
+
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -13,7 +26,7 @@ def test_binding_sites_xlsx_loads(tmp_path: Path) -> None:
     df.to_excel(path, index=False)
 
     ds = BindingSitesDataSource(path=str(path), cfg_path=tmp_path)
-    entries, meta = ds.load_data()
+    entries, meta, _summaries = ds.load_data()
 
     assert len(entries) == 2
     assert meta is not None
diff --git a/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
index 46aacfb7..5d2a16c2 100644
--- a/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
+++ b/src/dnadesign/densegen/tests/test_cli_stage_a_summary.py
@@ -83,8 +83,8 @@ def test_stage_a_build_pool_reports_sampling_recap(tmp_path: Path) -> None:
     assert result.exit_code == 0, result.output
     assert "Stage-A sampling recap" in result.output
     assert "candidates" in result.output
-    assert "strata" in result.output
     assert "pool" in result.output
+    assert "provided:" in result.output
     assert "toy_sites" in result.output
 
 
diff --git a/src/dnadesign/densegen/tests/test_data_ingestor_strict.py b/src/dnadesign/densegen/tests/test_data_ingestor_strict.py
index 2e835b59..b55aeb34 100644
--- a/src/dnadesign/densegen/tests/test_data_ingestor_strict.py
+++ b/src/dnadesign/densegen/tests/test_data_ingestor_strict.py
@@ -1,3 +1,16 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_data_ingestor_strict.py
+
+Input table validation tests for DenseGen sources.
+
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -20,7 +33,7 @@ def test_binding_sites_allows_duplicates(tmp_path: Path, caplog) -> None:
     csv_path.write_text("tf,tfbs\nLexA,AAA\nLexA,AAA\n")
     ds = BindingSitesDataSource(path=str(csv_path), cfg_path=tmp_path / "config.yaml")
     with caplog.at_level("WARNING"):
-        entries, df = ds.load_data()
+        entries, df, _summaries = ds.load_data()
     assert len(entries) == 2
     assert df.shape[0] == 2
     assert "duplicate regulator/binding-site pairs" in caplog.text.lower()
diff --git a/src/dnadesign/densegen/tests/test_pwm_artifact_set_source.py b/src/dnadesign/densegen/tests/test_pwm_artifact_set_source.py
index edc01286..dc9c9e91 100644
--- a/src/dnadesign/densegen/tests/test_pwm_artifact_set_source.py
+++ b/src/dnadesign/densegen/tests/test_pwm_artifact_set_source.py
@@ -1,3 +1,16 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_pwm_artifact_set_source.py
+
+PWM artifact-set data source sampling tests.
+
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 import json
@@ -49,7 +62,7 @@ def test_pwm_artifact_set_sampling(tmp_path: Path) -> None:
             "length_policy": "exact",
         },
     )
-    entries, df = ds.load_data(rng=np.random.default_rng(0))
+    entries, df, _summaries = ds.load_data(rng=np.random.default_rng(0))
     assert len(entries) == 8
     assert df is not None
     assert set(df["tf"].tolist()) == {"M1", "M2"}
@@ -106,7 +119,7 @@ def test_pwm_artifact_set_overrides_by_motif_id(tmp_path: Path) -> None:
             }
         },
     )
-    _entries, df = ds.load_data(rng=np.random.default_rng(2))
+    _entries, df, _summaries = ds.load_data(rng=np.random.default_rng(2))
     counts = df["tf"].value_counts().to_dict()
     assert counts["M1"] == 2
     assert counts["M2"] == 1
diff --git a/src/dnadesign/densegen/tests/test_pwm_artifact_source.py b/src/dnadesign/densegen/tests/test_pwm_artifact_source.py
index 2d029d81..acc8c7e9 100644
--- a/src/dnadesign/densegen/tests/test_pwm_artifact_source.py
+++ b/src/dnadesign/densegen/tests/test_pwm_artifact_source.py
@@ -1,3 +1,16 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_pwm_artifact_source.py
+
+PWM artifact data source sampling tests.
+
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 import json
@@ -47,7 +60,7 @@ def test_pwm_artifact_sampling_exact(tmp_path: Path) -> None:
             "length_policy": "exact",
         },
     )
-    entries, df = ds.load_data(rng=np.random.default_rng(0))
+    entries, df, _summaries = ds.load_data(rng=np.random.default_rng(0))
     assert len(entries) == 5
     assert df is not None
     assert set(df["tf"].tolist()) == {"M1"}
@@ -71,7 +84,7 @@ def test_pwm_artifact_sampling_range(tmp_path: Path) -> None:
             "length_range": (3, 5),
         },
     )
-    entries, df = ds.load_data(rng=np.random.default_rng(1))
+    entries, df, _summaries = ds.load_data(rng=np.random.default_rng(1))
     assert len(entries) == 6
     assert df is not None
     lengths = [len(seq) for seq in df["tfbs"].tolist()]
@@ -116,7 +129,7 @@ def test_pwm_artifact_rejects_nonfinite_log_odds(tmp_path: Path) -> None:
         ds.load_data(rng=np.random.default_rng(0))
 
 
-def test_pwm_sampling_error_includes_motif_id(tmp_path: Path) -> None:
+def test_pwm_sampling_shortfall_includes_motif_id(tmp_path: Path, caplog: pytest.LogCaptureFixture) -> None:
     artifact_path = tmp_path / "motif.json"
     _write_artifact(artifact_path)
     ds = PWMArtifactDataSource(
@@ -132,5 +145,7 @@ def test_pwm_sampling_error_includes_motif_id(tmp_path: Path) -> None:
             "length_policy": "exact",
         },
     )
-    with pytest.raises(ValueError, match="motif 'M1'"):
+    with caplog.at_level("WARNING"):
         ds.load_data(rng=np.random.default_rng(1))
+    assert "motif 'M1'" in caplog.text
+    assert "shortfall" in caplog.text.lower()
diff --git a/src/dnadesign/densegen/tests/test_pwm_meme_set_source.py b/src/dnadesign/densegen/tests/test_pwm_meme_set_source.py
index 31df16fa..82cddd19 100644
--- a/src/dnadesign/densegen/tests/test_pwm_meme_set_source.py
+++ b/src/dnadesign/densegen/tests/test_pwm_meme_set_source.py
@@ -1,3 +1,16 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_pwm_meme_set_source.py
+
+PWM MEME set data source sampling tests.
+
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -43,7 +56,7 @@ def test_pwm_meme_set_sampling(tmp_path: Path) -> None:
             "score_percentile": None,
         },
     )
-    entries, df = ds.load_data(rng=np.random.default_rng(0))
+    entries, df, _summaries = ds.load_data(rng=np.random.default_rng(0))
     assert len(entries) == 6
     assert set(df["tf"].tolist()) == {"lexA", "cpxR"}
 
diff --git a/src/dnadesign/densegen/tests/test_pwm_meme_source.py b/src/dnadesign/densegen/tests/test_pwm_meme_source.py
index b28578db..f0adde8e 100644
--- a/src/dnadesign/densegen/tests/test_pwm_meme_source.py
+++ b/src/dnadesign/densegen/tests/test_pwm_meme_source.py
@@ -58,7 +58,7 @@ def test_pwm_meme_sampling_stochastic(tmp_path: Path) -> None:
             "score_percentile": None,
         },
     )
-    entries, df = ds.load_data(rng=np.random.default_rng(0))
+    entries, df, _summaries = ds.load_data(rng=np.random.default_rng(0))
     assert len(entries) == 5
     assert df is not None
     assert set(df["tf"].tolist()) == {"M1"}
diff --git a/src/dnadesign/densegen/tests/test_pwm_other_sources.py b/src/dnadesign/densegen/tests/test_pwm_other_sources.py
index 3b5ec498..60559a51 100644
--- a/src/dnadesign/densegen/tests/test_pwm_other_sources.py
+++ b/src/dnadesign/densegen/tests/test_pwm_other_sources.py
@@ -1,3 +1,16 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_pwm_other_sources.py
+
+PWM sampling tests for JASPAR and matrix CSV sources.
+
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 from pathlib import Path
@@ -30,7 +43,7 @@ def test_pwm_jaspar_sampling(tmp_path: Path) -> None:
             "score_percentile": None,
         },
     )
-    entries, df = ds.load_data(rng=np.random.default_rng(0))
+    entries, df, _summaries = ds.load_data(rng=np.random.default_rng(0))
     assert len(entries) == 4
     assert set(df["tf"].tolist()) == {"M1"}
 
@@ -52,6 +65,6 @@ def test_pwm_matrix_csv_sampling(tmp_path: Path) -> None:
             "score_percentile": None,
         },
     )
-    entries, df = ds.load_data(rng=np.random.default_rng(1))
+    entries, df, _summaries = ds.load_data(rng=np.random.default_rng(1))
     assert len(entries) == 1
     assert df["tf"].unique().tolist() == ["M1"]
diff --git a/src/dnadesign/densegen/tests/test_pwm_progress.py b/src/dnadesign/densegen/tests/test_pwm_progress.py
index 72d40d20..bdf9a215 100644
--- a/src/dnadesign/densegen/tests/test_pwm_progress.py
+++ b/src/dnadesign/densegen/tests/test_pwm_progress.py
@@ -152,6 +152,26 @@ def test_progress_handler_suppresses_flagged_records() -> None:
     assert stream.getvalue() == ""
 
 
+def test_progress_handler_closed_stream_avoids_logging_error(capsys) -> None:
+    stream = io.StringIO()
+    handler = logging_utils.ProgressAwareStreamHandler(stream=stream)
+    handler.setFormatter(logging.Formatter("%(message)s"))
+    logger = logging.getLogger("progress_handler_closed")
+    logger.handlers = [handler]
+    logger.setLevel(logging.INFO)
+    logger.propagate = False
+    stream.close()
+    raise_exceptions = logging.raiseExceptions
+    logging.raiseExceptions = True
+    try:
+        logger.info("hello")
+    finally:
+        logging.raiseExceptions = raise_exceptions
+        logger.handlers = []
+    captured = capsys.readouterr()
+    assert "Logging error" not in captured.err
+
+
 class _TtyBuffer(io.StringIO):
     def isatty(self) -> bool:
         return True
diff --git a/src/dnadesign/densegen/tests/test_pwm_sampling_relaxed_selection.py b/src/dnadesign/densegen/tests/test_pwm_sampling_relaxed_selection.py
index 12d0d101..1e5880e5 100644
--- a/src/dnadesign/densegen/tests/test_pwm_sampling_relaxed_selection.py
+++ b/src/dnadesign/densegen/tests/test_pwm_sampling_relaxed_selection.py
@@ -31,20 +31,20 @@ def _motif() -> PWMMotif:
     )
 
 
-def test_densegen_requires_score_threshold() -> None:
+def test_densegen_accepts_score_percentile() -> None:
     rng = np.random.default_rng(0)
-    with pytest.raises(ValueError, match="score_threshold"):
-        sample_pwm_sites(
-            rng,
-            _motif(),
-            strategy="stochastic",
-            n_sites=3,
-            oversample_factor=2,
-            max_candidates=None,
-            max_seconds=None,
-            score_threshold=None,
-            score_percentile=90.0,
-        )
+    sites = sample_pwm_sites(
+        rng,
+        _motif(),
+        strategy="stochastic",
+        n_sites=1,
+        oversample_factor=2,
+        max_candidates=None,
+        max_seconds=None,
+        score_threshold=None,
+        score_percentile=90.0,
+    )
+    assert len(sites) == 1
 
 
 def test_densegen_allows_shortfall_with_warning(caplog: pytest.LogCaptureFixture) -> None:
diff --git a/src/dnadesign/densegen/tests/test_source_cache.py b/src/dnadesign/densegen/tests/test_source_cache.py
index 431e42c2..e9d6e21d 100644
--- a/src/dnadesign/densegen/tests/test_source_cache.py
+++ b/src/dnadesign/densegen/tests/test_source_cache.py
@@ -1,3 +1,16 @@
+"""
+--------------------------------------------------------------------------------
+dnadesign
+src/dnadesign/densegen/tests/test_source_cache.py
+
+Pipeline source cache behavior tests.
+
+Dunlop Lab.
+
+Module Author(s): Eric J. South
+--------------------------------------------------------------------------------
+"""
+
 from __future__ import annotations
 
 import random
@@ -78,7 +91,7 @@ def __init__(self, entries: list[str]) -> None:
 
     def load_data(self, *, rng, outputs_root, run_id=None):
         self.calls += 1
-        return self.entries, None
+        return self.entries, None, None
 
 
 def test_source_cache_reuses_loaded_inputs(tmp_path: Path) -> None: