Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
a13ddca
docs(cruncher): document FIMO-like scoring
e-south Jan 20, 2026
4cad606
build: add pixi task aliases for dense and cruncher
e-south Jan 20, 2026
71ea141
densegen: improve FIMO sampling UX and audit metadata
e-south Jan 20, 2026
d803d83
docs(densegen): update stratified FIMO demo and mining workflow
e-south Jan 20, 2026
4700a9c
densegen: add FIMO mining workflow and UX updates
e-south Jan 20, 2026
14279f0
densegen: cache input sampling and improve run UX
e-south Jan 20, 2026
176c4d1
densegen: tighten FIMO mining config and preflight checks
e-south Jan 20, 2026
4d5eed7
densegen docs: clarify FIMO mining workflow
e-south Jan 20, 2026
59b5e53
densegen: add pool/library artifacts and audit reporting
e-south Jan 20, 2026
47ce75c
pixi: add pytest task for MEME-enabled tests
e-south Jan 20, 2026
e6566cc
densegen: harden sampling UX and reporting
e-south Jan 20, 2026
cc151b6
densegen: drop legacy schema paths
e-south Jan 20, 2026
273288f
densegen: strengthen artifacts, reporting, and CLI
e-south Jan 21, 2026
1dfa161
Fix candidate logging scoping and report fallbacks
e-south Jan 21, 2026
a26c1e8
Fix stage-a pool build checks and report warnings
e-south Jan 21, 2026
e08b974
densegen: run-scoped candidates and explicit resume
e-south Jan 21, 2026
1da8834
densegen: docs and demo alignment for explicit runs
e-south Jan 21, 2026
c8c5840
densegen: expand report plots and composition export
e-south Jan 21, 2026
e6f1e81
Refactor densegen pad config and outputs schema
e-south Jan 23, 2026
0e31add
densegen: harden solver controls and demo config
e-south Jan 23, 2026
93d53b9
densegen: fix library summary outputs
e-south Jan 23, 2026
1033ce7
densegen: decouple report plots and add plot manifest
e-south Jan 23, 2026
3b83e32
densegen: append Stage-A pools and refresh CLI defaults
e-south Jan 23, 2026
ad790c0
densegen: harden pool append and strict reporting
e-south Jan 23, 2026
c5d1c68
densegen: clarify demo build-pool fresh semantics
e-south Jan 23, 2026
70ce4a1
densegen: clarify inspect inputs output and demo cruncher step
e-south Jan 23, 2026
90f95f7
densegen demo artifact inputs
e-south Jan 24, 2026
f0052f9
docs align densegen demo workflow
e-south Jan 24, 2026
f6dff3c
cruncher export to densegen workspace
e-south Jan 24, 2026
ba92a58
docs: export densegen via workspace
e-south Jan 24, 2026
16e02fb
docs: tighten demo tips
e-south Jan 24, 2026
515d880
demo: align sampling narrative
e-south Jan 24, 2026
e74bd0e
demo: use meme-derived motifs
e-south Jan 24, 2026
743f07e
docs: refine demo flow
e-south Jan 24, 2026
a5f977e
stage-a: show bins + logging
e-south Jan 24, 2026
e4dc395
Add Stage-A strata design
e-south Jan 25, 2026
2bb5fc7
Refactor Stage-A strata config and summaries
e-south Jan 25, 2026
7fe3ae4
Align metadata and docs with strata sampling
e-south Jan 25, 2026
39912bd
cruncher: write parquet via pyarrow
e-south Jan 25, 2026
4325a6c
densegen: harden sampling and recap output
e-south Jan 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ src/dnadesign/densegen/workspaces/**
!src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/
!src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/*.txt
!src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/*.meme
!src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/
!src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/*.json

# Legacy DenseGen runs (ignored to avoid local artifact noise)
src/dnadesign/densegen/runs/**
Expand Down
38 changes: 37 additions & 1 deletion .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_baseline_file",
"filename": ".secrets.baseline"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
Expand Down Expand Up @@ -138,6 +142,38 @@
"line_number": 181
}
],
"src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json": [
{
"type": "Hex High Entropy String",
"filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json",
"hashed_secret": "2598c3ba7f3985f5df916954885b71931380e2ad",
"is_verified": false,
"line_number": 10
},
{
"type": "Hex High Entropy String",
"filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/cpxR__meme_suite_meme__cpxR_MANWWHTTTAM.json",
"hashed_secret": "23616517bff0fc8f7749dc3f40e0ec36ec8ebcd1",
"is_verified": false,
"line_number": 11
}
],
"src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json": [
{
"type": "Hex High Entropy String",
"filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json",
"hashed_secret": "733c5c02dcc073c2c1040be08dbb665375e48571",
"is_verified": false,
"line_number": 10
},
{
"type": "Hex High Entropy String",
"filename": "src/dnadesign/densegen/workspaces/demo_meme_two_tf/inputs/motif_artifacts/lexA__meme_suite_meme__lexA_CTGTATAWAWWHACA.json",
"hashed_secret": "b76157d075f0bf4ee272f029f598f911769f42d6",
"is_verified": false,
"line_number": 11
}
],
"src/dnadesign/opal/campaigns/demo/inputs/r0/demo_y_sfxi_existing.csv": [
{
"type": "AWS Access Key",
Expand Down Expand Up @@ -175,5 +211,5 @@
}
]
},
"generated_at": "2026-01-15T18:03:36Z"
"generated_at": "2026-01-24T01:02:10Z"
}
72 changes: 72 additions & 0 deletions docs/plans/2026-01-25-stage-a-strata-design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Stage-A PWM Sampling: Strata-First Semantics (FIMO)

## Context
Stage-A PWM sampling currently mixes the ideas of thresholding, binning, and selection in a way that exposes too many knobs and makes the logs hard to interpret. The desired behavior is: (1) mine PWM-like sequences, (2) account for a spectrum of p-value strata for diagnostics/visualization, and (3) retain only the best strata prefix for Stage-B. Configuration should be minimal and ergonomic, with a single obvious knob to adjust strictness, while still capturing per-bin distributions for later analyses (e.g., Hamming/Levenshtein by bin).

## Goals
- Align sampling semantics with user intent: generated → eligible → retained.
- Keep configuration minimal and hard to misconfigure.
- Preserve per-bin counts for didactic plots and diagnostics.
- Make shortfalls expected and interpretable without extra debug logs.
- Ensure docs, demo config, and tests align with the new semantics.

## Non-goals
- Automatic per-regulator threshold calibration.
- Changing the FIMO backend itself or its internal scoring.
- Adding new diversity-selection algorithms (post-hoc analysis stays separate).

## Proposed Semantics
We define three counts per regulator:
- **Generated**: number of candidate sequences sampled.
- **Eligible**: candidates with a FIMO hit at or below a floor threshold.
- **Retained**: eligible hits within the best strata prefix, deduped and capped.

FIMO only reports hits under its reporting threshold, so eligibility is defined by that floor. Per-bin counts are computed for eligible hits to support plots and later analysis. Retention is a strict prefix of bins (best p-values), not an arbitrary list of indices.

## Config Changes (Breaking)
Replace `pvalue_threshold` and `mining.retain_bin_ids` with two semantic knobs:
- `pvalue_strata`: ordered p-value edges (best → worst). The **last** edge is the eligibility floor (FIMO `--thresh`).
- `retain_depth`: number of best bins to keep for Stage-B (prefix of strata).

`n_sites` remains as the **cap** on retained unique sites per regulator (not a target). The default behavior should be explicit in docs; a typical default is `pvalue_strata: [1e-8, 1e-6, 1e-4]` with `retain_depth: 2`.

## Data Flow
1. Generate candidate sequences as today.
2. Run FIMO with `--thresh = last(pvalue_strata)`.
3. Bin each reported hit by `pvalue_strata`.
4. Accumulate **eligible** counts per bin (all bins up to the floor).
5. Retain only bins `0..retain_depth-1`.
6. Dedup retained sequences; if retained > `n_sites`, keep best by `(pvalue asc, score desc)`.

This keeps accounting broad while retention remains strict and bounded.

## Reporting & UX
Stage-A recap table should show:
- `candidates` = generated/target
- `eligible` = eligible/generated
- `pool` = retained/n_sites
- `bins` = per-bin `eligible/retained` pairs (e.g., `b0 12/12 | b1 55/20 | b2 400/0`)
- `len` = `n/min/med/avg/max` for retained (pool) sequences

Zero-retained cases become interpretable without extra logs:
- `eligible=0` → no hits under floor.
- `eligible>0` but retained bins empty → hits exist, none in strict strata.

## Migration
- Remove `pvalue_threshold` and `mining.retain_bin_ids` from config schema.
- Require `pvalue_strata` and `retain_depth` for FIMO inputs.
- Update metadata fields to reflect `pvalue_strata` and `retain_depth`.
- Update demo config and docs to use the new semantics.

## Testing Plan
- Config validation rejects legacy keys and enforces `pvalue_strata` + `retain_depth`.
- Sampling tests verify:
- FIMO floor applied from last stratum edge.
- Eligibility counts include all bins up to floor.
- Retention is a prefix of bins (best strata).
- Dedup + cap are enforced on retained sites.
- CLI recap tests verify new column labels and bin formatting.

## Open Questions
- Default `retain_depth` (require explicit vs. default to full strata).
- Whether to surface the eligibility floor explicitly in metadata or derive from `pvalue_strata`.
2 changes: 2 additions & 0 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ platforms = ["osx-arm64", "osx-64", "linux-64"]

[tasks]
cruncher = "uv run cruncher"
dense = "uv run dense"
pytest = "uv run pytest -q"

[dependencies]
meme = "*"
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ where = ["src"]

[tool.setuptools.package-data]
"dnadesign.cruncher.ingest.certs" = ["*.pem"]
"dnadesign.densegen" = [
"workspaces/demo_meme_two_tf/config.yaml",
"workspaces/demo_meme_two_tf/inputs/*.txt",
]

[tool.pytest.ini_options]
addopts = "-ra -q"
Expand All @@ -109,6 +113,7 @@ norecursedirs = ["*/archived/*", ".venv", "venv", "build", "dist", "*.egg-info"]
markers = ["slow: sampling-heavy tests (>10 s)"]
filterwarnings = [
"ignore:ArviZ is undergoing a major refactor.*:FutureWarning",
"ignore::FutureWarning:arviz.*",
"ignore:builtin type SwigPyPacked has no __module__ attribute:DeprecationWarning",
"ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning",
"ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning",
Expand Down
8 changes: 8 additions & 0 deletions src/dnadesign/cruncher/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ A typical workflow looks like:
3. Generate synthetic sequences (e.g., via [MCMC](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo)) using the locked motifs.
4. Analyze / visualize / report from run artifacts.

Scoring is **FIMO-like**: cruncher builds log-odds PWMs against a 0‑order
background, scans each candidate sequence to find the best window per TF
(optionally bidirectional), and can scale that best hit to a p‑value using a
DP‑derived null distribution (`score_scale: logp`). For `logp`, the tail
probability for the best window is converted to a sequence‑level p via
`p_seq = 1 − (1 − p_win)^n_windows`. This is an internal implementation; cruncher
does not call the FIMO binary.

---

### Quickstart (happy path)
Expand Down
15 changes: 13 additions & 2 deletions src/dnadesign/cruncher/docs/demos/demo_basics_two_tf.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@

**cruncher** scores each TF by the best PWM match anywhere in the candidate sequence on either strand, then optimizes the min/soft‑min across TFs so the weakest TF improves. It explores sequence space with Gibbs + parallel tempering (MCMC) and returns a diverse elite set (unique up to reverse‑complement) plus diagnostics for stability/mixing. Motif overlap is allowed and treated as informative structure in analysis.

Scoring is **FIMO-like** (internal implementation): for each PWM, cruncher builds
log‑odds scores against a 0‑order background, scans all windows to find the best
hit (optionally bidirectional), and optionally converts that best hit to a
p‑value via a DP‑derived null distribution (`score_scale: logp`). For `logp`,
the tail probability for the best window becomes a sequence‑level p via
`p_seq = 1 − (1 − p_win)^n_windows`.

**Terminology:**

- **sites** = training binding sequences
Expand Down Expand Up @@ -476,12 +483,16 @@ Export the binding-site superset and the selected motifs for DenseGen runs:

```bash
# Export binding sites (CSV/Parquet) for DenseGen binding_sites inputs
cruncher catalog export-sites --set 1 --out /tmp/densegen_sites.csv -c "$CONFIG"
cruncher catalog export-sites --set 1 --densegen-workspace demo_meme_two_tf -c "$CONFIG"

# Export per-motif JSON artifacts for DenseGen PWM artifact inputs
cruncher catalog export-densegen --set 1 --out /tmp/densegen_pwms -c "$CONFIG"
cruncher catalog export-densegen --set 1 --densegen-workspace demo_meme_two_tf -c "$CONFIG"
```

`--densegen-workspace` accepts a workspace name (resolved under `src/dnadesign/densegen/workspaces`)
or an absolute path, and writes under that workspace's `inputs/`. You can still provide `--out`,
but the path must remain inside the target `inputs/` directory.

Then point DenseGen configs at the exported files (`type: binding_sites`) or artifacts
(`type: pwm_artifact_set`).

Expand Down
5 changes: 5 additions & 0 deletions src/dnadesign/cruncher/docs/demos/demo_campaigns_multi_tf.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

This demo walks through a process of running category-based sequence optimization campaigns, with a focus on campaign selection (site counts + PWM quality), derived configs, and multi-TF runs.

Scoring is **FIMO-like** (internal implementation): cruncher uses PWM log‑odds
scanning against a 0‑order background, takes the best window per TF (optionally
both strands), and can convert that best hit to a p‑value via a DP‑derived null
distribution (`score_scale: logp`, with `p_seq = 1 − (1 − p_win)^n_windows`).

### Demo instance

- **Workspace**: `src/dnadesign/cruncher/workspaces/demo_campaigns_multi_tf/`
Expand Down
6 changes: 6 additions & 0 deletions src/dnadesign/cruncher/docs/reference/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -475,9 +475,15 @@ Examples:
* `cruncher catalog pwms <config>`
* `cruncher catalog pwms --set 1 <config>`
* `cruncher catalog export-sites --set 1 --out densegen/sites.csv <config>`
* `cruncher catalog export-sites --set 1 --densegen-workspace demo_meme_two_tf <config>`
* `cruncher catalog export-densegen --set 1 --out densegen/pwms <config>`
* `cruncher catalog export-densegen --set 1 --densegen-workspace demo_meme_two_tf <config>`
* `cruncher catalog logos --set 1 <config>`

`catalog export-densegen` and `catalog export-sites` accept `--densegen-workspace` (workspace
name under `src/dnadesign/densegen/workspaces/` or an absolute path). When provided, outputs
default to the workspace `inputs/` locations and must stay within that directory.

---

#### `cruncher discover`
Expand Down
4 changes: 4 additions & 0 deletions src/dnadesign/cruncher/docs/reference/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,10 @@ Notes:
- `objective.bidirectional=true` scores both strands (reverse complement) when scanning PWMs.
- `objective.combine` controls how per-TF scores are combined (`min` for weakest-TF optimization, `sum` for sum-based).
- `objective.allow_unscaled_llr=true` allows `score_scale=llr` in multi-TF runs (otherwise validation fails).
- `objective.score_scale=logp` is FIMO‑like: it uses a DP‑derived null
distribution under a 0‑order background to compute a tail p‑value for the
best window, then converts to a sequence‑level p via
`p_seq = 1 − (1 − p_win)^n_windows` before reporting `−log10(p_seq)`.
- `elites.min_hamming` is the Hamming-distance filter for elites (0 disables). If `output.trim.enabled=true` yields variable lengths, the distance is computed over the shared prefix plus the length difference.
- `elites.k` controls how many sequences are retained before diversity filtering (0 = keep all).
- `elites.dsDNA_canonicalize=true` treats reverse complements as identical when computing unique fractions and (optionally) stores `canonical_sequence` in elites.
Expand Down
4 changes: 4 additions & 0 deletions src/dnadesign/cruncher/src/analysis/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ def read_parquet(path: Path):
import pandas as pd

return pd.read_parquet(path, engine="fastparquet")


def write_parquet(df, path: Path) -> None:
df.to_parquet(path, engine="pyarrow", index=False)
4 changes: 2 additions & 2 deletions src/dnadesign/cruncher/src/analysis/per_pwm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import pandas as pd

from dnadesign.cruncher.analysis.parquet import read_parquet
from dnadesign.cruncher.analysis.parquet import read_parquet, write_parquet
from dnadesign.cruncher.analysis.plots.scatter_utils import encode_sequence
from dnadesign.cruncher.artifacts.layout import sequences_path
from dnadesign.cruncher.core.scoring import Scorer
Expand Down Expand Up @@ -152,7 +152,7 @@ def gather_per_pwm_scores(
out_df = out_df.sort_values(["chain", "draw"]).reset_index(drop=True)
out_path.parent.mkdir(parents=True, exist_ok=True)
if out_path.suffix == ".parquet":
out_df.to_parquet(out_path, engine="fastparquet", index=False)
write_parquet(out_df, out_path)
else:
out_df.to_csv(out_path, index=False)
logger.info("Wrote change-threshold per-PWM scores → %s", out_path)
10 changes: 5 additions & 5 deletions src/dnadesign/cruncher/src/analysis/plots/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import pandas as pd
import seaborn as sns

from dnadesign.cruncher.analysis.parquet import read_parquet
from dnadesign.cruncher.analysis.parquet import read_parquet, write_parquet
from dnadesign.cruncher.analysis.plots._savefig import savefig

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -49,7 +49,7 @@ def write_score_summary(score_df: pd.DataFrame, tf_names: list[str], out_path: P
summary.reset_index(drop=True, inplace=True)
out_path.parent.mkdir(parents=True, exist_ok=True)
if out_path.suffix == ".parquet":
summary.to_parquet(out_path, engine="fastparquet", index=False)
write_parquet(summary, out_path)
else:
summary.to_csv(out_path, index=False)

Expand All @@ -71,7 +71,7 @@ def write_elite_topk(elites_df: pd.DataFrame, tf_names: list[str], out_path: Pat
keep_cols = ["sequence"] + [c for c in ("rank", "norm_sum") if c in df.columns] + cols
out_path.parent.mkdir(parents=True, exist_ok=True)
if out_path.suffix == ".parquet":
df[keep_cols].to_parquet(out_path, engine="fastparquet", index=False)
write_parquet(df[keep_cols], out_path)
else:
df[keep_cols].to_csv(out_path, index=False)

Expand Down Expand Up @@ -120,7 +120,7 @@ def write_joint_metrics(elites_df: pd.DataFrame, tf_names: list[str], out_path:
}
df = pd.DataFrame([payload])
if out_path.suffix == ".parquet":
df.to_parquet(out_path, engine="fastparquet", index=False)
write_parquet(df, out_path)
else:
df.to_csv(out_path, index=False)
return
Expand Down Expand Up @@ -160,7 +160,7 @@ def write_joint_metrics(elites_df: pd.DataFrame, tf_names: list[str], out_path:
}
df = pd.DataFrame([payload])
if out_path.suffix == ".parquet":
df.to_parquet(out_path, engine="fastparquet", index=False)
write_parquet(df, out_path)
else:
df.to_csv(out_path, index=False)

Expand Down
14 changes: 7 additions & 7 deletions src/dnadesign/cruncher/src/app/analyze_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
)
from dnadesign.cruncher.analysis.objective import compute_objective_components
from dnadesign.cruncher.analysis.overlap import compute_overlap_tables
from dnadesign.cruncher.analysis.parquet import read_parquet
from dnadesign.cruncher.analysis.parquet import read_parquet, write_parquet
from dnadesign.cruncher.analysis.plot_registry import PLOT_SPECS
from dnadesign.cruncher.analysis.report import ensure_report
from dnadesign.cruncher.app.run_service import list_runs
Expand Down Expand Up @@ -977,8 +977,8 @@ def _plot_path(stem: str) -> Path:
overlap_summary_path = tables_dir / f"overlap_summary.{table_ext}"
elite_overlap_path = tables_dir / f"elite_overlap.{table_ext}"
if table_ext == "parquet":
overlap_summary_df.to_parquet(overlap_summary_path, engine="fastparquet", index=False)
elite_overlap_df.to_parquet(elite_overlap_path, engine="fastparquet", index=False)
write_parquet(overlap_summary_df, overlap_summary_path)
write_parquet(elite_overlap_df, elite_overlap_path)
else:
overlap_summary_df.to_csv(overlap_summary_path, index=False)
elite_overlap_df.to_csv(elite_overlap_path, index=False)
Expand Down Expand Up @@ -1041,13 +1041,13 @@ def _plot_path(stem: str) -> Path:
if move_stats_summary_df is not None and not move_stats_summary_df.empty:
move_stats_summary_path = tables_dir / f"move_stats_summary.{table_ext}"
if table_ext == "parquet":
move_stats_summary_df.to_parquet(move_stats_summary_path, engine="fastparquet", index=False)
write_parquet(move_stats_summary_df, move_stats_summary_path)
else:
move_stats_summary_df.to_csv(move_stats_summary_path, index=False)
if analysis_cfg.extra_tables and move_stats_df is not None:
move_stats_path = tables_dir / f"move_stats.{table_ext}"
if table_ext == "parquet":
move_stats_df.to_parquet(move_stats_path, engine="fastparquet", index=False)
write_parquet(move_stats_df, move_stats_path)
else:
move_stats_df.to_csv(move_stats_path, index=False)

Expand Down Expand Up @@ -1083,7 +1083,7 @@ def _plot_path(stem: str) -> Path:
pt_swap_pairs_df = pd.DataFrame(rows)
pt_swap_pairs_path = tables_dir / f"pt_swap_pairs.{table_ext}"
if table_ext == "parquet":
pt_swap_pairs_df.to_parquet(pt_swap_pairs_path, engine="fastparquet", index=False)
write_parquet(pt_swap_pairs_df, pt_swap_pairs_path)
else:
pt_swap_pairs_df.to_csv(pt_swap_pairs_path, index=False)

Expand All @@ -1097,7 +1097,7 @@ def _plot_path(stem: str) -> Path:
auto_opt_table_path = tables_dir / f"auto_opt_pilots.{table_ext}"
df_auto_table = pd.DataFrame(candidates)
if table_ext == "parquet":
df_auto_table.to_parquet(auto_opt_table_path, engine="fastparquet", index=False)
write_parquet(df_auto_table, auto_opt_table_path)
else:
df_auto_table.to_csv(auto_opt_table_path, index=False)
if analysis_cfg.extra_plots:
Expand Down
Loading
Loading