Skip to content

Commit b15758c

Browse files
committed
shell: add defaults param, fix 3 rendering bugs, add bedtools example
shell.create now accepts `defaults={name: value}` — per-input fallback values applied when the caller omits an input. Works independently of `T | None`: required + no default raises, optional + no default suppresses the flag, either + default uses the default. Three rendering bugs surfaced while building a bedtools intersect wrapper: 1. Positional-arg indices >= 10 were emitted as bare `$10`, which bash parses as `$1` followed by literal "0". Any task with 10+ scalar/bool inputs had its 10..N values silently corrupted. Fixed by always braced `${10}`. 2. Optional File / Dir flags emitted unconditionally — the renderer hardcoded `-flag /var/inputs/name` regardless of whether the caller supplied the file, so omitting an optional file flag pointed the tool at a missing path. Now guarded with `if [ -e <path> ]; then ...; else <flag>=""; fi`. 3. Inputs whose names differed only in case (e.g. `c` vs `C`, common in bio CLIs like bedtools / samtools) collided on the same `_VAL_*` / `_FLAG_*` bash variable and silently overwrote each other. create() now rejects this at declaration time with an error naming both offending inputs. Adds 28 unit tests across the four-cell defaults matrix, optional File/Dir flag emission, case-collision detection, and the positional-arg brace regression. New examples: - examples/shell/12_bedtools_intersect_example.py — three intersect queries in parallel against a small BED fixture. - examples/shell/modules/bedtools_intersect.py — typed wrapper around bedtools intersect; reference for bio-CLI shell-extra modules. Signed-off-by: Kyle Hazen <kyle@union.ai>
1 parent a147e12 commit b15758c

5 files changed

Lines changed: 530 additions & 7 deletions

File tree

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""bedtools intersect — three common overlap queries against a peaks file.
2+
3+
This example consumes ``modules/bedtools_intersect.py`` (a typed shell wrapper
4+
around the ``bedtools intersect`` CLI) and exercises three of its most-used
5+
flag combinations on a small BED fixture:
6+
7+
- ``wa=True`` — write each A feature that has *any* overlap in B.
8+
- ``v=True`` — write each A feature that has *no* overlap in B (set diff).
9+
- ``c=True`` — write each A feature with a trailing count of B overlaps.
10+
11+
Fixture (4 "genes" in A, 3 "peaks" in B, all on chr1):
12+
13+
A (genes) B (peaks)
14+
chr1 100-200 gene1 chr1 150-180 peak1 <- overlaps gene1
15+
chr1 300-400 gene2 chr1 350-450 peak2 <- overlaps gene2
16+
chr1 500-600 gene3 chr1 900-950 peak3
17+
chr1 700-800 gene4
18+
19+
Expected:
20+
- wa -> gene1, gene2
21+
- v -> gene3, gene4
22+
- c -> gene1\\t1, gene2\\t1, gene3\\t0, gene4\\t0
23+
24+
Run locally::
25+
26+
uv run python 12_bedtools_intersect_example.py
27+
"""
28+
29+
import asyncio
30+
import tempfile
31+
32+
import flyte
33+
from flyte.io import File
34+
35+
from modules.bedtools_intersect import bedtools_intersect
36+
37+
38+
env = flyte.TaskEnvironment(
39+
name="bedtools_intersect_example",
40+
depends_on=[bedtools_intersect.env],
41+
)
42+
43+
44+
@env.task
45+
async def intersect_demo(genes: File, peaks: File) -> tuple[File, File, File]:
46+
overlapping, non_overlapping, counts = await asyncio.gather(
47+
bedtools_intersect(a=genes, b=[peaks], wa=True),
48+
bedtools_intersect(a=genes, b=[peaks], v=True),
49+
bedtools_intersect(a=genes, b=[peaks], count_overlaps=True),
50+
)
51+
return overlapping, non_overlapping, counts
52+
53+
54+
GENES_BED = (
55+
"chr1\t100\t200\tgene1\t0\t+\n"
56+
"chr1\t300\t400\tgene2\t0\t+\n"
57+
"chr1\t500\t600\tgene3\t0\t+\n"
58+
"chr1\t700\t800\tgene4\t0\t+\n"
59+
)
60+
61+
PEAKS_BED = (
62+
"chr1\t150\t180\tpeak1\t0\t+\n"
63+
"chr1\t350\t450\tpeak2\t0\t+\n"
64+
"chr1\t900\t950\tpeak3\t0\t+\n"
65+
)
66+
67+
68+
69+
70+
# Fixtures
71+
# mkdir -p /tmp/bedtools-fixtures && \
72+
# printf 'chr1\t100\t200\tgene1\t0\t+\nchr1\t300\t400\tgene2\t0\t+\nchr1\t500\t600\tgene3\t0\t+\nchr1\t700\t800\tgene4\t0\t+\n' > /tmp/bedtools-fixtures/genes.bed && \
73+
# printf 'chr1\t150\t180\tpeak1\t0\t+\nchr1\t350\t450\tpeak2\t0\t+\nchr1\t900\t950\tpeak3\t0\t+\n' > /tmp/bedtools-fixtures/peaks.bed && \
74+
# ls -la /tmp/bedtools-fixtures/
75+
76+
if __name__ == "__main__":
77+
78+
flyte.init()
79+
80+
with tempfile.NamedTemporaryFile(mode="w", suffix=".bed", delete=False) as f:
81+
f.write(GENES_BED)
82+
genes_path = f.name
83+
84+
with tempfile.NamedTemporaryFile(mode="w", suffix=".bed", delete=False) as f:
85+
f.write(PEAKS_BED)
86+
peaks_path = f.name
87+
88+
run = flyte.with_runcontext().run(
89+
intersect_demo,
90+
File.from_local_sync(genes_path),
91+
File.from_local_sync(peaks_path),
92+
)
93+
94+
print(run)
95+
96+
out = run.outputs()
97+
overlapping_path = out.o0.download_sync("./overlapping.bed")
98+
non_overlapping_path = out.o1.download_sync("./non_overlapping.bed")
99+
counts_path = out.o2.download_sync("./counts.bed")
100+
print(f"Wrote: {overlapping_path}, {non_overlapping_path}, {counts_path}")
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from flyte.extras import shell
2+
from flyte.io import File
3+
4+
IMAGE = "quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_0"
5+
6+
# Inputs use descriptive Python names where the bedtools CLI has case-only
7+
# collisions (`-c`/`-C`, `-s`/`-S`, `-f`/`-F`). The shell renderer builds
8+
# bash variable names by uppercasing the Python name, so two inputs whose
9+
# names differ only in case would collide on the same `_FLAG_*` slot.
10+
# `flag_aliases` maps each descriptive Python name to the actual CLI flag.
11+
bedtools_intersect = shell.create(
12+
name="bedtools_intersect",
13+
image=IMAGE,
14+
inputs={
15+
"a": File,
16+
"b": list[File],
17+
"wa": bool | None,
18+
"wb": bool | None,
19+
"loj": bool | None,
20+
"wo": bool | None,
21+
"wao": bool | None,
22+
"u": bool | None,
23+
"count_overlaps": bool | None,
24+
"count_per_file": bool | None,
25+
"v": bool | None,
26+
"same_strand": bool | None,
27+
"opposite_strand": bool | None,
28+
"frac_a": float | None,
29+
"frac_b": float | None,
30+
"r": bool | None,
31+
"e": bool | None,
32+
"ubam": bool | None,
33+
"bed": bool | None,
34+
"sorted": bool | None,
35+
"nonamecheck": bool | None,
36+
"g": File | None,
37+
"names": str | None,
38+
"filenames": bool | None,
39+
"sortout": bool | None,
40+
"split": bool | None,
41+
"header": bool | None,
42+
"nobuf": bool | None,
43+
"iobuf": str | None,
44+
},
45+
outputs={"out": File},
46+
flag_aliases={
47+
"b": "-b",
48+
"count_overlaps": "-c",
49+
"count_per_file": "-C",
50+
"same_strand": "-s",
51+
"opposite_strand": "-S",
52+
"frac_a": "-f",
53+
"frac_b": "-F",
54+
},
55+
script=r"""
56+
bedtools intersect \
57+
-a {inputs.a} \
58+
{flags.b} \
59+
{flags.wa} {flags.wb} {flags.loj} {flags.wo} {flags.wao} \
60+
{flags.u} {flags.count_overlaps} {flags.count_per_file} {flags.v} \
61+
{flags.same_strand} {flags.opposite_strand} \
62+
{flags.frac_a} {flags.frac_b} {flags.r} {flags.e} \
63+
{flags.ubam} {flags.bed} \
64+
{flags.sorted} {flags.nonamecheck} {flags.g} \
65+
{flags.names} {flags.filenames} {flags.sortout} \
66+
{flags.split} {flags.header} {flags.nobuf} {flags.iobuf} \
67+
> {outputs.out}
68+
""",
69+
)

src/flyte/extras/shell/_render.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import shlex
66
from typing import Any, Tuple
77

8-
from ._types import FlagSpec, Stderr, Stdout, _classify_input
8+
from ._types import FlagSpec, Stderr, Stdout, _classify_input, _is_optional
99

1010
_PLACEHOLDER_RE = re.compile(r"\{(inputs|flags|outputs)\.([a-zA-Z_][a-zA-Z0-9_]*)\}")
1111
_DICT_SEP = "\x1e"
@@ -20,6 +20,7 @@ def _render_command(
2020
output_data_dir: pathlib.Path,
2121
) -> Tuple[str, list[str]]:
2222
kinds = {name: _classify_input(name, tp) for name, tp in inputs.items()}
23+
optionals = {name: _is_optional(tp)[0] for name, tp in inputs.items()}
2324

2425
preamble_lines: list[str] = []
2526
positional_templates: list[str] = []
@@ -34,7 +35,11 @@ def alloc_slot(name: str) -> str:
3435
idx = len(positional_templates) + 1
3536
positional_templates.append(f"{{{{.inputs.{name}}}}}")
3637
var = f"_VAL_{name.upper()}"
37-
preamble_lines.append(f'{var}="${idx}"')
38+
# Brace the positional index: bash parses `$10` as `$1` + `"0"`,
39+
# so any task with 10+ scalar/bool inputs would silently bind
40+
# later variables to the wrong values. `${10}` is the only form
41+
# that works for indices ≥ 10.
42+
preamble_lines.append(f'{var}="${{{idx}}}"')
3843
slot_var_for[name] = var
3944
return var
4045

@@ -90,6 +95,7 @@ def render_flag_ref(name: str) -> str:
9095
alloc_slot,
9196
ensure_dict_decoded,
9297
input_data_dir,
98+
is_optional=optionals[name],
9399
)
94100
)
95101
if kind in ("list_file", "dict_str"):
@@ -139,6 +145,7 @@ def _emit_flag_setter(
139145
alloc_slot,
140146
ensure_dict_decoded,
141147
input_data_dir: pathlib.Path,
148+
is_optional: bool = False,
142149
) -> str:
143150
flag = spec.flag
144151
sep = spec.separator
@@ -155,6 +162,12 @@ def _emit_flag_setter(
155162
)
156163
if kind in ("file", "dir"):
157164
path = input_data_dir / name
165+
if is_optional:
166+
return (
167+
f'if [ -e {shlex.quote(str(path))} ]; then '
168+
f'{flag_var}={shlex.quote(flag + sep + str(path))}; '
169+
f'else {flag_var}=""; fi'
170+
)
158171
return f"{flag_var}={shlex.quote(flag + sep + str(path))}"
159172
if kind == "list_file":
160173
dirpath = input_data_dir / name

0 commit comments

Comments
 (0)