Skip to content

Commit

Permalink
Fix done file requests
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Feb 11, 2025
1 parent c772696 commit 2af215a
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 40 deletions.
2 changes: 1 addition & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ params:

# Extra parameters for MarkDuplicates.
# See https://gatk.broadinstitute.org/hc/en-us/articles/360057439771-MarkDuplicates-Picard
MarkDuplicates: "--REMOVE_DUPLICATES true"
MarkDuplicates: "REMOVE_DUPLICATES=true"

# Run several Picard QC tools, as needed, using Picard CollectMultipleMetrics.
# See https://gatk.broadinstitute.org/hc/en-us/articles/360042478112-CollectMultipleMetrics-Picard
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/calling-bcftools-individual.smk
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ rule combine_contig:
log:
"logs/calling/bcftools/combine-contig-{contig}.log",
benchmark:
"benchmarks/calling/called/bcftools/combine-contig-{contig}.log"
"benchmarks/calling/combined/bcftools/combine-contig-{contig}.log"
conda:
"../envs/bcftools.yaml"
shell:
Expand Down Expand Up @@ -137,7 +137,7 @@ def combined_contig_gvcfs(wildcards):
# Also need the done files to make sure snakemake doesn't mess this up.
def combined_contig_done(wildcards):
fai = checkpoints.samtools_faidx.get().output[0]
return expand("calling/called/all.{contig}.g.vcf.gz.done", contig=get_contigs(fai))
return expand("calling/combined/all.{contig}.g.vcf.gz.done", contig=get_contigs(fai))


# We also need a comma-separated list of the contigs, so that bcftools can output
Expand Down
25 changes: 16 additions & 9 deletions workflow/rules/frequency.smk
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ rule hafpipe_snp_table:
# in order to better inform the user about the situation and how to fix this.
numeric=get_hafpipe_snp_table_dir() + "/{chrom}.csv.numeric",
numericbgz=get_hafpipe_snp_table_dir() + "/{chrom}.csv.numeric.bgz",
done=get_hafpipe_snp_table_dir() + "/{chrom}.done",
done=get_hafpipe_snp_table_dir() + "/{chrom}.csv.done",
params:
tasks="1",
chrom="{chrom}",
Expand All @@ -197,7 +197,11 @@ def get_all_hafpipe_raw_snp_tables(wildcards):
# We use a checkpoint to create the fai file from our ref genome, which gives us the chrom names.
# Snakemake then needs an input function to work with the fai checkpoint here.
fai = checkpoints.samtools_faidx.get().output[0]
return expand(get_hafpipe_snp_table_dir() + "/{chrom}.csv", chrom=get_hafpipe_chromosomes(fai))
return expand(
get_hafpipe_snp_table_dir() + "/{chrom}.csv{ext}",
chrom=get_hafpipe_chromosomes(fai),
ext=["", ".done"]
)


# Rule that requests all HAFpipe SNP table files, so that users can impute them themselves.
Expand Down Expand Up @@ -245,7 +249,7 @@ if impmethod in ["simpute", "npute"]:
rule hafpipe_impute_snp_table:
input:
snptable=get_hafpipe_snp_table_dir() + "/{chrom}.csv",
done=get_hafpipe_snp_table_dir() + "/{chrom}.done",
done=get_hafpipe_snp_table_dir() + "/{chrom}.csv.done",
bins=get_hafpipe_bins(),
output:
csv=get_hafpipe_snp_table_dir() + "/{chrom}.csv." + impmethod,
Expand Down Expand Up @@ -277,7 +281,7 @@ elif impmethod != "":
rule hafpipe_impute_snp_table:
input:
snptable=get_hafpipe_snp_table_dir() + "/{chrom}.csv",
done=get_hafpipe_snp_table_dir() + "/{chrom}.done",
done=get_hafpipe_snp_table_dir() + "/{chrom}.csv.done",
output:
csv=get_hafpipe_snp_table_dir() + "/{chrom}.csv." + impmethod,
done=touch(get_hafpipe_snp_table_dir() + "/{chrom}.csv." + impmethod + ".done"),
Expand Down Expand Up @@ -318,7 +322,7 @@ if impmethod == "":
snptable=get_hafpipe_snp_table_dir() + "/{chrom}.csv",
alleleCts=get_hafpipe_snp_table_dir() + "/{chrom}.csv.alleleCts",
numeric=get_hafpipe_snp_table_dir() + "/{chrom}.csv.numeric.bgz",
done=get_hafpipe_snp_table_dir() + "/{chrom}.done",
done=get_hafpipe_snp_table_dir() + "/{chrom}.csv.done",
output:
flag=get_hafpipe_snp_table_dir() + "/{chrom}.csv.flag",
shell:
Expand Down Expand Up @@ -522,7 +526,7 @@ rule hafpipe_concat_sample_allele_frequencies:
# This is the file name produced by the script. For now we do not allow to change this.
table="hafpipe/samples/{sample}.csv"
+ (".gz" if config["params"]["hafpipe"].get("compress-sample-tables", False) else ""),
done="hafpipe/samples/{sample}.done",
done="hafpipe/samples/{sample}.csv.done",
params:
# The rule needs access to the list of chromosomes, and to the sample.
sample="{sample}",
Expand All @@ -543,7 +547,10 @@ rule hafpipe_collect_concat_samples:
+ (".gz" if config["params"]["hafpipe"].get("compress-sample-tables", False) else ""),
sample=config["global"]["sample-names"],
),
done="hafpipe/samples/{sample}.done",
done=expand(
"hafpipe/samples/{sample}.csv.done",
sample=config["global"]["sample-names"],
),
output:
done=touch("hafpipe/samples.done"),

Expand Down Expand Up @@ -588,7 +595,7 @@ rule hafpipe_merge_allele_frequencies:
# This is the file name produced by the script. For now we do not allow to change this.
table="hafpipe/all.csv"
+ (".gz" if config["params"]["hafpipe"].get("compress-merged-table", False) else ""),
done="hafpipe/all.done",
done="hafpipe/all.csv.done",
params:
# We are potentially dealing with tons of files, and cannot open all of them at the same
# time, due to OS limitations, check `ulimit -n` for example. When this param is set to 0,
Expand Down Expand Up @@ -639,7 +646,7 @@ rule all_hafpipe:
[
"hafpipe/all.csv"
+ (".gz" if config["params"]["hafpipe"].get("compress-merged-table", False) else ""),
"hafpipe/all.done"
"hafpipe/all.csv.done"
]
if config["params"]["hafpipe"].get("make-merged-table", False)
else [],
Expand Down
4 changes: 3 additions & 1 deletion workflow/rules/mapping-bowtie2.smk
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ rule map_reads:
ext=["1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2", "done"],
),
output:
# Piping the file, so no done file here
pipe("mapping/mapped/{sample}-{unit}.bam"),
# touch("mapping/mapped/{sample}-{unit}.bam.done"),
params:
Expand Down Expand Up @@ -88,8 +89,9 @@ rule map_reads:
# At least, we can pipe the files from above to here, so this should not slow us down.
rule sort_reads:
input:
# Piping the file, so no done file here
"mapping/mapped/{sample}-{unit}.bam",
"mapping/mapped/{sample}-{unit}.bam.done",
# "mapping/mapped/{sample}-{unit}.bam.done",
output:
(
"mapping/sorted/{sample}-{unit}.bam"
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/mapping-bwa-aln.smk
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ rule bwa_sai_to_bam:
input:
fastq=get_trimmed_reads,
sai=get_sai,
done=get_sai_done
done=get_sai_done,
ref=config["data"]["reference-genome"],
# Somehow, the wrapper expects the index extensions to be given,
# instead of the underlying fasta file... Well, so let's do that.
Expand Down
30 changes: 13 additions & 17 deletions workflow/rules/mapping.smk
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,13 @@ def get_sorted_sample_bams_done(wildcards):
return [b + ".done" for b in bams]


def get_all_sorted_sample_bams():
res = list()
for sample in config["global"]["sample-names"]:
for unit in get_sample_units(sample):
bam = f"mapping/sorted/{sample}-{unit}.bam"
res.append(bam)
return res
# def get_all_sorted_sample_bams():
# res = list()
# for sample in config["global"]["sample-names"]:
# for unit in get_sample_units(sample):
# bam = f"mapping/sorted/{sample}-{unit}.bam"
# res.append(bam)
# return res


# This is where all units are merged together.
Expand Down Expand Up @@ -315,6 +315,7 @@ if not duplicates_tool_good:
# Base Quality Score Recalibration
# =================================================================================================


if config["settings"]["recalibrate-base-qualities"]:

include: "mapping-recalibrate.smk"
Expand All @@ -334,16 +335,11 @@ def get_bam_from_mappings_table(sample):
assert "mappings-table" in config["data"] and config["data"]["mappings-table"]
bams = config["global"]["samples"].loc[sample, ["bam"]].dropna()

# Check if we have touched the bam done files already
if not hasattr(get_bam_from_mappings_table, "done"):
get_bam_from_mappings_table.done = False

# If not, touch all files, then set the internal flag
# so that we do not do this every time this function is called.
if not get_bam_from_mappings_table.done:
for f in bams:
# Touch all non-existing files. If they already exist,
# we do nothing, to not mess with their time stamps.
for f in bams:
if not os.path.isfile(f):
Path(f + ".done").touch()
get_bam_from_mappings_table.done = True

# Now we can return the bam file list to the caller.
return bams
Expand Down Expand Up @@ -473,7 +469,7 @@ def get_all_bams_done():

rule all_bams:
input:
merged=get_all_sorted_sample_bams(),
# merged=get_all_sorted_sample_bams(),
bams=get_all_bams(),
done=get_all_bams_done(),
qc="qc/multiqc.html",
Expand Down
13 changes: 4 additions & 9 deletions workflow/rules/trimming-none.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,11 @@ def get_trimmed_reads(wildcards):
def get_trimmed_reads_done(wildcards):
files = get_trimmed_reads(wildcards)

# Check if we have touched the fastq done files already
if not hasattr(get_trimmed_reads_done, "done"):
get_trimmed_reads_done.done = False

# If not, touch all files, then set the internal flag
# so that we do not do this every time this function is called.
if not get_trimmed_reads_done.done:
for f in files:
# Touch all non-existing files. If they already exist,
# we do nothing, to not mess with their time stamps.
for f in files:
if not os.path.isfile(f):
Path(f + ".done").touch()
get_trimmed_reads_done.done = True

# Now we can return the fastq done file list to the caller.
return [f + ".done" for f in files]
Expand Down

0 comments on commit 2af215a

Please sign in to comment.