Fix done file requests

moiexpositoalonsolab · Feb 11, 2025 · 2af215a · 2af215a
1 parent c772696
commit 2af215a
Show file tree

Hide file tree

Showing 7 changed files with 40 additions and 40 deletions.
diff --git a/config/config.yaml b/config/config.yaml
@@ -640,7 +640,7 @@ params:
 
     # Extra parameters for MarkDuplicates.
     # See https://gatk.broadinstitute.org/hc/en-us/articles/360057439771-MarkDuplicates-Picard
-    MarkDuplicates: "--REMOVE_DUPLICATES true"
+    MarkDuplicates: "REMOVE_DUPLICATES=true"
 
     # Run several Picard QC tools, as needed, using Picard CollectMultipleMetrics.
     # See https://gatk.broadinstitute.org/hc/en-us/articles/360042478112-CollectMultipleMetrics-Picard

diff --git a/workflow/rules/calling-bcftools-individual.smk b/workflow/rules/calling-bcftools-individual.smk
@@ -101,7 +101,7 @@ rule combine_contig:
     log:
         "logs/calling/bcftools/combine-contig-{contig}.log",
     benchmark:
-        "benchmarks/calling/called/bcftools/combine-contig-{contig}.log"
+        "benchmarks/calling/combined/bcftools/combine-contig-{contig}.log"
     conda:
         "../envs/bcftools.yaml"
     shell:
@@ -137,7 +137,7 @@ def combined_contig_gvcfs(wildcards):
 # Also need the done files to make sure snakemake doesn't mess this up.
 def combined_contig_done(wildcards):
     fai = checkpoints.samtools_faidx.get().output[0]
-    return expand("calling/called/all.{contig}.g.vcf.gz.done", contig=get_contigs(fai))
+    return expand("calling/combined/all.{contig}.g.vcf.gz.done", contig=get_contigs(fai))
 
 
 # We also need a comma-separated list of the contigs, so that bcftools can output

diff --git a/workflow/rules/frequency.smk b/workflow/rules/frequency.smk
@@ -174,7 +174,7 @@ rule hafpipe_snp_table:
         # in order to better inform the user about the situation and how to fix this.
         numeric=get_hafpipe_snp_table_dir() + "/{chrom}.csv.numeric",
         numericbgz=get_hafpipe_snp_table_dir() + "/{chrom}.csv.numeric.bgz",
-        done=get_hafpipe_snp_table_dir() + "/{chrom}.done",
+        done=get_hafpipe_snp_table_dir() + "/{chrom}.csv.done",
     params:
         tasks="1",
         chrom="{chrom}",
@@ -197,7 +197,11 @@ def get_all_hafpipe_raw_snp_tables(wildcards):
     # We use a checkpoint to create the fai file from our ref genome, which gives us the chrom names.
     # Snakemake then needs an input function to work with the fai checkpoint here.
     fai = checkpoints.samtools_faidx.get().output[0]
-    return expand(get_hafpipe_snp_table_dir() + "/{chrom}.csv", chrom=get_hafpipe_chromosomes(fai))
+    return expand(
+        get_hafpipe_snp_table_dir() + "/{chrom}.csv{ext}",
+        chrom=get_hafpipe_chromosomes(fai),
+        ext=["", ".done"]
+    )
 
 
 # Rule that requests all HAFpipe SNP table files, so that users can impute them themselves.
@@ -245,7 +249,7 @@ if impmethod in ["simpute", "npute"]:
     rule hafpipe_impute_snp_table:
         input:
             snptable=get_hafpipe_snp_table_dir() + "/{chrom}.csv",
-            done=get_hafpipe_snp_table_dir() + "/{chrom}.done",
+            done=get_hafpipe_snp_table_dir() + "/{chrom}.csv.done",
             bins=get_hafpipe_bins(),
         output:
             csv=get_hafpipe_snp_table_dir() + "/{chrom}.csv." + impmethod,
@@ -277,7 +281,7 @@ elif impmethod != "":
     rule hafpipe_impute_snp_table:
         input:
             snptable=get_hafpipe_snp_table_dir() + "/{chrom}.csv",
-            done=get_hafpipe_snp_table_dir() + "/{chrom}.done",
+            done=get_hafpipe_snp_table_dir() + "/{chrom}.csv.done",
         output:
             csv=get_hafpipe_snp_table_dir() + "/{chrom}.csv." + impmethod,
             done=touch(get_hafpipe_snp_table_dir() + "/{chrom}.csv." + impmethod + ".done"),
@@ -318,7 +322,7 @@ if impmethod == "":
             snptable=get_hafpipe_snp_table_dir() + "/{chrom}.csv",
             alleleCts=get_hafpipe_snp_table_dir() + "/{chrom}.csv.alleleCts",
             numeric=get_hafpipe_snp_table_dir() + "/{chrom}.csv.numeric.bgz",
-            done=get_hafpipe_snp_table_dir() + "/{chrom}.done",
+            done=get_hafpipe_snp_table_dir() + "/{chrom}.csv.done",
         output:
             flag=get_hafpipe_snp_table_dir() + "/{chrom}.csv.flag",
         shell:
@@ -522,7 +526,7 @@ rule hafpipe_concat_sample_allele_frequencies:
         # This is the file name produced by the script. For now we do not allow to change this.
         table="hafpipe/samples/{sample}.csv"
         + (".gz" if config["params"]["hafpipe"].get("compress-sample-tables", False) else ""),
-        done="hafpipe/samples/{sample}.done",
+        done="hafpipe/samples/{sample}.csv.done",
     params:
         # The rule needs access to the list of chromosomes, and to the sample.
         sample="{sample}",
@@ -543,7 +547,10 @@ rule hafpipe_collect_concat_samples:
             + (".gz" if config["params"]["hafpipe"].get("compress-sample-tables", False) else ""),
             sample=config["global"]["sample-names"],
         ),
-        done="hafpipe/samples/{sample}.done",
+        done=expand(
+            "hafpipe/samples/{sample}.csv.done",
+            sample=config["global"]["sample-names"],
+        ),
     output:
         done=touch("hafpipe/samples.done"),
 
@@ -588,7 +595,7 @@ rule hafpipe_merge_allele_frequencies:
         # This is the file name produced by the script. For now we do not allow to change this.
         table="hafpipe/all.csv"
         + (".gz" if config["params"]["hafpipe"].get("compress-merged-table", False) else ""),
-        done="hafpipe/all.done",
+        done="hafpipe/all.csv.done",
     params:
         # We are potentially dealing with tons of files, and cannot open all of them at the same
         # time, due to OS limitations, check `ulimit -n` for example. When this param is set to 0,
@@ -639,7 +646,7 @@ rule all_hafpipe:
         [
         "hafpipe/all.csv"
         + (".gz" if config["params"]["hafpipe"].get("compress-merged-table", False) else ""),
-        "hafpipe/all.done"
+        "hafpipe/all.csv.done"
         ]
         if config["params"]["hafpipe"].get("make-merged-table", False)
         else [],

diff --git a/workflow/rules/mapping-bowtie2.smk b/workflow/rules/mapping-bowtie2.smk
@@ -58,6 +58,7 @@ rule map_reads:
             ext=["1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2", "done"],
         ),
     output:
+        # Piping the file, so no done file here
         pipe("mapping/mapped/{sample}-{unit}.bam"),
         # touch("mapping/mapped/{sample}-{unit}.bam.done"),
     params:
@@ -88,8 +89,9 @@ rule map_reads:
 # At least, we can pipe the files from above to here, so this should not slow us down.
 rule sort_reads:
     input:
+        # Piping the file, so no done file here
         "mapping/mapped/{sample}-{unit}.bam",
-        "mapping/mapped/{sample}-{unit}.bam.done",
+        # "mapping/mapped/{sample}-{unit}.bam.done",
     output:
         (
             "mapping/sorted/{sample}-{unit}.bam"

diff --git a/workflow/rules/mapping-bwa-aln.smk b/workflow/rules/mapping-bwa-aln.smk
@@ -119,7 +119,7 @@ rule bwa_sai_to_bam:
     input:
         fastq=get_trimmed_reads,
         sai=get_sai,
-        done=get_sai_done
+        done=get_sai_done,
         ref=config["data"]["reference-genome"],
         # Somehow, the wrapper expects the index extensions to be given,
         # instead of the underlying fasta file... Well, so let's do that.

diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk
@@ -134,13 +134,13 @@ def get_sorted_sample_bams_done(wildcards):
     return [b + ".done" for b in bams]
 
 
-def get_all_sorted_sample_bams():
-    res = list()
-    for sample in config["global"]["sample-names"]:
-        for unit in get_sample_units(sample):
-            bam = f"mapping/sorted/{sample}-{unit}.bam"
-            res.append(bam)
-    return res
+# def get_all_sorted_sample_bams():
+#     res = list()
+#     for sample in config["global"]["sample-names"]:
+#         for unit in get_sample_units(sample):
+#             bam = f"mapping/sorted/{sample}-{unit}.bam"
+#             res.append(bam)
+#     return res
 
 
 # This is where all units are merged together.
@@ -315,6 +315,7 @@ if not duplicates_tool_good:
 #     Base Quality Score Recalibration
 # =================================================================================================
 
+
 if config["settings"]["recalibrate-base-qualities"]:
 
     include: "mapping-recalibrate.smk"
@@ -334,16 +335,11 @@ def get_bam_from_mappings_table(sample):
     assert "mappings-table" in config["data"] and config["data"]["mappings-table"]
     bams = config["global"]["samples"].loc[sample, ["bam"]].dropna()
 
-    # Check if we have touched the bam done files already
-    if not hasattr(get_bam_from_mappings_table, "done"):
-        get_bam_from_mappings_table.done = False
-
-    # If not, touch all files, then set the internal flag
-    # so that we do not do this every time this function is called.
-    if not get_bam_from_mappings_table.done:
-        for f in bams:
+    # Touch all non-existing files. If they already exist,
+    # we do nothing, to not mess with their time stamps.
+    for f in bams:
+        if not os.path.isfile(f):
             Path(f + ".done").touch()
-    get_bam_from_mappings_table.done = True
 
     # Now we can return the bam file list to the caller.
     return bams
@@ -473,7 +469,7 @@ def get_all_bams_done():
 
 rule all_bams:
     input:
-        merged=get_all_sorted_sample_bams(),
+        # merged=get_all_sorted_sample_bams(),
         bams=get_all_bams(),
         done=get_all_bams_done(),
         qc="qc/multiqc.html",

diff --git a/workflow/rules/trimming-none.smk b/workflow/rules/trimming-none.smk
@@ -13,16 +13,11 @@ def get_trimmed_reads(wildcards):
 def get_trimmed_reads_done(wildcards):
     files = get_trimmed_reads(wildcards)
 
-    # Check if we have touched the fastq done files already
-    if not hasattr(get_trimmed_reads_done, "done"):
-        get_trimmed_reads_done.done = False
-
-    # If not, touch all files, then set the internal flag
-    # so that we do not do this every time this function is called.
-    if not get_trimmed_reads_done.done:
-        for f in files:
+    # Touch all non-existing files. If they already exist,
+    # we do nothing, to not mess with their time stamps.
+    for f in files:
+        if not os.path.isfile(f):
             Path(f + ".done").touch()
-    get_trimmed_reads_done.done = True
 
     # Now we can return the fastq done file list to the caller.
     return [f + ".done" for f in files]