snakefmt

moiexpositoalonsolab · Feb 12, 2025 · 6a75aa0 · 6a75aa0
1 parent e219df1
commit 6a75aa0
Show file tree

Hide file tree

Showing 14 changed files with 77 additions and 40 deletions.
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -23,7 +23,9 @@ rule all:
         "calling/genotyped-all.vcf.gz",
         "calling/genotyped-all.vcf.gz.done",
         "calling/filtered-all.vcf.gz" if not config["settings"]["filter-variants"] == "none" else [],
-        "calling/filtered-all.vcf.gz.done" if not config["settings"]["filter-variants"] == "none" else [],
+        "calling/filtered-all.vcf.gz.done"
+        if not config["settings"]["filter-variants"] == "none"
+        else [],
         "annotation/snpeff.vcf.gz" if config["settings"]["snpeff"] else [],
         "annotation/snpeff.vcf.gz.done" if config["settings"]["snpeff"] else [],
         "annotation/vep.vcf.gz" if config["settings"]["vep"] else [],

diff --git a/workflow/rules/annotation.smk b/workflow/rules/annotation.smk
@@ -82,7 +82,7 @@ rule snpeff:
         stats=report("annotation/snpeff.html", category="Calls"),
         # summary statistics in CSV, optional
         csvstats="annotation/snpeff.csv",
-        done=touch("annotation/snpeff.vcf.gz.done")
+        done=touch("annotation/snpeff.vcf.gz.done"),
     log:
         "logs/annotation/snpeff.log",
     group:
@@ -220,7 +220,7 @@ rule vep:
             caption="../report/stats.rst",
             category="Calls",
         ),
-        done=touch("annotation/vep.vcf.gz.done")
+        done=touch("annotation/vep.vcf.gz.done"),
     params:
         # Pass a list of plugins to use,
         # see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html

diff --git a/workflow/rules/calling-bcftools.smk b/workflow/rules/calling-bcftools.smk
@@ -76,7 +76,10 @@ if config["settings"].get("contig-group-size"):
                 if platform.system() == "Darwin"
                 else ""
             ),
-            java_opts=config["params"]["picard"]["SortVcf-java-opts"] + " -Xmx" + str(config["params"]["picard"].get("SortVcf-mem-mb", 1024)) + "m",
+            java_opts=config["params"]["picard"]["SortVcf-java-opts"]
+            + " -Xmx"
+            + str(config["params"]["picard"].get("SortVcf-mem-mb", 1024))
+            + "m",
         log:
             "logs/calling/picard/sort-genotyped.log",
         benchmark:

diff --git a/workflow/rules/calling-freebayes.smk b/workflow/rules/calling-freebayes.smk
@@ -58,8 +58,8 @@ rule call_variants:
             )
         ),
     output:
-        pipe("calling/called/{contig}.vcf"),
         # touch("calling/called/{contig}.vcf.done"),
+        pipe("calling/called/{contig}.vcf"),
     log:
         "logs/calling/freebayes/{contig}.log",
     benchmark:
@@ -143,7 +143,9 @@ rule merge_variants:
         # See duplicates-picard.smk for the reason whe need this on MacOS.
         java_opts=config["params"]["picard"]["MergeVcfs-java-opts"],
         extra=(
-            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
+            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
+            if platform.system() == "Darwin"
+            else ""
         ),
     resources:
         mem_mb=config["params"]["picard"].get("MergeVcfs-mem-mb", 1024),

diff --git a/workflow/rules/calling-haplotypecaller.smk b/workflow/rules/calling-haplotypecaller.smk
@@ -76,9 +76,9 @@ rule call_variants:
         extra=config["params"]["gatk"].get("HaplotypeCaller-extra", ""),
         java_opts=config["params"]["gatk"].get("HaplotypeCaller-java-opts", ""),
     resources:
-        mem_mb=config["params"]["gatk"].get("HaplotypeCaller-mem-mb", 1024),
         # Increase time limit in factors of 24h, if the job fails due to time limit.
         # time = lambda wildcards, input, threads, attempt: int(1440 * int(attempt))
+        mem_mb=config["params"]["gatk"].get("HaplotypeCaller-mem-mb", 1024),
     group:
         "call_variants"
     conda:
@@ -156,7 +156,12 @@ rule genomics_db_import:
         # Here, we actually use the intervals to provide them to the wrapper.
         intervals=get_gatk_intervals,
         db_action="create",
-        extra=" --reference " + config["data"]["reference-genome"] + " --sequence-dictionary " + genome_dict() + " " + config["params"]["gatk"].get("GenomicsDBImport-extra", ""),
+        extra=" --reference "
+        + config["data"]["reference-genome"]
+        + " --sequence-dictionary "
+        + genome_dict()
+        + " "
+        + config["params"]["gatk"].get("GenomicsDBImport-extra", ""),
         java_opts=config["params"]["gatk"].get("GenomicsDBImport-java-opts", ""),
     # threads: 2
     resources:
@@ -237,18 +242,26 @@ rule genotype_variants:
         ),
         refdict=genome_dict(),
         # Get the GVCF or GenomicsDB input, depending on which tool is requested in the config.
-        gvcf="calling/combined/all.{contig}.g.vcf.gz"
-        if not config["params"]["gatk"].get("use-GenomicsDBImport", True)
-        else [],
-        gvcf_done="calling/combined/all.{contig}.g.vcf.gz.done"
-        if not config["params"]["gatk"].get("use-GenomicsDBImport", True)
-        else [],
-        genomicsdb="calling/genomics_db/{contig}"
-        if config["params"]["gatk"].get("use-GenomicsDBImport", True)
-        else [],
-        genomicsdb_done="calling/genomics_db/{contig}.done"
-        if config["params"]["gatk"].get("use-GenomicsDBImport", True)
-        else [],
+        gvcf=(
+            "calling/combined/all.{contig}.g.vcf.gz"
+            if not config["params"]["gatk"].get("use-GenomicsDBImport", True)
+            else []
+        ),
+        gvcf_done=(
+            "calling/combined/all.{contig}.g.vcf.gz.done"
+            if not config["params"]["gatk"].get("use-GenomicsDBImport", True)
+            else []
+        ),
+        genomicsdb=(
+            "calling/genomics_db/{contig}"
+            if config["params"]["gatk"].get("use-GenomicsDBImport", True)
+            else []
+        ),
+        genomicsdb_done=(
+            "calling/genomics_db/{contig}.done"
+            if config["params"]["gatk"].get("use-GenomicsDBImport", True)
+            else []
+        ),
         # If known variants are set in the config, use them, and require the index file as well.
         known=config["data"]["known-variants"],
         knownidx=(
@@ -266,7 +279,10 @@ rule genotype_variants:
     params:
         # Again, we here use the intervals to provide them to the wrapper.
         intervals=get_gatk_intervals,
-        extra=" --sequence-dictionary " + genome_dict() + " " + config["params"]["gatk"]["GenotypeGVCFs-extra"],
+        extra=" --sequence-dictionary "
+        + genome_dict()
+        + " "
+        + config["params"]["gatk"]["GenotypeGVCFs-extra"],
         java_opts=config["params"]["gatk"]["GenotypeGVCFs-java-opts"],
     resources:
         mem_mb=config["params"]["gatk"].get("GenotypeGVCFs-mem-mb", 1024),
@@ -317,7 +333,9 @@ rule merge_variants:
         # See duplicates-picard.smk for the reason whe need this on MacOS.
         java_opts=config["params"]["picard"].get("MergeVcfs-java-opts", ""),
         extra=(
-            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
+            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
+            if platform.system() == "Darwin"
+            else ""
         ),
     resources:
         mem_mb=config["params"]["picard"].get("MergeVcfs-mem-mb", 1024),

diff --git a/workflow/rules/duplicates-picard.smk b/workflow/rules/duplicates-picard.smk
@@ -34,8 +34,12 @@ rule mark_duplicates:
         # libgkl_compression, see https://github.com/broadinstitute/picard/issues/1329.
         # Hence, on MacOS, we add the two settings recommended by the github issue.
         extra=config["params"]["picard"]["MarkDuplicates"]
-        + (" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""),
-        java_opts=config["params"]["picard"]["MarkDuplicates-java-opts"]
+        + (
+            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
+            if platform.system() == "Darwin"
+            else ""
+        ),
+        java_opts=config["params"]["picard"]["MarkDuplicates-java-opts"],
     resources:
         mem_mb=config["params"]["picard"].get("MarkDuplicates-mem-mb", 5000),
     group:
@@ -44,4 +48,3 @@ rule mark_duplicates:
         "../envs/picard.yaml"
     wrapper:
         "v5.7.0/bio/picard/markduplicates"
-        # "0.51.3/bio/picard/markduplicates"
diff --git a/workflow/rules/filtering.smk b/workflow/rules/filtering.smk
@@ -110,7 +110,9 @@ rule merge_calls:
         # See duplicates-picard.smk for the reason whe need this on MacOS.
         java_opts=config["params"]["picard"].get("MergeVcfs-java-opts", ""),
         extra=(
-            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
+            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
+            if platform.system() == "Darwin"
+            else ""
         ),
     resources:
         mem_mb=config["params"]["picard"].get("MergeVcfs-mem-mb", 1024),

diff --git a/workflow/rules/frequency.smk b/workflow/rules/frequency.smk
@@ -200,7 +200,7 @@ def get_all_hafpipe_raw_snp_tables(wildcards):
     return expand(
         get_hafpipe_snp_table_dir() + "/{chrom}.csv{ext}",
         chrom=get_hafpipe_chromosomes(fai),
-        ext=["", ".done"]
+        ext=["", ".done"],
     )
 
 
@@ -644,9 +644,9 @@ rule all_hafpipe:
     input:
         "hafpipe/afSite.done",
         [
-        "hafpipe/all.csv"
-        + (".gz" if config["params"]["hafpipe"].get("compress-merged-table", False) else ""),
-        "hafpipe/all.csv.done"
+            "hafpipe/all.csv"
+            + (".gz" if config["params"]["hafpipe"].get("compress-merged-table", False) else ""),
+            "hafpipe/all.csv.done",
         ]
         if config["params"]["hafpipe"].get("make-merged-table", False)
         else [],

diff --git a/workflow/rules/mapping-bowtie2.smk b/workflow/rules/mapping-bowtie2.smk
@@ -17,7 +17,7 @@ rule bowtie2_index:
             config["data"]["reference-genome"] + ".{ext}",
             ext=["1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"],
         ),
-        done=touch(config["data"]["reference-genome"] + ".done")
+        done=touch(config["data"]["reference-genome"] + ".done"),
     params:
         # Bowtie expects the prefix, and creates the above output files automatically.
         # So, let's do some snakemake magic to make this work.
@@ -59,8 +59,8 @@ rule map_reads:
         ),
     output:
         # Piping the file, so no done file here
-        pipe("mapping/mapped/{sample}-{unit}.bam"),
         # touch("mapping/mapped/{sample}-{unit}.bam.done"),
+        pipe("mapping/mapped/{sample}-{unit}.bam"),
     params:
         # Prefix of reference genome index (built with bowtie2-build above)
         index=config["data"]["reference-genome"],
@@ -90,8 +90,8 @@ rule map_reads:
 rule sort_reads:
     input:
         # Piping the file, so no done file here
-        "mapping/mapped/{sample}-{unit}.bam",
         # "mapping/mapped/{sample}-{unit}.bam.done",
+        "mapping/mapped/{sample}-{unit}.bam",
     output:
         (
             "mapping/sorted/{sample}-{unit}.bam"

diff --git a/workflow/rules/mapping-bwa-aln.smk b/workflow/rules/mapping-bwa-aln.smk
@@ -178,7 +178,9 @@ rule bwa_bam_clean:
     params:
         # See duplicates-picard.smk for the reason whe need this on MacOS.
         extra=(
-            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
+            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
+            if platform.system() == "Darwin"
+            else ""
         ),
     group:
         "mapping"

diff --git a/workflow/rules/mapping-recalibrate.smk b/workflow/rules/mapping-recalibrate.smk
@@ -99,8 +99,8 @@ rule recalibrate_base_qualities:
             if config["settings"]["keep-intermediate"]["mapping"]
             else temp("mapping/recal/{sample}.bam")
         ),
-        done=touch("mapping/recal/{sample}.bam.done"),
         # bam=protected("mapping/recal/{sample}.bam")
+        done=touch("mapping/recal/{sample}.bam.done"),
     params:
         extra=get_gatk_regions_param() + " " + config["params"]["gatk"]["BaseRecalibrator"],
     log:

diff --git a/workflow/rules/prepare-reference.smk b/workflow/rules/prepare-reference.smk
@@ -255,7 +255,9 @@ rule sequence_dictionary:
     params:
         # See duplicates-picard.smk for the reason whe need this on MacOS.
         extra=(
-            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
+            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
+            if platform.system() == "Darwin"
+            else ""
         ),
     #     base= lambda wc: os.path.splitext(genome)[0],
     log:

diff --git a/workflow/rules/qc-bam.smk b/workflow/rules/qc-bam.smk
@@ -226,9 +226,13 @@ rule picard_collectmultiplemetrics:
     log:
         "logs/qc/picard-collectmultiplemetrics/{sample}.log",
     params:
-        java_opts = config["params"]["picard"].get("CollectMultipleMetrics-java-opts", ""),
-        extra = config["params"]["picard"].get("CollectMultipleMetrics-extra", "")
-        + (" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""),
+        java_opts=config["params"]["picard"].get("CollectMultipleMetrics-java-opts", ""),
+        extra=config["params"]["picard"].get("CollectMultipleMetrics-extra", "")
+        + (
+            " --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
+            if platform.system() == "Darwin"
+            else ""
+        ),
     resources:
         mem_mb=config["params"]["picard"].get("CollectMultipleMetrics-mem-mb", 1024),
     conda:

diff --git a/workflow/rules/trimming-skewer.smk b/workflow/rules/trimming-skewer.smk
@@ -99,7 +99,6 @@ def get_trimmed_reads_done(wildcards):
     return [f + ".done" for f in files]
 
 
-
 def get_trimming_report(sample, unit):
     """Get the report needed for MultiQC."""
     if is_single_end(sample, unit):