Skip to content

Commit

Permalink
snakefmt
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Feb 12, 2025
1 parent e219df1 commit 6a75aa0
Show file tree
Hide file tree
Showing 14 changed files with 77 additions and 40 deletions.
4 changes: 3 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ rule all:
"calling/genotyped-all.vcf.gz",
"calling/genotyped-all.vcf.gz.done",
"calling/filtered-all.vcf.gz" if not config["settings"]["filter-variants"] == "none" else [],
"calling/filtered-all.vcf.gz.done" if not config["settings"]["filter-variants"] == "none" else [],
"calling/filtered-all.vcf.gz.done"
if not config["settings"]["filter-variants"] == "none"
else [],
"annotation/snpeff.vcf.gz" if config["settings"]["snpeff"] else [],
"annotation/snpeff.vcf.gz.done" if config["settings"]["snpeff"] else [],
"annotation/vep.vcf.gz" if config["settings"]["vep"] else [],
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/annotation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ rule snpeff:
stats=report("annotation/snpeff.html", category="Calls"),
# summary statistics in CSV, optional
csvstats="annotation/snpeff.csv",
done=touch("annotation/snpeff.vcf.gz.done")
done=touch("annotation/snpeff.vcf.gz.done"),
log:
"logs/annotation/snpeff.log",
group:
Expand Down Expand Up @@ -220,7 +220,7 @@ rule vep:
caption="../report/stats.rst",
category="Calls",
),
done=touch("annotation/vep.vcf.gz.done")
done=touch("annotation/vep.vcf.gz.done"),
params:
# Pass a list of plugins to use,
# see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
Expand Down
5 changes: 4 additions & 1 deletion workflow/rules/calling-bcftools.smk
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ if config["settings"].get("contig-group-size"):
if platform.system() == "Darwin"
else ""
),
java_opts=config["params"]["picard"]["SortVcf-java-opts"] + " -Xmx" + str(config["params"]["picard"].get("SortVcf-mem-mb", 1024)) + "m",
java_opts=config["params"]["picard"]["SortVcf-java-opts"]
+ " -Xmx"
+ str(config["params"]["picard"].get("SortVcf-mem-mb", 1024))
+ "m",
log:
"logs/calling/picard/sort-genotyped.log",
benchmark:
Expand Down
6 changes: 4 additions & 2 deletions workflow/rules/calling-freebayes.smk
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ rule call_variants:
)
),
output:
pipe("calling/called/{contig}.vcf"),
# touch("calling/called/{contig}.vcf.done"),
pipe("calling/called/{contig}.vcf"),
log:
"logs/calling/freebayes/{contig}.log",
benchmark:
Expand Down Expand Up @@ -143,7 +143,9 @@ rule merge_variants:
# See duplicates-picard.smk for the reason whe need this on MacOS.
java_opts=config["params"]["picard"]["MergeVcfs-java-opts"],
extra=(
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
if platform.system() == "Darwin"
else ""
),
resources:
mem_mb=config["params"]["picard"].get("MergeVcfs-mem-mb", 1024),
Expand Down
50 changes: 34 additions & 16 deletions workflow/rules/calling-haplotypecaller.smk
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ rule call_variants:
extra=config["params"]["gatk"].get("HaplotypeCaller-extra", ""),
java_opts=config["params"]["gatk"].get("HaplotypeCaller-java-opts", ""),
resources:
mem_mb=config["params"]["gatk"].get("HaplotypeCaller-mem-mb", 1024),
# Increase time limit in factors of 24h, if the job fails due to time limit.
# time = lambda wildcards, input, threads, attempt: int(1440 * int(attempt))
mem_mb=config["params"]["gatk"].get("HaplotypeCaller-mem-mb", 1024),
group:
"call_variants"
conda:
Expand Down Expand Up @@ -156,7 +156,12 @@ rule genomics_db_import:
# Here, we actually use the intervals to provide them to the wrapper.
intervals=get_gatk_intervals,
db_action="create",
extra=" --reference " + config["data"]["reference-genome"] + " --sequence-dictionary " + genome_dict() + " " + config["params"]["gatk"].get("GenomicsDBImport-extra", ""),
extra=" --reference "
+ config["data"]["reference-genome"]
+ " --sequence-dictionary "
+ genome_dict()
+ " "
+ config["params"]["gatk"].get("GenomicsDBImport-extra", ""),
java_opts=config["params"]["gatk"].get("GenomicsDBImport-java-opts", ""),
# threads: 2
resources:
Expand Down Expand Up @@ -237,18 +242,26 @@ rule genotype_variants:
),
refdict=genome_dict(),
# Get the GVCF or GenomicsDB input, depending on which tool is requested in the config.
gvcf="calling/combined/all.{contig}.g.vcf.gz"
if not config["params"]["gatk"].get("use-GenomicsDBImport", True)
else [],
gvcf_done="calling/combined/all.{contig}.g.vcf.gz.done"
if not config["params"]["gatk"].get("use-GenomicsDBImport", True)
else [],
genomicsdb="calling/genomics_db/{contig}"
if config["params"]["gatk"].get("use-GenomicsDBImport", True)
else [],
genomicsdb_done="calling/genomics_db/{contig}.done"
if config["params"]["gatk"].get("use-GenomicsDBImport", True)
else [],
gvcf=(
"calling/combined/all.{contig}.g.vcf.gz"
if not config["params"]["gatk"].get("use-GenomicsDBImport", True)
else []
),
gvcf_done=(
"calling/combined/all.{contig}.g.vcf.gz.done"
if not config["params"]["gatk"].get("use-GenomicsDBImport", True)
else []
),
genomicsdb=(
"calling/genomics_db/{contig}"
if config["params"]["gatk"].get("use-GenomicsDBImport", True)
else []
),
genomicsdb_done=(
"calling/genomics_db/{contig}.done"
if config["params"]["gatk"].get("use-GenomicsDBImport", True)
else []
),
# If known variants are set in the config, use them, and require the index file as well.
known=config["data"]["known-variants"],
knownidx=(
Expand All @@ -266,7 +279,10 @@ rule genotype_variants:
params:
# Again, we here use the intervals to provide them to the wrapper.
intervals=get_gatk_intervals,
extra=" --sequence-dictionary " + genome_dict() + " " + config["params"]["gatk"]["GenotypeGVCFs-extra"],
extra=" --sequence-dictionary "
+ genome_dict()
+ " "
+ config["params"]["gatk"]["GenotypeGVCFs-extra"],
java_opts=config["params"]["gatk"]["GenotypeGVCFs-java-opts"],
resources:
mem_mb=config["params"]["gatk"].get("GenotypeGVCFs-mem-mb", 1024),
Expand Down Expand Up @@ -317,7 +333,9 @@ rule merge_variants:
# See duplicates-picard.smk for the reason whe need this on MacOS.
java_opts=config["params"]["picard"].get("MergeVcfs-java-opts", ""),
extra=(
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
if platform.system() == "Darwin"
else ""
),
resources:
mem_mb=config["params"]["picard"].get("MergeVcfs-mem-mb", 1024),
Expand Down
9 changes: 6 additions & 3 deletions workflow/rules/duplicates-picard.smk
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ rule mark_duplicates:
# libgkl_compression, see https://github.com/broadinstitute/picard/issues/1329.
# Hence, on MacOS, we add the two settings recommended by the github issue.
extra=config["params"]["picard"]["MarkDuplicates"]
+ (" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""),
java_opts=config["params"]["picard"]["MarkDuplicates-java-opts"]
+ (
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
if platform.system() == "Darwin"
else ""
),
java_opts=config["params"]["picard"]["MarkDuplicates-java-opts"],
resources:
mem_mb=config["params"]["picard"].get("MarkDuplicates-mem-mb", 5000),
group:
Expand All @@ -44,4 +48,3 @@ rule mark_duplicates:
"../envs/picard.yaml"
wrapper:
"v5.7.0/bio/picard/markduplicates"
# "0.51.3/bio/picard/markduplicates"
4 changes: 3 additions & 1 deletion workflow/rules/filtering.smk
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ rule merge_calls:
# See duplicates-picard.smk for the reason whe need this on MacOS.
java_opts=config["params"]["picard"].get("MergeVcfs-java-opts", ""),
extra=(
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
if platform.system() == "Darwin"
else ""
),
resources:
mem_mb=config["params"]["picard"].get("MergeVcfs-mem-mb", 1024),
Expand Down
8 changes: 4 additions & 4 deletions workflow/rules/frequency.smk
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def get_all_hafpipe_raw_snp_tables(wildcards):
return expand(
get_hafpipe_snp_table_dir() + "/{chrom}.csv{ext}",
chrom=get_hafpipe_chromosomes(fai),
ext=["", ".done"]
ext=["", ".done"],
)


Expand Down Expand Up @@ -644,9 +644,9 @@ rule all_hafpipe:
input:
"hafpipe/afSite.done",
[
"hafpipe/all.csv"
+ (".gz" if config["params"]["hafpipe"].get("compress-merged-table", False) else ""),
"hafpipe/all.csv.done"
"hafpipe/all.csv"
+ (".gz" if config["params"]["hafpipe"].get("compress-merged-table", False) else ""),
"hafpipe/all.csv.done",
]
if config["params"]["hafpipe"].get("make-merged-table", False)
else [],
Expand Down
6 changes: 3 additions & 3 deletions workflow/rules/mapping-bowtie2.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ rule bowtie2_index:
config["data"]["reference-genome"] + ".{ext}",
ext=["1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"],
),
done=touch(config["data"]["reference-genome"] + ".done")
done=touch(config["data"]["reference-genome"] + ".done"),
params:
# Bowtie expects the prefix, and creates the above output files automatically.
# So, let's do some snakemake magic to make this work.
Expand Down Expand Up @@ -59,8 +59,8 @@ rule map_reads:
),
output:
# Piping the file, so no done file here
pipe("mapping/mapped/{sample}-{unit}.bam"),
# touch("mapping/mapped/{sample}-{unit}.bam.done"),
pipe("mapping/mapped/{sample}-{unit}.bam"),
params:
# Prefix of reference genome index (built with bowtie2-build above)
index=config["data"]["reference-genome"],
Expand Down Expand Up @@ -90,8 +90,8 @@ rule map_reads:
rule sort_reads:
input:
# Piping the file, so no done file here
"mapping/mapped/{sample}-{unit}.bam",
# "mapping/mapped/{sample}-{unit}.bam.done",
"mapping/mapped/{sample}-{unit}.bam",
output:
(
"mapping/sorted/{sample}-{unit}.bam"
Expand Down
4 changes: 3 additions & 1 deletion workflow/rules/mapping-bwa-aln.smk
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,9 @@ rule bwa_bam_clean:
params:
# See duplicates-picard.smk for the reason whe need this on MacOS.
extra=(
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
if platform.system() == "Darwin"
else ""
),
group:
"mapping"
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/mapping-recalibrate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ rule recalibrate_base_qualities:
if config["settings"]["keep-intermediate"]["mapping"]
else temp("mapping/recal/{sample}.bam")
),
done=touch("mapping/recal/{sample}.bam.done"),
# bam=protected("mapping/recal/{sample}.bam")
done=touch("mapping/recal/{sample}.bam.done"),
params:
extra=get_gatk_regions_param() + " " + config["params"]["gatk"]["BaseRecalibrator"],
log:
Expand Down
4 changes: 3 additions & 1 deletion workflow/rules/prepare-reference.smk
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,9 @@ rule sequence_dictionary:
params:
# See duplicates-picard.smk for the reason whe need this on MacOS.
extra=(
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
if platform.system() == "Darwin"
else ""
),
# base= lambda wc: os.path.splitext(genome)[0],
log:
Expand Down
10 changes: 7 additions & 3 deletions workflow/rules/qc-bam.smk
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,13 @@ rule picard_collectmultiplemetrics:
log:
"logs/qc/picard-collectmultiplemetrics/{sample}.log",
params:
java_opts = config["params"]["picard"].get("CollectMultipleMetrics-java-opts", ""),
extra = config["params"]["picard"].get("CollectMultipleMetrics-extra", "")
+ (" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""),
java_opts=config["params"]["picard"].get("CollectMultipleMetrics-java-opts", ""),
extra=config["params"]["picard"].get("CollectMultipleMetrics-extra", "")
+ (
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true"
if platform.system() == "Darwin"
else ""
),
resources:
mem_mb=config["params"]["picard"].get("CollectMultipleMetrics-mem-mb", 1024),
conda:
Expand Down
1 change: 0 additions & 1 deletion workflow/rules/trimming-skewer.smk
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ def get_trimmed_reads_done(wildcards):
return [f + ".done" for f in files]



def get_trimming_report(sample, unit):
"""Get the report needed for MultiQC."""
if is_single_end(sample, unit):
Expand Down

0 comments on commit 6a75aa0

Please sign in to comment.