Skip to content

Commit

Permalink
Split java opts from mem resources in config
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Feb 11, 2025
1 parent deffbb3 commit cacf59b
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 28 deletions.
38 changes: 31 additions & 7 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -671,11 +671,21 @@ params:
# system-provided tmp dir is too small (which can happen on clusters).
# Note that the Java memory options, such as `-Xmx10g` to increase the available memory within
# the Java virtual machine are provided via the Snakemake memory management directly,
# for instance, via the memory specified in slurm jobs. Change the memory there as needed.
# and hence cannot be specified here. Instead, use the below `*-mem-mb` options,
# or, if you are running grenepipe via slurm, use the slurm job configuration.
# The last option, SortVcf-java-opts, is used by bcftools when using contig-group-size > 0.
MarkDuplicates-java-opts: ""
CollectMultipleMetrics-java-opts: ""
SortVcf-java-opts: ""
MergeVcfs-java-opts: ""

# Memory for the Java virtual machine for the picard programs.
# Unfortunately, Java does not automatically use the available memory, and instead needs
# to be told that it is allowed to do that. Specify the memory here as needed, in MB.
MarkDuplicates-mem-mb: 5000
CollectMultipleMetrics-mem-mb: 1024
SortVcf-mem-mb: 1024
MergeVcfs-mem-mb: 1024

# ----------------------------------------------------------------------
# dedup
Expand Down Expand Up @@ -770,10 +780,21 @@ params:
GenotypeGVCFs-extra: ""

# For some specific error cases, it might be necessary to adjust java settings for the tools.
# Note that the Java memory options, such as `-Xmx10g` to increase the available memory within
# the Java virtual machine are provided via the Snakemake memory management directly,
# and hence cannot be specified here. Instead, use the below `*-mem-mb` options,
# or, if you are running grenepipe via slurm, use the slurm job configuration.
HaplotypeCaller-java-opts: ""
CombineGVCFs-java-opts: ""
GenotypeGVCFs-java-opts: ""

# Memory for the Java virtual machine for the GATK programs.
# Unfortunately, Java does not automatically use the available memory, and instead needs
# to be told that it is allowed to do that. Specify the memory here as needed, in MB.
HaplotypeCaller-mem-mb: 1024
CombineGVCFs-mem-mb: 1024
GenotypeGVCFs-mem-mb: 1024

# Number of threads to use for the HaplotypeCaller. We recommend to keep this at 2,
# as GATK does not seem to do a great job of parallelizing anyway.
HaplotypeCaller-threads: 2
Expand All @@ -797,6 +818,7 @@ params:
# We also offer extra settings that are used for both.
extra: ""
java-opts: ""
mem-mb: 1024

# ----------------------------------------------------------------------
# GATK VariantRecalibrator + ApplyVQSR
Expand Down Expand Up @@ -878,14 +900,16 @@ params:
# We here set the gaussians to a very low value, simply for our test case to work, see
# https://gatk.broadinstitute.org/hc/en-us/community/posts/4408833794587-Insufficient-variance-error-for-VariantRecalibrator
# for details. We recommend to change this according to your actual needs.
extra-variantrecalibrator-SNP: "--max-gaussians 1"
extra-variantrecalibrator-INDEL: "--max-gaussians 1"
java-variantrecalibrator: ""
variantrecalibrator-extra-SNP: "--max-gaussians 1"
variantrecalibrator-extra-INDEL: "--max-gaussians 1"
variantrecalibrator-java-opts: ""
variantrecalibrator-mem-mb: 1024

# Extra command line params, and optional Java runtime options to provide to GATK ApplyVQSR
extra-applyvqsr-SNP: "--truth-sensitivity-filter-level 99.0"
extra-applyvqsr-INDEL: "--truth-sensitivity-filter-level 99.0"
java-applyvqsr: ""
applyvqsr-extra-SNP: "--truth-sensitivity-filter-level 99.0"
applyvqsr-extra-INDEL: "--truth-sensitivity-filter-level 99.0"
applyvqsr-java-opts: ""
applyvqsr-mem-mb: 1024

# ----------------------------------------------------------------------
# bcftools filter
Expand Down
8 changes: 4 additions & 4 deletions workflow/rules/calling-bcftools.smk
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ if config["settings"].get("contig-group-size"):
if platform.system() == "Darwin"
else ""
),
java_opts=config["params"]["picard"]["SortVcf-java-opts"],
java_opts=config["params"]["picard"]["SortVcf-java-opts"] + " -Xmx" + config["params"]["picard"].get("SortVcf-mem-mb", 1024) + "m",
log:
"logs/calling/picard/sort-genotyped.log",
benchmark:
Expand All @@ -87,8 +87,8 @@ if config["settings"].get("contig-group-size"):
# Weird new picard syntax...
"picard SortVcf "
"{params.java_opts} "
"INPUT={input.vcf} "
"OUTPUT={output.vcf} "
"SEQUENCE_DICTIONARY={input.refdict} "
"--INPUT {input.vcf} "
"--OUTPUT {output.vcf} "
"--SEQUENCE_DICTIONARY {input.refdict} "
"{params.extra} "
"> {log} 2>&1"
17 changes: 13 additions & 4 deletions workflow/rules/calling-haplotypecaller.smk
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,15 @@ rule call_variants:
# Contigs are used as long as no restrict-regions are given in the config file.
extra=get_gatk_call_variants_params,
java_opts=config["params"]["gatk"]["HaplotypeCaller-java-opts"],
resources:
mem_mb=config["params"]["gatk"].get("HaplotypeCaller-mem-mb", 1024),
group:
"call_variants"
conda:
# Need to specify, yet again...
"../envs/gatk.yaml"
wrapper:
"0.51.3/bio/gatk/haplotypecaller"
"v5.7.0/bio/gatk/haplotypecaller"


# Deactivated the below, as this was causing trouble. Got the warning
Expand Down Expand Up @@ -155,6 +157,8 @@ rule combine_calls:
else ""
),
java_opts=config["params"]["gatk"]["CombineGVCFs-java-opts"],
resources:
mem_mb=config["params"]["gatk"].get("CombineGVCFs-mem-mb", 1024),
log:
"logs/calling/gatk-combine-gvcfs/{contig}.log",
benchmark:
Expand All @@ -164,7 +168,7 @@ rule combine_calls:
conda:
"../envs/gatk.yaml"
wrapper:
"0.51.3/bio/gatk/combinegvcfs"
"v5.7.0/bio/gatk/combinegvcfs"


rule genotype_variants:
Expand Down Expand Up @@ -194,6 +198,8 @@ rule genotype_variants:
else ""
),
java_opts=config["params"]["gatk"]["GenotypeGVCFs-java-opts"],
resources:
mem_mb=config["params"]["gatk"].get("GenotypeGVCFs-mem-mb", 1024),
log:
"logs/calling/gatk-genotype-gvcfs/{contig}.log",
benchmark:
Expand All @@ -203,7 +209,7 @@ rule genotype_variants:
conda:
"../envs/gatk.yaml"
wrapper:
"0.51.3/bio/gatk/genotypegvcfs"
"v5.7.0/bio/gatk/genotypegvcfs"


# =================================================================================================
Expand Down Expand Up @@ -239,14 +245,17 @@ rule merge_variants:
done=touch("calling/genotyped-all.vcf.gz.done"),
params:
# See duplicates-picard.smk for the reason whe need this on MacOS.
java_opts=config["params"]["picard"]["MergeVcfs-java-opts"],
extra=(
" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""
),
resources:
mem_mb=config["params"]["picard"].get("MergeVcfs-mem-mb", 1024),
log:
"logs/calling/picard-merge-genotyped.log",
benchmark:
"benchmarks/calling/genotyped/picard/merge-genotyped.log"
conda:
"../envs/picard.yaml"
wrapper:
"0.51.3/bio/picard/mergevcfs"
"v5.7.0/bio/picard/mergevcfs"
4 changes: 2 additions & 2 deletions workflow/rules/duplicates-picard.smk
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ rule mark_duplicates:
extra=config["params"]["picard"]["MarkDuplicates"]
+ (" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""),
java_opts=config["params"]["picard"]["MarkDuplicates-java-opts"]
# resources:
# mem_mb=1024,
resources:
mem_mb=config["params"]["picard"].get("MarkDuplicates-mem-mb", 5000),
group:
"mapping_extra"
conda:
Expand Down
4 changes: 3 additions & 1 deletion workflow/rules/filtering-gatk-variantfiltration.smk
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ rule gatk_hard_filter_calls:
filters=get_filter,
extra=config["params"]["gatk-variantfiltration"]["extra"],
java_opts=config["params"]["gatk-variantfiltration"]["java-opts"],
resources:
mem_mb=config["params"]["gatk-variantfiltration"].get("mem-mb", 1024),
log:
"logs/calling/gatk-variantfiltration/{vartype}.log",
benchmark:
Expand All @@ -38,4 +40,4 @@ rule gatk_hard_filter_calls:
conda:
"../envs/gatk.yaml"
wrapper:
"0.85.0/bio/gatk/variantfiltration"
"v5.7.0/bio/gatk/variantfiltration"
8 changes: 4 additions & 4 deletions workflow/rules/filtering-gatk-vqsr.smk
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def get_variant_recalibrator_extra(wildcards):
# config file dict retrieval. We additionally set the Rscript file, so that we get some plots.
vartype = wildcards.vartype
return (
config["params"]["gatk-vqsr"]["extra-variantrecalibrator-" + vartype]
config["params"]["gatk-vqsr"]["variantrecalibrator-extra-" + vartype]
+ " --rscript-file calling/filtered/all."
+ vartype
+ ".vqsr-recal.plots.R"
Expand All @@ -66,7 +66,7 @@ def get_variant_recalibrator_extra(wildcards):

def get_apply_vqsr_extra(wildcards):
# Same as above, need a function here for wildcard replacement
return config["params"]["gatk-vqsr"]["extra-applyvqsr-" + wildcards.vartype]
return config["params"]["gatk-vqsr"]["applyvqsr-extra-" + wildcards.vartype]


# Use the GATK VQSR machine learning based recalibration of quality scores instead of hard filtering.
Expand Down Expand Up @@ -100,7 +100,7 @@ rule gatk_variant_recalibrator:
annotation=config["params"]["gatk-vqsr"]["annotation"],
# Extras
extra=get_variant_recalibrator_extra,
java_opts=config["params"]["gatk-vqsr"]["java-variantrecalibrator"],
java_opts=config["params"]["gatk-vqsr"]["variantrecalibrator-java-opts"] + " -Xmx" + config["params"]["gatk-vqsr"].get("variantrecalibrator-mem-mb", 1024) + "m",
log:
"logs/calling/gatk-variantrecalibrator/{vartype}.log",
benchmark:
Expand Down Expand Up @@ -147,7 +147,7 @@ rule gatk_apply_vqsr:
# set mode, must be either SNP, INDEL or BOTH
mode="{vartype}",
extra=get_apply_vqsr_extra,
java_opts=config["params"]["gatk-vqsr"]["java-applyvqsr"],
java_opts=config["params"]["gatk-vqsr"]["applyvqsr-java-opts"] + " -Xmx" + config["params"]["gatk-vqsr"].get("applyvqsr-mem-mb", 1024) + "m",
# resources:
# mem_mb=50
conda:
Expand Down
2 changes: 2 additions & 0 deletions workflow/rules/qc-bam.smk
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ rule picard_collectmultiplemetrics:
+ " "
+ config["params"]["picard"]["CollectMultipleMetrics-extra"]
+ (" --USE_JDK_DEFLATER true --USE_JDK_INFLATER true" if platform.system() == "Darwin" else ""),
resources:
mem_mb=config["params"]["picard"].get("CollectMultipleMetrics-mem-mb", 1024),
conda:
"../envs/picard.yaml"
# script:
Expand Down
12 changes: 6 additions & 6 deletions workflow/schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -321,17 +321,17 @@ properties:
type: [array, object]
annotation:
type: [array, object]
extra-variantrecalibrator-SNP:
variantrecalibrator-extra-SNP:
type: string
extra-variantrecalibrator-INDEL:
variantrecalibrator-extra-INDEL:
type: string
java-variantrecalibrator:
variantrecalibrator-java-opts:
type: string
extra-applyvqsr-SNP:
applyvqsr-extra-SNP:
type: string
extra-applyvqsr-INDEL:
applyvqsr-extra-INDEL:
type: string
java-applyvqsr:
applyvqsr-java-opts:
type: string
bcftools-filter:
type: object
Expand Down

0 comments on commit cacf59b

Please sign in to comment.