diff --git a/CHANGELOG.md b/CHANGELOG.md index 1717141cb..b5514b2ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## dev [unreleased] +## 3.3.1dev [unreleased] ### `Added` +- [#745](https://github.com/nf-core/mag/pull/745) - Added pipeline parameter `spades_downstreaminput` to use contigs instead of scaffolds (by @Pranjal-Bioinfo, @jfy133, @GallVp & @sateeshperi). +- [#745](https://github.com/nf-core/mag/pull/745) - Added `trimmomatic` as an additional pre-processing tool (by @Pranjal-Bioinfo, @jfy133, @GallVp & @sateeshperi). +- [#745](https://github.com/nf-core/mag/pull/745) - Added parameters for `concoct/cut_up_fasta.py` including `bin_concoct_chunksize`, `bin_concoct_overlap` and `bin_concoct_donotconcatlast` (by @Pranjal-Bioinfo, @jfy133, @GallVp & @sateeshperi). - [#777](https://github.com/nf-core/mag/pull/777) - Improved input validation through additional JSON keywords and error messages (by @agusinac) ### `Changed` diff --git a/README.md b/README.md index 9739eeff9..19757ebe7 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. -By default, the pipeline currently performs the following: it supports both short and long reads, quality trims the reads and adapters with [fastp](https://github.com/OpenGene/fastp) and [Porechop](https://github.com/rrwick/Porechop), and performs basic QC with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), and merge multiple sequencing runs. +By default, the pipeline currently performs the following: it supports both short and long reads, quality trims the reads and adapters with [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval](https://github.com/MikkelSchubert/adapterremoval), or [trimmomatic](https://github.com/usadellab/Trimmomatic) and [Porechop](https://github.com/rrwick/Porechop), and performs basic QC with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), and merge multiple sequencing runs. The pipeline then: diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index a20422b35..c40aad3e2 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -28,6 +28,7 @@ run_modules: - prokka - porechop - filtlong + - trimmomatic ## Module order top_modules: @@ -37,6 +38,7 @@ top_modules: - "*trimmed*" - "fastp" - "adapterremoval" + - "trimmomatic" - "porechop" - "filtlong" - "fastqc": @@ -137,6 +139,8 @@ extra_fn_clean_exts: - "phix_removed" - "centrifuge_kreport" - "_fastp" + - "_trimmomatic_trim.log" + - "_trimmomatic_trim" ## Prettification custom_logo: "mag_logo_mascot_light.png" diff --git a/conf/modules.config b/conf/modules.config index e14432d63..b31fd10f2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -47,6 +47,25 @@ process { tag = { "${meta.id}_run${meta.run}" } } + withName: TRIMMOMATIC { + ext.args = "LEADING:30 TRAILING:30 SLIDINGWINDOW:4:20 MINLEN:35" + publishDir = [ + [ + path: { "${params.outdir}/QC_shortreads/trimmomatic/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*_trim.log", + ], + [ + path: { "${params.outdir}/QC_shortreads/trimmomatic/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_clipped_reads, + ], + ] + ext.prefix = { "${meta.id}_run${meta.run}_trimmomatic" } + tag = { "${meta.id}_run${meta.run}" } + } + withName: ADAPTERREMOVAL_PE { ext.args = [ "--minlength ${params.reads_minlength}", @@ -620,6 +639,14 @@ process { ] } + withName: CONCOCT_CUTUPFASTA { + ext.args = [ + "-c ${params.bin_concoct_chunksize}", + "-o ${params.bin_concoct_overlap}", + params.bin_concoct_donotconcatlast ? "" : "--merge_last", + ].join(' ').trim() + } + withName: 'CONCOCT_.*' { publishDir = [ [ diff --git a/conf/test_ancient_dna.config b/conf/test_ancient_dna.config index e8dab425e..4037b0527 100644 --- a/conf/test_ancient_dna.config +++ b/conf/test_ancient_dna.config @@ -15,7 +15,7 @@ process { resourceLimits = [ cpus: 4, memory: '15.GB', - time: '1.h' + time: '1.h', ] } @@ -28,6 +28,8 @@ params { input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.csv' centrifuge_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_cf.tar.gz' kraken2_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_kraken.tgz' + clip_tool = 'trimmomatic' + spades_downstreaminput = 'contigs' skip_krona = true min_length_unbinned_contigs = 1 max_unbinned_contigs = 2 diff --git a/docs/output.md b/docs/output.md index 60794d162..e1ff5b509 100644 --- a/docs/output.md +++ b/docs/output.md @@ -57,6 +57,7 @@ FastQC is run for visualising the general quality metrics of the sequencing runs - `QC_shortreads/fastp/[sample]/` - `fastp.html`: Interactive report - `fastp.json`: Report in json format + - `[sample_id]*.fastq.gz`: Clipped reads in FASTQ format (if `--save_clipped_reads` provided) @@ -69,6 +70,20 @@ FastQC is run for visualising the general quality metrics of the sequencing runs - `QC_shortreads/adapterremoval/[sample]/` - `[sample]_ar2.settings`: AdapterRemoval log file. + - `[sample_id]*.fastq.gz`: Clipped reads in FASTQ format (if `--save_clipped_reads` provided) + + + +### Trimmomatic + +[Trimmomatic](http://www.usadellab.org/cms/?page=trimmomatic) performs a variety of useful trimming tasks for illumina paired-end and single ended data. + +
+Output files + +- `QC_shortreads/trimmomatic/[sample]/` + - `[sample]_trimmomatic_trim.log`: Trimmomatic log file. + - `[sample_id]*.fastq.gz`: Clipped reads in FASTQ format (if `--save_clipped_reads` provided)
diff --git a/modules.json b/modules.json index 678b23ed0..198252f39 100644 --- a/modules.json +++ b/modules.json @@ -282,6 +282,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "trimmomatic": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "untar": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", diff --git a/modules/nf-core/trimmomatic/environment.yml b/modules/nf-core/trimmomatic/environment.yml new file mode 100644 index 000000000..ab4b72b77 --- /dev/null +++ b/modules/nf-core/trimmomatic/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::trimmomatic=0.39 diff --git a/modules/nf-core/trimmomatic/main.nf b/modules/nf-core/trimmomatic/main.nf new file mode 100644 index 000000000..ee500d852 --- /dev/null +++ b/modules/nf-core/trimmomatic/main.nf @@ -0,0 +1,71 @@ +process TRIMMOMATIC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/trimmomatic:0.39--hdfd78af_2': + 'biocontainers/trimmomatic:0.39--hdfd78af_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.paired.trim*.fastq.gz") , emit: trimmed_reads + tuple val(meta), path("*.unpaired.trim_*.fastq.gz"), emit: unpaired_reads, optional:true + tuple val(meta), path("*_trim.log") , emit: trim_log + tuple val(meta), path("*_out.log") , emit: out_log + tuple val(meta), path("*.summary") , emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "SE" : "PE" + def output = meta.single_end ? + "${prefix}.SE.paired.trim.fastq.gz" // HACK to avoid unpaired and paired in the trimmed_reads output + : "${prefix}.paired.trim_1.fastq.gz ${prefix}.unpaired.trim_1.fastq.gz ${prefix}.paired.trim_2.fastq.gz ${prefix}.unpaired.trim_2.fastq.gz" + def qual_trim = task.ext.args2 ?: '' + """ + trimmomatic \\ + $trimmed \\ + -threads $task.cpus \\ + -trimlog ${prefix}_trim.log \\ + -summary ${prefix}.summary \\ + $reads \\ + $output \\ + $qual_trim \\ + $args 2> >(tee ${prefix}_out.log >&2) + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimmomatic: \$(trimmomatic -version) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + if (meta.single_end) { + output_command = "echo '' | gzip > ${prefix}.SE.paired.trim.fastq.gz" + } else { + output_command = "echo '' | gzip > ${prefix}.paired.trim_1.fastq.gz" + output_command = "echo '' | gzip > ${prefix}.paired.trim_2.fastq.gz" + output_command += "echo '' | gzip > ${prefix}.unpaired.trim_1.fastq.gz" + output_command += "echo '' | gzip > ${prefix}.unpaired.trim_2.fastq.gz" + } + + """ + $output_command + touch ${prefix}.summary + touch ${prefix}_trim.log + touch ${prefix}_out.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimmomatic: \$(trimmomatic -version) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/trimmomatic/meta.yml b/modules/nf-core/trimmomatic/meta.yml new file mode 100644 index 000000000..e74014d5d --- /dev/null +++ b/modules/nf-core/trimmomatic/meta.yml @@ -0,0 +1,85 @@ +name: "trimmomatic" +description: Performs quality and adapter trimming on paired end and single end reads +keywords: + - trimming + - adapter trimming + - quality trimming +tools: + - "trimmomatic": + description: "A flexible read trimming tool for Illumina NGS data" + homepage: "http://www.usadellab.org/cms/?page=trimmomatic" + documentation: "https://github.com/usadellab/Trimmomatic" + doi: "10.1093/bioinformatics/btu170" + licence: ["GPL v3"] + identifier: biotools:trimmomatic +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Input FastQ files of size 1 or 2 for single-end and paired-end data, respectively. + pattern: "*.fastq.gz" +output: + - trimmed_reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paired.trim*.fastq.gz": + type: file + description: The trimmed/modified paired end fastq reads + pattern: "*.paired.trim*.fastq.gz" + - unpaired_reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.unpaired.trim_*.fastq.gz": + type: file + description: The trimmed/modified unpaired end fastq reads + pattern: "*.unpaired.trim_*.fastq.gz" + - trim_log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_trim.log": + type: file + description: trimmomatic log file, from the trim_log parameter + pattern: "*.log" + - out_log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_out.log": + type: file + description: log of output from the standard out + pattern: "*.log" + - summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.summary": + type: file + description: trimmomatic summary file of surviving and dropped reads + pattern: "*.summary" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@alyssa-ab" +maintainers: + - "@alyssa-ab" diff --git a/modules/nf-core/trimmomatic/tests/main.nf.test b/modules/nf-core/trimmomatic/tests/main.nf.test new file mode 100644 index 000000000..fee08848c --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/main.nf.test @@ -0,0 +1,118 @@ +nextflow_process { + + name "Test Process TRIMMOMATIC" + script "../main.nf" + process "TRIMMOMATIC" + tag "modules" + tag "modules_nfcore" + tag "trimmomatic" + + test("Single-Read") { + config "./nextflow_SE.config" + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.trimmed_reads != null }, + { assert process.out.trimmed_reads.get(0).get(1) ==~ ".*.SE.paired.trim.fastq.gz" }, + { assert process.out.out_log.get(0).get(1) ==~ ".*.log" }, + { assert snapshot(process.out.trimmed_reads, + process.out.trim_log, + process.out.summary, + process.out.versions + ).match() } + ) + } + } + + test("Paired-Reads") { + config "./nextflow_PE.config" + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.trimmed_reads != null }, + { assert process.out.trimmed_reads.get(0).get(1).get(0) ==~ ".*.paired.trim_1.fastq.gz" }, + { assert process.out.trimmed_reads.get(0).get(1).get(1) ==~ ".*.paired.trim_2.fastq.gz" }, + { assert process.out.out_log.get(0).get(1) ==~ ".*.log" }, + { assert snapshot(process.out.trimmed_reads, + process.out.trim_log, + process.out.summary, + process.out.versions + ).match() } + ) + } + } + + test("No Adaptors") { + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.failed } + ) + } + } + + test("Single-Read - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/trimmomatic/tests/main.nf.test.snap b/modules/nf-core/trimmomatic/tests/main.nf.test.snap new file mode 100644 index 000000000..6b51bc4e2 --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/main.nf.test.snap @@ -0,0 +1,178 @@ +{ + "Single-Read - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SE.paired.trim.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trim.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test_out.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,14413a048f088a147fb04f3d59c6c604" + ], + "out_log": [ + [ + { + "id": "test", + "single_end": true + }, + "test_out.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trim.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trimmed_reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SE.paired.trim.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "unpaired_reads": [ + + ], + "versions": [ + "versions.yml:md5,14413a048f088a147fb04f3d59c6c604" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-26T21:50:12.135007267" + }, + "Single-Read": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SE.paired.trim.fastq.gz:md5,e68abbd3b88f7ec12940a4f5c2b8bfb9" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_trim.log:md5,e4c3f619e9b0e26847f8f3e3d9af319b" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary:md5,24c973237557a1439c775ca19a5deaa5" + ] + ], + [ + "versions.yml:md5,14413a048f088a147fb04f3d59c6c604" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-26T21:56:07.085100348" + }, + "Paired-Reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.trim_1.fastq.gz:md5,a866e64f451745f176316d0df7d52b30", + "test.paired.trim_2.fastq.gz:md5,725d4ab909b39291ab56b090cab32075" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_trim.log:md5,9629761761a34576b3484bf4174f681f" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.summary:md5,9698e5e5c060bbe64588998fe35f8d71" + ] + ], + [ + "versions.yml:md5,14413a048f088a147fb04f3d59c6c604" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-26T21:56:18.779303181" + } +} diff --git a/modules/nf-core/trimmomatic/tests/nextflow_PE.config b/modules/nf-core/trimmomatic/tests/nextflow_PE.config new file mode 100644 index 000000000..3f8fdfe11 --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/nextflow_PE.config @@ -0,0 +1,6 @@ +process { + + withName: TRIMMOMATIC { + ext.args = 'ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36' + } +} diff --git a/modules/nf-core/trimmomatic/tests/nextflow_SE.config b/modules/nf-core/trimmomatic/tests/nextflow_SE.config new file mode 100644 index 000000000..af777fda0 --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/nextflow_SE.config @@ -0,0 +1,6 @@ +process { + + withName: TRIMMOMATIC { + ext.args = 'ILLUMINACLIP:TruSeq3-SE:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36' + } +} diff --git a/modules/nf-core/trimmomatic/tests/tags.yml b/modules/nf-core/trimmomatic/tests/tags.yml new file mode 100644 index 000000000..68c5af6a6 --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/tags.yml @@ -0,0 +1,2 @@ +trimmomatic: + - modules/nf-core/trimmomatic/** diff --git a/nextflow.config b/nextflow.config index 67f4a1dd1..e32c73563 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,6 +59,7 @@ params { // assembly options coassemble_group = false spades_options = null + spades_downstreaminput = 'scaffolds' megahit_options = null skip_spades = false skip_spadeshybrid = false @@ -119,6 +120,9 @@ params { skip_metabat2 = false skip_maxbin2 = false skip_concoct = false + bin_concoct_chunksize = 10000 + bin_concoct_overlap = 0 + bin_concoct_donotconcatlast = false bin_domain_classification = false bin_domain_classification_tool = 'tiara' tiara_min_length = 3000 @@ -382,11 +386,11 @@ env { // Set bash options process.shell = [ "bash", - "-C", // No clobber - prevent output redirection from overwriting files. - "-e", // Exit if a tool returns a non-zero status/exit code - "-u", // Treat unset variables and parameters as an error - "-o", // Returns the status of the last command to exit.. - "pipefail" // ..with a non-zero status or zero if all successfully execute + "-C", + "-e", + "-u", + "-o", + "pipefail", ] // Disable process selector warnings by default. Use debug profile to enable warnings. diff --git a/nextflow_schema.json b/nextflow_schema.json index 274752c45..cbf408dca 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -72,7 +72,6 @@ "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, - "default": false, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." }, "igenomes_base": { @@ -145,8 +144,7 @@ "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "hidden": true, - "default": false + "hidden": true }, "publish_dir_mode": { "type": "string", @@ -215,7 +213,8 @@ "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", "fa_icon": "fas fa-check-square", - "hidden": true + "hidden": true, + "default": true }, "pipelines_testdata_base_path": { "type": "string", @@ -276,7 +275,7 @@ "type": "string", "default": "fastp", "description": "Specify which adapter clipping tool to use.", - "enum": ["fastp", "adapterremoval"] + "enum": ["fastp", "adapterremoval", "trimmomatic"] }, "save_clipped_reads": { "type": "boolean", @@ -591,6 +590,13 @@ "description": "Additional custom options for SPAdes and SPAdesHybrid. Do not specify `--meta` as this will be added for you!", "help_text": "An example is adjusting k-mers (\"-k 21,33,55,77\") or adding [advanced options](https://github.com/ablab/spades#advanced-options). But not --meta, -t, -m, -o or --out-prefix, because these are already in use. Must be used like this: --spades_options \"-k 21,33,55,77\")" }, + "spades_downstreaminput": { + "type": "string", + "description": "Specify whether to use contigs or scaffolds assembled by SPAdes", + "help_text": "Be default the SPAdes documentation recommends using scaffolds rather than contigs, as these are longer as they combine multiple contigs. However there is an increased risk of misassemblies or chimeras. Use this flag to specify to just use contigs for downstream processes such as binning.", + "enum": ["scaffolds", "contigs"], + "default": "scaffolds" + }, "megahit_options": { "type": "string", "description": "Additional custom options for MEGAHIT.", @@ -758,6 +764,20 @@ "description": "Maximal number of contigs that are not part of any bin but treated as individual genome.", "help_text": "Contigs that do not fulfill the thresholds of `--min_length_unbinned_contigs` and `--max_unbinned_contigs` are pooled for downstream analysis and reporting, except contigs that also do not fullfill `--min_contig_size` are not considered further." }, + "bin_concoct_chunksize": { + "type": "integer", + "default": 10000, + "description": "Specify length of sub-contigs cut up prior CONCOCT binnning" + }, + "bin_concoct_overlap": { + "type": "integer", + "default": 0, + "description": "Specify the overlap between each sub-contig prior CONCOCT binnning" + }, + "bin_concoct_donotconcatlast": { + "type": "boolean", + "description": "Specify to not append the last contig less than sub-contig length to the last correct length contig" + }, "bowtie2_mode": { "type": "string", "description": "Bowtie2 alignment mode", diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 388caa498..95470d1ac 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -5,6 +5,7 @@ include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' include { FASTP } from '../../modules/nf-core/fastp/main' +include { TRIMMOMATIC } from '../../modules/nf-core/trimmomatic/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' @@ -63,6 +64,16 @@ workflow SHORTREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings) ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings) } + else if (params.clip_tool == 'trimmomatic') { + + TRIMMOMATIC(ch_raw_short_reads) + + ch_short_reads_prepped = Channel.empty() + ch_short_reads_prepped = TRIMMOMATIC.out.trimmed_reads + + ch_versions = ch_versions.mix(TRIMMOMATIC.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(TRIMMOMATIC.out.out_log) + } } else { ch_short_reads_prepped = ch_raw_short_reads diff --git a/workflows/mag.nf b/workflows/mag.nf index e980375c0..f663f01ec 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -391,7 +391,7 @@ workflow MAG { if (!params.single_end && !params.skip_spades) { METASPADES(ch_short_reads_spades.map { meta, reads -> [meta, reads, [], []] }, [], []) - ch_spades_assemblies = METASPADES.out.scaffolds.map { meta, assembly -> + ch_spades_assemblies = (params.spades_downstreaminput == 'contigs' ? METASPADES.out.contigs : METASPADES.out.scaffolds).map { meta, assembly -> def meta_new = meta + [assembler: 'SPAdes'] [meta_new, assembly] }