diff --git a/CHANGELOG.md b/CHANGELOG.md index 004375b5..a80ea5db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## dev [unreleased] +## 3.3.1dev [unreleased] ### `Added` +- [#745](https://github.com/nf-core/mag/pull/745) - Added `trimmomatic` as an additional pre-processing tool (added by @Pranjal-Bioinfo & @GallVp). + ### `Changed` +- [#745](https://github.com/nf-core/mag/pull/745) - Added pipeline parameter `spades_use_contigs_not_scaffolds` to use contigs instead of scaffolds (added by @Pranjal-Bioinfo & @GallVp). + ### `Fixed` +- [#745](https://github.com/nf-core/mag/pull/745) - Set `concoct/cut_up_fasta.py` default params in `modules.config` (added by @Pranjal-Bioinfo & @GallVp). + ### `Dependencies` ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config index 701598db..8e913744 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -47,6 +47,25 @@ process { tag = { "${meta.id}_run${meta.run}" } } + withName: TRIMMOMATIC { + ext.args = "LEADING:30 TRAILING:30 SLIDINGWINDOW:4:20 MINLEN:35" + publishDir = [ + [ + path: { "${params.outdir}/QC_shortreads/trimmomatic/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*_trim.log" + ], + [ + path: { "${params.outdir}/QC_shortreads/trimmomatic/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_clipped_reads + ] + ] + ext.prefix = { "${meta.id}_run${meta.run}_trimmomatic" } + tag = { "${meta.id}_run${meta.run}" } + } + withName: ADAPTERREMOVAL_PE { ext.args = [ "--minlength ${params.reads_minlength}", @@ -620,6 +639,9 @@ process { ] } + withName: 'CONCOCT_CUTUPFASTA' { + ext.args = '-c 10000 -o 0 --merge_last' + } withName: 'CONCOCT_.*' { publishDir = [ [ diff --git a/modules.json b/modules.json index 05e3b3dd..a83e743a 100644 --- a/modules.json +++ b/modules.json @@ -282,6 +282,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "trimmomatic": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "untar": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", diff --git a/modules/nf-core/trimmomatic/environment.yml b/modules/nf-core/trimmomatic/environment.yml new file mode 100644 index 00000000..ab4b72b7 --- /dev/null +++ b/modules/nf-core/trimmomatic/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::trimmomatic=0.39 diff --git a/modules/nf-core/trimmomatic/main.nf b/modules/nf-core/trimmomatic/main.nf new file mode 100644 index 00000000..ee500d85 --- /dev/null +++ b/modules/nf-core/trimmomatic/main.nf @@ -0,0 +1,71 @@ +process TRIMMOMATIC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/trimmomatic:0.39--hdfd78af_2': + 'biocontainers/trimmomatic:0.39--hdfd78af_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.paired.trim*.fastq.gz") , emit: trimmed_reads + tuple val(meta), path("*.unpaired.trim_*.fastq.gz"), emit: unpaired_reads, optional:true + tuple val(meta), path("*_trim.log") , emit: trim_log + tuple val(meta), path("*_out.log") , emit: out_log + tuple val(meta), path("*.summary") , emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "SE" : "PE" + def output = meta.single_end ? + "${prefix}.SE.paired.trim.fastq.gz" // HACK to avoid unpaired and paired in the trimmed_reads output + : "${prefix}.paired.trim_1.fastq.gz ${prefix}.unpaired.trim_1.fastq.gz ${prefix}.paired.trim_2.fastq.gz ${prefix}.unpaired.trim_2.fastq.gz" + def qual_trim = task.ext.args2 ?: '' + """ + trimmomatic \\ + $trimmed \\ + -threads $task.cpus \\ + -trimlog ${prefix}_trim.log \\ + -summary ${prefix}.summary \\ + $reads \\ + $output \\ + $qual_trim \\ + $args 2> >(tee ${prefix}_out.log >&2) + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimmomatic: \$(trimmomatic -version) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + if (meta.single_end) { + output_command = "echo '' | gzip > ${prefix}.SE.paired.trim.fastq.gz" + } else { + output_command = "echo '' | gzip > ${prefix}.paired.trim_1.fastq.gz" + output_command = "echo '' | gzip > ${prefix}.paired.trim_2.fastq.gz" + output_command += "echo '' | gzip > ${prefix}.unpaired.trim_1.fastq.gz" + output_command += "echo '' | gzip > ${prefix}.unpaired.trim_2.fastq.gz" + } + + """ + $output_command + touch ${prefix}.summary + touch ${prefix}_trim.log + touch ${prefix}_out.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimmomatic: \$(trimmomatic -version) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/trimmomatic/meta.yml b/modules/nf-core/trimmomatic/meta.yml new file mode 100644 index 00000000..e74014d5 --- /dev/null +++ b/modules/nf-core/trimmomatic/meta.yml @@ -0,0 +1,85 @@ +name: "trimmomatic" +description: Performs quality and adapter trimming on paired end and single end reads +keywords: + - trimming + - adapter trimming + - quality trimming +tools: + - "trimmomatic": + description: "A flexible read trimming tool for Illumina NGS data" + homepage: "http://www.usadellab.org/cms/?page=trimmomatic" + documentation: "https://github.com/usadellab/Trimmomatic" + doi: "10.1093/bioinformatics/btu170" + licence: ["GPL v3"] + identifier: biotools:trimmomatic +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Input FastQ files of size 1 or 2 for single-end and paired-end data, respectively. + pattern: "*.fastq.gz" +output: + - trimmed_reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paired.trim*.fastq.gz": + type: file + description: The trimmed/modified paired end fastq reads + pattern: "*.paired.trim*.fastq.gz" + - unpaired_reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.unpaired.trim_*.fastq.gz": + type: file + description: The trimmed/modified unpaired end fastq reads + pattern: "*.unpaired.trim_*.fastq.gz" + - trim_log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_trim.log": + type: file + description: trimmomatic log file, from the trim_log parameter + pattern: "*.log" + - out_log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_out.log": + type: file + description: log of output from the standard out + pattern: "*.log" + - summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.summary": + type: file + description: trimmomatic summary file of surviving and dropped reads + pattern: "*.summary" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@alyssa-ab" +maintainers: + - "@alyssa-ab" diff --git a/modules/nf-core/trimmomatic/tests/main.nf.test b/modules/nf-core/trimmomatic/tests/main.nf.test new file mode 100644 index 00000000..fee08848 --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/main.nf.test @@ -0,0 +1,118 @@ +nextflow_process { + + name "Test Process TRIMMOMATIC" + script "../main.nf" + process "TRIMMOMATIC" + tag "modules" + tag "modules_nfcore" + tag "trimmomatic" + + test("Single-Read") { + config "./nextflow_SE.config" + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.trimmed_reads != null }, + { assert process.out.trimmed_reads.get(0).get(1) ==~ ".*.SE.paired.trim.fastq.gz" }, + { assert process.out.out_log.get(0).get(1) ==~ ".*.log" }, + { assert snapshot(process.out.trimmed_reads, + process.out.trim_log, + process.out.summary, + process.out.versions + ).match() } + ) + } + } + + test("Paired-Reads") { + config "./nextflow_PE.config" + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.trimmed_reads != null }, + { assert process.out.trimmed_reads.get(0).get(1).get(0) ==~ ".*.paired.trim_1.fastq.gz" }, + { assert process.out.trimmed_reads.get(0).get(1).get(1) ==~ ".*.paired.trim_2.fastq.gz" }, + { assert process.out.out_log.get(0).get(1) ==~ ".*.log" }, + { assert snapshot(process.out.trimmed_reads, + process.out.trim_log, + process.out.summary, + process.out.versions + ).match() } + ) + } + } + + test("No Adaptors") { + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.failed } + ) + } + } + + test("Single-Read - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/trimmomatic/tests/main.nf.test.snap b/modules/nf-core/trimmomatic/tests/main.nf.test.snap new file mode 100644 index 00000000..6b51bc4e --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/main.nf.test.snap @@ -0,0 +1,178 @@ +{ + "Single-Read - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SE.paired.trim.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trim.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test_out.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,14413a048f088a147fb04f3d59c6c604" + ], + "out_log": [ + [ + { + "id": "test", + "single_end": true + }, + "test_out.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trim.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trimmed_reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SE.paired.trim.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "unpaired_reads": [ + + ], + "versions": [ + "versions.yml:md5,14413a048f088a147fb04f3d59c6c604" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-26T21:50:12.135007267" + }, + "Single-Read": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SE.paired.trim.fastq.gz:md5,e68abbd3b88f7ec12940a4f5c2b8bfb9" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_trim.log:md5,e4c3f619e9b0e26847f8f3e3d9af319b" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary:md5,24c973237557a1439c775ca19a5deaa5" + ] + ], + [ + "versions.yml:md5,14413a048f088a147fb04f3d59c6c604" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-26T21:56:07.085100348" + }, + "Paired-Reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.trim_1.fastq.gz:md5,a866e64f451745f176316d0df7d52b30", + "test.paired.trim_2.fastq.gz:md5,725d4ab909b39291ab56b090cab32075" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_trim.log:md5,9629761761a34576b3484bf4174f681f" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.summary:md5,9698e5e5c060bbe64588998fe35f8d71" + ] + ], + [ + "versions.yml:md5,14413a048f088a147fb04f3d59c6c604" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-26T21:56:18.779303181" + } +} diff --git a/modules/nf-core/trimmomatic/tests/nextflow_PE.config b/modules/nf-core/trimmomatic/tests/nextflow_PE.config new file mode 100644 index 00000000..3f8fdfe1 --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/nextflow_PE.config @@ -0,0 +1,6 @@ +process { + + withName: TRIMMOMATIC { + ext.args = 'ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36' + } +} diff --git a/modules/nf-core/trimmomatic/tests/nextflow_SE.config b/modules/nf-core/trimmomatic/tests/nextflow_SE.config new file mode 100644 index 00000000..af777fda --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/nextflow_SE.config @@ -0,0 +1,6 @@ +process { + + withName: TRIMMOMATIC { + ext.args = 'ILLUMINACLIP:TruSeq3-SE:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36' + } +} diff --git a/modules/nf-core/trimmomatic/tests/tags.yml b/modules/nf-core/trimmomatic/tests/tags.yml new file mode 100644 index 00000000..68c5af6a --- /dev/null +++ b/modules/nf-core/trimmomatic/tests/tags.yml @@ -0,0 +1,2 @@ +trimmomatic: + - modules/nf-core/trimmomatic/** diff --git a/nextflow.config b/nextflow.config index 387fee34..b7ac54e8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,6 +59,7 @@ params { // assembly options coassemble_group = false spades_options = null + spades_use_contigs_not_scaffolds = false megahit_options = null skip_spades = false skip_spadeshybrid = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 18307b05..a3fd6a35 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -269,7 +269,7 @@ "type": "string", "default": "fastp", "description": "Specify which adapter clipping tool to use.", - "enum": ["fastp", "adapterremoval"] + "enum": ["fastp", "adapterremoval", "trimmomatic"] }, "save_clipped_reads": { "type": "boolean", @@ -570,6 +570,10 @@ "description": "Additional custom options for SPAdes and SPAdesHybrid. Do not specify `--meta` as this will be added for you!", "help_text": "An example is adjusting k-mers (\"-k 21,33,55,77\") or adding [advanced options](https://github.com/ablab/spades#advanced-options). But not --meta, -t, -m, -o or --out-prefix, because these are already in use. Must be used like this: --spades_options \"-k 21,33,55,77\")" }, + "spades_use_contigs_not_scaffolds": { + "type": "boolean", + "description": "Specify whether to use contigs or scaffolds assembled by SPAdes" + }, "megahit_options": { "type": "string", "description": "Additional custom options for MEGAHIT.", diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index ad33b56f..b71c47c2 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -5,6 +5,7 @@ include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' include { FASTP } from '../../modules/nf-core/fastp/main' +include { TRIMMOMATIC } from '../../modules/nf-core/trimmomatic/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' @@ -64,6 +65,16 @@ workflow SHORTREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings) ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings) } + else if (params.clip_tool == 'trimmomatic') { + + TRIMMOMATIC(ch_raw_short_reads) + + ch_short_reads_prepped = Channel.empty() + ch_short_reads_prepped = TRIMMOMATIC.out.trimmed_reads + + ch_versions = ch_versions.mix(TRIMMOMATIC.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(TRIMMOMATIC.out.summary) + } } else { ch_short_reads_prepped = ch_raw_short_reads diff --git a/workflows/mag.nf b/workflows/mag.nf index de353a40..709d1a6b 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -380,10 +380,11 @@ workflow MAG { if (!params.single_end && !params.skip_spades) { METASPADES(ch_short_reads_spades.map { meta, reads -> [meta, reads, [], []] }, [], []) - ch_spades_assemblies = METASPADES.out.scaffolds.map { meta, assembly -> - def meta_new = meta + [assembler: 'SPAdes'] - [meta_new, assembly] - } + ch_spades_assemblies = ( params.spades_use_contigs_not_scaffolds ? METASPADES.out.contigs : METASPADES.out.scaffolds ) + .map { meta, assembly -> + def meta_new = meta + [assembler: 'SPAdes'] + [meta_new, assembly] + } ch_assembled_contigs = ch_assembled_contigs.mix(ch_spades_assemblies) ch_versions = ch_versions.mix(METASPADES.out.versions.first()) }