diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a8bfa6ba1..c276df3d4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,13 +34,13 @@ jobs: - name: Build new docker image if: env.MATCHED_FILES - run: docker build --no-cache . -t nfcore/eager:2.3.3 + run: docker build --no-cache . -t nfcore/eager:2.3.4 - name: Pull docker image if: ${{ !env.MATCHED_FILES }} run: | docker pull nfcore/eager:dev - docker tag nfcore/eager:dev nfcore/eager:2.3.3 + docker tag nfcore/eager:dev nfcore/eager:2.3.4 - name: Install Nextflow env: diff --git a/CHANGELOG.md b/CHANGELOG.md index e289edece..79977f787 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,28 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## v2.3.3 - 2021-01-06 +## v2.3.4 - 2021-05-05 + +### `Added` + +- [#729](https://github.com/nf-core/eager/issues/729) Added Bowtie2 flag `--maxins` for PE mapping modern DNA mapping contexts + +### `Fixed` + +- Corrected explanation of the "--min_adap_overlap" parameter for AdapterRemoval in the docs +- [#725](https://github.com/nf-core/eager/pull/725) `bwa_index` doc update +- Re-adds gzip piping to AdapterRemovalFixPrefix to speed up process after reports of being very slow +- Updated DamageProfiler citation from bioRxiv to publication + +### `Dependencies` + +- Removed pinning of `tbb` (upstream bug in bioconda fixed) +- Bumped `pigz` to 2.6 to fix rare stall bug when compressing data after AdapterRemoval +- Bumped Bowtie2 to 2.4.2 to fix issues with `tbb` version + +### `Deprecated` + +## v2.3.3 - 2021-04-08 ### `Added` diff --git a/Dockerfile b/Dockerfile index 88e0429a8..12e7f7ec2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,10 +7,10 @@ COPY environment.yml / RUN conda env create --quiet -f /environment.yml && conda clean -a # Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-eager-2.3.3/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-eager-2.3.4/bin:$PATH # Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-eager-2.3.3 > nf-core-eager-2.3.3.yml +RUN conda env export --name nf-core-eager-2.3.4 > nf-core-eager-2.3.4.yml # Instruct R processes to use these empty files instead of clashing with a local version RUN touch .Rprofile diff --git a/README.md b/README.md index ac9e19a4e..eae0a3761 100644 --- a/README.md +++ b/README.md @@ -207,7 +207,7 @@ In addition, references of tools and data used in this pipeline are as follows: * **AdapterRemoval v2** Schubert, M., Lindgreen, S., & Orlando, L. (2016). AdapterRemoval v2: rapid adapter trimming, identification, and read merging. BMC Research Notes, 9, 88. [https://doi.org/10.1186/s13104-016-1900-2](https://doi.org/10.1186/s13104-016-1900-2). Download: [https://github.com/MikkelSchubert/adapterremoval](https://github.com/MikkelSchubert/adapterremoval) * **bwa** Li, H., & Durbin, R. (2009). Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics , 25(14), 1754–1760. [https://doi.org/10.1093/bioinformatics/btp324](https://doi.org/10.1093/bioinformatics/btp324). Download: [http://bio-bwa.sourceforge.net/bwa.shtml](http://bio-bwa.sourceforge.net/bwa.shtml) * **SAMtools** Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., … 1000 Genome Project Data Processing Subgroup. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics , 25(16), 2078–2079. [https://doi.org/10.1093/bioinformatics/btp352](https://doi.org/10.1093/bioinformatics/btp352). Download: [http://www.htslib.org/](http://www.htslib.org/) -* **DamageProfiler** Neukamm, J., Peltzer, A., & Nieselt, K. (2020). DamageProfiler: Fast damage pattern calculation for ancient DNA. In biorXiv (p. 2020.10.01.322206). [https://doi.org/10.1101/2020.10.01.322206](https://doi.org/10.1101/2020.10.01.322206). Download: [https://github.com/Integrative-Transcriptomics/DamageProfiler](https://github.com/Integrative-Transcriptomics/DamageProfiler) +* **DamageProfiler** Neukamm, J., Peltzer, A., & Nieselt, K. (2020). DamageProfiler: Fast damage pattern calculation for ancient DNA. In Bioinformatics (btab190). [https://doi.org/10.1093/bioinformatics/btab190](https://doi.org/10.1093/bioinformatics/btab190). Download: [https://github.com/Integrative-Transcriptomics/DamageProfiler](https://github.com/Integrative-Transcriptomics/DamageProfiler) * **QualiMap** Okonechnikov, K., Conesa, A., & García-Alcalde, F. (2016). Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics , 32(2), 292–294. [https://doi.org/10.1093/bioinformatics/btv566](https://doi.org/10.1093/bioinformatics/btv566). Download: [http://qualimap.bioinfo.cipf.es/](http://qualimap.bioinfo.cipf.es/) * **preseq** Daley, T., & Smith, A. D. (2013). Predicting the molecular complexity of sequencing libraries. Nature Methods, 10(4), 325–327. [https://doi.org/10.1038/nmeth.2375](https://doi.org/10.1038/nmeth.2375). Download: [http://smithlabresearch.org/software/preseq/](http://smithlabresearch.org/software/preseq/) * **PMDTools** Skoglund, P., Northoff, B. H., Shunkov, M. V., Derevianko, A. P., Pääbo, S., Krause, J., & Jakobsson, M. (2014). Separating endogenous ancient DNA from modern day contamination in a Siberian Neandertal. Proceedings of the National Academy of Sciences of the United States of America, 111(6), 2229–2234. [https://doi.org/10.1073/pnas.1318934111](https://doi.org/10.1073/pnas.1318934111). Download: [https://github.com/pontussk/PMDtools](https://github.com/pontussk/PMDtools) diff --git a/assets/nf-core-eager_social_preview.png b/assets/nf-core-eager_social_preview.png deleted file mode 100644 index db7ac98c8..000000000 Binary files a/assets/nf-core-eager_social_preview.png and /dev/null differ diff --git a/assets/nf-core-eager_social_preview.svg b/assets/nf-core-eager_social_preview.svg deleted file mode 100644 index 49c2120cd..000000000 --- a/assets/nf-core-eager_social_preview.svg +++ /dev/null @@ -1,662 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - A fully reproducible and state-of-the-artancient DNA analysis pipeline - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - eager - - - - diff --git a/assets/social_preview_image.png b/assets/social_preview_image.png deleted file mode 100644 index d699980e7..000000000 Binary files a/assets/social_preview_image.png and /dev/null differ diff --git a/environment.yml b/environment.yml index a55929d30..45b750d2f 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-eager-2.3.3 +name: nf-core-eager-2.3.4 channels: - conda-forge - bioconda @@ -30,7 +30,7 @@ dependencies: - bioconda::pmdtools=0.60 - bioconda::bedtools=2.29.2 - conda-forge::libiconv=1.15 - - conda-forge::pigz=2.3.4 + - conda-forge::pigz=2.6 - bioconda::sequencetools=1.4.0.6 - bioconda::preseq=2.0.3 - bioconda::fastp=0.20.1 @@ -45,9 +45,8 @@ dependencies: - bioconda::hops=0.35 - conda-forge::biopython=1.76 - conda-forge::xopen=0.9.0 - - bioconda::bowtie2=2.4.1 + - bioconda::bowtie2=2.4.2 - bioconda::eigenstratdatabasetools=1.0.2 - bioconda::mapdamage2=2.2.0 - bioconda::bbmap=38.87 - - conda-forge::tbb=2020.2 # temp for bioconda broken bowtie2, remove once patched in bioconda diff --git a/main.nf b/main.nf index 6033028c6..274e96fad 100644 --- a/main.nf +++ b/main.nf @@ -791,9 +791,7 @@ process adapter_removal { mv *.settings output/ ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz > output/${base}.pe.combined.fq - - pigz -p ${task.cpus} output/${base}.pe.combined.fq + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz """ //PE mode, collapse and trim, outputting all reads, preserving 5p } else if (seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && !params.mergedonly && params.preserve5p) { @@ -807,9 +805,7 @@ process adapter_removal { mv *.settings output/ ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz > output/${base}.pe.combined.fq - - pigz -p ${task.cpus} output/${base}.pe.combined.fq + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz """ // PE mode, collapse and trim but only output collapsed reads } else if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && params.mergedonly && !params.preserve5p ) { @@ -820,9 +816,7 @@ process adapter_removal { cat *.collapsed.gz *.collapsed.truncated.gz > output/${base}.pe.combined.tmp.fq.gz ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz > output/${base}.pe.combined.fq - - pigz -p ${task.cpus} output/${base}.pe.combined.fq + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz mv *.settings output/ """ @@ -835,9 +829,7 @@ process adapter_removal { cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz > output/${base}.pe.combined.fq - - pigz -p ${task.cpus} output/${base}.pe.combined.fq + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz mv *.settings output/ """ @@ -851,9 +843,7 @@ process adapter_removal { cat *.collapsed.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz > output/${base}.pe.combined.fq - - pigz -p ${task.cpus} output/${base}.pe.combined.fq + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz mv *.settings output/ """ @@ -867,9 +857,7 @@ process adapter_removal { cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz ## Add R_ and L_ for unmerged reads for DeDup compatibility - AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz > output/${base}.pe.combined.fq - - pigz -p ${task.cpus} output/${base}.pe.combined.fq + AdapterRemovalFixPrefix -Xmx${task.memory.toGiga()}g output/${base}.pe.combined.tmp.fq.gz | pigz -p ${task.cpus} > output/${base}.pe.combined.fq.gz mv *.settings output/ """ @@ -1357,7 +1345,7 @@ process bowtie2 { //PE data without merging, PE data without any AR applied if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){ """ - bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam + bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --maxins ${params.bt2_maxins} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } else { diff --git a/nextflow.config b/nextflow.config index 2ac079dad..2533ea38b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -89,6 +89,7 @@ params { bt2l = 0 bt2_trim5 = 0 bt2_trim3 = 0 + bt2_maxins = 500 //Mapped read removal from input FASTQ hostremoval_input_fastq = false @@ -263,7 +264,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/eager:2.3.3' +process.container = 'nfcore/eager:2.3.4' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -392,7 +393,7 @@ manifest { description = 'A fully reproducible and state-of-the-art ancient DNA analysis pipeline' mainScript = 'main.nf' nextflowVersion = '!>=20.07.1' - version = '2.3.3' + version = '2.3.4' } // Function to ensure that resource requirements don't go beyond diff --git a/nextflow_schema.json b/nextflow_schema.json index 0e7a9e623..26a2fbf0f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -115,7 +115,7 @@ }, "bwa_index": { "type": "string", - "description": "Path to directory containing pre-made BWA indices (i.e. everything before the endings '.amb' '.ann' '.bwt'. Most likely the same path as --fasta). If not supplied will be made for you.", + "description": "Path to directory containing pre-made BWA indices (i.e. the directory before the files ending in '.amb' '.ann' '.bwt'. Do not include the files themselves. Most likely the same directory of the file provided with --fasta). If not supplied will be made for you.", "fa_icon": "fas fa-address-book", "help_text": "If you want to use pre-existing `bwa index` indices, please supply the **directory** to the FASTA you also specified in `--fasta` nf-core/eager will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa` index file suffixes.\n\nFor example:\n\n```bash\nnextflow run nf-core/eager \\\n-profile test,docker \\\n--input '*{R1,R2}*.fq.gz'\n--fasta 'results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta' \\\n--bwa_index 'results/reference_genome/bwa_index/BWAIndex/'\n```\n\n> `bwa index` does not give you an option to supply alternative suffixes/names for these indices. Thus, the file names generated by this command _must not_ be changed, otherwise nf-core/eager will not be able to find them." }, @@ -484,7 +484,7 @@ "default": 1, "description": "Specify minimum adapter overlap required for clipping.", "fa_icon": "fas fa-hands-helping", - "help_text": "Sets the minimum overlap between two reads when read merging is performed. Default is set to `1` base overlap.\n\n> Modifies AdapterRemoval parameter: `--minadapteroverlap`" + "help_text": "Specifies a minimum number of bases that overlap with the adapter sequence before adapters are trimmed from reads. Default is set to `1` base overlap.\n\n> Modifies AdapterRemoval parameter: `--minadapteroverlap`" }, "skip_collapse": { "type": "boolean", @@ -634,6 +634,13 @@ "description": "Specify number of bases to trim off from 3' (right) end of read before alignment.", "fa_icon": "fas fa-cut", "help_text": "Number of bases to trim at the 3' (right) end of read prior alignment. Maybe useful when left-over sequencing artefacts of in-line barcodes present Default: 0.\n\n> Modifies Bowtie2 parameters: `-bt2_trim3`" + }, + "bt2_maxins": { + "type": "integer", + "default": 500, + "fa_icon": "fas fa-exchange-alt", + "description": "Specify the maximum fragment length for Bowtie2 paired-end mapping mode only.", + "help_text": "The maximum fragment for valid paired-end alignments. Only for paired-end mapping (i.e. unmerged), and therefore typically only useful for modern data.\n\n See [Bowtie2 documentation](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml) for more information.\n\n> Modifies Bowtie2 parameters: `--maxins`" } }, "fa_icon": "fas fa-layer-group", @@ -1642,4 +1649,4 @@ "$ref": "#/definitions/metagenomic_authentication" } ] -} +} \ No newline at end of file