diff --git a/CHANGELOG.md b/CHANGELOG.md index cffca7b5..e6bdbc4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#125](https://github.com/nf-core/riboseq/pull/125) - Add rRNA removal tool selection with support for SortMeRNA (default), Bowtie2, and RiboDetector ([@pinin4fjords](https://github.com/pinin4fjords)) - [#131](https://github.com/nf-core/riboseq/pull/131) - Add ribotish quality output routing to MultiQC ([@pinin4fjords](https://github.com/pinin4fjords)) +- Add RiboCode ORF detection with P-site analysis and metaplots ([@JackCurragh](https://github.com/JackCurragh)) ### `Changed` @@ -16,9 +17,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Parameters` -| Old parameter | New parameter | -| ------------- | --------------------- | -| | `--ribo_removal_tool` | +| Old parameter | New parameter | +| ------------- | ---------------------------------- | +| | `--ribo_removal_tool` | +| | `--skip_ribocode` | +| | `--extra_ribocode_gtfupdate_args` | +| | `--extra_ribocode_prepare_args` | +| | `--extra_ribocode_metaplots_args` | +| | `--extra_ribocode_ribocode_args` | ### `Dependencies` diff --git a/CITATIONS.md b/CITATIONS.md index 3ec1a9cb..fc295d3f 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -40,6 +40,10 @@ > Choudhary S, Li W, D Smith A. Accurate detection of short and long active ORFs using Ribo-seq data. Bioinformatics. 2020 Apr 1;36(7):2053-2059. doi: 10.1093/bioinformatics/btz878. PMID: 31750902; PMCID: PMC7141849. +- [RiboCode](https://pubmed.ncbi.nlm.nih.gov/29538776/) + + > Zhengtao Xiao, Rongyao Huang, Xudong Xing, Yuling Chen, Haiteng Deng, Xuerui Yang. De novo annotation and characterization of the translatome with ribosome profiling data Nucleic Acids Res. 2018 Jun 1;46(10):e61. doi: 10.1093/nar/gky179. PMID: 29538776 PMCID: PMC6007384 + - [riboWaltz](https://pubmed.ncbi.nlm.nih.gov/30102689/) > Lauria F, Tebaldi T, Bernabò P, Groen EJN, Gillingwater TH, Viero G. riboWaltz: Optimization of ribosome P-site positioning in ribosome profiling data. PLoS Comput Biol. 2018 Aug 13;14(8):e1006169. doi: 10.1371/journal.pcbi.1006169. PMID: 30102689; PMCID: PMC6112680. diff --git a/README.md b/README.md index 5ae566f0..00a2d436 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,9 @@ Differences occur in the downstream analysis steps. Currently these specialist s 1. Check reads distribution around annotated protein coding regions on user provided transcripts, show frame bias and estimate P-site offset for different group of reads ([`Ribo-TISH`](https://github.com/zhpn1024/ribotish)) 2. (default, optional) Predict translated open reading frames and/ or translation initiation sites _de novo_ from alignment data ([`Ribo-TISH`](https://github.com/zhpn1024/ribotish)) 3. (default, optional) Derive candidate ORFs from reference data and detect translated ORFs from that list ([`Ribotricer`](https://github.com/smithlabcode/ribotricer)) -4. (default, optional) Derive P-sites and QC from transcriptome alignments ([`riboWaltz`](https://github.com/LabTranslationalArchitectomics/riboWaltz)) -5. (optional) Use a translational efficiency approach to study the dynamics of transcription and translation, with [anota2seq](https://bioconductor.org/packages/release/bioc/html/anota2seq.html). **requires matched RNA-seq and Ribo-seq data** +4. (default, optional) Identify translated ORFs using P-site periodicity and read density ([`RiboCode`](https://github.com/zhengtaoxiao/RiboCode)) +5. (default, optional) Derive P-sites and QC from transcriptome alignments ([`riboWaltz`](https://github.com/LabTranslationalArchitectomics/riboWaltz)) +6. (optional) Use a translational efficiency approach to study the dynamics of transcription and translation, with [anota2seq](https://bioconductor.org/packages/release/bioc/html/anota2seq.html). **requires matched RNA-seq and Ribo-seq data** ## Usage diff --git a/conf/modules.config b/conf/modules.config index 00732be4..d5f79df5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -869,6 +869,43 @@ if (!params.skip_ribotricer) { } } +if (!params.skip_ribocode) { + process { + withName: 'RIBOCODE_GTFUPDATE' { + ext.args = { params.extra_ribocode_gtfupdate_args ?: '' } + } + withName: 'RIBOCODE_PREPARE' { + ext.args = { params.extra_ribocode_prepare_args ?: '' } + } + withName: 'RIBOCODE_METAPLOTS' { + ext.args = { params.extra_ribocode_metaplots_args ?: '' } + publishDir = [ + path: { "${params.outdir}/riboseq_qc/ribocode" }, + mode: params.publish_dir_mode, + pattern: "*.{pdf,txt}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'RIBOCODE_RIBOCODE' { + ext.args = { params.extra_ribocode_ribocode_args ?: '' } + publishDir = [ + [ + path: { "${params.outdir}/orf_predictions/ribocode" }, + mode: params.publish_dir_mode, + pattern: "*.txt", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ], + [ + path: { "${params.outdir}/riboseq_qc/ribocode" }, + mode: params.publish_dir_mode, + pattern: "*.{pdf,hd5}", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ] + } + } +} + if (!params.skip_ribowaltz) { process { withName: 'RIBOWALTZ' { diff --git a/conf/test.config b/conf/test.config index 6236c511..ca42cacd 100644 --- a/conf/test.config +++ b/conf/test.config @@ -31,5 +31,6 @@ params { min_trimmed_reads = 1000 skip_ribotricer = true + skip_ribocode = true extra_fqlint_args = '--disable-validator P001 --disable-validator S007' } diff --git a/docs/output.md b/docs/output.md index 7693713e..6c2c402d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -31,9 +31,11 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Riboseq-specific QC](#riboseq-specific-qc) - [Ribo-TISH quality](#ribo-tish-quality) - [Ribotricer detect-orfs QC outputs](#ribotricer-detect-orfs-qc-outputs) + - [RiboCode metaplots](#ribocode-metaplots) - [ORF predictions](#orf-predictions) - [Ribo-TISH predict](#ribo-tish-predict) - [Ribotricer detect-orfs](#ribotricer-detect-orfs) + - [RiboCode](#ribocode) - [P-site identification](#p-site-identification) - [riboWaltz](#ribowaltz) - [Quantification](#quantification) @@ -314,6 +316,16 @@ Read distribution metrics around annotated protein coding regions or based on al - `*_metagene_profiles_3p.tsv`: Metagene profile aligning with the stop codon +### RiboCode metaplots + +
+Output files + +- `riboseq_qc/ribocode/` + - `*_config.txt`: Configuration file containing P-site offsets for different read lengths + - `*.pdf`: Metaplot showing read density around start and stop codons +
+ ## ORF predictions ### Ribo-TISH predict @@ -341,6 +353,16 @@ Read distribution metrics around annotated protein coding regions or based on al - `*_psite_offsets.txt`: If the P-site offsets are not provided, txt file containing the derived relative offsets +### RiboCode + +
+Output files + +- `orf_predictions/ribocode/` + - `*.txt`: ORF predictions with coordinates, read counts, and translation scores + - `*_collapsed.txt`: Collapsed ORF predictions removing redundant isoforms +
+ ## P-site identification ### riboWaltz diff --git a/modules.json b/modules.json index 912b2366..8804af7b 100644 --- a/modules.json +++ b/modules.json @@ -95,6 +95,26 @@ "git_sha": "9656d955b700a8707c4a67821ab056f8c1095675", "installed_by": ["modules"] }, + "ribocode/gtfupdate": { + "branch": "master", + "git_sha": "3b13c6d5817ba9611477a7aa37120d9a9e50fff0", + "installed_by": ["modules"] + }, + "ribocode/metaplots": { + "branch": "master", + "git_sha": "3b13c6d5817ba9611477a7aa37120d9a9e50fff0", + "installed_by": ["modules"] + }, + "ribocode/prepare": { + "branch": "master", + "git_sha": "3b13c6d5817ba9611477a7aa37120d9a9e50fff0", + "installed_by": ["modules"] + }, + "ribocode/ribocode": { + "branch": "master", + "git_sha": "3b13c6d5817ba9611477a7aa37120d9a9e50fff0", + "installed_by": ["modules"] + }, "ribodetector": { "branch": "master", "git_sha": "61ce58ac3e4e4627d9c1cdc958ee6bfa742a0a75", diff --git a/modules/nf-core/ribocode/gtfupdate/environment.yml b/modules/nf-core/ribocode/gtfupdate/environment.yml new file mode 100644 index 00000000..f4311180 --- /dev/null +++ b/modules/nf-core/ribocode/gtfupdate/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ribocode=1.2.15 diff --git a/modules/nf-core/ribocode/gtfupdate/main.nf b/modules/nf-core/ribocode/gtfupdate/main.nf new file mode 100644 index 00000000..5f3cade8 --- /dev/null +++ b/modules/nf-core/ribocode/gtfupdate/main.nf @@ -0,0 +1,38 @@ +process RIBOCODE_GTFUPDATE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe815db0864b45b91afc7bc84c55cb60acb0035e7248dda7f480a55c4cb105d7/data': + 'community.wave.seqera.io/library/ribocode:1.2.15--5530b252f5433a62' }" + + input: + tuple val(meta), path(gtf) + + output: + tuple val(meta), path("*.gtf") , emit: gtf + tuple val("${task.process}"), val('ribocode'), eval('RiboCode --version 2>&1') , emit: versions_ribocode, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + GTFupdate \\ + ${gtf} \\ + $args \\ + > ${prefix}_updated.gtf + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}_updated.gtf + """ +} diff --git a/modules/nf-core/ribocode/gtfupdate/meta.yml b/modules/nf-core/ribocode/gtfupdate/meta.yml new file mode 100644 index 00000000..a3586360 --- /dev/null +++ b/modules/nf-core/ribocode/gtfupdate/meta.yml @@ -0,0 +1,66 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "ribocode_gtfupdate" +description: Update GTF annotation file for RiboCode compatibility +keywords: + - ribo-seq + - ribosome profiling + - gtf + - annotation +tools: + - "ribocode": + description: "A package for detecting the actively translated ORFs using ribosome-profiling + data" + homepage: "https://github.com/xryanglab/RiboCode" + documentation: "https://github.com/xryanglab/RiboCode" + tool_dev_url: "https://github.com/xryanglab/RiboCode" + doi: "10.1093/nar/gky179" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - gtf: + type: file + description: GTF annotation file to update (uncompressed) + pattern: "*.{gtf}" + ontologies: [] +output: + gtf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.gtf": + type: file + description: Updated GTF annotation file + pattern: "*_updated.gtf" + versions_ribocode: + - - ${task.process}: + type: string + description: Name of the process + - ribocode: + type: string + description: Name of the tool + - RiboCode --version 2>&1: + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - ribocode: + type: string + description: Name of the tool + - RiboCode --version 2>&1: + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@JackCurragh" diff --git a/modules/nf-core/ribocode/gtfupdate/tests/main.nf.test b/modules/nf-core/ribocode/gtfupdate/tests/main.nf.test new file mode 100644 index 00000000..2748caef --- /dev/null +++ b/modules/nf-core/ribocode/gtfupdate/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process RIBOCODE_GTFUPDATE" + script "../main.nf" + process "RIBOCODE_GTFUPDATE" + + tag "modules" + tag "modules_nfcore" + tag "ribocode" + tag "ribocode/gtfupdate" + + test("test_ribocode_gtfupdate") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("process_outputs") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match("versions") } + ) + } + } + + test("test_ribocode_gtfupdate - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.gtf[0][1].toString().endsWith('.gtf') } + ) + } + } +} diff --git a/modules/nf-core/ribocode/gtfupdate/tests/main.nf.test.snap b/modules/nf-core/ribocode/gtfupdate/tests/main.nf.test.snap new file mode 100644 index 00000000..616d4255 --- /dev/null +++ b/modules/nf-core/ribocode/gtfupdate/tests/main.nf.test.snap @@ -0,0 +1,61 @@ +{ + "versions": { + "content": [ + { + "versions_ribocode": [ + [ + "RIBOCODE_GTFUPDATE", + "ribocode", + "1.2.15" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T13:58:20.352527" + }, + "process_outputs": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_updated.gtf:md5,4aee3722f1d061395e2fdce7277708b3" + ] + ], + "1": [ + [ + "RIBOCODE_GTFUPDATE", + "ribocode", + "1.2.15" + ] + ], + "gtf": [ + [ + { + "id": "test" + }, + "test_updated.gtf:md5,4aee3722f1d061395e2fdce7277708b3" + ] + ], + "versions_ribocode": [ + [ + "RIBOCODE_GTFUPDATE", + "ribocode", + "1.2.15" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T13:58:20.062134" + } +} \ No newline at end of file diff --git a/modules/nf-core/ribocode/metaplots/environment.yml b/modules/nf-core/ribocode/metaplots/environment.yml new file mode 100644 index 00000000..f4311180 --- /dev/null +++ b/modules/nf-core/ribocode/metaplots/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ribocode=1.2.15 diff --git a/modules/nf-core/ribocode/metaplots/main.nf b/modules/nf-core/ribocode/metaplots/main.nf new file mode 100644 index 00000000..dad139c1 --- /dev/null +++ b/modules/nf-core/ribocode/metaplots/main.nf @@ -0,0 +1,40 @@ +process RIBOCODE_METAPLOTS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe815db0864b45b91afc7bc84c55cb60acb0035e7248dda7f480a55c4cb105d7/data': + 'community.wave.seqera.io/library/ribocode:1.2.15--5530b252f5433a62' }" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(annotation) + + output: + tuple val(meta), path("*config.txt") , emit: config + tuple val(meta), path("*.pdf") , emit: pdf + tuple val("${task.process}"), val('ribocode'), eval('RiboCode --version 2>&1') , emit: versions_ribocode, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + metaplots \\ + -a $annotation \\ + -r $bam \\ + -o ${prefix} \\ + $args + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_config.txt + touch ${prefix}_report.pdf + """ +} diff --git a/modules/nf-core/ribocode/metaplots/meta.yml b/modules/nf-core/ribocode/metaplots/meta.yml new file mode 100644 index 00000000..34a4099d --- /dev/null +++ b/modules/nf-core/ribocode/metaplots/meta.yml @@ -0,0 +1,87 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "ribocode_metaplots" +description: Set up RiboCode ORF calling with metaplots +keywords: + - ribo-seq + - ribosome profiling + - orf calling +tools: + - "ribocode": + description: "A package for detecting the actively translated ORFs using ribosome-profiling + data" + homepage: "https://github.com/xryanglab/RiboCode" + documentation: "https://github.com/xryanglab/RiboCode" + tool_dev_url: "https://github.com/xryanglab/RiboCode" + doi: "10.1093/nar/gky179" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing annotation information + e.g. `[ id:'genome' ]` + - annotation: + type: directory + description: Directory containing annotation files from ribocode/prepare + pattern: "annotation" + +output: + config: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*config.txt": + type: file + description: RiboCode configuration file containing P-site offsets + pattern: "*_config.txt" + ontologies: [] + pdf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.pdf": + type: file + description: PDF file containing P-site metaplots for quality control + pattern: "*_report.pdf" + ontologies: [] + versions_ribocode: + - - ${task.process}: + type: string + description: Name of the process + - ribocode: + type: string + description: Name of the tool + - RiboCode --version 2>&1: + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - ribocode: + type: string + description: Name of the tool + - RiboCode --version 2>&1: + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@JackCurragh" diff --git a/modules/nf-core/ribocode/metaplots/tests/main.nf.test b/modules/nf-core/ribocode/metaplots/tests/main.nf.test new file mode 100644 index 00000000..97d17741 --- /dev/null +++ b/modules/nf-core/ribocode/metaplots/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process RIBOCODE_METAPLOTS" + script "../main.nf" + process "RIBOCODE_METAPLOTS" + + tag "modules" + tag "modules_nfcore" + tag "ribocode" + tag "ribocode/metaplots" + tag "untar" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'annotation' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/ribocode/annotation.tar.gz", checkIfExists: true) + ] + """ + } + } + } + + test("test_ribocode_metaplots") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780887.Aligned.toTranscriptome.out.bam", checkIfExists: true) + ] + input[1] = UNTAR.out.untar + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.config).match("config") }, + { assert process.out.pdf[0][1].toString().endsWith('.pdf') }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match("versions") } + ) + } + } + + test("test_ribocode_metaplots - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780887.Aligned.toTranscriptome.out.bam", checkIfExists: true) + ] + input[1] = [ + [ id:'annotation' ], + file("annotation") + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.config[0][1].toString().endsWith('_config.txt') }, + { assert process.out.pdf[0][1].toString().endsWith('.pdf') } + ) + } + } +} diff --git a/modules/nf-core/ribocode/metaplots/tests/main.nf.test.snap b/modules/nf-core/ribocode/metaplots/tests/main.nf.test.snap new file mode 100644 index 00000000..e8888fdc --- /dev/null +++ b/modules/nf-core/ribocode/metaplots/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "versions": { + "content": [ + { + "versions_ribocode": [ + [ + "RIBOCODE_METAPLOTS", + "ribocode", + "1.2.15" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T14:02:16.711086" + }, + "config": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_pre_config.txt:md5,6da5a4583c1f94ec908c9556bf7064be" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T14:02:16.699246" + } +} \ No newline at end of file diff --git a/modules/nf-core/ribocode/metaplots/tests/nextflow.config b/modules/nf-core/ribocode/metaplots/tests/nextflow.config new file mode 100644 index 00000000..3ccf4a70 --- /dev/null +++ b/modules/nf-core/ribocode/metaplots/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: RIBOCODE_METAPLOTS { + ext.args = '-f0_percent 0.1 -pv1 1 -pv2 1' + } +} diff --git a/modules/nf-core/ribocode/prepare/environment.yml b/modules/nf-core/ribocode/prepare/environment.yml new file mode 100644 index 00000000..f4311180 --- /dev/null +++ b/modules/nf-core/ribocode/prepare/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ribocode=1.2.15 diff --git a/modules/nf-core/ribocode/prepare/main.nf b/modules/nf-core/ribocode/prepare/main.nf new file mode 100644 index 00000000..d9397c96 --- /dev/null +++ b/modules/nf-core/ribocode/prepare/main.nf @@ -0,0 +1,42 @@ +process RIBOCODE_PREPARE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe815db0864b45b91afc7bc84c55cb60acb0035e7248dda7f480a55c4cb105d7/data': + 'community.wave.seqera.io/library/ribocode:1.2.15--5530b252f5433a62' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + + output: + tuple val(meta), path("annotation") , emit: annotation + tuple val("${task.process}"), val('ribocode'), eval('RiboCode --version 2>&1') , emit: versions_ribocode, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + prepare_transcripts \\ + -g ${gtf} \\ + -f ${fasta} \\ + -o annotation \\ + $args + """ + + stub: + def args = task.ext.args ?: '' + + """ + mkdir annotation + + touch annotation/transcripts_cds.txt + touch annotation/transcripts_sequence.fa + touch annotation/transcripts.pickle + """ +} diff --git a/modules/nf-core/ribocode/prepare/meta.yml b/modules/nf-core/ribocode/prepare/meta.yml new file mode 100644 index 00000000..73004355 --- /dev/null +++ b/modules/nf-core/ribocode/prepare/meta.yml @@ -0,0 +1,77 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "ribocode_prepare" +description: Prepare the annotation files for RiboCode ORF calling +keywords: + - ribo-seq + - ribosome profiling + - orf calling +tools: + - "ribocode": + description: "A package for detecting the actively translated ORFs using ribosome-profiling + data" + homepage: "https://github.com/xryanglab/RiboCode" + documentation: "https://github.com/xryanglab/RiboCode" + tool_dev_url: "https://github.com/xryanglab/RiboCode" + doi: "10.1093/nar/gky179" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Reference genome FASTA file (uncompressed) + pattern: "*.{fasta,fa,fna}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'genome' ]` + - gtf: + type: file + description: Reference genome GTF annotation file (uncompressed, updated with + ribocode/gtfupdate) + pattern: "*.{gtf}" + ontologies: [] +output: + annotation: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - annotation: + type: directory + description: Directory containing RiboCode annotation files (transcripts_cds.txt, + transcripts_sequence.fa, transcripts.pickle) + pattern: "annotation/" + versions_ribocode: + - - ${task.process}: + type: string + description: Name of the process + - ribocode: + type: string + description: Name of the tool + - RiboCode --version 2>&1: + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - ribocode: + type: string + description: Name of the tool + - RiboCode --version 2>&1: + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@JackCurragh" diff --git a/modules/nf-core/ribocode/prepare/tests/main.nf.test b/modules/nf-core/ribocode/prepare/tests/main.nf.test new file mode 100644 index 00000000..b8a846c2 --- /dev/null +++ b/modules/nf-core/ribocode/prepare/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process RIBOCODE_PREPARE" + script "../main.nf" + process "RIBOCODE_PREPARE" + + tag "modules" + tag "modules_nfcore" + tag "ribocode" + tag "ribocode/prepare" + tag "gunzip" + + setup { + run("GUNZIP", alias: "GUNZIP_FASTA") { + script "../../../gunzip/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.dna.chromosome.20.fa.gz", checkIfExists: true) + ] + """ + } + } + run("GUNZIP", alias: "GUNZIP_GTF") { + script "../../../gunzip/main.nf" + process { + """ + input[0] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/ribocode/genome_updated.gtf.gz", checkIfExists: true) + ] + """ + } + } + } + + test("test_ribocode_prepare") { + + when { + process { + """ + input[0] = GUNZIP_FASTA.out.gunzip + input[1] = GUNZIP_GTF.out.gunzip + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("process_outputs") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match("versions") } + ) + } + } + + test("test_ribocode_prepare - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.dna.chromosome.20.fa.gz", checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.annotation[0][1].toString().endsWith('annotation') } + ) + } + } +} diff --git a/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap b/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap new file mode 100644 index 00000000..0919d104 --- /dev/null +++ b/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap @@ -0,0 +1,69 @@ +{ + "versions": { + "content": [ + { + "versions_ribocode": [ + [ + "RIBOCODE_PREPARE", + "ribocode", + "1.2.15" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T14:01:21.822415" + }, + "process_outputs": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "transcripts.pickle:md5,b83be7910166b56d09c4879d38223883", + "transcripts_cds.txt:md5,6fae20439cbe378eb4db60a8bdf6a6af", + "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33" + ] + ] + ], + "1": [ + [ + "RIBOCODE_PREPARE", + "ribocode", + "1.2.15" + ] + ], + "annotation": [ + [ + { + "id": "test" + }, + [ + "transcripts.pickle:md5,b83be7910166b56d09c4879d38223883", + "transcripts_cds.txt:md5,6fae20439cbe378eb4db60a8bdf6a6af", + "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33" + ] + ] + ], + "versions_ribocode": [ + [ + "RIBOCODE_PREPARE", + "ribocode", + "1.2.15" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T14:01:21.661508" + } +} \ No newline at end of file diff --git a/modules/nf-core/ribocode/ribocode/environment.yml b/modules/nf-core/ribocode/ribocode/environment.yml new file mode 100644 index 00000000..f4311180 --- /dev/null +++ b/modules/nf-core/ribocode/ribocode/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ribocode=1.2.15 diff --git a/modules/nf-core/ribocode/ribocode/main.nf b/modules/nf-core/ribocode/ribocode/main.nf new file mode 100644 index 00000000..46fb22ee --- /dev/null +++ b/modules/nf-core/ribocode/ribocode/main.nf @@ -0,0 +1,47 @@ +process RIBOCODE_RIBOCODE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe815db0864b45b91afc7bc84c55cb60acb0035e7248dda7f480a55c4cb105d7/data': + 'community.wave.seqera.io/library/ribocode:1.2.15--5530b252f5433a62' }" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(annotation) + tuple val(meta3), path(config) + + output: + + tuple val(meta), path("*.txt") , emit: orf_txt + tuple val(meta), path("*_collapsed.txt") , emit: orf_txt_collapsed + tuple val(meta), path("*_ORFs_category.pdf") , emit: orf_pdf, optional: true + tuple val(meta), path("*_psites.hd5") , emit: psites_hd5, optional: true + tuple val("${task.process}"), val('ribocode'), eval('RiboCode --version 2>&1') , emit: versions_ribocode, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + RiboCode \\ + -a $annotation \\ + -c $config \\ + -o ${prefix} \\ + $args 2>&1 || test -s ${prefix}.txt + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.txt + touch ${prefix}_collapsed.txt + touch ${prefix}_ORFs_category.pdf + touch ${prefix}_psites.hd5 + """ +} diff --git a/modules/nf-core/ribocode/ribocode/meta.yml b/modules/nf-core/ribocode/ribocode/meta.yml new file mode 100644 index 00000000..a6a38276 --- /dev/null +++ b/modules/nf-core/ribocode/ribocode/meta.yml @@ -0,0 +1,120 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "ribocode_ribocode" +description: Call ORFs with RiboCode from Ribo-Seq data +keywords: + - ribo-seq + - ribosome profiling + - orf calling +tools: + - "ribocode": + description: "A package for detecting the actively translated ORFs using ribosome-profiling + data" + homepage: "https://github.com/xryanglab/RiboCode" + documentation: "https://github.com/xryanglab/RiboCode" + tool_dev_url: "https://github.com/xryanglab/RiboCode" + doi: "10.1093/nar/gky179" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing annotation information + e.g. [ id:'genome' ] + - annotation: + type: directory + description: Directory containing RiboCode annotation files from ribocode/prepare + pattern: "annotation" + - - meta3: + type: map + description: | + Groovy Map containing config information + e.g. [ id:'config' ] + - config: + type: file + description: RiboCode configuration file containing P-site offsets from ribocode/metaplots + pattern: "*_config.txt" + + ontologies: [] +output: + orf_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Text file containing all detected ORFs with detailed information + pattern: "*.txt" + ontologies: [] + orf_txt_collapsed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_collapsed.txt": + type: file + description: Text file containing collapsed ORFs (merged isoforms) + pattern: "*_collapsed.txt" + ontologies: [] + orf_pdf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_ORFs_category.pdf": + type: file + description: PDF file with ORF category distribution plots + pattern: "*_ORFs_category.pdf" + ontologies: [] + psites_hd5: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_psites.hd5": + type: file + description: HDF5 file containing P-site positions + pattern: "*_psites.hd5" + ontologies: [] + versions_ribocode: + - - ${task.process}: + type: string + description: Name of the process + - ribocode: + type: string + description: Name of the tool + - RiboCode --version 2>&1: + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - ribocode: + type: string + description: Name of the tool + - RiboCode --version 2>&1: + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@JackCurragh" diff --git a/modules/nf-core/ribocode/ribocode/tests/main.nf.test b/modules/nf-core/ribocode/ribocode/tests/main.nf.test new file mode 100644 index 00000000..b3adc533 --- /dev/null +++ b/modules/nf-core/ribocode/ribocode/tests/main.nf.test @@ -0,0 +1,95 @@ +nextflow_process { + + name "Test Process RIBOCODE_RIBOCODE" + script "../main.nf" + process "RIBOCODE_RIBOCODE" + + tag "modules" + tag "modules_nfcore" + tag "ribocode" + tag "ribocode/ribocode" + tag "untar" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'annotation' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/ribocode/annotation.tar.gz", checkIfExists: true) + ] + """ + } + } + } + + test("test_ribocode_ribocode") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780887.Aligned.toTranscriptome.out.bam", checkIfExists: true) + ] + input[1] = UNTAR.out.untar + input[2] = [ + [ id:'config' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/ribocode/test_pre_config.txt", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.orf_txt, + process.out.orf_txt_collapsed + ).match("orf_outputs") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match("versions") }, + { assert process.out.orf_pdf[0][1].toString().endsWith('.pdf') }, + { assert process.out.psites_hd5[0][1].toString().endsWith('.hd5') } + ) + } + } + + test("test_ribocode_ribocode - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/aligned_reads/SRX11780887.Aligned.toTranscriptome.out.bam", checkIfExists: true) + ] + input[1] = [ + [ id:'annotation' ], + file("annotation") + ] + input[2] = [ + [ id:'config' ], + file("config.txt") + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.orf_txt[0][1][0].toString().endsWith('.txt') }, + { assert process.out.orf_txt_collapsed[0][1].toString().endsWith('_collapsed.txt') }, + { assert process.out.orf_pdf[0][1].toString().endsWith('.pdf') }, + { assert process.out.psites_hd5[0][1].toString().endsWith('.hd5') } + ) + } + } +} diff --git a/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap b/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap new file mode 100644 index 00000000..9b4ce343 --- /dev/null +++ b/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "orf_outputs": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.txt:md5,3c6c1f3ffff5f9c4f4e59fd4f52c56f4", + "test_collapsed.txt:md5,d1e13bb728ad0b0e79b9326c75c6e47a" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_collapsed.txt:md5,d1e13bb728ad0b0e79b9326c75c6e47a" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-12-15T15:07:54.349354" + }, + "versions": { + "content": [ + { + "versions_ribocode": [ + [ + "RIBOCODE_RIBOCODE", + "ribocode", + "1.2.15" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T14:03:21.494462" + } +} \ No newline at end of file diff --git a/modules/nf-core/ribocode/ribocode/tests/nextflow.config b/modules/nf-core/ribocode/ribocode/tests/nextflow.config new file mode 100644 index 00000000..3ccf4a70 --- /dev/null +++ b/modules/nf-core/ribocode/ribocode/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: RIBOCODE_METAPLOTS { + ext.args = '-f0_percent 0.1 -pv1 1 -pv2 1' + } +} diff --git a/nextflow.config b/nextflow.config index 724d5de3..a40a6636 100644 --- a/nextflow.config +++ b/nextflow.config @@ -96,6 +96,11 @@ params { skip_ribotricer = false extra_ribotricer_prepareorfs_args = null extra_ribotricer_detectorfs_args = null + skip_ribocode = false + extra_ribocode_gtfupdate_args = null + extra_ribocode_prepare_args = null + extra_ribocode_metaplots_args = null + extra_ribocode_ribocode_args = null skip_ribowaltz = false extra_ribowaltz_args = null extra_anota2seq_run_args = null diff --git a/nextflow_schema.json b/nextflow_schema.json index fb2f7c04..8d3b2782 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -254,7 +254,7 @@ "format": "file-path", "exists": true, "mimetype": "text/plain", - "default": "${projectDir}/assets/rrna-db-defaults.txt", + "default": "/Users/jackt/riboseq/assets/rrna-db-defaults.txt", "fa_icon": "fas fa-database", "description": "Text file containing paths to fasta files (one per line) that will be used to create the database for rRNA removal.", "help_text": "By default, [rRNA databases](https://github.com/biocore/sortmerna/tree/master/data/rRNA_databases) defined in the SortMeRNA GitHub repo are used. You can see an example in the pipeline Github repository in `assets/rrna-default-dbs.txt`. This is used by SortMeRNA and Bowtie2 (RiboDetector does not require a reference database).\nPlease note that commercial/non-academic entities require [`licensing for SILVA`](https://www.arb-silva.de/silva-license-information) for these default databases." @@ -451,6 +451,22 @@ "type": "string", "description": "Extra arguments to pass to the ribotricer detect-orfs command in addition to defaults defined by the pipeline." }, + "extra_ribocode_gtfupdate_args": { + "type": "string", + "description": "Extra arguments to pass to the RiboCode GTFupdate command in addition to defaults defined by the pipeline." + }, + "extra_ribocode_prepare_args": { + "type": "string", + "description": "Extra arguments to pass to the RiboCode prepare_transcripts command in addition to defaults defined by the pipeline." + }, + "extra_ribocode_metaplots_args": { + "type": "string", + "description": "Extra arguments to pass to the RiboCode metaplots command in addition to defaults defined by the pipeline." + }, + "extra_ribocode_ribocode_args": { + "type": "string", + "description": "Extra arguments to pass to the RiboCode command in addition to defaults defined by the pipeline." + }, "extra_ribowaltz_args": { "type": "string", "description": "Extra arguments to pass to the riboWaltz command in addition to defaults defined by the pipeline." @@ -588,6 +604,11 @@ "description": "Skip Riboricer.", "fa_icon": "fas fa-fast-forward" }, + "skip_ribocode": { + "type": "boolean", + "description": "Skip RiboCode.", + "fa_icon": "fas fa-fast-forward" + }, "skip_ribowaltz": { "type": "boolean", "description": "Skip riboWaltz.", diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index e9bfb1c2..5622469f 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-12-03T16:26:32+00:00", - "description": "

\n \n \n \"nf-core/riboseq\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/riboseq)\n[![GitHub Actions CI Status](https://github.com/nf-core/riboseq/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/riboseq/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/riboseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/riboseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/riboseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10966364-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10966364)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.8-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/riboseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23riboseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/riboseq)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/riboseq** is a bioinformatics pipeline for analysis of Ribo-seq data. It borrows heavily from nf-core/rnaseq in the preprocessing stages:\n\n![nf-core/riboseq metro map](docs/images/nf-core-riboseq_metro_map.png)\n\n1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html))\n2. Sub-sample FastQ files and auto-infer strandedness ([`fq`](https://github.com/stjude-rust-labs/fq), [`Salmon`](https://combine-lab.github.io/salmon/))\n3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n4. UMI extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n5. Adapter and quality trimming ([`Trim Galore!`](https://github.com/FelixKrueger/TrimGalore))\n6. Removal of genome contaminants ([`BBSplit`](http://seqanswers.com/forums/showthread.php?t=41288))\n7. Removal of ribosomal RNA ([`SortMeRNA`](https://github.com/biocore/sortmerna))\n8. Genome alignment of reads, outputting both genome and transcriptome alignments with [`STAR`](https://github.com/alexdobin/STAR)\n9. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))\n10. UMI-based deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n\nDifferences occur in the downstream analysis steps. Currently these specialist steps are:\n\n1. Check reads distribution around annotated protein coding regions on user provided transcripts, show frame bias and estimate P-site offset for different group of reads ([`Ribo-TISH`](https://github.com/zhpn1024/ribotish))\n2. (default, optional) Predict translated open reading frames and/ or translation initiation sites _de novo_ from alignment data ([`Ribo-TISH`](https://github.com/zhpn1024/ribotish))\n3. (default, optional) Derive candidate ORFs from reference data and detect translated ORFs from that list ([`Ribotricer`](https://github.com/smithlabcode/ribotricer))\n4. (default, optional) Derive P-sites and QC from transcriptome alignments ([`riboWaltz`](https://github.com/LabTranslationalArchitectomics/riboWaltz))\n5. (optional) Use a translational efficiency approach to study the dynamics of transcription and translation, with [anota2seq](https://bioconductor.org/packages/release/bioc/html/anota2seq.html). **requires matched RNA-seq and Ribo-seq data**\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2,strandedness,type\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward,riboseq\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end). Each row should have a 'type' value of `riboseq`, `tiseq` or `rnaseq`. Future iterations of the workflow will conduct paired analysis of matched riboseq and rnaseq samples to accomplish analysis types such as 'translational efficiency, but in the current version you should set this to `riboseq` or `tiseq` for reglar Ribo-seq or TI-seq data respectively.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/riboseq \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n### Including a translational efficiency analysis\n\n![anota2seq - fold change plot](docs/images/fc.png)\n\nIn the translational efficiency analysis provided by [anota2seq](https://bioconductor.org/packages/release/bioc/html/anota2seq.html), we use matched pairs of Ribo-seq and RNA-seq data to study the relationship between transcription and translation as they differ between two treatment groups. For example the test data for this workflow has a contrasts file like:\n\n```csv\nid,variable,reference,target,batch,pair\ntreated_vs_control,treatment,control,treated,,pair\n```\n\nThis describes how to compare groups of samples between treament groups, and between RNA-seq and Ribo-seq. In order the columns are:\n\n- `id`: a unique identifier to use for the contrast\n- 'variable`: which vaiable (column) of the sample sheet should be used to separate the treatment groups?\n- `reference`: which value of the variable column should be used to select samples to be used as the reference/ base group?\n- `target`: which value of the variable column should be used to select samples to be used as the target/treated group?\n- `batch`: (optional) specify a variable in the sample sheet that defines sample batches\n- `pair`: (optional) specify a variable in the sample sheet that defines sample pairing between RNA-seq and Ribo-seq samples. If not specified, it is assumed that the two types of sample are ordered the same.\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/riboseq/usage) and the [parameter documentation](https://nf-co.re/riboseq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/riboseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/riboseq/output).\n\n## Credits\n\nnf-core/riboseq was originally written by [Jonathan Manning](https://github.com/pinin4fjords) (Bioinformatics Engineer at Seqera) with support from [Altos Labs](https://www.altoslabs.com/) and in discussion with [Felix Krueger](https://github.com/FelixKrueger) and [Christel Krueger](https://github.com/ChristelKrueger). We thank the following people for their input:\n\n- Anne Bresciani (ZS)\n- [Felipe Almeida](https://github.com/fmalmeida) (ZS)\n- [Mikhail Osipovitch](https://github.com/mosi223) (ZS)\n- [Edward Wallace](https://github.com/ewallace) (University of Edinburgh)\n- [Jack Tierney](https://github.com/JackCurragh) (University College Cork)\n- [Maxime U Garcia](https://github.com/maxulysse) (Seqera)\n- [Ira A Iosub](https://github.com/iraiosub) (The Francis Crick Institute)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#riboseq` channel](https://nfcore.slack.com/channels/riboseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/riboseq for your analysis, please cite it using the following doi: [10.5281/zenodo.10966364](https://doi.org/10.5281/zenodo.10966364)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/riboseq\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/riboseq)\n[![GitHub Actions CI Status](https://github.com/nf-core/riboseq/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/riboseq/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/riboseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/riboseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/riboseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10966364-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10966364)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.8-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/riboseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23riboseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/riboseq)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/riboseq** is a bioinformatics pipeline for analysis of Ribo-seq data. It borrows heavily from nf-core/rnaseq in the preprocessing stages:\n\n![nf-core/riboseq metro map](docs/images/nf-core-riboseq_metro_map.png)\n\n1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html))\n2. Sub-sample FastQ files and auto-infer strandedness ([`fq`](https://github.com/stjude-rust-labs/fq), [`Salmon`](https://combine-lab.github.io/salmon/))\n3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n4. UMI extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n5. Adapter and quality trimming ([`Trim Galore!`](https://github.com/FelixKrueger/TrimGalore))\n6. Removal of genome contaminants ([`BBSplit`](http://seqanswers.com/forums/showthread.php?t=41288))\n7. Removal of ribosomal RNA ([`SortMeRNA`](https://github.com/biocore/sortmerna))\n8. Genome alignment of reads, outputting both genome and transcriptome alignments with [`STAR`](https://github.com/alexdobin/STAR)\n9. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))\n10. UMI-based deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))\n\nDifferences occur in the downstream analysis steps. Currently these specialist steps are:\n\n1. Check reads distribution around annotated protein coding regions on user provided transcripts, show frame bias and estimate P-site offset for different group of reads ([`Ribo-TISH`](https://github.com/zhpn1024/ribotish))\n2. (default, optional) Predict translated open reading frames and/ or translation initiation sites _de novo_ from alignment data ([`Ribo-TISH`](https://github.com/zhpn1024/ribotish))\n3. (default, optional) Derive candidate ORFs from reference data and detect translated ORFs from that list ([`Ribotricer`](https://github.com/smithlabcode/ribotricer))\n4. (default, optional) Identify translated ORFs using P-site periodicity and read density ([`RiboCode`](https://github.com/zhengtaoxiao/RiboCode))\n5. (default, optional) Derive P-sites and QC from transcriptome alignments ([`riboWaltz`](https://github.com/LabTranslationalArchitectomics/riboWaltz))\n6. (optional) Use a translational efficiency approach to study the dynamics of transcription and translation, with [anota2seq](https://bioconductor.org/packages/release/bioc/html/anota2seq.html). **requires matched RNA-seq and Ribo-seq data**\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2,strandedness,type\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward,riboseq\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end). Each row should have a 'type' value of `riboseq`, `tiseq` or `rnaseq`. Future iterations of the workflow will conduct paired analysis of matched riboseq and rnaseq samples to accomplish analysis types such as 'translational efficiency, but in the current version you should set this to `riboseq` or `tiseq` for reglar Ribo-seq or TI-seq data respectively.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/riboseq \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n### Including a translational efficiency analysis\n\n![anota2seq - fold change plot](docs/images/fc.png)\n\nIn the translational efficiency analysis provided by [anota2seq](https://bioconductor.org/packages/release/bioc/html/anota2seq.html), we use matched pairs of Ribo-seq and RNA-seq data to study the relationship between transcription and translation as they differ between two treatment groups. For example the test data for this workflow has a contrasts file like:\n\n```csv\nid,variable,reference,target,batch,pair\ntreated_vs_control,treatment,control,treated,,pair\n```\n\nThis describes how to compare groups of samples between treament groups, and between RNA-seq and Ribo-seq. In order the columns are:\n\n- `id`: a unique identifier to use for the contrast\n- 'variable`: which vaiable (column) of the sample sheet should be used to separate the treatment groups?\n- `reference`: which value of the variable column should be used to select samples to be used as the reference/ base group?\n- `target`: which value of the variable column should be used to select samples to be used as the target/treated group?\n- `batch`: (optional) specify a variable in the sample sheet that defines sample batches\n- `pair`: (optional) specify a variable in the sample sheet that defines sample pairing between RNA-seq and Ribo-seq samples. If not specified, it is assumed that the two types of sample are ordered the same.\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/riboseq/usage) and the [parameter documentation](https://nf-co.re/riboseq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/riboseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/riboseq/output).\n\n## Credits\n\nnf-core/riboseq was originally written by [Jonathan Manning](https://github.com/pinin4fjords) (Bioinformatics Engineer at Seqera) with support from [Altos Labs](https://www.altoslabs.com/) and in discussion with [Felix Krueger](https://github.com/FelixKrueger) and [Christel Krueger](https://github.com/ChristelKrueger). We thank the following people for their input:\n\n- Anne Bresciani (ZS)\n- [Felipe Almeida](https://github.com/fmalmeida) (ZS)\n- [Mikhail Osipovitch](https://github.com/mosi223) (ZS)\n- [Edward Wallace](https://github.com/ewallace) (University of Edinburgh)\n- [Jack Tierney](https://github.com/JackCurragh) (University College Cork)\n- [Maxime U Garcia](https://github.com/maxulysse) (Seqera)\n- [Ira A Iosub](https://github.com/iraiosub) (The Francis Crick Institute)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#riboseq` channel](https://nfcore.slack.com/channels/riboseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/riboseq for your analysis, please cite it using the following doi: [10.5281/zenodo.10966364](https://doi.org/10.5281/zenodo.10966364)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/workflows/riboseq/main.nf b/workflows/riboseq/main.nf index 1eefacae..0eb4addf 100644 --- a/workflows/riboseq/main.nf +++ b/workflows/riboseq/main.nf @@ -31,6 +31,10 @@ include { RIBOTISH_PREDICT as RIBOTISH_PREDICT_INDIVIDUAL } from '../../mod include { RIBOTISH_PREDICT as RIBOTISH_PREDICT_ALL } from '../../modules/nf-core/ribotish/predict' include { RIBOTRICER_PREPAREORFS } from '../../modules/nf-core/ribotricer/prepareorfs' include { RIBOTRICER_DETECTORFS } from '../../modules/nf-core/ribotricer/detectorfs' +include { RIBOCODE_GTFUPDATE } from '../../modules/nf-core/ribocode/gtfupdate' +include { RIBOCODE_PREPARE } from '../../modules/nf-core/ribocode/prepare' +include { RIBOCODE_METAPLOTS } from '../../modules/nf-core/ribocode/metaplots' +include { RIBOCODE_RIBOCODE } from '../../modules/nf-core/ribocode/ribocode' include { ANOTA2SEQ_ANOTA2SEQRUN } from '../../modules/nf-core/anota2seq/anota2seqrun' include { QUANTIFY_PSEUDO_ALIGNMENT as QUANTIFY_STAR_SALMON } from '../../subworkflows/nf-core/quantify_pseudo_alignment' include { RIBOWALTZ } from '../../modules/nf-core/ribowaltz/main' @@ -289,6 +293,51 @@ workflow RIBOSEQ { ch_versions = ch_versions.mix(RIBOTRICER_DETECTORFS.out.versions) } + if (!params.skip_ribocode){ + // RiboCode requires transcriptome BAMs + ch_transcriptome_bams_for_ribocode = ch_transcriptome_bam + .branch { meta, bam -> + riboseq: meta.sample_type == 'riboseq' + return [ meta, bam ] + } + .riboseq + + // Step 1: Update GTF annotation + RIBOCODE_GTFUPDATE( + ch_gtf.map { [ [:], it ] }.first() + ) + + // Step 2: Prepare annotation files + RIBOCODE_PREPARE( + ch_fasta.map { [ [:], it ] }.first(), + RIBOCODE_GTFUPDATE.out.gtf + ) + + // Step 3: Generate metaplots and config for each sample + RIBOCODE_METAPLOTS( + ch_transcriptome_bams_for_ribocode, + RIBOCODE_PREPARE.out.annotation + ) + + // Step 4: Run RiboCode ORF detection + // Join BAMs with their corresponding config files by meta + ch_ribocode_inputs = ch_transcriptome_bams_for_ribocode + .join(RIBOCODE_METAPLOTS.out.config) + .map { meta, bam, config -> + tuple( + tuple(meta, bam), + RIBOCODE_PREPARE.out.annotation, + tuple(meta, config) + ) + } + + RIBOCODE_RIBOCODE( + ch_ribocode_inputs.map { it[0] }, + RIBOCODE_PREPARE.out.annotation, + ch_ribocode_inputs.map { it[2] } + ) + } + // // Get P-sites and P-site diagnostics with riboWaltz