Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#131](https://github.com/nf-core/riboseq/pull/131) - Add ribotish quality output routing to MultiQC ([@pinin4fjords](https://github.com/pinin4fjords))
- [#135](https://github.com/nf-core/riboseq/pull/135) - Add optional read length equalisation to trim RNA-seq reads to match Ribo-seq lengths before quantification ([@pinin4fjords](https://github.com/pinin4fjords))
- [#138](https://github.com/nf-core/riboseq/pull/138) - Add MultiQC configuration for BBSplit, Bowtie2 rRNA removal, UMItools, and UMIcollapse ([@pinin4fjords](https://github.com/pinin4fjords))
- [#139](https://github.com/nf-core/riboseq/pull/139) - Add riboWaltz QC plots to MultiQC report (P-site regions, reading frames, metaprofiles) ([@pinin4fjords](https://github.com/pinin4fjords))

### `Fixed`

Expand Down
67 changes: 54 additions & 13 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,57 @@
report_comment: >
This report has been generated by the <a href="https://github.com/nf-core/riboseq/tree/dev" target="_blank">nf-core/riboseq</a> analysis pipeline. For information about how to interpret these results, please see the <a href="https://nf-co.re/riboseq/dev/docs/output" target="_blank">documentation</a>.
report_section_order:
# Important checks and failures
fail_trimmed_samples-module:
order: 5003
fail_mapped_samples-module:
order: 5002
fail_strand_check-module:
order: 5001
# Preprocessing and pre-alignment QC
fastqc_raw:
order: 4004
cutadapt:
order: 4003
fastp:
order: 4003
fastqc_trimmed:
order: 4002
sortmerna:
order: 4001
# Alignment
star:
order: 3001
hisat2:
order: 3001
# Post-alignment QC
samtools:
order: 3000
picard:
order: 3000
rseqc:
order: 3000
qualimap:
order: 3000
# Ribo-seq specific QC
ribowaltz:
order: 2500
ribotish:
order: 2500
# Post-quantification QC
star_rsem_deseq2_pca:
order: 1005
star_rsem_deseq2_clustering:
order: 1004
star_salmon_deseq2_pca:
order: 1003
star_salmon_deseq2_clustering:
order: 1002
salmon_deseq2_pca:
order: 1001
salmon_deseq2_clustering:
order: 1000
# Summaries
"nf-core-riboseq-methods-description":
order: -1000
software_versions:
Expand Down Expand Up @@ -47,21 +98,9 @@ run_modules:
- qualimap
- ribotish

# Order of modules
top_modules:
- "fail_trimmed_samples"
- "fail_mapped_samples"
- "fail_strand_check"
- "star_rsem_deseq2_pca"
- "star_rsem_deseq2_clustering"
- "star_salmon_deseq2_pca"
- "star_salmon_deseq2_clustering"
- "salmon_deseq2_pca"
- "salmon_deseq2_clustering"
- "biotype_counts"

module_order:
- fastqc:
anchor: "fastqc_raw"
name: "FastQC (raw)"
info: "This section of the report shows FastQC results before adapter trimming."
path_filters:
Expand All @@ -79,6 +118,7 @@ module_order:
path_filters:
- "*.riboseq_readlength_stats.tsv"
- fastqc:
anchor: "fastqc_trimmed"
name: "FastQC (trimmed)"
info: "This section of the report shows FastQC results after adapter trimming."
path_filters:
Expand All @@ -90,6 +130,7 @@ module_order:
info: "Bowtie2 alignment statistics for rRNA removal. Reads were aligned against an rRNA reference to filter out ribosomal RNA contamination."
- star
- samtools
- ribowaltz
- umitools
- umicollapse
- ribotish
Expand Down
171 changes: 171 additions & 0 deletions bin/ribowaltz_to_mqc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#!/usr/bin/env python3
"""
Transform ribowaltz TSV output to MultiQC-compatible format.

Usage:
ribowaltz_to_mqc.py psite_region <input_files...> > output.tsv
ribowaltz_to_mqc.py frames <input_files...> > output.tsv
ribowaltz_to_mqc.py metaprofile_start <input_files...> > output.json
ribowaltz_to_mqc.py metaprofile_stop <input_files...> > output.json
"""

import sys
import csv
import json
from collections import defaultdict

# Common constants
PARENT_ID = "ribowaltz"
PARENT_NAME = "riboWaltz"
PARENT_DESC = 'Quality control metrics from <a href="https://github.com/LabTranslationalArchitectomics/riboWaltz">riboWaltz</a> for assessing Ribo-seq data quality.'

REGIONS = ["5' UTR", "CDS", "3' UTR"]
REGION_MAP = {"5' UTR": "5' UTR", "5utr": "5' UTR", "CDS": "CDS", "cds": "CDS", "3' UTR": "3' UTR", "3utr": "3' UTR"}


def read_tsv_files(files):
"""Yield rows from multiple TSV files."""
for filepath in files:
with open(filepath, 'r') as f:
yield from csv.DictReader(f, delimiter='\t')


def print_bargraph_header(plot_id, section_name, description, ylab="% of P-sites"):
"""Print common YAML header for bargraph plots."""
print(f"# id: '{plot_id}'")
print(f"# section_name: '{section_name}'")
print(f"# description: '{description}'")
print(f"# parent_id: '{PARENT_ID}'")
print(f"# parent_name: '{PARENT_NAME}'")
print(f"# parent_description: '{PARENT_DESC}'")
print("# plot_type: 'bargraph'")
print("# pconfig:")
print(f"# id: '{plot_id}_plot'")
print(f"# title: '{PARENT_NAME}: {section_name}'")
print(f"# ylab: '{ylab}'")
print("# cpswitch: false")


def transform_psite_region(files):
"""Transform psite_region.tsv files to wide format for MultiQC bargraph."""
data = defaultdict(dict)
rna_ref = {}

for row in read_tsv_files(files):
sample = row['sample']
region = REGION_MAP.get(row['region'], row['region'])

if sample == 'RNAs':
if region in REGIONS and region not in rna_ref:
rna_ref[region] = float(row['scaled_count'])
elif region in REGIONS:
data[sample][region] = float(row['scaled_count'])

print_bargraph_header(
"ribowaltz_1_psite_regions",
"P-site Region Distribution",
"Distribution of P-sites across transcript regions. Good Ribo-seq data shows strong CDS enrichment (>70%). The RNA-seq reference shows expected distribution from uniform transcript coverage."
)

print("Sample\t" + "\t".join(REGIONS))
if rna_ref:
values = [str(round(rna_ref.get(r, 0), 1)) for r in REGIONS]
print(f"<b>RNA-seq reference</b>\t" + "\t".join(values))
for sample in sorted(data.keys()):
values = [str(round(data[sample].get(r, 0), 1)) for r in REGIONS]
print(f"{sample}\t" + "\t".join(values))


def transform_frames(files):
"""Transform frames.tsv files to MultiQC custom content."""
data = defaultdict(lambda: defaultdict(dict))
frames = ["Frame 0", "Frame 1", "Frame 2"]
region_keys = ["5utr", "cds", "3utr"]
region_labels = {"5utr": "5' UTR", "cds": "CDS", "3utr": "3' UTR"}
to_key = {"5' UTR": "5utr", "5utr": "5utr", "CDS": "cds", "cds": "cds", "3' UTR": "3utr", "3utr": "3utr"}

for row in read_tsv_files(files):
sample = row['sample']
region = to_key.get(row['region'], row['region'])
frame = f"Frame {row['frame']}"
if region in region_keys and frame in frames:
data[sample][region][frame] = float(row['scaled_count'])

samples = sorted(data.keys())

print_bargraph_header(
"ribowaltz_2_frames",
"Reading Frame Distribution",
"Distribution of P-sites across reading frames for each transcript region. Good Ribo-seq data shows Frame 0 enrichment (>50%) in the CDS but not in UTRs."
)

if samples:
print("# sample_groups:")
for region in region_keys:
label = region_labels[region]
print(f'# "{label}":')
for sample in samples:
print(f'# - ["{sample}_{label}", "{sample}"]')

print("Sample\t" + "\t".join(frames))
for sample in samples:
for region in region_keys:
label = region_labels[region]
values = [str(round(data[sample][region].get(f, 0), 1)) for f in frames]
print(f"{sample}_{label}\t" + "\t".join(values))


def transform_metaprofile(files, region_filter):
"""Transform metaprofile_psite.tsv files to MultiQC linegraph JSON format."""
data = defaultdict(list)
target_region = f"Distance from {region_filter} (nt)"

for row in read_tsv_files(files):
if row['region'] == target_region:
data[row['sample']].append([int(float(row['x'])), round(float(row['y']), 2)])

for sample in data:
data[sample].sort(key=lambda p: p[0])

section_name = f"Metaprofile ({region_filter.title()} Codon)"
plot_id = f"ribowaltz_{'3' if region_filter == 'start' else '4'}_metaprofile_{region_filter}"

output = {
"id": plot_id,
"section_name": section_name,
"description": f"P-site frequency around the {region_filter} codon. Good Ribo-seq data shows trinucleotide periodicity with peaks at frame 0 positions.",
"parent_id": PARENT_ID,
"parent_name": PARENT_NAME,
"parent_description": PARENT_DESC,
"plot_type": "linegraph",
"pconfig": {
"id": f"{plot_id}_plot",
"title": f"{PARENT_NAME}: {section_name}",
"xlab": f"Distance from {region_filter} codon (nt)",
"ylab": "P-site frequency",
"x_decimals": False
},
"data": dict(sorted(data.items()))
}
print(json.dumps(output, indent=2))


if __name__ == "__main__":
if len(sys.argv) < 3:
print(__doc__, file=sys.stderr)
sys.exit(1)

mode, files = sys.argv[1], sys.argv[2:]
modes = {
"psite_region": lambda: transform_psite_region(files),
"frames": lambda: transform_frames(files),
"metaprofile_start": lambda: transform_metaprofile(files, "start"),
"metaprofile_stop": lambda: transform_metaprofile(files, "stop"),
}

if mode in modes:
modes[mode]()
else:
print(f"Unknown mode: {mode}", file=sys.stderr)
print(__doc__, file=sys.stderr)
sys.exit(1)
20 changes: 20 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,26 @@ if (!params.skip_ribowaltz) {
process {
withName: 'RIBOWALTZ' {
ext.args = { params.extra_ribowaltz_args ?: '' }
publishDir = [
[
path: { "${params.outdir}/riboseq_qc/ribowaltz" },
mode: params.publish_dir_mode,
pattern: "{ribowaltz_qc,offset_plot}/*",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
],
[
path: { "${params.outdir}/riboseq_qc/ribowaltz" },
mode: params.publish_dir_mode,
pattern: "*.{best_offset.txt,psite_offset.tsv,psite_offset.tsv.gz}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
],
[
path: { "${params.outdir}/other/ribowaltz" },
mode: params.publish_dir_mode,
pattern: "*.{psite,codon_coverage_rpf,codon_coverage_psite,cds_coverage_psite,nt_coverage_psite}.tsv{,.gz}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
]
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@
},
"ribowaltz": {
"branch": "master",
"git_sha": "5111d7a79aa8071bf2758cf64d6f5688879cc2be",
"git_sha": "b59f74e059a49fce82f19fbf684e2876da85ee39",
"installed_by": ["modules"]
},
"rsem/preparereference": {
Expand Down
31 changes: 31 additions & 0 deletions modules/local/ribowaltz_mqc/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
process RIBOWALTZ_MQC {
tag "ribowaltz_mqc"
label 'process_single'

conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
path psite_region_files
path frames_files
path metaprofile_files

output:
path "ribowaltz_psite_regions_mqc.tsv" , emit: psite_regions_mqc , optional: true
path "ribowaltz_frames_mqc.tsv" , emit: frames_mqc , optional: true
path "ribowaltz_metaprofile_start_mqc.json" , emit: metaprofile_start_mqc, optional: true
path "ribowaltz_metaprofile_stop_mqc.json" , emit: metaprofile_stop_mqc , optional: true

when:
task.ext.when == null || task.ext.when

script:
"""
ribowaltz_to_mqc.py psite_region ${psite_region_files} > ribowaltz_psite_regions_mqc.tsv
ribowaltz_to_mqc.py frames ${frames_files} > ribowaltz_frames_mqc.tsv
ribowaltz_to_mqc.py metaprofile_start ${metaprofile_files} > ribowaltz_metaprofile_start_mqc.json
ribowaltz_to_mqc.py metaprofile_stop ${metaprofile_files} > ribowaltz_metaprofile_stop_mqc.json
"""
}
4 changes: 4 additions & 0 deletions modules/nf-core/ribowaltz/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions modules/nf-core/ribowaltz/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading