nf-core · pinin4fjords · Dec 19, 2025 · Dec 19, 2025 · Dec 19, 2025 · Dec 19, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#131](https://github.com/nf-core/riboseq/pull/131) - Add ribotish quality output routing to MultiQC ([@pinin4fjords](https://github.com/pinin4fjords))
 - [#135](https://github.com/nf-core/riboseq/pull/135) - Add optional read length equalisation to trim RNA-seq reads to match Ribo-seq lengths before quantification ([@pinin4fjords](https://github.com/pinin4fjords))
 - [#138](https://github.com/nf-core/riboseq/pull/138) - Add MultiQC configuration for BBSplit, Bowtie2 rRNA removal, UMItools, and UMIcollapse ([@pinin4fjords](https://github.com/pinin4fjords))
+- [#139](https://github.com/nf-core/riboseq/pull/139) - Add riboWaltz QC plots to MultiQC report (P-site regions, reading frames, metaprofiles) ([@pinin4fjords](https://github.com/pinin4fjords))
 
 ### `Fixed`
 

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -1,6 +1,57 @@
 report_comment: >
   This report has been generated by the <a href="https://github.com/nf-core/riboseq/tree/dev" target="_blank">nf-core/riboseq</a> analysis pipeline. For information about how to interpret these results, please see the <a href="https://nf-co.re/riboseq/dev/docs/output" target="_blank">documentation</a>.
 report_section_order:
+  # Important checks and failures
+  fail_trimmed_samples-module:
+    order: 5003
+  fail_mapped_samples-module:
+    order: 5002
+  fail_strand_check-module:
+    order: 5001
+  # Preprocessing and pre-alignment QC
+  fastqc_raw:
+    order: 4004
+  cutadapt:
+    order: 4003
+  fastp:
+    order: 4003
+  fastqc_trimmed:
+    order: 4002
+  sortmerna:
+    order: 4001
+  # Alignment
+  star:
+    order: 3001
+  hisat2:
+    order: 3001
+  # Post-alignment QC
+  samtools:
+    order: 3000
+  picard:
+    order: 3000
+  rseqc:
+    order: 3000
+  qualimap:
+    order: 3000
+  # Ribo-seq specific QC
+  ribowaltz:
+    order: 2500
+  ribotish:
+    order: 2500
+  # Post-quantification QC
+  star_rsem_deseq2_pca:
+    order: 1005
+  star_rsem_deseq2_clustering:
+    order: 1004
+  star_salmon_deseq2_pca:
+    order: 1003
+  star_salmon_deseq2_clustering:
+    order: 1002
+  salmon_deseq2_pca:
+    order: 1001
+  salmon_deseq2_clustering:
+    order: 1000
+  # Summaries
   "nf-core-riboseq-methods-description":
     order: -1000
   software_versions:
@@ -47,21 +98,9 @@ run_modules:
   - qualimap
   - ribotish
 
-# Order of modules
-top_modules:
-  - "fail_trimmed_samples"
-  - "fail_mapped_samples"
-  - "fail_strand_check"
-  - "star_rsem_deseq2_pca"
-  - "star_rsem_deseq2_clustering"
-  - "star_salmon_deseq2_pca"
-  - "star_salmon_deseq2_clustering"
-  - "salmon_deseq2_pca"
-  - "salmon_deseq2_clustering"
-  - "biotype_counts"
-
 module_order:
   - fastqc:
+      anchor: "fastqc_raw"
       name: "FastQC (raw)"
       info: "This section of the report shows FastQC results before adapter trimming."
       path_filters:
@@ -79,6 +118,7 @@ module_order:
       path_filters:
         - "*.riboseq_readlength_stats.tsv"
   - fastqc:
+      anchor: "fastqc_trimmed"
       name: "FastQC (trimmed)"
       info: "This section of the report shows FastQC results after adapter trimming."
       path_filters:
@@ -90,6 +130,7 @@ module_order:
       info: "Bowtie2 alignment statistics for rRNA removal. Reads were aligned against an rRNA reference to filter out ribosomal RNA contamination."
   - star
   - samtools
+  - ribowaltz
   - umitools
   - umicollapse
   - ribotish

diff --git a/bin/ribowaltz_to_mqc.py b/bin/ribowaltz_to_mqc.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+"""
+Transform ribowaltz TSV output to MultiQC-compatible format.
+
+Usage:
+    ribowaltz_to_mqc.py psite_region <input_files...> > output.tsv
+    ribowaltz_to_mqc.py frames <input_files...> > output.tsv
+    ribowaltz_to_mqc.py metaprofile_start <input_files...> > output.json
+    ribowaltz_to_mqc.py metaprofile_stop <input_files...> > output.json
+"""
+
+import sys
+import csv
+import json
+from collections import defaultdict
+
+# Common constants
+PARENT_ID = "ribowaltz"
+PARENT_NAME = "riboWaltz"
+PARENT_DESC = 'Quality control metrics from <a href="https://github.com/LabTranslationalArchitectomics/riboWaltz">riboWaltz</a> for assessing Ribo-seq data quality.'
+
+REGIONS = ["5' UTR", "CDS", "3' UTR"]
+REGION_MAP = {"5' UTR": "5' UTR", "5utr": "5' UTR", "CDS": "CDS", "cds": "CDS", "3' UTR": "3' UTR", "3utr": "3' UTR"}
+
+
+def read_tsv_files(files):
+    """Yield rows from multiple TSV files."""
+    for filepath in files:
+        with open(filepath, 'r') as f:
+            yield from csv.DictReader(f, delimiter='\t')
+
+
+def print_bargraph_header(plot_id, section_name, description, ylab="% of P-sites"):
+    """Print common YAML header for bargraph plots."""
+    print(f"# id: '{plot_id}'")
+    print(f"# section_name: '{section_name}'")
+    print(f"# description: '{description}'")
+    print(f"# parent_id: '{PARENT_ID}'")
+    print(f"# parent_name: '{PARENT_NAME}'")
+    print(f"# parent_description: '{PARENT_DESC}'")
+    print("# plot_type: 'bargraph'")
+    print("# pconfig:")
+    print(f"#     id: '{plot_id}_plot'")
+    print(f"#     title: '{PARENT_NAME}: {section_name}'")
+    print(f"#     ylab: '{ylab}'")
+    print("#     cpswitch: false")
+
+
+def transform_psite_region(files):
+    """Transform psite_region.tsv files to wide format for MultiQC bargraph."""
+    data = defaultdict(dict)
+    rna_ref = {}
+
+    for row in read_tsv_files(files):
+        sample = row['sample']
+        region = REGION_MAP.get(row['region'], row['region'])
+
+        if sample == 'RNAs':
+            if region in REGIONS and region not in rna_ref:
+                rna_ref[region] = float(row['scaled_count'])
+        elif region in REGIONS:
+            data[sample][region] = float(row['scaled_count'])
+
+    print_bargraph_header(
+        "ribowaltz_1_psite_regions",
+        "P-site Region Distribution",
+        "Distribution of P-sites across transcript regions. Good Ribo-seq data shows strong CDS enrichment (>70%). The RNA-seq reference shows expected distribution from uniform transcript coverage."
+    )
+
+    print("Sample\t" + "\t".join(REGIONS))
+    if rna_ref:
+        values = [str(round(rna_ref.get(r, 0), 1)) for r in REGIONS]
+        print(f"<b>RNA-seq reference</b>\t" + "\t".join(values))
+    for sample in sorted(data.keys()):
+        values = [str(round(data[sample].get(r, 0), 1)) for r in REGIONS]
+        print(f"{sample}\t" + "\t".join(values))
+
+
+def transform_frames(files):
+    """Transform frames.tsv files to MultiQC custom content."""
+    data = defaultdict(lambda: defaultdict(dict))
+    frames = ["Frame 0", "Frame 1", "Frame 2"]
+    region_keys = ["5utr", "cds", "3utr"]
+    region_labels = {"5utr": "5' UTR", "cds": "CDS", "3utr": "3' UTR"}
+    to_key = {"5' UTR": "5utr", "5utr": "5utr", "CDS": "cds", "cds": "cds", "3' UTR": "3utr", "3utr": "3utr"}
+
+    for row in read_tsv_files(files):
+        sample = row['sample']
+        region = to_key.get(row['region'], row['region'])
+        frame = f"Frame {row['frame']}"
+        if region in region_keys and frame in frames:
+            data[sample][region][frame] = float(row['scaled_count'])
+
+    samples = sorted(data.keys())
+
+    print_bargraph_header(
+        "ribowaltz_2_frames",
+        "Reading Frame Distribution",
+        "Distribution of P-sites across reading frames for each transcript region. Good Ribo-seq data shows Frame 0 enrichment (>50%) in the CDS but not in UTRs."
+    )
+
+    if samples:
+        print("#     sample_groups:")
+        for region in region_keys:
+            label = region_labels[region]
+            print(f'#         "{label}":')
+            for sample in samples:
+                print(f'#             - ["{sample}_{label}", "{sample}"]')
+
+    print("Sample\t" + "\t".join(frames))
+    for sample in samples:
+        for region in region_keys:
+            label = region_labels[region]
+            values = [str(round(data[sample][region].get(f, 0), 1)) for f in frames]
+            print(f"{sample}_{label}\t" + "\t".join(values))
+
+
+def transform_metaprofile(files, region_filter):
+    """Transform metaprofile_psite.tsv files to MultiQC linegraph JSON format."""
+    data = defaultdict(list)
+    target_region = f"Distance from {region_filter} (nt)"
+
+    for row in read_tsv_files(files):
+        if row['region'] == target_region:
+            data[row['sample']].append([int(float(row['x'])), round(float(row['y']), 2)])
+
+    for sample in data:
+        data[sample].sort(key=lambda p: p[0])
+
+    section_name = f"Metaprofile ({region_filter.title()} Codon)"
+    plot_id = f"ribowaltz_{'3' if region_filter == 'start' else '4'}_metaprofile_{region_filter}"
+
+    output = {
+        "id": plot_id,
+        "section_name": section_name,
+        "description": f"P-site frequency around the {region_filter} codon. Good Ribo-seq data shows trinucleotide periodicity with peaks at frame 0 positions.",
+        "parent_id": PARENT_ID,
+        "parent_name": PARENT_NAME,
+        "parent_description": PARENT_DESC,
+        "plot_type": "linegraph",
+        "pconfig": {
+            "id": f"{plot_id}_plot",
+            "title": f"{PARENT_NAME}: {section_name}",
+            "xlab": f"Distance from {region_filter} codon (nt)",
+            "ylab": "P-site frequency",
+            "x_decimals": False
+        },
+        "data": dict(sorted(data.items()))
+    }
+    print(json.dumps(output, indent=2))
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print(__doc__, file=sys.stderr)
+        sys.exit(1)
+
+    mode, files = sys.argv[1], sys.argv[2:]
+    modes = {
+        "psite_region": lambda: transform_psite_region(files),
+        "frames": lambda: transform_frames(files),
+        "metaprofile_start": lambda: transform_metaprofile(files, "start"),
+        "metaprofile_stop": lambda: transform_metaprofile(files, "stop"),
+    }
+
+    if mode in modes:
+        modes[mode]()
+    else:
+        print(f"Unknown mode: {mode}", file=sys.stderr)
+        print(__doc__, file=sys.stderr)
+        sys.exit(1)
diff --git a/conf/modules.config b/conf/modules.config
@@ -920,6 +920,26 @@ if (!params.skip_ribowaltz) {
     process {
         withName: 'RIBOWALTZ' {
             ext.args   = { params.extra_ribowaltz_args ?: '' }
+            publishDir = [
+                [
+                    path: { "${params.outdir}/riboseq_qc/ribowaltz" },
+                    mode: params.publish_dir_mode,
+                    pattern: "{ribowaltz_qc,offset_plot}/*",
+                    saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+                ],
+                [
+                    path: { "${params.outdir}/riboseq_qc/ribowaltz" },
+                    mode: params.publish_dir_mode,
+                    pattern: "*.{best_offset.txt,psite_offset.tsv,psite_offset.tsv.gz}",
+                    saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+                ],
+                [
+                    path: { "${params.outdir}/other/ribowaltz" },
+                    mode: params.publish_dir_mode,
+                    pattern: "*.{psite,codon_coverage_rpf,codon_coverage_psite,cds_coverage_psite,nt_coverage_psite}.tsv{,.gz}",
+                    saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+                ]
+            ]
         }
     }
 }

diff --git a/modules.json b/modules.json
@@ -122,7 +122,7 @@
                     },
                     "ribowaltz": {
                         "branch": "master",
-                        "git_sha": "5111d7a79aa8071bf2758cf64d6f5688879cc2be",
+                        "git_sha": "b59f74e059a49fce82f19fbf684e2876da85ee39",
                         "installed_by": ["modules"]
                     },
                     "rsem/preparereference": {

diff --git a/modules/local/ribowaltz_mqc/main.nf b/modules/local/ribowaltz_mqc/main.nf
@@ -0,0 +1,31 @@
+process RIBOWALTZ_MQC {
+    tag "ribowaltz_mqc"
+    label 'process_single'
+
+    conda "conda-forge::python=3.9"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.9' :
+        'biocontainers/python:3.9' }"
+
+    input:
+    path psite_region_files
+    path frames_files
+    path metaprofile_files
+
+    output:
+    path "ribowaltz_psite_regions_mqc.tsv"      , emit: psite_regions_mqc   , optional: true
+    path "ribowaltz_frames_mqc.tsv"             , emit: frames_mqc          , optional: true
+    path "ribowaltz_metaprofile_start_mqc.json" , emit: metaprofile_start_mqc, optional: true
+    path "ribowaltz_metaprofile_stop_mqc.json"  , emit: metaprofile_stop_mqc , optional: true
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    ribowaltz_to_mqc.py psite_region ${psite_region_files} > ribowaltz_psite_regions_mqc.tsv
+    ribowaltz_to_mqc.py frames ${frames_files} > ribowaltz_frames_mqc.tsv
+    ribowaltz_to_mqc.py metaprofile_start ${metaprofile_files} > ribowaltz_metaprofile_start_mqc.json
+    ribowaltz_to_mqc.py metaprofile_stop ${metaprofile_files} > ribowaltz_metaprofile_stop_mqc.json
+    """
+}
diff --git a/modules/nf-core/ribowaltz/main.nf b/modules/nf-core/ribowaltz/main.nf
diff --git a/modules/nf-core/ribowaltz/meta.yml b/modules/nf-core/ribowaltz/meta.yml