diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e2003fe..a21e2f4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,8 @@ on: env: NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -24,23 +26,29 @@ jobs: strategy: matrix: NXF_VER: - - "23.04.0" + - "24.04.0" - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Download Tiny test data + # Download A fungal test data set that is full enough to show some real output. + # Needs a kmer db for merqury + run: | + curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - + cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-hap.fa + cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix + # Skip BTK and CPRETEXT as they are already tested on their repos. run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext,merquryfk diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1fcafe8..8c9f4e6 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,7 +14,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + - uses: actions/checkout@v4 - name: Set up Python 3.12 uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 @@ -44,7 +44,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==2.8.0 - name: Run nf-core lint env: diff --git a/.nf-core.yml b/.nf-core.yml index 9a35f55..b3bc23b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -4,21 +4,36 @@ lint: - assets/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_dark.png + - lib/nfcore_external_java_deps.jar + - lib/NfcoreSchema.groovy + - lib/NfcoreTemplate.groovy + - lib/Utils.groovy + - lib/WorkflowMain.groovy - .github/ISSUE_TEMPLATE/config.yml - .github/workflows/awstest.yml - .github/workflows/awsfulltest.yml - conf/igenomes.config files_unchanged: + - LICENSE - CODE_OF_CONDUCT.md - assets/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_dark.png - .github/ISSUE_TEMPLATE/bug_report.yml + - .github/CONTRIBUTING.md + - .github/PULL_REQUEST_TEMPLATE.md + - .github/workflows/branch.yml + - .github/workflows/linting_comment.yml + - .github/workflows/linting.yml + - assets/email_template.html multiqc_config: - report_comment nextflow_config: - manifest.name - manifest.homePage + - params.show_hidden_params + - params.schema_ignore_params + - params.validationSchemaIgnoreParams nf_core_version: 2.14.1 repository_type: pipeline template: diff --git a/CHANGELOG.md b/CHANGELOG.md index 3173f7c..f8847d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,39 +2,63 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type). +Naming based on: [Audiologists](https://en.wikipedia.org/wiki/Category:Audiologists). -## v1.0.0 - Aquatic Bahamut [21/08/2024] +## v0.6.2 - Robert Beiny H2 [09/01/2025] + +- Modules have been updated to remove conda defaults. + +### Software dependencies + +| Dependency | Old version | New version | +| ---------------------------- | ------------------- | ------------------- | +| sanger-tol/blobtoolkit\* | | 0.6.0 (Bellsprout) | +| sanger-tol/curationpretext\* | 1.0.0 (UNSC Cradle) | 1.1.0 (UNSC Delphi) | +| GFASTATS | | 1.3.6--hdcf5f25_3 | +| MERQUERY_FK | | 1.2 | +| MINIMAP2_ALIGN | | 2.28 | +| SAMTOOLS_MERGE | 1.20--h50ea8bc_0 | 1.21--h50ea8bc_0 | +| SAMTOOLS_SORT | 1.21--h50ea8bc_0 | 1.21--h50ea8bc_0 | + +## v0.6.1 - Robert Beiny H1 [08/10/2024] + +- Blobtookit version was specified in the wrong location, so defaulted to a development branch "draft_assemblies", this has now been updated to v0.6.0. +- Zenodo DOI has now been added to the repo. + +## v0.6.0 - Robert Beiny [20/09/2024] Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/) template. The current pipeline means the MVP for ear. ### Added + GFASTATS to generate statistics on the input primary genome. MERQURY_FK to generate kmer graphs and analyses of the primary, haplotype and merged assembly. +MAIN_MAPPING which is a small mapping subworkflow, that can work with single and paired reads. BLOBTOOLKIT to generate busco files and blobtoolkit dataset/plots. CURATIONPRETEXT to generate pretext plots and pngs. ### Parameters -| Old parameter | New parameter | -| --------------- | ------------- | -| | --mapped | +| Old parameter | New parameter | +| ------------- | ------------- | +| | --mapped | +| | --steps | ### Software dependencies -| Dependency | Old version | New version | -| ----------- | ------------- | ------------- | -| sanger-tol/blobtoolkit* | | draft_assemblies | -| sanger-tol/curationpretext* | | 1.0.0 (UNSC Cradle) | -| GFASTATS | | 1.3.6--hdcf5f25_3 | -| MERQUERY_FK | | 1.2 | -| MINIMAP2_ALIGN | | 2.28 | -| SAMTOOLS_MERGE | | 1.20--h50ea8bc_0 | -| SAMTOOLS_SORT | | 1.20--h50ea8bc_0 | -| +| Dependency | Old version | New version | +| ---------------------------- | ----------- | ------------------- | +| sanger-tol/blobtoolkit\* | | 0.6.0 (Bellsprout) | +| sanger-tol/curationpretext\* | | 1.0.0 (UNSC Cradle) | +| GFASTATS | | 1.3.6--hdcf5f25_3 | +| MERQUERY_FK | | 1.2 | +| MINIMAP2_ALIGN | | 2.28 | +| SAMTOOLS_MERGE | | 1.20--h50ea8bc_0 | +| SAMTOOLS_SORT | | 1.20--h50ea8bc_0 | -- Note: for pipelines, please check their own CHANGELOG file for a full list of software dependencies. +\* for pipelines, please check their own CHANGELOG file for a full list of software dependencies. ### Dependencies -The pipeline depends on a number of databases which are noted in [README](README.md) and [USAGE](docs/usage.md). + +The pipeline depends on a number of databases which are noted in [README](README.md) and [USAGE](docs/usage.md). diff --git a/CITATIONS.md b/CITATIONS.md index c0cf948..2973402 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,19 +10,35 @@ ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [GFastar/GFastats](https://www.biorxiv.org/content/10.1101/2022.03.24.485682v1) - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + > Formenti, G., Abueg, L., Brajuka, N., Gallardo, C., Giani, A., Fedrigo, O., Jarvis, ED. (2022). Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs. bioRxiv. doi: https://doi.org/10.1101/2022.03.24.485682 -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) +- [Merqury_FK](https://github.com/thegenemyers/MERQURY.FK) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Myers, G., Rhie, A. (2024). MerquryFK & KatFK. [online]. https://github.com/thegenemyers/MERQURY.FK. (Accessed on 20 September 2024). + +- [Minimap2](https://pubmed.ncbi.nlm.nih.gov/34623391/) + + > Li, H. 2021. ‘New strategies to improve MINIMAP2 alignment accuracy’, Bioinformatics, 37(23), pp. 4572–4574. doi:10.1093/bioinformatics/btab705. + +- [Samtools](https://pubmed.ncbi.nlm.nih.gov/33590861/) + + > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819. + +- [sanger-tol/blobtoolkit](https://zenodo.org/records/13758882) + + > Muffato, M., Butt, Z., Challis, R., Kumar, S., Qi, G., Ramos Díaz, A., Surana, P., & Yates, B. (2024). sanger-tol/blobtoolkit: v0.6.0 – Bellsprout (0.6.0). Zenodo. https://doi.org/10.5281/zenodo.13758882 + +- [sanger-tol/curationpretext](https://zenodo.org/records/13758882) + + > Pointon, DLB. (2024). sanger-tol/curationpretext: v1.0.0 (UNSC Cradle). [online]. https://github.com/sanger-tol/curationpretext/releases/tag/1.0.0. (Accessed on 20 September 2024). ## Software packaging/containerisation tools -- [Anaconda](https://anaconda.com) +- [Conda](https://conda.org/) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + > conda contributors. conda: A system-level, binary package and environment manager running on all major operating systems and platforms. Computer software. https://github.com/conda/conda - [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) diff --git a/LICENSE b/LICENSE index 967fdcd..ac4a5f3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) DLBPointon +Copyright (c) 2022 - 2023 Genome Research Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 652eba6..6197506 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,7 @@ [![GitHub Actions CI Status](https://github.com/sanger-tol/ear/actions/workflows/ci.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![DOI](https://zenodo.org/badge/833605808.svg)](https://doi.org/10.5281/zenodo.13819520) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) - -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/ear) @@ -15,7 +13,7 @@ 1. Read the input yaml file (YAML_INPUT) 2. Run GFASTATS (GFASTARS) 3. Run MERQURYFK_MERQURYFK (MERQURYFK) -4. Run MAIN_MAPPING, longread single-end/paired-end mapping +4. Run MAIN_MAPPING, longread single-end/paired-end mapping 5. Run GENERATE_SAMPLESHEET, generate a csv file required for SANGER_TOL_BTK. 6. Run SANGER_TOL_BTK, also known as SANGER-TOL/BLOBTOOLKIT a subpipline for SANGER-TOL/EAR 7. Run SANGER_TOL_CPRETEXT, also known as SANGER-TOL/CURATIONPRETEXT a subpipeline for SANGER-TOL/EAR. @@ -27,11 +25,12 @@ The sanger-tol/ear pipeline requires a number of databases in place in order to run the blobtoolkit pipeline. These include: - - A blast nt database - - A Diamond blast uniprot database - - A Diamond blast nr database - - An NCBI taxdump - - An NCBI rankedlineage.dmp + +- A blast nt database +- A Diamond blast uniprot database +- A Diamond blast nr database +- An NCBI taxdump +- An NCBI rankedlineage.dmp Next, a yaml file containing the following should then be completed: @@ -59,8 +58,9 @@ curationpretext: hic_dir: btk: taxid: 1464561 - lineages: - gca_accession: GCA_0001 + lineages: < CSV LIST OF DATABASES TO USE: "insecta_odb10,diptera_odb10"> + gca_accession: GCA_0001 + nt_database: nt_database_prefix: diamond_uniprot_database_path: @@ -70,14 +70,14 @@ btk: config: ``` - Now, you can run the pipeline using: ```bash nextflow run sanger-tol/ear -profile \\ --input assets/idCulLati1.yaml \\ --mapped TRUE \\ # OPTIONAL - --outdir test-truth + --steps ["", "btk", "cpretext", "merquryfk"] # OPTIONAL CSV LIST OF STEPS TO EXCLUDE FROM EXECUTION + --outdir test ``` > [!WARNING] diff --git a/assets/idCulLati1.yaml b/assets/idCulLati1.yaml index ea48cc2..404f4a5 100644 --- a/assets/idCulLati1.yaml +++ b/assets/idCulLati1.yaml @@ -2,7 +2,7 @@ assembly_id: idCulLati1_ear reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/primary.fa reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/hap2.fa -reference_haplotigs: / +reference_haplotigs: /nfs/treeoflife-01/teams/tola/users/dp24/ear/haplotigs.fa # If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore. mapped_bam: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/mapped_bam.bam diff --git a/assets/real_pdf.yaml b/assets/real_pdf.yaml index 8f8d4a0..19c4c35 100644 --- a/assets/real_pdf.yaml +++ b/assets/real_pdf.yaml @@ -20,14 +20,14 @@ PROFILING: # ASSEMBLY DATA ASSEMBLIES: Pre-curation: - pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|] + pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|] pri: gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.fa.gz.gfastats busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.insecta_odb10.busco/short_summary.specific.insecta_odb10.out_scaffolds_final.insecta_odb10.busco.txt merqury_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk/ Curated: - pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1] + pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1] pri: gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1.primary.curated.fa.gfastats busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1.primary.curated.insecta_odb10.busco/short_summary.specific.insecta_odb10.idCulLati1.1.primary.curated.insecta_odb10.busco.txt diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index 5f653ab..0000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json index 8012bf6..61d2b74 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,21 +13,137 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, - "fastq_1": { + "reference_hap1": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+\\.f[ast]a$", + "errorMessage": "Primary assembly input file, decompressed" }, - "fastq_2": { + "reference_hap2": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+\\.f[ast]a$", + "errorMessage": "Haplotype assembly input file, decompressed" + }, + "reference_haplotigs": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f[ast]a$", + "errorMessage": "Haplotigs removed from Primary Assembly input file during curation, decompressed" + }, + "mapped_bam": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bam$", + "errorMessage": "Optional mapped bam file used to skip mapping of pacbio files" + }, + "merquryfk": { + "type": "object", + "properties": { + "fastk_hist": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.hist$", + "errorMessage": "Path to hist file" + }, + "fastk_ktab": { + "type": "string", + "errorMessage": "Directory containing ktab files" + } + } + }, + "longread": { + "type": "object", + "properties": { + "dir": { + "type": "string", + "errorMessage": "Path to folder containing fasta.gz files" + }, + "type": { + "type": "string", + "errorMessage": "type of longread data" + } + } + }, + "curationpretext": { + "type": "object", + "properties": { + "aligner": { + "type": "string", + "errorMessage": "Aligner" + }, + "telomere_motif": { + "type": "string", + "errorMessage": "Telomere motif for telomere search" + }, + "hic_dir": { + "type": "string", + "errorMessage": "Directory of the cram data" + } + } + }, + "btk": { + "type": "object", + "properties": { + "taxid": { + "type": "string", + "errorMessage": "NCBI Taxid of organism" + }, + "lineages": { + "type": "string", + "errorMessage": "CSV list of BUSCO lineages to run against" + }, + "gca_accession": { + "type": "string", + "errorMessage": "gca_accession if applicable" + }, + "nt_database": { + "type": "string", + "errorMessage": "nt database directory" + }, + "nt_database_prefix": { + "type": "string", + "errorMessage": "Prefix for nt database" + }, + "diamond_uniprot_database_path": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.dmnd$", + "errorMessage": "Diamond protein database" + }, + "diamond_nr_database_path": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.dmnd$", + "errorMessage": "Nuclear diamond database" + }, + "ncbi_taxonomy_path": { + "type": "string", + "errorMessage": "Directory for tax2taxid" + }, + "ncbi_rankedlineage_path": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.dmp$", + "errorMessage": "Taxonomy dump" + }, + "config": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.config$", + "errorMessage": "Extra configuration file for Blobtoolkit pipeline" + } + } } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "reference_hap1", "reference_hap2"] } } diff --git a/assets/template_pdf.yaml b/assets/template_pdf.yaml index 3779c19..5688f4e 100644 --- a/assets/template_pdf.yaml +++ b/assets/template_pdf.yaml @@ -20,14 +20,14 @@ PROFILING: # ASSEMBLY DATA ASSEMBLIES: Pre-curation: - pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|] + pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|] pri: gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.fa.gz.gfastats busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.insecta_odb10.busco/short_summary.specific.insecta_odb10.out_scaffolds_final.insecta_odb10.busco.txt merqury_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk/ Curated: - pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1] + pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1] pri: gfastats--nstar-report_txt: idCulLati1.1.primary.curated.fa.gfastats busco_short_summary_txt: short_summary.specific.insecta_odb10.idCulLati1.1.primary.curated.insecta_odb10.busco.txt diff --git a/assets/test.yaml b/assets/test.yaml index 6a5299a..0f5f6ed 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,25 +1,33 @@ -assembly_id: Oscheius_DF5033 -reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa -reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa +# General Vales for all subpiplines and modules +assembly_id: grTriPseu1 +reference_hap1: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa +reference_hap2: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1-hap.fa +reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa + +# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore. +mapped_bam: [] + +merquryfk: + fastk_hist: "./" + fastk_ktab: "./" + +# Used by both subpipelines longread: type: hifi - dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/ -mapped_bam: idCulLati1/mapped_bam.bam + dir: /home/runner/work/ear/ear/TreeValTinyData/genomic_data/pacbio/ + curationpretext: aligner: minimap2 - telomere_motif: TTAGG - hic_dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/ -merquryfk: - fastk_hist: "./" - fastk_ktab: "./" + telomere_motif: TTAGGG + hic_dir: /home/runner/work/ear/ear/TreeValTinyData/genomic_data/hic-arima/ btk: - nt_database: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20240704_blast_tiny_testdb/blastdb/ - nt_database_prefix: tiny_plasmodium_blastdb.fa - diamond_uniprot_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20240704_diamond_tiny_testdb/ascc_tinytest_diamond_db.dmnd - diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd - ncbi_taxonomy_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump - ncbi_rankedlineage_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/rankedlineage.dmp - btk_yaml: /nfs/users/nfs_d/dp24/sanger-tol-ear/assets/btk_draft.yaml taxid: 352914 gca_accession: GCA_0001 - lineages: "diptera_odb10,insecta_odb10" + lineages: "fungi_odb10" + nt_database: /home/runner/work/ascc/ascc/NT_database/ + nt_database_prefix: 18S_fungal_sequences + diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd + diamond_nr_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd + ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/ + ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp + config: /home/runner/work/ear/ear/conf/sanger-tol-btk.config diff --git a/conf/base.config b/conf/base.config index e609a9e..4b330f4 100644 --- a/conf/base.config +++ b/conf/base.config @@ -11,16 +11,22 @@ process { // TODO nf-core: Check the defaults for all processes - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' withName: "SANGER_TOL_CPRETEXT|SANGER_TOL_BTK" { - time = { check_max( 70.h * task.attempt, 'time' ) } + time = { check_max( 70.h * task.attempt, 'time' ) } + } + + withName: "MINIMAP2_ALIGN_SE" { + cpus = { check_max( 16 , 'cpus' ) } + memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 60 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * task.attempt ) , 'memory') } + time = { check_max( 1.h * ( reference.size() < 1e9 ? 15 : reference.size() < 10e9 ? 30 : 48), 'time' ) } } // Process-specific resource requirements @@ -30,37 +36,45 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } + withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } + withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } + withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } + withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } } + withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { check_max( 200.GB * task.attempt, 'memory' ) } } + withLabel:error_ignore { - errorStrategy = 'ignore' + errorStrategy = 'ignore' } + withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 2 + errorStrategy = 'retry' + maxRetries = 2 } } diff --git a/conf/modules.config b/conf/modules.config index 137b892..ac33898 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -20,6 +20,10 @@ process { ] } + withName: CAT_CAT { + ext.prefix = 'combined_haplos.fa' + } + withName: GFASTATS { ext.args = '--nstar-report' } @@ -38,7 +42,7 @@ process { ext.executor = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'" ext.profiles = "singularity,sanger" ext.get_versions = "lsid | head -n1 | cut -d ',' -f 1" - ext.version = "draft_assemblies" + ext.version = "0.6.0" } withName: SANGER_TOL_CPRETEXT { @@ -47,7 +51,7 @@ process { ext.executor = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'" ext.profiles = "singularity,sanger" ext.get_versions = "lsid | head -n1 | cut -d ',' -f 1" - ext.version = "1.0.0" + ext.version = "1.1.0" } } diff --git a/conf/sanger-tol-btk.config b/conf/sanger-tol-btk.config index 247dbbd..553ad56 100644 --- a/conf/sanger-tol-btk.config +++ b/conf/sanger-tol-btk.config @@ -4,4 +4,4 @@ process { memory = { check_max( 10.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } -} \ No newline at end of file +} diff --git a/conf/test.config b/conf/test.config index 024498b..06d069f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,14 +15,10 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + input = "${projectDir}/assets/test.yaml" + outdir = "results" - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - - } diff --git a/docs/output.md b/docs/output.md index f5a9c8b..dac22bd 100644 --- a/docs/output.md +++ b/docs/output.md @@ -27,7 +27,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d -[GFASTATS](https://github.com/vgl-hub/gfastats) is a single fast and exhaustive tool for summary statistics and simultaneous *fa* (fasta, fastq, gfa [.gz]) genome assembly file manipulation. +[GFASTATS](https://github.com/vgl-hub/gfastats) is a single fast and exhaustive tool for summary statistics and simultaneous _fa_ (fasta, fastq, gfa [.gz]) genome assembly file manipulation. ### MERQURYFK @@ -35,7 +35,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Output files - `merquryfk/` - - `*.completeness.stats`: + - `*.completeness.stats`: - `*{"primary","haplotype",""}_only.bed`: - `*{"primary","haplotype",""}.qv`: - `*.spectra-asm.{fl,ln,st}.png`: @@ -47,14 +47,13 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Merqury is a novel tool for reference-free assembly evaluation based on efficient k-mer set operations. By comparing k-mers in a de novo assembly to those found in unassembled high-accuracy reads, Merqury estimates base-level accuracy and completeness. - ## SANGER_TOL_BTK
Output files - `sanger/*_blobtoolkit_out/` - - `blobtoolkit/plots/*png`: Blobtoolkit plots + - `blobtoolkit/plots/*png`: Blobtoolkit plots - `blobtoolkit/{ASSEMBLY_NAME}/*.json.gz`: Blobtoolkit dataset for use in BTK_viewer. - `busco/*_odb10/*.{tsv,tar.gz,json,txt}`: Busco output - `muliqc/`: MultiQC plots/data and report.html. @@ -64,14 +63,13 @@ Merqury is a novel tool for reference-free assembly evaluation based on efficien [SANGER_TOL_BTK](https://pipelines.tol.sanger.ac.uk/blobtoolkit) is a bioinformatics pipeline that can be used to identify and analyse non-target DNA for eukaryotic genomes. - ## SANGER_TOL_CPRETEXT
Output files - `sanger/*_curationpretext_out/` - - `accessory_files/*.{bigWig,bed,bedgraph}`: Track files describing Telomere, gap, coverage data across the genome. + - `accessory_files/*.{bigWig,bed,bedgraph}`: Track files describing Telomere, gap, coverage data across the genome. - `pretext_maps_raw`: Pre-accessory file ingestion pretext files. - `pretext_maps_processed`: Post-accessory file ingestion pretext files, e.g. the final output. - [`pipeline_info`](#pipeline-information) @@ -80,7 +78,6 @@ Merqury is a novel tool for reference-free assembly evaluation based on efficien [SANGER_TOL_CPRETEXT](https://pipelines.tol.sanger.ac.uk/curationpretext) is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://pipelines.tol.sanger.ac.uk/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes. - ### Pipeline information
diff --git a/docs/usage.md b/docs/usage.md index b703d3e..a1e62af 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -166,7 +166,6 @@ As in the Snakemake version [a YAML configuration file](https://github.com/blobt The data in the YAML is currently ignored in the Nextflow pipeline version. The YAML file is retained only to allow compatibility with the BlobDir dataset generated by the [Snakemake version](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/blobtoolkit-pipeline/src). The taxonomic information in the YAML file can be obtained from [NCBI Taxonomy](https://www.ncbi.nlm.nih.gov/data-hub/taxonomy/). - ## Running the pipeline The typical command for running the pipeline is as follows: diff --git a/modules.json b/modules.json index 23ee7d4..8ca244a 100644 --- a/modules.json +++ b/modules.json @@ -5,41 +5,37 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cat/cat": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "gfastats": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"], + "patch": "modules/nf-core/gfastats/gfastats.diff" }, "merquryfk/merquryfk": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ], + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"], "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" }, "minimap2/align": { "branch": "master", - "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306", - "installed_by": [ - "modules" - ] + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", - "installed_by": [ - "modules" - ] + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "46eca555142d6e597729fcb682adcc791796f514", - "installed_by": [ - "modules" - ] + "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", + "installed_by": ["modules"] } } }, @@ -48,26 +44,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/modules/local/nextflow/run/main.nf b/modules/local/nextflow/run/main.nf index cc522bc..af6ba65 100644 --- a/modules/local/nextflow/run/main.nf +++ b/modules/local/nextflow/run/main.nf @@ -35,4 +35,4 @@ process NEXTFLOW_RUN { output: path "results" , emit: output val process.text, emit: log -} \ No newline at end of file +} diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf index 6ce9191..543e693 100644 --- a/modules/local/sanger_tol_btk.nf +++ b/modules/local/sanger_tol_btk.nf @@ -7,7 +7,7 @@ process SANGER_TOL_BTK { tuple val(meta1), path(bam) // Name needs to remain the same as previous process as they are referenced in the samplesheet tuple val(meta2), path(samplesheet_csv, stageAs: "SAMPLESHEET.csv") path blastp, stageAs: "blastp.dmnd" - path blastn, stageAs: "" + path blastn path blastx path config_file path tax_dump @@ -16,13 +16,13 @@ process SANGER_TOL_BTK { val gca_accession output: - tuple val(meta), path("*_out/blobtoolkit/REFERENCE"), emit: dataset - path("*_out/blobtoolkit/plots"), emit: plots - path("*_out/blobtoolkit/REFERENCE/summary.json.gz"), emit: summary_json - path("*_out/busco"), emit: busco_data - path("*_out/multiqc"), emit: multiqc_report - path("*_out/blobtoolkit_pipeline_info"), emit: pipeline_info - path "versions.yml", emit: versions + tuple val(meta), path("*_out/blobtoolkit/REFERENCE"), emit: dataset + path "*_out/blobtoolkit/plots" , emit: plots + path "*_out/blobtoolkit/REFERENCE/summary.json.gz", emit: summary_json + path "*_out/busco", emit: busco_data + path "*_out/multiqc", emit: multiqc_report + path "*_out/pipeline_info/blobtoolkit", emit: pipeline_info + path "versions.yml", emit: versions script: def pipeline_name = task.ext.pipeline_name @@ -71,10 +71,16 @@ process SANGER_TOL_BTK { Nextflow: \$(nextflow -v | cut -d " " -f3) executor system: $get_version END_VERSIONS - - printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml """ + // INFILE=${output_dir}/pipeline_info/software_versions.yml + // IFS=\$'\n' + // echo "$pipeline_name:" >> versions.yml + // for \${LINE} in \$(cat "\$INFILE") + // do + // echo " \${LINE}" >> versions.yml + // done + stub: def pipeline_version = task.ext.version ?: "main" diff --git a/modules/local/sanger_tol_cpretext.nf b/modules/local/sanger_tol_cpretext.nf index 5b986e1..b073039 100644 --- a/modules/local/sanger_tol_cpretext.nf +++ b/modules/local/sanger_tol_cpretext.nf @@ -45,17 +45,23 @@ process SANGER_TOL_CPRETEXT { $args \\ $config \\ -resume' - + cat <<-END_VERSIONS > versions.yml "${task.process}": $pipeline_suffix: $pipeline_version Nextflow: \$(nextflow -v | cut -d " " -f3) executor system: $get_version END_VERSIONS - - printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml """ + // INFILE=${output_dir}/pipeline_info/software_versions.yml + // IFS=\$'\n' + // echo "$pipeline_name:" >> versions.yml + // for LINE in \$(cat "\$INFILE") + // do + // echo " \$LINE" >> versions.yml + // done + stub: def pipeline_version = task.ext.version ?: "main" def (pipeline_prefix,pipeline_suffix) = pipeline_name.split('/') diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 0000000..9b01c86 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 0000000..2862c64 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 0000000..81778a0 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,43 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - file_out: + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000..9cb1617 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,191 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..b7623ee --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,147 @@ +{ + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" + }, + "test_cat_one_file_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" + }, + "test_cat_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 0000000..ec26b0f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 0000000..fbc7978 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 0000000..37b578f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/gfastats/environment.yml b/modules/nf-core/gfastats/environment.yml index 1c875ce..b47bbdb 100644 --- a/modules/nf-core/gfastats/environment.yml +++ b/modules/nf-core/gfastats/environment.yml @@ -1,7 +1,5 @@ -name: gfastats channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gfastats=1.3.6 diff --git a/modules/nf-core/gfastats/gfastats.diff b/modules/nf-core/gfastats/gfastats.diff new file mode 100644 index 0000000..0f108e1 --- /dev/null +++ b/modules/nf-core/gfastats/gfastats.diff @@ -0,0 +1,38 @@ +Changes in module 'nf-core/gfastats' +--- modules/nf-core/gfastats/main.nf ++++ modules/nf-core/gfastats/main.nf +@@ -19,7 +19,6 @@ + + output: + tuple val(meta), path("*.assembly_summary"), emit: assembly_summary +- tuple val(meta), path("*.${out_fmt}.gz") , emit: assembly + path "versions.yml" , emit: versions + + when: +@@ -32,18 +31,16 @@ + def ibed = include_bed ? "--include-bed $include_bed" : "" + def ebed = exclude_bed ? "--exclude-bed $exclude_bed" : "" + def sak = instructions ? "--swiss-army-knife $instructions" : "" ++ ++ // Arguments have been removed due to causing errors with output values being 0 ++ // out-format seemed to be the main cause of this, in testing ++ // Even using the main branch of the github repo yielded the same error. ++ + """ + gfastats \\ +- $args \\ ++ --nstar-report \\ + --threads $task.cpus \\ +- $agp \\ +- $ibed \\ +- $ebed \\ +- $sak \\ +- --out-format ${prefix}.${out_fmt}.gz \\ + $assembly \\ +- $genome_size \\ +- $target \\ + > ${prefix}.assembly_summary + + cat <<-END_VERSIONS > versions.yml + +************************************************************ diff --git a/modules/nf-core/gfastats/main.nf b/modules/nf-core/gfastats/main.nf index 8db239a..37a811e 100644 --- a/modules/nf-core/gfastats/main.nf +++ b/modules/nf-core/gfastats/main.nf @@ -19,7 +19,6 @@ process GFASTATS { output: tuple val(meta), path("*.assembly_summary"), emit: assembly_summary - tuple val(meta), path("*.${out_fmt}.gz") , emit: assembly path "versions.yml" , emit: versions when: @@ -32,18 +31,16 @@ process GFASTATS { def ibed = include_bed ? "--include-bed $include_bed" : "" def ebed = exclude_bed ? "--exclude-bed $exclude_bed" : "" def sak = instructions ? "--swiss-army-knife $instructions" : "" + + // Arguments have been removed due to causing errors with output values being 0 + // out-format seemed to be the main cause of this, in testing + // Even using the main branch of the github repo yielded the same error. + """ gfastats \\ - $args \\ + --nstar-report \\ --threads $task.cpus \\ - $agp \\ - $ibed \\ - $ebed \\ - $sak \\ - --out-format ${prefix}.${out_fmt}.gz \\ $assembly \\ - $genome_size \\ - $target \\ > ${prefix}.assembly_summary cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/gfastats/meta.yml b/modules/nf-core/gfastats/meta.yml index d0e97a8..a621343 100644 --- a/modules/nf-core/gfastats/meta.yml +++ b/modules/nf-core/gfastats/meta.yml @@ -16,56 +16,67 @@ tools: documentation: "https://github.com/vgl-hub/gfastats/tree/main/instructions" tool_dev_url: "https://github.com/vgl-hub/gfastats" doi: "10.1093/bioinformatics/btac460" - licence: "['MIT']" + licence: ["MIT"] + identifier: biotools:gfastats input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - assembly: - type: file - description: Draft assembly file - pattern: "*.{fasta,fastq,gfa}(.gz)?" - - out_fmt: - type: string - description: Output format (fasta, fastq, gfa) - - genome_size: - type: integer - description: estimated genome size (bp) for NG* statistics (optional). - - target: - type: string - description: target specific sequence by header, optionally with coordinates (optional). - - agpfile: - type: file - description: converts input agp to path and replaces existing paths. - - include_bed: - type: file - description: generates output on a subset list of headers or coordinates in 0-based bed format. - - exclude_bed: - type: file - description: opposite of --include-bed. They can be combined (no coordinates). - - instructions: - type: file - description: set of instructions provided as an ordered list. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: file + description: Draft assembly file + pattern: "*.{fasta,fastq,gfa}(.gz)?" + - - out_fmt: + type: string + description: Output format (fasta, fastq, gfa) + - - genome_size: + type: integer + description: estimated genome size (bp) for NG* statistics (optional). + - - target: + type: string + description: target specific sequence by header, optionally with coordinates + (optional). + - - agpfile: + type: file + description: converts input agp to path and replaces existing paths. + - - include_bed: + type: file + description: generates output on a subset list of headers or coordinates in + 0-based bed format. + - - exclude_bed: + type: file + description: opposite of --include-bed. They can be combined (no coordinates). + - - instructions: + type: file + description: set of instructions provided as an ordered list. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - assembly_summary: - type: file - description: Assembly summary statistics file - pattern: "*.assembly_summary" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.assembly_summary": + type: file + description: Assembly summary statistics file + pattern: "*.assembly_summary" - assembly: - type: file - description: The assembly as modified by gfastats - pattern: "*.{fasta,fastq,gfa}.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${out_fmt}.gz": + type: file + description: The assembly as modified by gfastats + pattern: "*.{fasta,fastq,gfa}.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@mahesh-panchal" maintainers: diff --git a/modules/nf-core/merquryfk/merquryfk/environment.yml b/modules/nf-core/merquryfk/merquryfk/environment.yml deleted file mode 100644 index 44a5ee9..0000000 --- a/modules/nf-core/merquryfk/merquryfk/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: merquryfk_merquryfk -channels: - - conda-forge - - bioconda - - defaults diff --git a/modules/nf-core/merquryfk/merquryfk/main.nf b/modules/nf-core/merquryfk/merquryfk/main.nf index f0e78cc..79c404d 100644 --- a/modules/nf-core/merquryfk/merquryfk/main.nf +++ b/modules/nf-core/merquryfk/merquryfk/main.nf @@ -6,26 +6,28 @@ process MERQURYFK_MERQURYFK { container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' input: - tuple val(meta), path(fastk_hist), path(fastk_ktab), path(assembly), path(haplotigs) + tuple val(meta), path(fastk_hist),path(fastk_ktab),path(assembly),path(haplotigs) + path matktab //optional + path patktab //optional output: - tuple val(meta), path("${prefix}.completeness.stats") , emit: stats - tuple val(meta), path("${prefix}.*_only.bed") , emit: bed - tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv - tuple val(meta), path("${prefix}.*.spectra-cn.fl.png"), emit: spectra_cn_fl_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.fl.pdf"), emit: spectra_cn_fl_pdf, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.ln.png"), emit: spectra_cn_ln_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.ln.pdf"), emit: spectra_cn_ln_pdf, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.st.png"), emit: spectra_cn_st_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.st.pdf"), emit: spectra_cn_st_pdf, optional: true - tuple val(meta), path("${prefix}.qv") , emit: qv - tuple val(meta), path("${prefix}.spectra-asm.fl.png") , emit: spectra_asm_fl_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.fl.pdf") , emit: spectra_asm_fl_pdf, optional: true - tuple val(meta), path("${prefix}.spectra-asm.ln.png") , emit: spectra_asm_ln_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.ln.pdf") , emit: spectra_asm_ln_pdf, optional: true - tuple val(meta), path("${prefix}.spectra-asm.st.png") , emit: spectra_asm_st_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.st.pdf") , emit: spectra_asm_st_pdf, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.completeness.stats") , emit: stats + tuple val(meta), path("${prefix}.*_only.bed") , emit: bed + tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv + tuple val(meta), path("${prefix}.*.spectra-cn.fl.{png,pdf}") , emit: spectra_cn_fl, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.ln.{png,pdf}") , emit: spectra_cn_ln, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.st.{png,pdf}") , emit: spectra_cn_st, optional: true + tuple val(meta), path("${prefix}.qv") , emit: qv + tuple val(meta), path("${prefix}.spectra-asm.fl.{png,pdf}") , emit: spectra_asm_fl, optional: true + tuple val(meta), path("${prefix}.spectra-asm.ln.{png,pdf}") , emit: spectra_asm_ln, optional: true + tuple val(meta), path("${prefix}.spectra-asm.st.{png,pdf}") , emit: spectra_asm_st, optional: true + tuple val(meta), path("${prefix}.phased_block.bed") , emit: phased_block_bed, optional: true + tuple val(meta), path("${prefix}.phased_block.stats") , emit: phased_block_stats, optional: true + tuple val(meta), path("${prefix}.continuity.N.{pdf,png}") , emit: continuity_N, optional: true + tuple val(meta), path("${prefix}.block.N.{pdf,png}") , emit: block_N, optional: true + tuple val(meta), path("${prefix}.block.blob.{pdf,png}") , emit: block_blob, optional: true + tuple val(meta), path("${prefix}.hapmers.blob.{pdf,png}") , emit: hapmers_blob, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -37,11 +39,10 @@ process MERQURYFK_MERQURYFK { } def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def mat_ktab = matktab ? "${matktab.find{ it.toString().endsWith(".ktab") }}" : '' + def pat_ktab = patktab ? "${patktab.find{ it.toString().endsWith(".ktab") }}" : '' def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - - // Passing in the link through FASTK works, however passing in through YAML_INPUT results in being unable to find file - // seems as though it is because it is in a folder rather directly in the folder merqury is running in. """ cp ${fastk_ktab}/*ktab . && cp ${fastk_ktab}/.*ktab.* . @@ -60,4 +61,20 @@ process MERQURYFK_MERQURYFK { r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' ) END_VERSIONS """ + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.completeness.stats + touch ${prefix}.qv + touch ${prefix}._.qv + touch ${prefix}._only.bed + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + merquryfk: $MERQURY_VERSION + r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff b/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff index 751b30b..84c5901 100644 --- a/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff +++ b/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff @@ -1,13 +1,9 @@ Changes in module 'nf-core/merquryfk/merquryfk' --- modules/nf-core/merquryfk/merquryfk/main.nf +++ modules/nf-core/merquryfk/merquryfk/main.nf -@@ -39,11 +39,16 @@ - prefix = task.ext.prefix ?: "${meta.id}" +@@ -44,12 +44,12 @@ def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. -+ -+ // Passing in the link through FASTK works, however passing in through YAML_INPUT results in being unable to find file -+ // seems as though it is because it is in a folder rather directly in the folder merqury is running in. """ + cp ${fastk_ktab}/*ktab . && cp ${fastk_ktab}/.*ktab.* . + @@ -15,6 +11,8 @@ Changes in module 'nf-core/merquryfk/merquryfk' $args \\ -T$task.cpus \\ - ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\ +- ${mat_ktab} \\ +- ${pat_ktab} \\ + *.ktab \\ $assembly \\ $haplotigs \\ diff --git a/modules/nf-core/merquryfk/merquryfk/meta.yml b/modules/nf-core/merquryfk/merquryfk/meta.yml index 7d4af79..82bfbec 100644 --- a/modules/nf-core/merquryfk/merquryfk/meta.yml +++ b/modules/nf-core/merquryfk/merquryfk/meta.yml @@ -9,104 +9,209 @@ tools: description: "FastK based version of Merqury" homepage: "https://github.com/thegenemyers/MERQURY.FK" tool_dev_url: "https://github.com/thegenemyers/MERQURY.FK" - licence: "https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE" + licence: ["https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fastk_hist: - type: file - description: A histogram files from the program FastK - pattern: "*.hist" - - fastk_ktab: - type: file - description: Histogram ktab files from the program FastK (option -t) - pattern: "*.ktab*" - - assembly: - type: file - description: Genome (primary) assembly files (fasta format) - pattern: ".fasta" - - haplotigs: - type: file - description: Assembly haplotigs (fasta format) - pattern: ".fasta" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastk_hist: + type: file + description: A histogram files from the program FastK + pattern: "*.hist" + - fastk_ktab: + type: file + description: Histogram ktab files from the program FastK (option -t) + pattern: "*.ktab*" + - assembly: + type: file + description: Genome (primary) assembly files (fasta format) + pattern: ".fasta" + - haplotigs: + type: file + description: Assembly haplotigs (fasta format) + pattern: ".fasta" + - - matktab: + type: file + description: trio maternal histogram ktab files from the program FastK (option + -t) + pattern: "*.ktab*" + - - patktab: + type: file + description: trio paternal histogram ktab files from the program FastK (option + -t) + pattern: "*.ktab*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - stats: - type: file - description: Assembly statistics file - pattern: "*.completeness.stats" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.completeness.stats: + type: file + description: Assembly statistics file + pattern: "*.completeness.stats" - bed: - type: file - description: Assembly only kmer positions not supported by reads in bed format - pattern: "*_only.bed" - - spectra_cn_fl_png: - type: file - description: "Unstacked copy number spectra filled plot in PNG format" - pattern: "*.spectra-cn.fl.png" - - spectra_cn_ln_png: - type: file - description: "Unstacked copy number spectra line plot in PNG format" - pattern: "*.spectra-cn.ln.png" - - spectra_cn_st_png: - type: file - description: "Stacked copy number spectra line plot in PNG format" - pattern: "*.spectra-cn.st.png" - - spectra_asm_fl_png: - type: file - description: "Unstacked assembly spectra filled plot in PNG format" - pattern: "*.spectra-asm.fl.png" - - spectra_asm_ln_png: - type: file - description: "Unstacked assembly spectra line plot in PNG format" - pattern: "*.spectra-asm.ln.png" - - spectra_asm_st_png: - type: file - description: "Stacked assembly spectra line plot in PNG format" - pattern: "*.spectra-asm.st.png" - - spectra_cn_fl_pdf: - type: file - description: "Unstacked copy number spectra filled plot in PDF format" - pattern: "*.spectra-cn.fl.pdf" - - spectra_cn_ln_pdf: - type: file - description: "Unstacked copy number spectra line plot in PDF format" - pattern: "*.spectra-cn.ln.pdf" - - spectra_cn_st_pdf: - type: file - description: "Stacked copy number spectra line plot in PDF format" - pattern: "*.spectra-cn.st.pdf" - - spectra_asm_fl_pdf: - type: file - description: "Unstacked assembly spectra filled plot in PDF format" - pattern: "*.spectra-asm.fl.pdf" - - spectra_asm_ln_pdf: - type: file - description: "Unstacked assembly spectra line plot in PDF format" - pattern: "*.spectra-asm.ln.pdf" - - spectra_asm_st_pdf: - type: file - description: "Stacked assembly spectra line plot in PDF format" - pattern: "*.spectra-asm.st.pdf" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*_only.bed: + type: file + description: Assembly only kmer positions not supported by reads in bed format + pattern: "*_only.bed" - assembly_qv: - type: file - description: "error and qv table for each scaffold of the assembly" - pattern: "*.qv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*.qv: + type: file + description: "error and qv table for each scaffold of the assembly" + pattern: "*.qv" + - spectra_cn_fl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*.spectra-cn.fl.{png,pdf}: + type: file + description: "Unstacked copy number spectra filled plot in PNG or PDF format" + pattern: "*.spectra-cn.fl.{png,pdf}" + - spectra_cn_ln: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*.spectra-cn.ln.{png,pdf}: + type: file + description: "Unstacked copy number spectra line plot in PNG or PDF format" + pattern: "*.spectra-cn.ln.{png,pdf}" + - spectra_cn_st: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*.spectra-cn.st.{png,pdf}: + type: file + description: "Stacked copy number spectra line plot in PNG or PDF format" + pattern: "*.spectra-cn.st.{png,pdf}" - qv: - type: file - description: "error and qv of each assembly as a whole" - pattern: "*.qv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.qv: + type: file + description: "error and qv of each assembly as a whole" + pattern: "*.qv" + - spectra_asm_fl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.spectra-asm.fl.{png,pdf}: + type: file + description: "Unstacked assembly spectra filled plot in PNG or PDF format" + pattern: "*.spectra-asm.fl.{png,pdf}" + - spectra_asm_ln: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.spectra-asm.ln.{png,pdf}: + type: file + description: "Unstacked assembly spectra line plot in PNG or PDF format" + pattern: "*.spectra-asm.ln.{png,pdf}" + - spectra_asm_st: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.spectra-asm.st.{png,pdf}: + type: file + description: "Stacked assembly spectra line plot in PNG or PDF format" + pattern: "*.spectra-asm.st.{png,pdf}" + - phased_block_bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.phased_block.bed: + type: file + description: Assembly kmer positions seperated by block in bed format + pattern: "*.phased.block.bed" + - phased_block_stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.phased_block.stats: + type: file + description: phased assembly statistics file + pattern: "*.phased.block.stats" + - continuity_N: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.continuity.N.{pdf,png}: + type: file + description: "Stacked assembly N continuity plot in PNG or PDF format" + pattern: "*.continuity.N.{png,pdf}" + - block_N: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.block.N.{pdf,png}: + type: file + description: "Stacked assembly N continuity by block plot in PNG or PDF format" + pattern: "*.block.N.{png,pdf}" + - block_blob: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.block.blob.{pdf,png}: + type: file + description: "Stacked assembly block plot in PNG or PDF format" + pattern: "*.block.blob.{png,pdf}" + - hapmers_blob: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.hapmers.blob.{pdf,png}: + type: file + description: "Stacked assembly hapmers block plot in PNG or PDF format" + pattern: "*.hapmers.blob.{png,pdf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@mahesh-panchal" + - "@yumisims" maintainers: - "@mahesh-panchal" + - "@yumisims" diff --git a/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test new file mode 100644 index 0000000..c46843c --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test @@ -0,0 +1,170 @@ +nextflow_process { + + name "Test Process MERQURYFK" + script "../main.nf" + process "MERQURYFK_MERQURYFK" + + tag "modules" + tag "modules_nfcore" + tag "merquryfk" + tag "merquryfk/merquryfk" + tag "fastk" + tag "fastk/fastk" + + setup { + run("FASTK_FASTK") { + script "../../../fastk/fastk" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + run("FASTK_FASTK", alias: "FASTK_MAT") { + script "../../../fastk/fastk" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + ] + """ + } + } + + run("FASTK_FASTK", alias: "FASTK_PAT") { + script "../../../fastk/fastk" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + ] + """ + } + } + } + + test("homo_sapiens - Illumina - png") { + config "./nextflow.png.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - Illumina - pdf") { + config "./nextflow.pdf.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - Illumina - trio") { + config "./nextflow.trio.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = FASTK_MAT.out.ktab + input[2] = FASTK_PAT.out.ktab + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - Illumina - stub") { + options "-stub" + config "./nextflow.pdf.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + + +} \ No newline at end of file diff --git a/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap new file mode 100644 index 0000000..f7ce47f --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap @@ -0,0 +1,454 @@ +{ + "homo_sapiens - Illumina - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:22:52.240373868" + }, + "homo_sapiens - Illumina - pdf": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:22:19.530675341" + }, + "homo_sapiens - Illumina - png": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:21:57.682723412" + }, + "homo_sapiens - Illumina - trio": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:22:40.060937299" + } +} \ No newline at end of file diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.config new file mode 100644 index 0000000..ba1eebc --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config new file mode 100644 index 0000000..52beeaa --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config @@ -0,0 +1,9 @@ +process { + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } + withName: 'MERQURYFK_MERQURYFK' { + ext.args = '-lfs -pdf' + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config new file mode 100644 index 0000000..47c3d63 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config @@ -0,0 +1,9 @@ +process { + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } + withName: 'MERQURYFK_MERQURYFK' { + ext.args = '-lfs' + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config new file mode 100644 index 0000000..47c3d63 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config @@ -0,0 +1,9 @@ +process { + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } + withName: 'MERQURYFK_MERQURYFK' { + ext.args = '-lfs' + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/tags.yml b/modules/nf-core/merquryfk/merquryfk/tests/tags.yml new file mode 100644 index 0000000..7dcac99 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/tags.yml @@ -0,0 +1,2 @@ +merquryfk/merquryfk: + - "modules/nf-core/merquryfk/merquryfk/**" diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index 41e8fe9..dc6476b 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -1,9 +1,6 @@ -name: minimap2_align - channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::htslib=1.20 diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index 8996f88..a4cfc89 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -14,62 +14,77 @@ tools: homepage: https://github.com/lh3/minimap2 documentation: https://github.com/lh3/minimap2#uguide licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FASTA or FASTQ files of size 1 and 2 for single-end - and paired-end data, respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test_ref'] - - reference: - type: file - description: | - Reference database in FASTA format. - - bam_format: - type: boolean - description: Specify that output should be in BAM format - - bam_index_extension: - type: string - description: BAM alignment index extension (e.g. "bai") - - cigar_paf_format: - type: boolean - description: Specify that output CIGAR should be in PAF format - - cigar_bam: - type: boolean - description: | - Write CIGAR with >65535 ops at the CG tag. This is recommended when - doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - - bam_format: + type: boolean + description: Specify that output should be in BAM format + - - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - paf: - type: file - description: Alignment in PAF format - pattern: "*.paf" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: Alignment in PAF format + pattern: "*.paf" - bam: - type: file - description: Alignment in BAM format - pattern: "*.bam" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Alignment in BAM format + pattern: "*.bam" - index: - type: file - description: BAM alignment index - pattern: "*.bam.*" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam.${bam_index_extension}": + type: file + description: BAM alignment index + pattern: "*.bam.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@heuermh" - "@sofstam" diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml index cd366d6..62054fc 100644 --- a/modules/nf-core/samtools/merge/environment.yml +++ b/modules/nf-core/samtools/merge/environment.yml @@ -1,8 +1,8 @@ -name: samtools_merge +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.20 - - bioconda::htslib=1.20 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index 693b1d8..34da4c7 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_MERGE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : - 'biocontainers/samtools:1.20--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml index 2e8f3db..235aa21 100644 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -15,60 +15,81 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_files: - type: file - description: BAM/CRAM file - pattern: "*.{bam,cram,sam}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fasta: - type: file - description: Reference file the CRAM was created with (optional) - pattern: "*.{fasta,fa}" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fai: - type: file - description: Index of the reference file the CRAM was created with (optional) - pattern: "*.fai" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: BAM file - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: BAM file + pattern: "*.{bam}" - cram: - type: file - description: CRAM file - pattern: "*.{cram}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: CRAM file + pattern: "*.{cram}" - csi: - type: file - description: BAM index file (optional) - pattern: "*.csi" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" - crai: - type: file - description: CRAM index file (optional) - pattern: "*.crai" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@yuukiiwa " diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap index 17bc846..0a41e01 100644 --- a/modules/nf-core/samtools/merge/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -80,14 +80,14 @@ "bam_versions": { "content": [ [ - "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:46:35.851936597" + "timestamp": "2024-09-16T09:16:30.476887194" }, "bams_csi": { "content": [ @@ -124,14 +124,14 @@ "bams_stub_versions": { "content": [ [ - "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:46:41.405707643" + "timestamp": "2024-09-16T09:16:52.203823961" }, "bam_cram": { "content": [ @@ -158,14 +158,14 @@ "bams_versions": { "content": [ [ - "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:45:51.695689923" + "timestamp": "2024-09-16T08:29:57.524363148" }, "crams_bam": { "content": [ @@ -182,14 +182,14 @@ "crams_versions": { "content": [ [ - "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:46:30.185392319" + "timestamp": "2024-09-16T09:16:06.977096207" }, "bam_csi": { "content": [ diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml index 36a12ea..62054fc 100644 --- a/modules/nf-core/samtools/sort/environment.yml +++ b/modules/nf-core/samtools/sort/environment.yml @@ -1,8 +1,8 @@ -name: samtools_sort +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.20 - - bioconda::htslib=1.20 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 8e01909..caf3c61 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -4,19 +4,19 @@ process SAMTOOLS_SORT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : - 'biocontainers/samtools:1.20--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta) , path(bam) tuple val(meta2), path(fasta) output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -32,7 +32,6 @@ process SAMTOOLS_SORT { """ samtools cat \\ - --threads $task.cpus \\ ${bam} \\ | \\ samtools sort \\ diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index 341a7d0..a9dbec5 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -15,52 +15,73 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file(s) - pattern: "*.{bam,cram,sam}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fasta: - type: file - description: Reference genome FASTA file - pattern: "*.{fa,fasta,fna}" - optional: true + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: Sorted BAM file - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.{bam}" - cram: - type: file - description: Sorted CRAM file - pattern: "*.{cram}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Sorted CRAM file + pattern: "*.{cram}" - crai: - type: file - description: CRAM index file (optional) - pattern: "*.crai" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" - csi: - type: file - description: BAM index file (optional) - pattern: "*.csi" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test index c2ea9c7..b05e669 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -39,6 +39,40 @@ nextflow_process { } } + test("multiple bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + test("cram") { config "./nextflow_cram.config" @@ -98,6 +132,36 @@ nextflow_process { } } + test("multiple bam - stub") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("cram - stub") { options "-stub" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap index da38d5d..469891f 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -20,14 +20,14 @@ ] ], [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T17:19:37.196205" + "timestamp": "2024-09-16T08:49:58.207549273" }, "bam - stub": { "content": [ @@ -57,7 +57,7 @@ ] ], "4": [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ], "bam": [ [ @@ -84,15 +84,15 @@ ] ], "versions": [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T15:54:46.580756" + "timestamp": "2024-09-16T08:50:08.630951018" }, "cram - stub": { "content": [ @@ -122,7 +122,7 @@ ], "4": [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ], "bam": [ @@ -149,15 +149,110 @@ ], "versions": [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T15:57:30.505698" + "timestamp": "2024-09-16T08:50:19.061912443" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:55.479443" + }, + "multiple bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:36:13.781404" }, "bam": { "content": [ @@ -167,7 +262,7 @@ "id": "test", "single_end": false }, - "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + "test.sorted.bam:md5,34aa85e86abefe637f7a4a9887f016fc" ] ], [ @@ -180,13 +275,13 @@ ] ], [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.09.0" }, - "timestamp": "2024-07-22T15:54:25.872954" + "timestamp": "2024-10-08T11:59:46.372244" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index e39cd2f..a2a13b0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,7 @@ params { // Input options input = null mapped = false + steps = "NONE" // Boilerplate options outdir = null @@ -43,7 +44,6 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base' validationShowHiddenParams = false validate_params = true @@ -178,7 +178,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id: 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -222,9 +222,9 @@ manifest { homePage = 'https://github.com/sanger-tol/ear' description = """ERGA Assembly Report pipeline""" mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '1.0dev' - doi = '' + nextflowVersion = '!>=24.04.0' + version = '0.6.2' + doi = 'https://zenodo.org/records/13819520' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index f198603..eee6164 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -36,9 +36,14 @@ "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" }, - "multiqc_title": { + "mapped": { + "type": "boolean", + "description": "Have you got a mapped bam as input?", + "fa_icon": "fas fa-file-signature" + }, + "steps": { "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "description": "csv list of steps to skip", "fa_icon": "fas fa-file-signature" } } @@ -168,14 +173,6 @@ "fa_icon": "fas fa-remove-format", "hidden": true }, - "max_multiqc_email_size": { - "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true - }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", @@ -189,24 +186,6 @@ "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, - "multiqc_config": { - "type": "string", - "format": "file-path", - "description": "Custom config file to supply to MultiQC.", - "fa_icon": "fas fa-cog", - "hidden": true - }, - "multiqc_logo": { - "type": "string", - "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", - "fa_icon": "fas fa-image", - "hidden": true - }, - "multiqc_methods_description": { - "type": "string", - "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog" - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", diff --git a/subworkflows/local/main_mapping.nf b/subworkflows/local/main_mapping.nf index 28c100f..0531201 100644 --- a/subworkflows/local/main_mapping.nf +++ b/subworkflows/local/main_mapping.nf @@ -74,4 +74,4 @@ workflow MAIN_MAPPING { mapped_bam // channel: tuple val(meta), path(mapped_bam) versions = ch_versions // channel: [ path(versions.yml) ] -} \ No newline at end of file +} diff --git a/subworkflows/local/pe_mapping.nf b/subworkflows/local/pe_mapping.nf index 3c41670..358be3b 100644 --- a/subworkflows/local/pe_mapping.nf +++ b/subworkflows/local/pe_mapping.nf @@ -113,4 +113,4 @@ process GrabFiles { tuple val(meta), path("in/*.{fa,fasta}.{gz}") "true" -} \ No newline at end of file +} diff --git a/subworkflows/local/se_mapping.nf b/subworkflows/local/se_mapping.nf index 8c7ad52..c3307d4 100644 --- a/subworkflows/local/se_mapping.nf +++ b/subworkflows/local/se_mapping.nf @@ -112,4 +112,4 @@ process GrabFiles { tuple val(meta), path("in/*.{fa,fasta,fna}.{gz}") "true" -} \ No newline at end of file +} diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index 916c003..4e3cc9e 100644 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -26,6 +26,7 @@ workflow YAML_INPUT { reference_1 = Channel.fromPath(inputs.reference_hap1, checkIfExists: true) reference_2 = Channel.fromPath(inputs.reference_hap2, checkIfExists: true) + reference_3 = Channel.fromPath(inputs.reference_haplotigs, checkIfExists: true) reference_1 .combine(sample_id) @@ -84,6 +85,7 @@ workflow YAML_INPUT { pacbio_tuple // tuple (meta), path(file) reference_hap1 // tuple (meta), path(file) reference_hap2 = reference_2 // DataVariable + reference_haplotigs = reference_3 reference_path = inputs.reference_hap1 // DataVariable mapped_bam diff --git a/workflows/ear.nf b/workflows/ear.nf index 1c93b33..a37966c 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -13,6 +13,7 @@ include { YAML_INPUT } from '../subworkflows/local/yaml_i include { MAIN_MAPPING } from '../subworkflows/local/main_mapping' // Module imports +include { CAT_CAT } from '../modules/nf-core/cat/cat/main' include { GENERATE_SAMPLESHEET } from '../modules/local/generate_samplesheet' include { GFASTATS } from '../modules/nf-core/gfastats/main' include { MERQURYFK_MERQURYFK } from '../modules/nf-core/merquryfk/merquryfk/main' @@ -39,6 +40,13 @@ workflow EAR { ch_versions = Channel.empty() ch_align_bam = Channel.empty() + exclude_steps = params.steps ? params.steps.split(",") : "NONE" + + full_list = ["btk", "cpretext", "merquryfk", "NONE"] + + if (!full_list.containsAll(exclude_steps)) { + exit 1, "There is an extra argument given on Command Line: \nCheck contents of: $exclude_steps\nMaster list is: $full_list" + } // // MODULE: YAML_INPUT @@ -47,6 +55,33 @@ workflow EAR { YAML_INPUT(ch_input) + // + // LOGIC: IF HAPLOTIGS IS EMPTY THEN PASS ON HALPLOTYPE ASSEMBLY + // IF HAPLOTIGS EXISTS THEN MERGE WITH HAPLOTYPE ASSEMBLY + // + if (YAML_INPUT.out.reference_haplotigs.ifEmpty(true)) { + YAML_INPUT.out.sample_id + .combine(YAML_INPUT.out.reference_hap2) + .combine(YAML_INPUT.out.reference_haplotigs) + .map{ sample_id, file1, file2 -> + tuple( + [ id: sample_id ], + [file1, file2] + ) + } + .set { + cat_cat_input + } + + CAT_CAT(cat_cat_input) + ch_versions = ch_versions.mix( CAT_CAT.out.versions ) + + ch_haplotype_fasta = CAT_CAT.out.file_out + } else { + ch_haplotype_fasta = YAML_INPUT.out.reference_hap2 + } + + // // MODULE: ASSEMBLY STATISTICS FOR THE FASTA // @@ -67,11 +102,11 @@ workflow EAR { // LOGIC: REFORMAT A BUNCH OF CHANNELS FOR MERQUERYFK // YAML_INPUT.out.reference_hap1 - .combine(YAML_INPUT.out.reference_hap2) + .combine(ch_haplotype_fasta) .combine(YAML_INPUT.out.fastk_hist) .combine(YAML_INPUT.out.fastk_ktab) - .map{ meta, primary, haplotigs, fastk_hist, fastk_ktab -> - tuple( meta, + .map{ meta1, primary, meta2, haplotigs, fastk_hist, fastk_ktab -> + tuple( meta1, fastk_hist, fastk_ktab, primary, @@ -82,13 +117,20 @@ workflow EAR { // - // MODULE: MERQURYFK PLOTS OF GENOME + // LOGIC: STEP TO STOP MERQURY_FK RUNNING IF SPECIFIED BY USER // - MERQURYFK_MERQURYFK( - merquryfk_input - ) - ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) + if (!exclude_steps.contains('merquryfk')) { + // + // MODULE: MERQURYFK PLOTS OF GENOME + // + MERQURYFK_MERQURYFK( + merquryfk_input, + [], + [] + ) + ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) + } // // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE @@ -113,48 +155,57 @@ workflow EAR { // - // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline + // LOGIC: STEP TO STOP BTK RUNNING IF SPECIFIED BY USER // - GENERATE_SAMPLESHEET( - ch_mapped_bam - ) - ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions ) + if (!exclude_steps.contains('btk')) { + // + // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline + // + GENERATE_SAMPLESHEET( + ch_mapped_bam + ) + ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions ) - // - // MODULE: Run Sanger-ToL/BlobToolKit - // - SANGER_TOL_BTK ( - YAML_INPUT.out.reference_hap1, - ch_mapped_bam, - GENERATE_SAMPLESHEET.out.csv, - YAML_INPUT.out.btk_un_diamond_database, - YAML_INPUT.out.btk_nt_database, - YAML_INPUT.out.btk_un_diamond_database, - YAML_INPUT.out.btk_config, - YAML_INPUT.out.btk_ncbi_taxonomy_path, - YAML_INPUT.out.busco_lineages, - YAML_INPUT.out.btk_taxid, - 'GCA_0001' - ) - ch_versions = ch_versions.mix(SANGER_TOL_BTK.out.versions) + // + // MODULE: Run Sanger-ToL/BlobToolKit + // + SANGER_TOL_BTK ( + YAML_INPUT.out.reference_hap1, + ch_mapped_bam, + GENERATE_SAMPLESHEET.out.csv, + YAML_INPUT.out.btk_un_diamond_database, + YAML_INPUT.out.btk_nt_database, + YAML_INPUT.out.btk_un_diamond_database, + YAML_INPUT.out.btk_config, + YAML_INPUT.out.btk_ncbi_taxonomy_path, + YAML_INPUT.out.busco_lineages, + YAML_INPUT.out.btk_taxid, + 'GCA_0001' + ) + ch_versions = ch_versions.mix(SANGER_TOL_BTK.out.versions) + } // - // MODULE: Run Sanger-ToL/CurationPretext + // LOGIC: STEP TO STOP CURATION_PRETEXT RUNNING IF SPECIFIED BY USER // - reference = YAML_INPUT.out.reference_path.get() - hic_dir = YAML_INPUT.out.cpretext_hic_dir_raw.get() - longread_dir = YAML_INPUT.out.longread_dir.get() - - SANGER_TOL_CPRETEXT( - reference, - longread_dir, - hic_dir, - [] - ) - ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions ) - + if (!exclude_steps.contains('cpretext')) { + // + // MODULE: Run Sanger-ToL/CurationPretext + // + reference = YAML_INPUT.out.reference_path.get() + hic_dir = YAML_INPUT.out.cpretext_hic_dir_raw.get() + longread_dir = YAML_INPUT.out.longread_dir.get() + + SANGER_TOL_CPRETEXT( + reference, + longread_dir, + hic_dir, + [] + ) + ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions ) + } // // Collate and save software versions