From c89745e8979026276e52d8e6a6b13c72057dda33 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 21 Aug 2024 14:01:29 +0100 Subject: [PATCH 01/46] testing --- .github/workflows/ci.yml | 66 ++++++++++++++++++++++++++++++++++++++-- assets/test.yaml | 27 ++++++++-------- 2 files changed, 77 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e2003fe..7c98354 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,8 @@ on: env: NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -27,6 +29,29 @@ jobs: - "23.04.0" - "latest-everything" steps: + - name: Get branch names + # Pulls the names of current branches in repo + # steps.branch-names.outputs.current_branch is used later and returns the name of the branch the PR is made FROM not to + id: branch-names + uses: tj-actions/branch-names@v8 + + - name: Setup apptainer + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install nf-core + run: | + pip install nf-core + - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 @@ -35,8 +60,44 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: NF-Core Download - download singularity containers + run: | + nf-core download sanger-tol/ear --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity + + # - name: NF-Core Download - download singularity containers + # run: | + # nf-core download sanger-tol/blobtoolkit --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity + + # - name: NF-Core Download - download singularity containers + # run: | + # nf-core download sanger-tol/curationpretext --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity + - name: Download Tiny test data + # Download A fungal test data set that is full enough to show some real output. + run: | + curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - + + - name: Download the NCBI taxdump database + run: | + mkdir ncbi_taxdump + curl -L https://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -C ncbi_taxdump -xzf - + + - name: Download the BUSCO lineage database + run: | + mkdir busco_database + curl -L https://tolit.cog.sanger.ac.uk/test-data/resources/busco/blobtoolkit.GCA_922984935.2.2023-08-03.lineages.tar.gz | tar -C busco_database -xzf - + + - name: Download the subset of NT database + run: | + mkdir NT_database + curl -L https://ftp.ncbi.nlm.nih.gov/blast/db/18S_fungal_sequences.tar.gz | tar -C NT_database -xzf - + + - name: Download the subset of Diamond database + run: | + mkdir diamond + wget -c https://tolit.cog.sanger.ac.uk/test-data/resources/diamond/UP000000212_1234679_tax.dmnd -O diamond/UP000000212_1234679_tax.dmnd + + # - name: Disk space cleanup + # uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required @@ -44,3 +105,4 @@ jobs: # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + ls ./results/*/* diff --git a/assets/test.yaml b/assets/test.yaml index 6a5299a..ba87caf 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,25 +1,24 @@ -assembly_id: Oscheius_DF5033 -reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa -reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa +assembly_id: grTriPseu1 +reference_hap1: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa +reference_hap2: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa longread: type: hifi - dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/ + dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/pacbio/ mapped_bam: idCulLati1/mapped_bam.bam curationpretext: aligner: minimap2 - telomere_motif: TTAGG - hic_dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/ + telomere_motif: TTAGGG + hic_dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/hic-arima/ merquryfk: fastk_hist: "./" fastk_ktab: "./" btk: - nt_database: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20240704_blast_tiny_testdb/blastdb/ - nt_database_prefix: tiny_plasmodium_blastdb.fa - diamond_uniprot_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20240704_diamond_tiny_testdb/ascc_tinytest_diamond_db.dmnd - diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd - ncbi_taxonomy_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump - ncbi_rankedlineage_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/rankedlineage.dmp - btk_yaml: /nfs/users/nfs_d/dp24/sanger-tol-ear/assets/btk_draft.yaml + nt_database: /home/runner/work/ascc/ascc/NT_database/ + nt_database_prefix: 18S_fungal_sequences + diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd + diamond_nr_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd + ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/ + ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp taxid: 352914 gca_accession: GCA_0001 - lineages: "diptera_odb10,insecta_odb10" + lineages: "fungi_odb10" From fd77e2f7f93429ea6ee6a3fc65ddd75ceeb35d7c Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 21 Aug 2024 14:13:29 +0100 Subject: [PATCH 02/46] Updating the tests --- .github/workflows/ci.yml | 14 ++++++++------ conf/test.config | 14 +++++--------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7c98354..a3dbb17 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,17 +60,19 @@ jobs: with: version: "${{ matrix.NXF_VER }}" + # This will only download the main pipeline containers, subpipelines need their own nf-download - name: NF-Core Download - download singularity containers run: | nf-core download sanger-tol/ear --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - # - name: NF-Core Download - download singularity containers - # run: | - # nf-core download sanger-tol/blobtoolkit --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity + - name: NF-Core Download - download singularity containers + run: | + nf-core download sanger-tol/blobtoolkit --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity + + - name: NF-Core Download - download singularity containers + run: | + nf-core download sanger-tol/curationpretext --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - # - name: NF-Core Download - download singularity containers - # run: | - # nf-core download sanger-tol/curationpretext --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - name: Download Tiny test data # Download A fungal test data set that is full enough to show some real output. run: | diff --git a/conf/test.config b/conf/test.config index 024498b..7313f18 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,14 +15,10 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + input = "${projectDir}/assets/test.yaml" + outdir = "results" } From f8f9456fb32cecbf5c70f5d7dae82335c7616cf8 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 21 Aug 2024 14:15:55 +0100 Subject: [PATCH 03/46] Updating the tests --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a3dbb17..200afd0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,11 +67,11 @@ jobs: - name: NF-Core Download - download singularity containers run: | - nf-core download sanger-tol/blobtoolkit --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity + nf-core download sanger-tol/blobtoolkit --revision draft_assemblies --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - name: NF-Core Download - download singularity containers run: | - nf-core download sanger-tol/curationpretext --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity + nf-core download sanger-tol/curationpretext --revision 1.0.0 --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - name: Download Tiny test data # Download A fungal test data set that is full enough to show some real output. From 098dae1b158f830708e4d73eace84c791faee836 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 21 Aug 2024 15:08:38 +0100 Subject: [PATCH 04/46] upping version of nextflow, due to errors on Actions --- .github/workflows/ci.yml | 2 +- nextflow.config | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 200afd0..abd9874 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ jobs: strategy: matrix: NXF_VER: - - "23.04.0" + - "24.04.5" - "latest-everything" steps: - name: Get branch names diff --git a/nextflow.config b/nextflow.config index e39cd2f..a2b702d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -222,8 +222,8 @@ manifest { homePage = 'https://github.com/sanger-tol/ear' description = """ERGA Assembly Report pipeline""" mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '1.0dev' + nextflowVersion = '!>=24.04.0' + version = '1.0' doi = '' } From 44b080927efb4f1eb552023d7ded987ace4708a8 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 21 Aug 2024 16:31:19 +0100 Subject: [PATCH 05/46] Multi-hap support --- .github/workflows/ci.yml | 2 +- assets/idCulLati1.yaml | 2 +- conf/modules.config | 4 + modules.json | 39 ++-- modules/nf-core/cat/cat/environment.yml | 7 + modules/nf-core/cat/cat/main.nf | 78 +++++++ modules/nf-core/cat/cat/meta.yml | 36 ++++ modules/nf-core/cat/cat/tests/main.nf.test | 191 ++++++++++++++++++ .../nf-core/cat/cat/tests/main.nf.test.snap | 147 ++++++++++++++ .../cat/tests/nextflow_unzipped_zipped.config | 6 + .../cat/tests/nextflow_zipped_unzipped.config | 8 + modules/nf-core/cat/cat/tests/tags.yml | 2 + subworkflows/local/yaml_input.nf | 2 + workflows/ear.nf | 33 ++- 14 files changed, 527 insertions(+), 30 deletions(-) create mode 100644 modules/nf-core/cat/cat/environment.yml create mode 100644 modules/nf-core/cat/cat/main.nf create mode 100644 modules/nf-core/cat/cat/meta.yml create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test.snap create mode 100644 modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config create mode 100644 modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config create mode 100644 modules/nf-core/cat/cat/tests/tags.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index abd9874..61b0cbf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ jobs: strategy: matrix: NXF_VER: - - "24.04.5" + - "24.04.2" - "latest-everything" steps: - name: Get branch names diff --git a/assets/idCulLati1.yaml b/assets/idCulLati1.yaml index ea48cc2..404f4a5 100644 --- a/assets/idCulLati1.yaml +++ b/assets/idCulLati1.yaml @@ -2,7 +2,7 @@ assembly_id: idCulLati1_ear reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/primary.fa reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/hap2.fa -reference_haplotigs: / +reference_haplotigs: /nfs/treeoflife-01/teams/tola/users/dp24/ear/haplotigs.fa # If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore. mapped_bam: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/mapped_bam.bam diff --git a/conf/modules.config b/conf/modules.config index 137b892..73e83bb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -20,6 +20,10 @@ process { ] } + withName: CAT_CAT { + ext.prefix = 'combined_haplos.fa' + } + withName: GFASTATS { ext.args = '--nstar-report' } diff --git a/modules.json b/modules.json index 23ee7d4..b93de71 100644 --- a/modules.json +++ b/modules.json @@ -5,41 +5,36 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cat/cat": { + "branch": "master", + "git_sha": "5bb8ca085e17549e185e1823495ab8d20727a805", + "installed_by": ["modules"] + }, "gfastats": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "merquryfk/merquryfk": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" }, "minimap2/align": { "branch": "master", "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "46eca555142d6e597729fcb682adcc791796f514", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -48,26 +43,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 0000000..17a04ef --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +name: cat_cat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 0000000..2862c64 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 0000000..00a8db0 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,36 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file_out: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000..9cb1617 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,191 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..b7623ee --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,147 @@ +{ + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" + }, + "test_cat_one_file_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" + }, + "test_cat_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 0000000..ec26b0f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 0000000..fbc7978 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 0000000..37b578f --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index 916c003..4e3cc9e 100644 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -26,6 +26,7 @@ workflow YAML_INPUT { reference_1 = Channel.fromPath(inputs.reference_hap1, checkIfExists: true) reference_2 = Channel.fromPath(inputs.reference_hap2, checkIfExists: true) + reference_3 = Channel.fromPath(inputs.reference_haplotigs, checkIfExists: true) reference_1 .combine(sample_id) @@ -84,6 +85,7 @@ workflow YAML_INPUT { pacbio_tuple // tuple (meta), path(file) reference_hap1 // tuple (meta), path(file) reference_hap2 = reference_2 // DataVariable + reference_haplotigs = reference_3 reference_path = inputs.reference_hap1 // DataVariable mapped_bam diff --git a/workflows/ear.nf b/workflows/ear.nf index 1c93b33..091697a 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -13,6 +13,7 @@ include { YAML_INPUT } from '../subworkflows/local/yaml_i include { MAIN_MAPPING } from '../subworkflows/local/main_mapping' // Module imports +include { CAT_CAT } from '../modules/nf-core/cat/cat/main' include { GENERATE_SAMPLESHEET } from '../modules/local/generate_samplesheet' include { GFASTATS } from '../modules/nf-core/gfastats/main' include { MERQURYFK_MERQURYFK } from '../modules/nf-core/merquryfk/merquryfk/main' @@ -47,6 +48,32 @@ workflow EAR { YAML_INPUT(ch_input) + // + // LOGIC: IF HAPLOTIGS IS EMPTY THEN PASS ON HALPLOTYPE ASSEMBLY + // IF HAPLOTIGS EXISTS THEN MERGE WITH HAPLOTYPE ASSEMBLY + // + if (YAML_INPUT.out.reference_haplotigs.ifEmpty(true)) { + YAML_INPUT.out.sample_id + .combine(YAML_INPUT.out.reference_hap2) + .combine(YAML_INPUT.out.reference_haplotigs) + .map{ sample_id, file1, file2 -> + tuple( + [ id: sample_id ], + [file1, file2] + ) + } + .set { + cat_cat_input + } + + CAT_CAT(cat_cat_input) + ch_versions = ch_versions.mix( CAT_CAT.out.versions ) + + ch_haplotype_fasta = CAT_CAT.out.file_out + } else { + ch_haplotype_fasta = YAML_INPUT.out.reference_hap2 + } + // // MODULE: ASSEMBLY STATISTICS FOR THE FASTA // @@ -67,11 +94,11 @@ workflow EAR { // LOGIC: REFORMAT A BUNCH OF CHANNELS FOR MERQUERYFK // YAML_INPUT.out.reference_hap1 - .combine(YAML_INPUT.out.reference_hap2) + .combine(ch_haplotype_fasta) .combine(YAML_INPUT.out.fastk_hist) .combine(YAML_INPUT.out.fastk_ktab) - .map{ meta, primary, haplotigs, fastk_hist, fastk_ktab -> - tuple( meta, + .map{ meta1, primary, meta2, haplotigs, fastk_hist, fastk_ktab -> + tuple( meta1, fastk_hist, fastk_ktab, primary, From eec25397a932c3d786a24fe6c197465872ecfd2d Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 2 Sep 2024 15:02:00 +0100 Subject: [PATCH 06/46] Corrected GFASTATS error caused by a faulty flag --- assets/test.yaml | 1 + modules.json | 39 ++++++++--- modules/local/sanger_tol_btk.nf | 12 +++- modules/local/sanger_tol_cpretext.nf | 10 ++- modules/nf-core/gfastats/gfastats.diff | 38 ++++++++++ modules/nf-core/gfastats/main.nf | 15 ++-- workflows/ear.nf | 96 +++++++++++++------------- 7 files changed, 139 insertions(+), 72 deletions(-) create mode 100644 modules/nf-core/gfastats/gfastats.diff diff --git a/assets/test.yaml b/assets/test.yaml index ba87caf..4175309 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,6 +1,7 @@ assembly_id: grTriPseu1 reference_hap1: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa reference_hap2: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa +reference_haplotigs: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa longread: type: hifi dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/pacbio/ diff --git a/modules.json b/modules.json index b93de71..ef2ff42 100644 --- a/modules.json +++ b/modules.json @@ -8,33 +8,46 @@ "cat/cat": { "branch": "master", "git_sha": "5bb8ca085e17549e185e1823495ab8d20727a805", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gfastats": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ], + "patch": "modules/nf-core/gfastats/gfastats.diff" }, "merquryfk/merquryfk": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" }, "minimap2/align": { "branch": "master", "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "46eca555142d6e597729fcb682adcc791796f514", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -43,20 +56,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf index 6ce9191..c9cb6a9 100644 --- a/modules/local/sanger_tol_btk.nf +++ b/modules/local/sanger_tol_btk.nf @@ -21,7 +21,7 @@ process SANGER_TOL_BTK { path("*_out/blobtoolkit/REFERENCE/summary.json.gz"), emit: summary_json path("*_out/busco"), emit: busco_data path("*_out/multiqc"), emit: multiqc_report - path("*_out/blobtoolkit_pipeline_info"), emit: pipeline_info + path("*_out/pipeline_info/blobtoolkit"), emit: pipeline_info path "versions.yml", emit: versions script: @@ -71,10 +71,16 @@ process SANGER_TOL_BTK { Nextflow: \$(nextflow -v | cut -d " " -f3) executor system: $get_version END_VERSIONS - - printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml """ + // INFILE=${output_dir}/pipeline_info/software_versions.yml + // IFS=\$'\n' + // echo "$pipeline_name:" >> versions.yml + // for \${LINE} in \$(cat "\$INFILE") + // do + // echo " \${LINE}" >> versions.yml + // done + stub: def pipeline_version = task.ext.version ?: "main" diff --git a/modules/local/sanger_tol_cpretext.nf b/modules/local/sanger_tol_cpretext.nf index 5b986e1..f9b12d8 100644 --- a/modules/local/sanger_tol_cpretext.nf +++ b/modules/local/sanger_tol_cpretext.nf @@ -52,10 +52,16 @@ process SANGER_TOL_CPRETEXT { Nextflow: \$(nextflow -v | cut -d " " -f3) executor system: $get_version END_VERSIONS - - printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml """ + // INFILE=${output_dir}/pipeline_info/software_versions.yml + // IFS=\$'\n' + // echo "$pipeline_name:" >> versions.yml + // for LINE in \$(cat "\$INFILE") + // do + // echo " \$LINE" >> versions.yml + // done + stub: def pipeline_version = task.ext.version ?: "main" def (pipeline_prefix,pipeline_suffix) = pipeline_name.split('/') diff --git a/modules/nf-core/gfastats/gfastats.diff b/modules/nf-core/gfastats/gfastats.diff new file mode 100644 index 0000000..0f108e1 --- /dev/null +++ b/modules/nf-core/gfastats/gfastats.diff @@ -0,0 +1,38 @@ +Changes in module 'nf-core/gfastats' +--- modules/nf-core/gfastats/main.nf ++++ modules/nf-core/gfastats/main.nf +@@ -19,7 +19,6 @@ + + output: + tuple val(meta), path("*.assembly_summary"), emit: assembly_summary +- tuple val(meta), path("*.${out_fmt}.gz") , emit: assembly + path "versions.yml" , emit: versions + + when: +@@ -32,18 +31,16 @@ + def ibed = include_bed ? "--include-bed $include_bed" : "" + def ebed = exclude_bed ? "--exclude-bed $exclude_bed" : "" + def sak = instructions ? "--swiss-army-knife $instructions" : "" ++ ++ // Arguments have been removed due to causing errors with output values being 0 ++ // out-format seemed to be the main cause of this, in testing ++ // Even using the main branch of the github repo yielded the same error. ++ + """ + gfastats \\ +- $args \\ ++ --nstar-report \\ + --threads $task.cpus \\ +- $agp \\ +- $ibed \\ +- $ebed \\ +- $sak \\ +- --out-format ${prefix}.${out_fmt}.gz \\ + $assembly \\ +- $genome_size \\ +- $target \\ + > ${prefix}.assembly_summary + + cat <<-END_VERSIONS > versions.yml + +************************************************************ diff --git a/modules/nf-core/gfastats/main.nf b/modules/nf-core/gfastats/main.nf index 8db239a..37a811e 100644 --- a/modules/nf-core/gfastats/main.nf +++ b/modules/nf-core/gfastats/main.nf @@ -19,7 +19,6 @@ process GFASTATS { output: tuple val(meta), path("*.assembly_summary"), emit: assembly_summary - tuple val(meta), path("*.${out_fmt}.gz") , emit: assembly path "versions.yml" , emit: versions when: @@ -32,18 +31,16 @@ process GFASTATS { def ibed = include_bed ? "--include-bed $include_bed" : "" def ebed = exclude_bed ? "--exclude-bed $exclude_bed" : "" def sak = instructions ? "--swiss-army-knife $instructions" : "" + + // Arguments have been removed due to causing errors with output values being 0 + // out-format seemed to be the main cause of this, in testing + // Even using the main branch of the github repo yielded the same error. + """ gfastats \\ - $args \\ + --nstar-report \\ --threads $task.cpus \\ - $agp \\ - $ibed \\ - $ebed \\ - $sak \\ - --out-format ${prefix}.${out_fmt}.gz \\ $assembly \\ - $genome_size \\ - $target \\ > ${prefix}.assembly_summary cat <<-END_VERSIONS > versions.yml diff --git a/workflows/ear.nf b/workflows/ear.nf index 091697a..4b7db54 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -111,60 +111,60 @@ workflow EAR { // // MODULE: MERQURYFK PLOTS OF GENOME // - MERQURYFK_MERQURYFK( - merquryfk_input - ) - ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) + // MERQURYFK_MERQURYFK( + // merquryfk_input + // ) + // ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) // // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE // SKIP THE MAPPING SUBWORKFLOW // - if (!params.mapped) { - // - // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC - // This allows us to more esily bypass the mapping if we already have a sorted and mapped bam - // - MAIN_MAPPING ( - YAML_INPUT.out.sample_id, - YAML_INPUT.out.longread_type, - YAML_INPUT.out.reference_hap1, - YAML_INPUT.out.pacbio_tuple, - ) - ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions ) - ch_mapped_bam = MAIN_MAPPING.out.mapped_bam - } else { - ch_mapped_bam = YAML_INPUT.out.mapped_bam - } + // if (!params.mapped) { + // // + // // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC + // // This allows us to more esily bypass the mapping if we already have a sorted and mapped bam + // // + // MAIN_MAPPING ( + // YAML_INPUT.out.sample_id, + // YAML_INPUT.out.longread_type, + // YAML_INPUT.out.reference_hap1, + // YAML_INPUT.out.pacbio_tuple, + // ) + // ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions ) + // ch_mapped_bam = MAIN_MAPPING.out.mapped_bam + // } else { + // ch_mapped_bam = YAML_INPUT.out.mapped_bam + // } // // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline // - GENERATE_SAMPLESHEET( - ch_mapped_bam - ) - ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions ) + // GENERATE_SAMPLESHEET( + // ch_mapped_bam + // ) + // ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions ) - // - // MODULE: Run Sanger-ToL/BlobToolKit - // - SANGER_TOL_BTK ( - YAML_INPUT.out.reference_hap1, - ch_mapped_bam, - GENERATE_SAMPLESHEET.out.csv, - YAML_INPUT.out.btk_un_diamond_database, - YAML_INPUT.out.btk_nt_database, - YAML_INPUT.out.btk_un_diamond_database, - YAML_INPUT.out.btk_config, - YAML_INPUT.out.btk_ncbi_taxonomy_path, - YAML_INPUT.out.busco_lineages, - YAML_INPUT.out.btk_taxid, - 'GCA_0001' - ) - ch_versions = ch_versions.mix(SANGER_TOL_BTK.out.versions) + // // + // // MODULE: Run Sanger-ToL/BlobToolKit + // // + // SANGER_TOL_BTK ( + // YAML_INPUT.out.reference_hap1, + // ch_mapped_bam, + // GENERATE_SAMPLESHEET.out.csv, + // YAML_INPUT.out.btk_un_diamond_database, + // YAML_INPUT.out.btk_nt_database, + // YAML_INPUT.out.btk_un_diamond_database, + // YAML_INPUT.out.btk_config, + // YAML_INPUT.out.btk_ncbi_taxonomy_path, + // YAML_INPUT.out.busco_lineages, + // YAML_INPUT.out.btk_taxid, + // 'GCA_0001' + // ) + // ch_versions = ch_versions.mix(SANGER_TOL_BTK.out.versions) // @@ -174,13 +174,13 @@ workflow EAR { hic_dir = YAML_INPUT.out.cpretext_hic_dir_raw.get() longread_dir = YAML_INPUT.out.longread_dir.get() - SANGER_TOL_CPRETEXT( - reference, - longread_dir, - hic_dir, - [] - ) - ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions ) + // SANGER_TOL_CPRETEXT( + // reference, + // longread_dir, + // hic_dir, + // [] + // ) + // ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions ) // From 0ff29b3a421a2c390ba2821cd6ac62ec67006de1 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 2 Sep 2024 15:26:19 +0100 Subject: [PATCH 07/46] Uncomment workflow --- workflows/ear.nf | 96 ++++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/workflows/ear.nf b/workflows/ear.nf index 4b7db54..091697a 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -111,60 +111,60 @@ workflow EAR { // // MODULE: MERQURYFK PLOTS OF GENOME // - // MERQURYFK_MERQURYFK( - // merquryfk_input - // ) - // ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) + MERQURYFK_MERQURYFK( + merquryfk_input + ) + ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) // // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE // SKIP THE MAPPING SUBWORKFLOW // - // if (!params.mapped) { - // // - // // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC - // // This allows us to more esily bypass the mapping if we already have a sorted and mapped bam - // // - // MAIN_MAPPING ( - // YAML_INPUT.out.sample_id, - // YAML_INPUT.out.longread_type, - // YAML_INPUT.out.reference_hap1, - // YAML_INPUT.out.pacbio_tuple, - // ) - // ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions ) - // ch_mapped_bam = MAIN_MAPPING.out.mapped_bam - // } else { - // ch_mapped_bam = YAML_INPUT.out.mapped_bam - // } + if (!params.mapped) { + // + // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC + // This allows us to more esily bypass the mapping if we already have a sorted and mapped bam + // + MAIN_MAPPING ( + YAML_INPUT.out.sample_id, + YAML_INPUT.out.longread_type, + YAML_INPUT.out.reference_hap1, + YAML_INPUT.out.pacbio_tuple, + ) + ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions ) + ch_mapped_bam = MAIN_MAPPING.out.mapped_bam + } else { + ch_mapped_bam = YAML_INPUT.out.mapped_bam + } // // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline // - // GENERATE_SAMPLESHEET( - // ch_mapped_bam - // ) - // ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions ) + GENERATE_SAMPLESHEET( + ch_mapped_bam + ) + ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions ) - // // - // // MODULE: Run Sanger-ToL/BlobToolKit - // // - // SANGER_TOL_BTK ( - // YAML_INPUT.out.reference_hap1, - // ch_mapped_bam, - // GENERATE_SAMPLESHEET.out.csv, - // YAML_INPUT.out.btk_un_diamond_database, - // YAML_INPUT.out.btk_nt_database, - // YAML_INPUT.out.btk_un_diamond_database, - // YAML_INPUT.out.btk_config, - // YAML_INPUT.out.btk_ncbi_taxonomy_path, - // YAML_INPUT.out.busco_lineages, - // YAML_INPUT.out.btk_taxid, - // 'GCA_0001' - // ) - // ch_versions = ch_versions.mix(SANGER_TOL_BTK.out.versions) + // + // MODULE: Run Sanger-ToL/BlobToolKit + // + SANGER_TOL_BTK ( + YAML_INPUT.out.reference_hap1, + ch_mapped_bam, + GENERATE_SAMPLESHEET.out.csv, + YAML_INPUT.out.btk_un_diamond_database, + YAML_INPUT.out.btk_nt_database, + YAML_INPUT.out.btk_un_diamond_database, + YAML_INPUT.out.btk_config, + YAML_INPUT.out.btk_ncbi_taxonomy_path, + YAML_INPUT.out.busco_lineages, + YAML_INPUT.out.btk_taxid, + 'GCA_0001' + ) + ch_versions = ch_versions.mix(SANGER_TOL_BTK.out.versions) // @@ -174,13 +174,13 @@ workflow EAR { hic_dir = YAML_INPUT.out.cpretext_hic_dir_raw.get() longread_dir = YAML_INPUT.out.longread_dir.get() - // SANGER_TOL_CPRETEXT( - // reference, - // longread_dir, - // hic_dir, - // [] - // ) - // ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions ) + SANGER_TOL_CPRETEXT( + reference, + longread_dir, + hic_dir, + [] + ) + ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions ) // From d5448fae161f39443c0374760c2ced3d00e29a4c Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:46:09 +0100 Subject: [PATCH 08/46] Update ci.yml --- .github/workflows/ci.yml | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 61b0cbf..1d1ce2e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,39 +65,12 @@ jobs: run: | nf-core download sanger-tol/ear --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - - name: NF-Core Download - download singularity containers - run: | - nf-core download sanger-tol/blobtoolkit --revision draft_assemblies --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - - - name: NF-Core Download - download singularity containers - run: | - nf-core download sanger-tol/curationpretext --revision 1.0.0 --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - - name: Download Tiny test data # Download A fungal test data set that is full enough to show some real output. + # Needs a kmer db for merqury run: | curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - - - name: Download the NCBI taxdump database - run: | - mkdir ncbi_taxdump - curl -L https://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -C ncbi_taxdump -xzf - - - - name: Download the BUSCO lineage database - run: | - mkdir busco_database - curl -L https://tolit.cog.sanger.ac.uk/test-data/resources/busco/blobtoolkit.GCA_922984935.2.2023-08-03.lineages.tar.gz | tar -C busco_database -xzf - - - - name: Download the subset of NT database - run: | - mkdir NT_database - curl -L https://ftp.ncbi.nlm.nih.gov/blast/db/18S_fungal_sequences.tar.gz | tar -C NT_database -xzf - - - - name: Download the subset of Diamond database - run: | - mkdir diamond - wget -c https://tolit.cog.sanger.ac.uk/test-data/resources/diamond/UP000000212_1234679_tax.dmnd -O diamond/UP000000212_1234679_tax.dmnd - # - name: Disk space cleanup # uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 @@ -106,5 +79,5 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,singularity --outdir ./results ls ./results/*/* From f476781b483508c3d3f6853dc37cf868b13b61b5 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 13:30:27 +0100 Subject: [PATCH 09/46] Updating the CICD and adding steps --- .github/workflows/ci.yml | 30 +----------- assets/test.yaml | 32 +++++++----- modules/local/sanger_tol_btk.nf | 12 +++-- nextflow.config | 2 +- workflows/ear.nf | 86 +++++++++++++++++++-------------- 5 files changed, 80 insertions(+), 82 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 61b0cbf..e1f61fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,38 +65,11 @@ jobs: run: | nf-core download sanger-tol/ear --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - - name: NF-Core Download - download singularity containers - run: | - nf-core download sanger-tol/blobtoolkit --revision draft_assemblies --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - - - name: NF-Core Download - download singularity containers - run: | - nf-core download sanger-tol/curationpretext --revision 1.0.0 --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - - name: Download Tiny test data # Download A fungal test data set that is full enough to show some real output. run: | curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - - - - name: Download the NCBI taxdump database - run: | - mkdir ncbi_taxdump - curl -L https://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -C ncbi_taxdump -xzf - - - - name: Download the BUSCO lineage database - run: | - mkdir busco_database - curl -L https://tolit.cog.sanger.ac.uk/test-data/resources/busco/blobtoolkit.GCA_922984935.2.2023-08-03.lineages.tar.gz | tar -C busco_database -xzf - - - - name: Download the subset of NT database - run: | - mkdir NT_database - curl -L https://ftp.ncbi.nlm.nih.gov/blast/db/18S_fungal_sequences.tar.gz | tar -C NT_database -xzf - - - - name: Download the subset of Diamond database - run: | - mkdir diamond - wget -c https://tolit.cog.sanger.ac.uk/test-data/resources/diamond/UP000000212_1234679_tax.dmnd -O diamond/UP000000212_1234679_tax.dmnd + cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-hap.fa # - name: Disk space cleanup # uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 @@ -107,4 +80,3 @@ jobs: # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - ls ./results/*/* diff --git a/assets/test.yaml b/assets/test.yaml index 4175309..e7cd829 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,25 +1,33 @@ +# General Vales for all subpiplines and modules assembly_id: grTriPseu1 -reference_hap1: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa -reference_hap2: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa -reference_haplotigs: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa +reference_hap1: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa +reference_hap2: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1-hap.fa +reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa + +# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore. +mapped_bam: [] + +merquryfk: + fastk_hist: "./" + fastk_ktab: "./" + +# Used by both subpipelines longread: type: hifi - dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/pacbio/ -mapped_bam: idCulLati1/mapped_bam.bam + dir: /lustre/scratch122/tol/data/d/0/d/1/f/e/Anisus_vorticulus/genomic_data/xgAniVori1/pacbio/fasta/ + curationpretext: aligner: minimap2 telomere_motif: TTAGGG - hic_dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/hic-arima/ -merquryfk: - fastk_hist: "./" - fastk_ktab: "./" + hic_dir: /home/runner/work/ear/ear/TreeValTinyData/genomic_data/hic-arima/ btk: + taxid: 352914 + gca_accession: GCA_0001 + lineages: "fungi_odb10" nt_database: /home/runner/work/ascc/ascc/NT_database/ nt_database_prefix: 18S_fungal_sequences diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd diamond_nr_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/ ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp - taxid: 352914 - gca_accession: GCA_0001 - lineages: "fungi_odb10" + config: /home/runner/work/ear/ear/conf/sanger-tol-btk.config diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf index c9cb6a9..c8a5776 100644 --- a/modules/local/sanger_tol_btk.nf +++ b/modules/local/sanger_tol_btk.nf @@ -17,11 +17,11 @@ process SANGER_TOL_BTK { output: tuple val(meta), path("*_out/blobtoolkit/REFERENCE"), emit: dataset - path("*_out/blobtoolkit/plots"), emit: plots - path("*_out/blobtoolkit/REFERENCE/summary.json.gz"), emit: summary_json - path("*_out/busco"), emit: busco_data - path("*_out/multiqc"), emit: multiqc_report - path("*_out/pipeline_info/blobtoolkit"), emit: pipeline_info + path "*_out/blobtoolkit/plots" , emit: plots + path "*_out/blobtoolkit/REFERENCE/summary.json.gz", emit: summary_json + path "*_out/busco", emit: busco_data + path "*_out/multiqc", emit: multiqc_report + path "*_out/pipeline_info/blobtoolkit", emit: pipeline_info path "versions.yml", emit: versions script: @@ -111,3 +111,5 @@ process SANGER_TOL_BTK { END_VERSIONS """ } + +} diff --git a/nextflow.config b/nextflow.config index a2b702d..9302ce8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -178,7 +178,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id: 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Export these variables to prevent local Python/R libraries from conflicting with those in the container diff --git a/workflows/ear.nf b/workflows/ear.nf index 091697a..34310ba 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -40,6 +40,13 @@ workflow EAR { ch_versions = Channel.empty() ch_align_bam = Channel.empty() + exclude_steps = params.steps ? params.steps.split(",") : "" + + full_list = ["btk", "cpretext"] + + if (!full_list.containsAll(exclude_steps)) { + exit 1, "There is an extra argument given on Command Line: \n Check contents of: $exclude_steps\nMaster list is: $full_list" + } // // MODULE: YAML_INPUT @@ -140,48 +147,57 @@ workflow EAR { // - // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline + // LOGIC: STEP TO STOP BTK RUNNING IF SPECIFIED BY USER // - GENERATE_SAMPLESHEET( - ch_mapped_bam - ) - ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions ) + if (!exclude_steps.contains('btk')) { + // + // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline + // + GENERATE_SAMPLESHEET( + ch_mapped_bam + ) + ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions ) - // - // MODULE: Run Sanger-ToL/BlobToolKit - // - SANGER_TOL_BTK ( - YAML_INPUT.out.reference_hap1, - ch_mapped_bam, - GENERATE_SAMPLESHEET.out.csv, - YAML_INPUT.out.btk_un_diamond_database, - YAML_INPUT.out.btk_nt_database, - YAML_INPUT.out.btk_un_diamond_database, - YAML_INPUT.out.btk_config, - YAML_INPUT.out.btk_ncbi_taxonomy_path, - YAML_INPUT.out.busco_lineages, - YAML_INPUT.out.btk_taxid, - 'GCA_0001' - ) - ch_versions = ch_versions.mix(SANGER_TOL_BTK.out.versions) + // + // MODULE: Run Sanger-ToL/BlobToolKit + // + SANGER_TOL_BTK ( + YAML_INPUT.out.reference_hap1, + ch_mapped_bam, + GENERATE_SAMPLESHEET.out.csv, + YAML_INPUT.out.btk_un_diamond_database, + YAML_INPUT.out.btk_nt_database, + YAML_INPUT.out.btk_un_diamond_database, + YAML_INPUT.out.btk_config, + YAML_INPUT.out.btk_ncbi_taxonomy_path, + YAML_INPUT.out.busco_lineages, + YAML_INPUT.out.btk_taxid, + 'GCA_0001' + ) + ch_versions = ch_versions.mix(SANGER_TOL_BTK.out.versions) + } // - // MODULE: Run Sanger-ToL/CurationPretext + // LOGIC: STEP TO STOP CURATION_PRETEXT RUNNING IF SPECIFIED BY USER // - reference = YAML_INPUT.out.reference_path.get() - hic_dir = YAML_INPUT.out.cpretext_hic_dir_raw.get() - longread_dir = YAML_INPUT.out.longread_dir.get() - - SANGER_TOL_CPRETEXT( - reference, - longread_dir, - hic_dir, - [] - ) - ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions ) - + if (!exclude_steps.contains('cpretext')) { + // + // MODULE: Run Sanger-ToL/CurationPretext + // + reference = YAML_INPUT.out.reference_path.get() + hic_dir = YAML_INPUT.out.cpretext_hic_dir_raw.get() + longread_dir = YAML_INPUT.out.longread_dir.get() + + SANGER_TOL_CPRETEXT( + reference, + longread_dir, + hic_dir, + [] + ) + ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions ) + } // // Collate and save software versions From 53242fe097068b0dffa3c26e968c634a942aca21 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 13:40:04 +0100 Subject: [PATCH 10/46] Extra } --- modules/local/sanger_tol_btk.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf index c8a5776..d7675fb 100644 --- a/modules/local/sanger_tol_btk.nf +++ b/modules/local/sanger_tol_btk.nf @@ -111,5 +111,3 @@ process SANGER_TOL_BTK { END_VERSIONS """ } - -} From 73399cb13d0a7fc07b5f13c9651fef83666d0882 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 13:48:48 +0100 Subject: [PATCH 11/46] Fix steps --- .github/workflows/ci.yml | 3 ++- workflows/ear.nf | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a501dff..1311aea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,5 +79,6 @@ jobs: # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix + # Skip BTK and CPRETEXT as they are already tested on their repos. run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext diff --git a/workflows/ear.nf b/workflows/ear.nf index 34310ba..fb8b441 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -42,7 +42,7 @@ workflow EAR { exclude_steps = params.steps ? params.steps.split(",") : "" - full_list = ["btk", "cpretext"] + full_list = ["btk", "cpretext", ""] if (!full_list.containsAll(exclude_steps)) { exit 1, "There is an extra argument given on Command Line: \n Check contents of: $exclude_steps\nMaster list is: $full_list" From 0a441e7092b7cac5abda9d8b03c064103178fad4 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 13:54:50 +0100 Subject: [PATCH 12/46] Wrong Pacbio path --- assets/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/test.yaml b/assets/test.yaml index e7cd829..4cbec2b 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -14,7 +14,7 @@ merquryfk: # Used by both subpipelines longread: type: hifi - dir: /lustre/scratch122/tol/data/d/0/d/1/f/e/Anisus_vorticulus/genomic_data/xgAniVori1/pacbio/fasta/ + dir: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/genomic_data/pacbio/ curationpretext: aligner: minimap2 From b0fe91f707f6f2456eead753fc7a8232bb173e62 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 13:59:19 +0100 Subject: [PATCH 13/46] Wrong Pacbio path --- assets/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/test.yaml b/assets/test.yaml index 4cbec2b..47947d9 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -14,7 +14,7 @@ merquryfk: # Used by both subpipelines longread: type: hifi - dir: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/genomic_data/pacbio/ + dir: /home/runner/work/ear/ear/TreeValTinyData/genomic_data/pacbio/ curationpretext: aligner: minimap2 From a758e817ce9413305c365614192542491dc9a78b Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 14:09:12 +0100 Subject: [PATCH 14/46] Correct collision --- .github/workflows/ci.yml | 1 + assets/test.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1311aea..9f1beaf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,6 +71,7 @@ jobs: run: | curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-hap.fa + cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa # - name: Disk space cleanup # uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 diff --git a/assets/test.yaml b/assets/test.yaml index 47947d9..0f5f6ed 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -2,7 +2,7 @@ assembly_id: grTriPseu1 reference_hap1: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa reference_hap2: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1-hap.fa -reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa +reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa # If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore. mapped_bam: [] From f429c473e8ebbe4ec7c54631ec1a3d5b4d2f5d2d Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 16:27:48 +0100 Subject: [PATCH 15/46] Update for linting --- .github/workflows/ci.yml | 4 +-- .nf-core.yml | 1 + README.md | 2 +- conf/sanger-tol-btk.config | 2 +- conf/test.config | 2 +- modules.json | 38 ++++++++-------------------- modules/local/nextflow/run/main.nf | 2 +- modules/local/sanger_tol_btk.nf | 14 +++++----- modules/local/sanger_tol_cpretext.nf | 2 +- nextflow.config | 1 + nextflow_schema.json | 35 +++++-------------------- subworkflows/local/main_mapping.nf | 2 +- subworkflows/local/pe_mapping.nf | 2 +- subworkflows/local/se_mapping.nf | 2 +- workflows/ear.nf | 24 +++++++++++------- 15 files changed, 51 insertions(+), 82 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9f1beaf..95c5d8c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ jobs: strategy: matrix: NXF_VER: - - "24.04.2" + - "24.04.0" - "latest-everything" steps: - name: Get branch names @@ -82,4 +82,4 @@ jobs: # Remember that you can parallelise this by using strategy.matrix # Skip BTK and CPRETEXT as they are already tested on their repos. run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext,merquryfk diff --git a/.nf-core.yml b/.nf-core.yml index 9a35f55..fd05354 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -4,6 +4,7 @@ lint: - assets/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_dark.png + - lib/nfcore_external_java_deps.jar - .github/ISSUE_TEMPLATE/config.yml - .github/workflows/awstest.yml - .github/workflows/awsfulltest.yml diff --git a/README.md b/README.md index 652eba6..697f288 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) diff --git a/conf/sanger-tol-btk.config b/conf/sanger-tol-btk.config index 247dbbd..553ad56 100644 --- a/conf/sanger-tol-btk.config +++ b/conf/sanger-tol-btk.config @@ -4,4 +4,4 @@ process { memory = { check_max( 10.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } -} \ No newline at end of file +} diff --git a/conf/test.config b/conf/test.config index 7313f18..06d069f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,5 +20,5 @@ params { max_time = '6.h' input = "${projectDir}/assets/test.yaml" outdir = "results" - + } diff --git a/modules.json b/modules.json index ef2ff42..d4e081b 100644 --- a/modules.json +++ b/modules.json @@ -8,46 +8,34 @@ "cat/cat": { "branch": "master", "git_sha": "5bb8ca085e17549e185e1823495ab8d20727a805", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gfastats": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "merquryfk/merquryfk": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" }, "minimap2/align": { "branch": "master", "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "46eca555142d6e597729fcb682adcc791796f514", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -56,26 +44,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/modules/local/nextflow/run/main.nf b/modules/local/nextflow/run/main.nf index cc522bc..af6ba65 100644 --- a/modules/local/nextflow/run/main.nf +++ b/modules/local/nextflow/run/main.nf @@ -35,4 +35,4 @@ process NEXTFLOW_RUN { output: path "results" , emit: output val process.text, emit: log -} \ No newline at end of file +} diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf index d7675fb..699ee04 100644 --- a/modules/local/sanger_tol_btk.nf +++ b/modules/local/sanger_tol_btk.nf @@ -16,13 +16,13 @@ process SANGER_TOL_BTK { val gca_accession output: - tuple val(meta), path("*_out/blobtoolkit/REFERENCE"), emit: dataset - path "*_out/blobtoolkit/plots" , emit: plots - path "*_out/blobtoolkit/REFERENCE/summary.json.gz", emit: summary_json - path "*_out/busco", emit: busco_data - path "*_out/multiqc", emit: multiqc_report - path "*_out/pipeline_info/blobtoolkit", emit: pipeline_info - path "versions.yml", emit: versions + tuple val(meta), path("*_out/blobtoolkit/REFERENCE"), emit: dataset + path "*_out/blobtoolkit/plots" , emit: plots + path "*_out/blobtoolkit/REFERENCE/summary.json.gz", emit: summary_json + path "*_out/busco", emit: busco_data + path "*_out/multiqc", emit: multiqc_report + path "*_out/pipeline_info/blobtoolkit", emit: pipeline_info + path "versions.yml", emit: versions script: def pipeline_name = task.ext.pipeline_name diff --git a/modules/local/sanger_tol_cpretext.nf b/modules/local/sanger_tol_cpretext.nf index f9b12d8..b073039 100644 --- a/modules/local/sanger_tol_cpretext.nf +++ b/modules/local/sanger_tol_cpretext.nf @@ -45,7 +45,7 @@ process SANGER_TOL_CPRETEXT { $args \\ $config \\ -resume' - + cat <<-END_VERSIONS > versions.yml "${task.process}": $pipeline_suffix: $pipeline_version diff --git a/nextflow.config b/nextflow.config index 9302ce8..e564534 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,7 @@ params { // Input options input = null mapped = false + steps = "" // Boilerplate options outdir = null diff --git a/nextflow_schema.json b/nextflow_schema.json index f198603..eee6164 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -36,9 +36,14 @@ "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" }, - "multiqc_title": { + "mapped": { + "type": "boolean", + "description": "Have you got a mapped bam as input?", + "fa_icon": "fas fa-file-signature" + }, + "steps": { "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "description": "csv list of steps to skip", "fa_icon": "fas fa-file-signature" } } @@ -168,14 +173,6 @@ "fa_icon": "fas fa-remove-format", "hidden": true }, - "max_multiqc_email_size": { - "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true - }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", @@ -189,24 +186,6 @@ "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, - "multiqc_config": { - "type": "string", - "format": "file-path", - "description": "Custom config file to supply to MultiQC.", - "fa_icon": "fas fa-cog", - "hidden": true - }, - "multiqc_logo": { - "type": "string", - "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", - "fa_icon": "fas fa-image", - "hidden": true - }, - "multiqc_methods_description": { - "type": "string", - "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog" - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", diff --git a/subworkflows/local/main_mapping.nf b/subworkflows/local/main_mapping.nf index 28c100f..0531201 100644 --- a/subworkflows/local/main_mapping.nf +++ b/subworkflows/local/main_mapping.nf @@ -74,4 +74,4 @@ workflow MAIN_MAPPING { mapped_bam // channel: tuple val(meta), path(mapped_bam) versions = ch_versions // channel: [ path(versions.yml) ] -} \ No newline at end of file +} diff --git a/subworkflows/local/pe_mapping.nf b/subworkflows/local/pe_mapping.nf index 3c41670..358be3b 100644 --- a/subworkflows/local/pe_mapping.nf +++ b/subworkflows/local/pe_mapping.nf @@ -113,4 +113,4 @@ process GrabFiles { tuple val(meta), path("in/*.{fa,fasta}.{gz}") "true" -} \ No newline at end of file +} diff --git a/subworkflows/local/se_mapping.nf b/subworkflows/local/se_mapping.nf index 8c7ad52..c3307d4 100644 --- a/subworkflows/local/se_mapping.nf +++ b/subworkflows/local/se_mapping.nf @@ -112,4 +112,4 @@ process GrabFiles { tuple val(meta), path("in/*.{fa,fasta,fna}.{gz}") "true" -} \ No newline at end of file +} diff --git a/workflows/ear.nf b/workflows/ear.nf index fb8b441..6a87e9e 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -13,7 +13,7 @@ include { YAML_INPUT } from '../subworkflows/local/yaml_i include { MAIN_MAPPING } from '../subworkflows/local/main_mapping' // Module imports -include { CAT_CAT } from '../modules/nf-core/cat/cat/main' +include { CAT_CAT } from '../modules/nf-core/cat/cat/main' include { GENERATE_SAMPLESHEET } from '../modules/local/generate_samplesheet' include { GFASTATS } from '../modules/nf-core/gfastats/main' include { MERQURYFK_MERQURYFK } from '../modules/nf-core/merquryfk/merquryfk/main' @@ -42,10 +42,10 @@ workflow EAR { exclude_steps = params.steps ? params.steps.split(",") : "" - full_list = ["btk", "cpretext", ""] + full_list = ["btk", "cpretext", "merquryfk", ""] if (!full_list.containsAll(exclude_steps)) { - exit 1, "There is an extra argument given on Command Line: \n Check contents of: $exclude_steps\nMaster list is: $full_list" + exit 1, "There is an extra argument given on Command Line: \nCheck contents of: $exclude_steps\nMaster list is: $full_list" } // @@ -58,7 +58,7 @@ workflow EAR { // // LOGIC: IF HAPLOTIGS IS EMPTY THEN PASS ON HALPLOTYPE ASSEMBLY // IF HAPLOTIGS EXISTS THEN MERGE WITH HAPLOTYPE ASSEMBLY - // + // if (YAML_INPUT.out.reference_haplotigs.ifEmpty(true)) { YAML_INPUT.out.sample_id .combine(YAML_INPUT.out.reference_hap2) @@ -116,13 +116,19 @@ workflow EAR { // - // MODULE: MERQURYFK PLOTS OF GENOME + // LOGIC: STEP TO STOP MERQURY_FK RUNNING IF SPECIFIED BY USER // - MERQURYFK_MERQURYFK( - merquryfk_input - ) - ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) + if (!exclude_steps.contains('merquryfk')) { + // + // MODULE: MERQURYFK PLOTS OF GENOME + // + merquryfk_input.view() + MERQURYFK_MERQURYFK( + merquryfk_input + ) + ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) + } // // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE From 7f1dd548a13e48a582973a18b07fb46febdc5f97 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 16:32:43 +0100 Subject: [PATCH 16/46] PRETTIER --- .github/workflows/ci.yml | 2 +- .nf-core.yml | 2 +- CHANGELOG.md | 31 +++++++++++++++++-------------- README.md | 14 +++++++------- assets/real_pdf.yaml | 4 ++-- docs/output.md | 11 ++++------- docs/usage.md | 1 - 7 files changed, 32 insertions(+), 33 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 95c5d8c..c347e38 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,7 +47,7 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.10" - + - name: Install nf-core run: | pip install nf-core diff --git a/.nf-core.yml b/.nf-core.yml index fd05354..d9fe12b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -4,7 +4,7 @@ lint: - assets/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_dark.png - - lib/nfcore_external_java_deps.jar + - lib/nfcore_external_java_deps.jar - .github/ISSUE_TEMPLATE/config.yml - .github/workflows/awstest.yml - .github/workflows/awsfulltest.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 3173f7c..9106bfd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/) The current pipeline means the MVP for ear. ### Added + GFASTATS to generate statistics on the input primary genome. MERQURY_FK to generate kmer graphs and analyses of the primary, haplotype and merged assembly. BLOBTOOLKIT to generate busco files and blobtoolkit dataset/plots. @@ -17,24 +18,26 @@ CURATIONPRETEXT to generate pretext plots and pngs. ### Parameters -| Old parameter | New parameter | -| --------------- | ------------- | -| | --mapped | +| Old parameter | New parameter | +| ------------- | ------------- | +| | --mapped | ### Software dependencies -| Dependency | Old version | New version | -| ----------- | ------------- | ------------- | -| sanger-tol/blobtoolkit* | | draft_assemblies | -| sanger-tol/curationpretext* | | 1.0.0 (UNSC Cradle) | -| GFASTATS | | 1.3.6--hdcf5f25_3 | -| MERQUERY_FK | | 1.2 | -| MINIMAP2_ALIGN | | 2.28 | -| SAMTOOLS_MERGE | | 1.20--h50ea8bc_0 | -| SAMTOOLS_SORT | | 1.20--h50ea8bc_0 | -| +| Dependency | Old version | New version | +| ---------------------------- | ----------- | ------------------- | +| sanger-tol/blobtoolkit\* | | draft_assemblies | +| sanger-tol/curationpretext\* | | 1.0.0 (UNSC Cradle) | +| GFASTATS | | 1.3.6--hdcf5f25_3 | +| MERQUERY_FK | | 1.2 | +| MINIMAP2_ALIGN | | 2.28 | +| SAMTOOLS_MERGE | | 1.20--h50ea8bc_0 | +| SAMTOOLS_SORT | | 1.20--h50ea8bc_0 | + +| - Note: for pipelines, please check their own CHANGELOG file for a full list of software dependencies. ### Dependencies -The pipeline depends on a number of databases which are noted in [README](README.md) and [USAGE](docs/usage.md). + +The pipeline depends on a number of databases which are noted in [README](README.md) and [USAGE](docs/usage.md). diff --git a/README.md b/README.md index 697f288..5665771 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ 1. Read the input yaml file (YAML_INPUT) 2. Run GFASTATS (GFASTARS) 3. Run MERQURYFK_MERQURYFK (MERQURYFK) -4. Run MAIN_MAPPING, longread single-end/paired-end mapping +4. Run MAIN_MAPPING, longread single-end/paired-end mapping 5. Run GENERATE_SAMPLESHEET, generate a csv file required for SANGER_TOL_BTK. 6. Run SANGER_TOL_BTK, also known as SANGER-TOL/BLOBTOOLKIT a subpipline for SANGER-TOL/EAR 7. Run SANGER_TOL_CPRETEXT, also known as SANGER-TOL/CURATIONPRETEXT a subpipeline for SANGER-TOL/EAR. @@ -27,11 +27,12 @@ The sanger-tol/ear pipeline requires a number of databases in place in order to run the blobtoolkit pipeline. These include: - - A blast nt database - - A Diamond blast uniprot database - - A Diamond blast nr database - - An NCBI taxdump - - An NCBI rankedlineage.dmp + +- A blast nt database +- A Diamond blast uniprot database +- A Diamond blast nr database +- An NCBI taxdump +- An NCBI rankedlineage.dmp Next, a yaml file containing the following should then be completed: @@ -70,7 +71,6 @@ btk: config: ``` - Now, you can run the pipeline using: ```bash diff --git a/assets/real_pdf.yaml b/assets/real_pdf.yaml index 8f8d4a0..19c4c35 100644 --- a/assets/real_pdf.yaml +++ b/assets/real_pdf.yaml @@ -20,14 +20,14 @@ PROFILING: # ASSEMBLY DATA ASSEMBLIES: Pre-curation: - pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|] + pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|] pri: gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.fa.gz.gfastats busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.insecta_odb10.busco/short_summary.specific.insecta_odb10.out_scaffolds_final.insecta_odb10.busco.txt merqury_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk/ Curated: - pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1] + pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1] pri: gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1.primary.curated.fa.gfastats busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1.primary.curated.insecta_odb10.busco/short_summary.specific.insecta_odb10.idCulLati1.1.primary.curated.insecta_odb10.busco.txt diff --git a/docs/output.md b/docs/output.md index f5a9c8b..dac22bd 100644 --- a/docs/output.md +++ b/docs/output.md @@ -27,7 +27,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d -[GFASTATS](https://github.com/vgl-hub/gfastats) is a single fast and exhaustive tool for summary statistics and simultaneous *fa* (fasta, fastq, gfa [.gz]) genome assembly file manipulation. +[GFASTATS](https://github.com/vgl-hub/gfastats) is a single fast and exhaustive tool for summary statistics and simultaneous _fa_ (fasta, fastq, gfa [.gz]) genome assembly file manipulation. ### MERQURYFK @@ -35,7 +35,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Output files - `merquryfk/` - - `*.completeness.stats`: + - `*.completeness.stats`: - `*{"primary","haplotype",""}_only.bed`: - `*{"primary","haplotype",""}.qv`: - `*.spectra-asm.{fl,ln,st}.png`: @@ -47,14 +47,13 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Merqury is a novel tool for reference-free assembly evaluation based on efficient k-mer set operations. By comparing k-mers in a de novo assembly to those found in unassembled high-accuracy reads, Merqury estimates base-level accuracy and completeness. - ## SANGER_TOL_BTK
Output files - `sanger/*_blobtoolkit_out/` - - `blobtoolkit/plots/*png`: Blobtoolkit plots + - `blobtoolkit/plots/*png`: Blobtoolkit plots - `blobtoolkit/{ASSEMBLY_NAME}/*.json.gz`: Blobtoolkit dataset for use in BTK_viewer. - `busco/*_odb10/*.{tsv,tar.gz,json,txt}`: Busco output - `muliqc/`: MultiQC plots/data and report.html. @@ -64,14 +63,13 @@ Merqury is a novel tool for reference-free assembly evaluation based on efficien [SANGER_TOL_BTK](https://pipelines.tol.sanger.ac.uk/blobtoolkit) is a bioinformatics pipeline that can be used to identify and analyse non-target DNA for eukaryotic genomes. - ## SANGER_TOL_CPRETEXT
Output files - `sanger/*_curationpretext_out/` - - `accessory_files/*.{bigWig,bed,bedgraph}`: Track files describing Telomere, gap, coverage data across the genome. + - `accessory_files/*.{bigWig,bed,bedgraph}`: Track files describing Telomere, gap, coverage data across the genome. - `pretext_maps_raw`: Pre-accessory file ingestion pretext files. - `pretext_maps_processed`: Post-accessory file ingestion pretext files, e.g. the final output. - [`pipeline_info`](#pipeline-information) @@ -80,7 +78,6 @@ Merqury is a novel tool for reference-free assembly evaluation based on efficien [SANGER_TOL_CPRETEXT](https://pipelines.tol.sanger.ac.uk/curationpretext) is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://pipelines.tol.sanger.ac.uk/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes. - ### Pipeline information
diff --git a/docs/usage.md b/docs/usage.md index b703d3e..a1e62af 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -166,7 +166,6 @@ As in the Snakemake version [a YAML configuration file](https://github.com/blobt The data in the YAML is currently ignored in the Nextflow pipeline version. The YAML file is retained only to allow compatibility with the BlobDir dataset generated by the [Snakemake version](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/blobtoolkit-pipeline/src). The taxonomic information in the YAML file can be obtained from [NCBI Taxonomy](https://www.ncbi.nlm.nih.gov/data-hub/taxonomy/). - ## Running the pipeline The typical command for running the pipeline is as follows: From a8c8189a6cc5c98a769a33767e070e15d75ffdf2 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 16:36:21 +0100 Subject: [PATCH 17/46] Template yaml PRETTIER fix --- README.md | 1 - assets/template_pdf.yaml | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5665771..5b61fe6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ [![GitHub Actions CI Status](https://github.com/sanger-tol/ear/actions/workflows/ci.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/ci.yml) [![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) - [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) diff --git a/assets/template_pdf.yaml b/assets/template_pdf.yaml index 3779c19..5688f4e 100644 --- a/assets/template_pdf.yaml +++ b/assets/template_pdf.yaml @@ -20,14 +20,14 @@ PROFILING: # ASSEMBLY DATA ASSEMBLIES: Pre-curation: - pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|] + pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|] pri: gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.fa.gz.gfastats busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.insecta_odb10.busco/short_summary.specific.insecta_odb10.out_scaffolds_final.insecta_odb10.busco.txt merqury_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk/ Curated: - pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1] + pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1] pri: gfastats--nstar-report_txt: idCulLati1.1.primary.curated.fa.gfastats busco_short_summary_txt: short_summary.specific.insecta_odb10.idCulLati1.1.primary.curated.insecta_odb10.busco.txt From 229f0a97d80006c52c30fef7cb358cd83d7e8c3e Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 20:12:43 +0100 Subject: [PATCH 18/46] Updates --- modules/local/sanger_tol_btk.nf | 2 +- workflows/ear.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf index 699ee04..4f0bb06 100644 --- a/modules/local/sanger_tol_btk.nf +++ b/modules/local/sanger_tol_btk.nf @@ -7,7 +7,7 @@ process SANGER_TOL_BTK { tuple val(meta1), path(bam) // Name needs to remain the same as previous process as they are referenced in the samplesheet tuple val(meta2), path(samplesheet_csv, stageAs: "SAMPLESHEET.csv") path blastp, stageAs: "blastp.dmnd" - path blastn, stageAs: "" + path blastn, path blastx path config_file path tax_dump diff --git a/workflows/ear.nf b/workflows/ear.nf index 6a87e9e..9f1c434 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -81,6 +81,7 @@ workflow EAR { ch_haplotype_fasta = YAML_INPUT.out.reference_hap2 } + // // MODULE: ASSEMBLY STATISTICS FOR THE FASTA // @@ -123,7 +124,6 @@ workflow EAR { // // MODULE: MERQURYFK PLOTS OF GENOME // - merquryfk_input.view() MERQURYFK_MERQURYFK( merquryfk_input ) From f64792560028421487b125ddcd80419a087d67bb Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 20:14:00 +0100 Subject: [PATCH 19/46] Testing addition of ncbidatasets summary module --- modules/local/ncbidatasets/summary/main.nf | 52 ++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 modules/local/ncbidatasets/summary/main.nf diff --git a/modules/local/ncbidatasets/summary/main.nf b/modules/local/ncbidatasets/summary/main.nf new file mode 100644 index 0000000..bb9191f --- /dev/null +++ b/modules/local/ncbidatasets/summary/main.nf @@ -0,0 +1,52 @@ +process NCBIDATASETS_DOWNLOAD { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::ncbi-datasets-cli=15.11.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ncbi-datasets-pylib:15.11.0--pyhdfd78af_0': + 'staphb/ncbi-datasets:15.11.0' }" + + input: + val(input_data) + + output: + val(output_data) , emit: taxonomy + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def valid_commands = ["taxonomy", "taxon"] + if (!valid_commands.contains(meta.command)) { + error "Unsupported command: ${meta.command} " + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id.replaceAll(' ', '_')}" + + """ + + [ -e /usr/local/ssl/cacert.pem ] && export SSL_CERT_FILE=/usr/local/ssl/cacert.pem + + datasets summary \\ + ${meta.command} "${meta.latin_name}" ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ncbi-datasets-cli: \$(echo \$(datasets --version 2>&1) | sed 's/datasets version: //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id.replaceAll(' ', '_')}" + """ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ncbi-datasets-cli: \$(echo \$(datasets --version 2>&1) | sed 's/datasets version: //' ) + END_VERSIONS + """ +} \ No newline at end of file From c3475ee7660348f48c0e345f72cd37a503e715c2 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 18 Sep 2024 20:20:32 +0100 Subject: [PATCH 20/46] not touching ncbi datasets cli --- modules/local/ncbidatasets/summary/main.nf | 52 ---------------------- 1 file changed, 52 deletions(-) delete mode 100644 modules/local/ncbidatasets/summary/main.nf diff --git a/modules/local/ncbidatasets/summary/main.nf b/modules/local/ncbidatasets/summary/main.nf deleted file mode 100644 index bb9191f..0000000 --- a/modules/local/ncbidatasets/summary/main.nf +++ /dev/null @@ -1,52 +0,0 @@ -process NCBIDATASETS_DOWNLOAD { - tag "$meta.id" - label 'process_single' - - conda "conda-forge::ncbi-datasets-cli=15.11.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ncbi-datasets-pylib:15.11.0--pyhdfd78af_0': - 'staphb/ncbi-datasets:15.11.0' }" - - input: - val(input_data) - - output: - val(output_data) , emit: taxonomy - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def valid_commands = ["taxonomy", "taxon"] - if (!valid_commands.contains(meta.command)) { - error "Unsupported command: ${meta.command} " - } - - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id.replaceAll(' ', '_')}" - - """ - - [ -e /usr/local/ssl/cacert.pem ] && export SSL_CERT_FILE=/usr/local/ssl/cacert.pem - - datasets summary \\ - ${meta.command} "${meta.latin_name}" ${args} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ncbi-datasets-cli: \$(echo \$(datasets --version 2>&1) | sed 's/datasets version: //' ) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id.replaceAll(' ', '_')}" - """ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ncbi-datasets-cli: \$(echo \$(datasets --version 2>&1) | sed 's/datasets version: //' ) - END_VERSIONS - """ -} \ No newline at end of file From 1b68e566398215510c8bf3e8f4b49e24b4ac733a Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 19 Sep 2024 09:14:41 +0100 Subject: [PATCH 21/46] Bug Fix for extra comma in btk module --- modules/local/sanger_tol_btk.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf index 4f0bb06..543e693 100644 --- a/modules/local/sanger_tol_btk.nf +++ b/modules/local/sanger_tol_btk.nf @@ -7,7 +7,7 @@ process SANGER_TOL_BTK { tuple val(meta1), path(bam) // Name needs to remain the same as previous process as they are referenced in the samplesheet tuple val(meta2), path(samplesheet_csv, stageAs: "SAMPLESHEET.csv") path blastp, stageAs: "blastp.dmnd" - path blastn, + path blastn path blastx path config_file path tax_dump From e5b60b54c00eff45ff2ddd26b3bff7c503ab4a2b Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 20 Sep 2024 12:44:02 +0100 Subject: [PATCH 22/46] Adding MINIMAP2 resource fix --- conf/base.config | 54 +++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/conf/base.config b/conf/base.config index e609a9e..aa5a770 100644 --- a/conf/base.config +++ b/conf/base.config @@ -11,16 +11,22 @@ process { // TODO nf-core: Check the defaults for all processes - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' withName: "SANGER_TOL_CPRETEXT|SANGER_TOL_BTK" { - time = { check_max( 70.h * task.attempt, 'time' ) } + time = { check_max( 70.h * task.attempt, 'time' ) } + } + + withName: "MINIMAP2_ALIGN_SE" { + cpus = { check_max( 16 , 'cpus' ) } + memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 40 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * task.attempt ) , 'memory') } + time = { check_max( 1.h * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48), 'time' ) } } // Process-specific resource requirements @@ -31,36 +37,36 @@ process { // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { check_max( 200.GB * task.attempt, 'memory' ) } } withLabel:error_ignore { - errorStrategy = 'ignore' + errorStrategy = 'ignore' } withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 2 + errorStrategy = 'retry' + maxRetries = 2 } } From 870bf41e0c54e18949edd016d53cbf4dc31824c0 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 20 Sep 2024 12:44:36 +0100 Subject: [PATCH 23/46] Updating documentation --- CHANGELOG.md | 6 +- CITATIONS.md | 24 ++++++-- LICENSE | 4 +- README.md | 8 ++- assets/samplesheet.csv | 3 - assets/schema_input.json | 130 ++++++++++++++++++++++++++++++++++++--- 6 files changed, 154 insertions(+), 21 deletions(-) delete mode 100644 assets/samplesheet.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index 9106bfd..9959669 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ The current pipeline means the MVP for ear. GFASTATS to generate statistics on the input primary genome. MERQURY_FK to generate kmer graphs and analyses of the primary, haplotype and merged assembly. +MAIN_MAPPING which is a small mapping subworkflow, that can work with single and paired reads. BLOBTOOLKIT to generate busco files and blobtoolkit dataset/plots. CURATIONPRETEXT to generate pretext plots and pngs. @@ -21,12 +22,13 @@ CURATIONPRETEXT to generate pretext plots and pngs. | Old parameter | New parameter | | ------------- | ------------- | | | --mapped | +| | --steps | ### Software dependencies | Dependency | Old version | New version | | ---------------------------- | ----------- | ------------------- | -| sanger-tol/blobtoolkit\* | | draft_assemblies | +| sanger-tol/blobtoolkit\* | | 0.6.0 (Bellsprout) | | sanger-tol/curationpretext\* | | 1.0.0 (UNSC Cradle) | | GFASTATS | | 1.3.6--hdcf5f25_3 | | MERQUERY_FK | | 1.2 | @@ -36,7 +38,7 @@ CURATIONPRETEXT to generate pretext plots and pngs. | -- Note: for pipelines, please check their own CHANGELOG file for a full list of software dependencies. +\* for pipelines, please check their own CHANGELOG file for a full list of software dependencies. ### Dependencies diff --git a/CITATIONS.md b/CITATIONS.md index c0cf948..28e3ca8 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,13 +10,29 @@ ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [GFastar/GFastats](https://www.biorxiv.org/content/10.1101/2022.03.24.485682v1) - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + > Formenti, G., Abueg, L., Brajuka, N., Gallardo, C., Giani, A., Fedrigo, O., Jarvis, ED. (2022). Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs. bioRxiv. doi: https://doi.org/10.1101/2022.03.24.485682 -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) +- [Merqury_FK](https://github.com/thegenemyers/MERQURY.FK) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Myers, G., Rhie, A. (2024). MerquryFK & KatFK. [online]. https://github.com/thegenemyers/MERQURY.FK. (Accessed on 20 September 2024). + +- [Minimap2](https://pubmed.ncbi.nlm.nih.gov/34623391/) + + > Li, H. 2021. ‘New strategies to improve MINIMAP2 alignment accuracy’, Bioinformatics, 37(23), pp. 4572–4574. doi:10.1093/bioinformatics/btab705. + +- [Samtools](https://pubmed.ncbi.nlm.nih.gov/33590861/) + + > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819. + +- [sanger-tol/blobtoolkit](https://zenodo.org/records/13758882) + + > Muffato, M., Butt, Z., Challis, R., Kumar, S., Qi, G., Ramos Díaz, A., Surana, P., & Yates, B. (2024). sanger-tol/blobtoolkit: v0.6.0 – Bellsprout (0.6.0). Zenodo. https://doi.org/10.5281/zenodo.13758882 + +- [sanger-tol/curationpretext](https://zenodo.org/records/13758882) + + > Pointon, DLB. (2024). sanger-tol/curationpretext: v1.0.0 (UNSC Cradle). [online]. https://github.com/sanger-tol/curationpretext/releases/tag/1.0.0. (Accessed on 20 September 2024). ## Software packaging/containerisation tools diff --git a/LICENSE b/LICENSE index 967fdcd..138ff19 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) DLBPointon +Copyright (c) 2022 - 2023 Genome Research Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 5b61fe6..b8e17ab 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,9 @@ curationpretext: hic_dir: btk: taxid: 1464561 - lineages: - gca_accession: GCA_0001 + lineages: < CSV LIST OF DATABASES TO USE: "insecta_odb10,diptera_odb10"> + gca_accession: GCA_0001 + nt_database: nt_database_prefix: diamond_uniprot_database_path: @@ -76,7 +77,8 @@ Now, you can run the pipeline using: nextflow run sanger-tol/ear -profile \\ --input assets/idCulLati1.yaml \\ --mapped TRUE \\ # OPTIONAL - --outdir test-truth + --steps ["", "btk", "cpretext", "merquryfk"] # OPTIONAL CSV LIST OF STEPS TO EXCLUDE FROM EXECUTION + --outdir test ``` > [!WARNING] diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index 5f653ab..0000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json index 8012bf6..61d2b74 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,21 +13,137 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, - "fastq_1": { + "reference_hap1": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+\\.f[ast]a$", + "errorMessage": "Primary assembly input file, decompressed" }, - "fastq_2": { + "reference_hap2": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+\\.f[ast]a$", + "errorMessage": "Haplotype assembly input file, decompressed" + }, + "reference_haplotigs": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f[ast]a$", + "errorMessage": "Haplotigs removed from Primary Assembly input file during curation, decompressed" + }, + "mapped_bam": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bam$", + "errorMessage": "Optional mapped bam file used to skip mapping of pacbio files" + }, + "merquryfk": { + "type": "object", + "properties": { + "fastk_hist": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.hist$", + "errorMessage": "Path to hist file" + }, + "fastk_ktab": { + "type": "string", + "errorMessage": "Directory containing ktab files" + } + } + }, + "longread": { + "type": "object", + "properties": { + "dir": { + "type": "string", + "errorMessage": "Path to folder containing fasta.gz files" + }, + "type": { + "type": "string", + "errorMessage": "type of longread data" + } + } + }, + "curationpretext": { + "type": "object", + "properties": { + "aligner": { + "type": "string", + "errorMessage": "Aligner" + }, + "telomere_motif": { + "type": "string", + "errorMessage": "Telomere motif for telomere search" + }, + "hic_dir": { + "type": "string", + "errorMessage": "Directory of the cram data" + } + } + }, + "btk": { + "type": "object", + "properties": { + "taxid": { + "type": "string", + "errorMessage": "NCBI Taxid of organism" + }, + "lineages": { + "type": "string", + "errorMessage": "CSV list of BUSCO lineages to run against" + }, + "gca_accession": { + "type": "string", + "errorMessage": "gca_accession if applicable" + }, + "nt_database": { + "type": "string", + "errorMessage": "nt database directory" + }, + "nt_database_prefix": { + "type": "string", + "errorMessage": "Prefix for nt database" + }, + "diamond_uniprot_database_path": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.dmnd$", + "errorMessage": "Diamond protein database" + }, + "diamond_nr_database_path": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.dmnd$", + "errorMessage": "Nuclear diamond database" + }, + "ncbi_taxonomy_path": { + "type": "string", + "errorMessage": "Directory for tax2taxid" + }, + "ncbi_rankedlineage_path": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.dmp$", + "errorMessage": "Taxonomy dump" + }, + "config": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.config$", + "errorMessage": "Extra configuration file for Blobtoolkit pipeline" + } + } } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "reference_hap1", "reference_hap2"] } } From 93d17c240a0ab2dd1bf1ff35c2359b9a74d87068 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 20 Sep 2024 12:53:03 +0100 Subject: [PATCH 24/46] Fix LICENSE lint --- .nf-core.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.nf-core.yml b/.nf-core.yml index d9fe12b..407734e 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -10,6 +10,7 @@ lint: - .github/workflows/awsfulltest.yml - conf/igenomes.config files_unchanged: + - LICENSE - CODE_OF_CONDUCT.md - assets/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_light.png From fb826a379a2081d6fea67ecbff20a25c3dd60fde Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Fri, 20 Sep 2024 12:59:53 +0100 Subject: [PATCH 25/46] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9959669..0ebb5ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type). -## v1.0.0 - Aquatic Bahamut [21/08/2024] +## v1.0.0 - Robert Beiny [20/09/2024] Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/) template. The current pipeline means the MVP for ear. From 828cf7cfcbb40f217582c3c4a186a0174a9d5aff Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Fri, 20 Sep 2024 13:50:37 +0100 Subject: [PATCH 26/46] Update nextflow.config --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index e564534..83055b9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -224,7 +224,7 @@ manifest { description = """ERGA Assembly Report pipeline""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.0' - version = '1.0' + version = '0.6.0' doi = '' } From c39373703d8a8495f5ffab327556f17867e8d8a0 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:59:08 +0100 Subject: [PATCH 27/46] Update README.md Adding DOI --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b8e17ab..5c42432 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![GitHub Actions CI Status](https://github.com/sanger-tol/ear/actions/workflows/ci.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![DOI](https://zenodo.org/badge/833605808.svg)](https://doi.org/10.5281/zenodo.13819520) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) From e7e4fc3e6873d69278c682b10d46347aba53e383 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 1 Oct 2024 15:00:23 +0100 Subject: [PATCH 28/46] Update nextflow.config --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 83055b9..6e8499e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -225,7 +225,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=24.04.0' version = '0.6.0' - doi = '' + doi = 'https://zenodo.org/records/13819520' } // Load modules.config for DSL2 module specific options From 79a12a03ec0998299575ac4b5ce935bbc955703d Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 8 Oct 2024 08:42:18 +0100 Subject: [PATCH 29/46] Update modules.config Moving btk to tagged version 0.6.0 --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 73e83bb..90a7a0e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -42,7 +42,7 @@ process { ext.executor = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'" ext.profiles = "singularity,sanger" ext.get_versions = "lsid | head -n1 | cut -d ',' -f 1" - ext.version = "draft_assemblies" + ext.version = "0.6.0" } withName: SANGER_TOL_CPRETEXT { From 2fbf4124573a7dfb3329af5ff15c3c90b1ef5755 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 8 Oct 2024 08:44:39 +0100 Subject: [PATCH 30/46] Update CHANGELOG.md Update --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ebb5ba..d921035 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type). +## v1.0.1 - Robert Beiny H1 [08/10/2024] +- Blobtookit version was specified in the wrong location, so defaulted to a development branch "draft_assemblies", this has now been updated to v0.6.0. +- Zenodo DOI has now been added to the repo. + ## v1.0.0 - Robert Beiny [20/09/2024] Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/) template. @@ -36,8 +40,6 @@ CURATIONPRETEXT to generate pretext plots and pngs. | SAMTOOLS_MERGE | | 1.20--h50ea8bc_0 | | SAMTOOLS_SORT | | 1.20--h50ea8bc_0 | -| - \* for pipelines, please check their own CHANGELOG file for a full list of software dependencies. ### Dependencies From a126d8a0708db088e17a460e53c63448a3cc526b Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 8 Oct 2024 08:46:01 +0100 Subject: [PATCH 31/46] Update nextflow.config --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 6e8499e..c278c76 100644 --- a/nextflow.config +++ b/nextflow.config @@ -224,7 +224,7 @@ manifest { description = """ERGA Assembly Report pipeline""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.0' - version = '0.6.0' + version = '0.6.1' doi = 'https://zenodo.org/records/13819520' } From 2d2ac6bec02cda6d711bee1628280b15bfe6e7eb Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 8 Oct 2024 08:46:25 +0100 Subject: [PATCH 32/46] Update CHANGELOG.md Correct version information --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d921035..2bb9758 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,11 +4,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type). -## v1.0.1 - Robert Beiny H1 [08/10/2024] +## v0.6.1 - Robert Beiny H1 [08/10/2024] - Blobtookit version was specified in the wrong location, so defaulted to a development branch "draft_assemblies", this has now been updated to v0.6.0. - Zenodo DOI has now been added to the repo. -## v1.0.0 - Robert Beiny [20/09/2024] +## v0.6.0 - Robert Beiny [20/09/2024] Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/) template. The current pipeline means the MVP for ear. From dfb79a09ca41119c2befbfd5a719779cde65b1c1 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 8 Oct 2024 08:54:07 +0100 Subject: [PATCH 33/46] updating version info and fixing linting error --- conf/base.config | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index aa5a770..f600868 100644 --- a/conf/base.config +++ b/conf/base.config @@ -23,7 +23,7 @@ process { time = { check_max( 70.h * task.attempt, 'time' ) } } - withName: "MINIMAP2_ALIGN_SE" { + withName: "MINIMAP2_ALIGN_SE" { cpus = { check_max( 16 , 'cpus' ) } memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 40 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * task.attempt ) , 'memory') } time = { check_max( 1.h * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48), 'time' ) } @@ -36,35 +36,43 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } } + withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } } + withLabel:process_medium { cpus = { check_max( 6 * task.attempt, 'cpus' ) } memory = { check_max( 36.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } + withLabel:process_high { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } + withLabel:process_long { time = { check_max( 20.h * task.attempt, 'time' ) } } + withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } + withLabel:error_ignore { errorStrategy = 'ignore' } + withLabel:error_retry { errorStrategy = 'retry' maxRetries = 2 From 01baf9287701c33ddf0e025efc068a19b9e0f6b3 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 8 Oct 2024 08:54:58 +0100 Subject: [PATCH 34/46] Prettier --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bb9758..ff4ec69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type). ## v0.6.1 - Robert Beiny H1 [08/10/2024] + - Blobtookit version was specified in the wrong location, so defaulted to a development branch "draft_assemblies", this has now been updated to v0.6.0. - Zenodo DOI has now been added to the repo. From 8127c7c2c811e2084d94737e801d6145748b3544 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 8 Oct 2024 08:56:18 +0100 Subject: [PATCH 35/46] New line for editorconfig --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 138ff19..ac4a5f3 100644 --- a/LICENSE +++ b/LICENSE @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. From e1a274bb5ffdb0630109c19b424d7f4357d4173f Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 13 Dec 2024 16:49:56 +0000 Subject: [PATCH 36/46] Module updates --- modules.json | 15 +- modules/nf-core/cat/cat/environment.yml | 2 - modules/nf-core/cat/cat/meta.yml | 39 ++- modules/nf-core/gfastats/environment.yml | 2 - modules/nf-core/gfastats/meta.yml | 103 ++++--- .../merquryfk/merquryfk/environment.yml | 5 - modules/nf-core/merquryfk/merquryfk/main.nf | 59 ++-- .../merquryfk/merquryfk-merquryfk.diff | 23 -- modules/nf-core/merquryfk/merquryfk/meta.yml | 287 ++++++++++++------ .../nf-core/minimap2/align/environment.yml | 3 - modules/nf-core/minimap2/align/meta.yml | 115 ++++--- .../nf-core/samtools/merge/environment.yml | 8 +- modules/nf-core/samtools/merge/main.nf | 4 +- modules/nf-core/samtools/merge/meta.yml | 117 ++++--- .../samtools/merge/tests/main.nf.test.snap | 32 +- modules/nf-core/samtools/sort/environment.yml | 8 +- modules/nf-core/samtools/sort/main.nf | 15 +- modules/nf-core/samtools/sort/meta.yml | 99 +++--- .../nf-core/samtools/sort/tests/main.nf.test | 64 ++++ .../samtools/sort/tests/main.nf.test.snap | 125 +++++++- nextflow.config | 2 +- workflows/ear.nf | 4 +- 22 files changed, 726 insertions(+), 405 deletions(-) delete mode 100644 modules/nf-core/merquryfk/merquryfk/environment.yml delete mode 100644 modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff diff --git a/modules.json b/modules.json index d4e081b..49c2c8b 100644 --- a/modules.json +++ b/modules.json @@ -7,34 +7,33 @@ "nf-core": { "cat/cat": { "branch": "master", - "git_sha": "5bb8ca085e17549e185e1823495ab8d20727a805", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gfastats": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "merquryfk/merquryfk": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"], - "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "46eca555142d6e597729fcb682adcc791796f514", + "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", "installed_by": ["modules"] } } diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml index 17a04ef..9b01c86 100644 --- a/modules/nf-core/cat/cat/environment.yml +++ b/modules/nf-core/cat/cat/environment.yml @@ -1,7 +1,5 @@ -name: cat_cat channels: - conda-forge - bioconda - - defaults dependencies: - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml index 00a8db0..81778a0 100644 --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -9,25 +9,32 @@ tools: description: Just concatenation documentation: https://man7.org/linux/man-pages/man1/cat.1.html licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - files_in: - type: file - description: List of compressed / uncompressed files - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - file_out: - type: file - description: Concatenated file. Will be gzipped if file_out ends with ".gz" - pattern: "${file_out}" + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" - "@FriederikeHanssen" diff --git a/modules/nf-core/gfastats/environment.yml b/modules/nf-core/gfastats/environment.yml index 1c875ce..b47bbdb 100644 --- a/modules/nf-core/gfastats/environment.yml +++ b/modules/nf-core/gfastats/environment.yml @@ -1,7 +1,5 @@ -name: gfastats channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gfastats=1.3.6 diff --git a/modules/nf-core/gfastats/meta.yml b/modules/nf-core/gfastats/meta.yml index d0e97a8..a621343 100644 --- a/modules/nf-core/gfastats/meta.yml +++ b/modules/nf-core/gfastats/meta.yml @@ -16,56 +16,67 @@ tools: documentation: "https://github.com/vgl-hub/gfastats/tree/main/instructions" tool_dev_url: "https://github.com/vgl-hub/gfastats" doi: "10.1093/bioinformatics/btac460" - licence: "['MIT']" + licence: ["MIT"] + identifier: biotools:gfastats input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - assembly: - type: file - description: Draft assembly file - pattern: "*.{fasta,fastq,gfa}(.gz)?" - - out_fmt: - type: string - description: Output format (fasta, fastq, gfa) - - genome_size: - type: integer - description: estimated genome size (bp) for NG* statistics (optional). - - target: - type: string - description: target specific sequence by header, optionally with coordinates (optional). - - agpfile: - type: file - description: converts input agp to path and replaces existing paths. - - include_bed: - type: file - description: generates output on a subset list of headers or coordinates in 0-based bed format. - - exclude_bed: - type: file - description: opposite of --include-bed. They can be combined (no coordinates). - - instructions: - type: file - description: set of instructions provided as an ordered list. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: file + description: Draft assembly file + pattern: "*.{fasta,fastq,gfa}(.gz)?" + - - out_fmt: + type: string + description: Output format (fasta, fastq, gfa) + - - genome_size: + type: integer + description: estimated genome size (bp) for NG* statistics (optional). + - - target: + type: string + description: target specific sequence by header, optionally with coordinates + (optional). + - - agpfile: + type: file + description: converts input agp to path and replaces existing paths. + - - include_bed: + type: file + description: generates output on a subset list of headers or coordinates in + 0-based bed format. + - - exclude_bed: + type: file + description: opposite of --include-bed. They can be combined (no coordinates). + - - instructions: + type: file + description: set of instructions provided as an ordered list. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - assembly_summary: - type: file - description: Assembly summary statistics file - pattern: "*.assembly_summary" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.assembly_summary": + type: file + description: Assembly summary statistics file + pattern: "*.assembly_summary" - assembly: - type: file - description: The assembly as modified by gfastats - pattern: "*.{fasta,fastq,gfa}.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${out_fmt}.gz": + type: file + description: The assembly as modified by gfastats + pattern: "*.{fasta,fastq,gfa}.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@mahesh-panchal" maintainers: diff --git a/modules/nf-core/merquryfk/merquryfk/environment.yml b/modules/nf-core/merquryfk/merquryfk/environment.yml deleted file mode 100644 index 44a5ee9..0000000 --- a/modules/nf-core/merquryfk/merquryfk/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: merquryfk_merquryfk -channels: - - conda-forge - - bioconda - - defaults diff --git a/modules/nf-core/merquryfk/merquryfk/main.nf b/modules/nf-core/merquryfk/merquryfk/main.nf index f0e78cc..79c404d 100644 --- a/modules/nf-core/merquryfk/merquryfk/main.nf +++ b/modules/nf-core/merquryfk/merquryfk/main.nf @@ -6,26 +6,28 @@ process MERQURYFK_MERQURYFK { container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' input: - tuple val(meta), path(fastk_hist), path(fastk_ktab), path(assembly), path(haplotigs) + tuple val(meta), path(fastk_hist),path(fastk_ktab),path(assembly),path(haplotigs) + path matktab //optional + path patktab //optional output: - tuple val(meta), path("${prefix}.completeness.stats") , emit: stats - tuple val(meta), path("${prefix}.*_only.bed") , emit: bed - tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv - tuple val(meta), path("${prefix}.*.spectra-cn.fl.png"), emit: spectra_cn_fl_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.fl.pdf"), emit: spectra_cn_fl_pdf, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.ln.png"), emit: spectra_cn_ln_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.ln.pdf"), emit: spectra_cn_ln_pdf, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.st.png"), emit: spectra_cn_st_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.st.pdf"), emit: spectra_cn_st_pdf, optional: true - tuple val(meta), path("${prefix}.qv") , emit: qv - tuple val(meta), path("${prefix}.spectra-asm.fl.png") , emit: spectra_asm_fl_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.fl.pdf") , emit: spectra_asm_fl_pdf, optional: true - tuple val(meta), path("${prefix}.spectra-asm.ln.png") , emit: spectra_asm_ln_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.ln.pdf") , emit: spectra_asm_ln_pdf, optional: true - tuple val(meta), path("${prefix}.spectra-asm.st.png") , emit: spectra_asm_st_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.st.pdf") , emit: spectra_asm_st_pdf, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.completeness.stats") , emit: stats + tuple val(meta), path("${prefix}.*_only.bed") , emit: bed + tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv + tuple val(meta), path("${prefix}.*.spectra-cn.fl.{png,pdf}") , emit: spectra_cn_fl, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.ln.{png,pdf}") , emit: spectra_cn_ln, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.st.{png,pdf}") , emit: spectra_cn_st, optional: true + tuple val(meta), path("${prefix}.qv") , emit: qv + tuple val(meta), path("${prefix}.spectra-asm.fl.{png,pdf}") , emit: spectra_asm_fl, optional: true + tuple val(meta), path("${prefix}.spectra-asm.ln.{png,pdf}") , emit: spectra_asm_ln, optional: true + tuple val(meta), path("${prefix}.spectra-asm.st.{png,pdf}") , emit: spectra_asm_st, optional: true + tuple val(meta), path("${prefix}.phased_block.bed") , emit: phased_block_bed, optional: true + tuple val(meta), path("${prefix}.phased_block.stats") , emit: phased_block_stats, optional: true + tuple val(meta), path("${prefix}.continuity.N.{pdf,png}") , emit: continuity_N, optional: true + tuple val(meta), path("${prefix}.block.N.{pdf,png}") , emit: block_N, optional: true + tuple val(meta), path("${prefix}.block.blob.{pdf,png}") , emit: block_blob, optional: true + tuple val(meta), path("${prefix}.hapmers.blob.{pdf,png}") , emit: hapmers_blob, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -37,11 +39,10 @@ process MERQURYFK_MERQURYFK { } def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def mat_ktab = matktab ? "${matktab.find{ it.toString().endsWith(".ktab") }}" : '' + def pat_ktab = patktab ? "${patktab.find{ it.toString().endsWith(".ktab") }}" : '' def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - - // Passing in the link through FASTK works, however passing in through YAML_INPUT results in being unable to find file - // seems as though it is because it is in a folder rather directly in the folder merqury is running in. """ cp ${fastk_ktab}/*ktab . && cp ${fastk_ktab}/.*ktab.* . @@ -60,4 +61,20 @@ process MERQURYFK_MERQURYFK { r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' ) END_VERSIONS """ + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.completeness.stats + touch ${prefix}.qv + touch ${prefix}._.qv + touch ${prefix}._only.bed + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + merquryfk: $MERQURY_VERSION + r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff b/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff deleted file mode 100644 index 751b30b..0000000 --- a/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff +++ /dev/null @@ -1,23 +0,0 @@ -Changes in module 'nf-core/merquryfk/merquryfk' ---- modules/nf-core/merquryfk/merquryfk/main.nf -+++ modules/nf-core/merquryfk/merquryfk/main.nf -@@ -39,11 +39,16 @@ - prefix = task.ext.prefix ?: "${meta.id}" - def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. -+ -+ // Passing in the link through FASTK works, however passing in through YAML_INPUT results in being unable to find file -+ // seems as though it is because it is in a folder rather directly in the folder merqury is running in. - """ -+ cp ${fastk_ktab}/*ktab . && cp ${fastk_ktab}/.*ktab.* . -+ - MerquryFK \\ - $args \\ - -T$task.cpus \\ -- ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\ -+ *.ktab \\ - $assembly \\ - $haplotigs \\ - $prefix - -************************************************************ diff --git a/modules/nf-core/merquryfk/merquryfk/meta.yml b/modules/nf-core/merquryfk/merquryfk/meta.yml index 7d4af79..82bfbec 100644 --- a/modules/nf-core/merquryfk/merquryfk/meta.yml +++ b/modules/nf-core/merquryfk/merquryfk/meta.yml @@ -9,104 +9,209 @@ tools: description: "FastK based version of Merqury" homepage: "https://github.com/thegenemyers/MERQURY.FK" tool_dev_url: "https://github.com/thegenemyers/MERQURY.FK" - licence: "https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE" + licence: ["https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fastk_hist: - type: file - description: A histogram files from the program FastK - pattern: "*.hist" - - fastk_ktab: - type: file - description: Histogram ktab files from the program FastK (option -t) - pattern: "*.ktab*" - - assembly: - type: file - description: Genome (primary) assembly files (fasta format) - pattern: ".fasta" - - haplotigs: - type: file - description: Assembly haplotigs (fasta format) - pattern: ".fasta" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastk_hist: + type: file + description: A histogram files from the program FastK + pattern: "*.hist" + - fastk_ktab: + type: file + description: Histogram ktab files from the program FastK (option -t) + pattern: "*.ktab*" + - assembly: + type: file + description: Genome (primary) assembly files (fasta format) + pattern: ".fasta" + - haplotigs: + type: file + description: Assembly haplotigs (fasta format) + pattern: ".fasta" + - - matktab: + type: file + description: trio maternal histogram ktab files from the program FastK (option + -t) + pattern: "*.ktab*" + - - patktab: + type: file + description: trio paternal histogram ktab files from the program FastK (option + -t) + pattern: "*.ktab*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - stats: - type: file - description: Assembly statistics file - pattern: "*.completeness.stats" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.completeness.stats: + type: file + description: Assembly statistics file + pattern: "*.completeness.stats" - bed: - type: file - description: Assembly only kmer positions not supported by reads in bed format - pattern: "*_only.bed" - - spectra_cn_fl_png: - type: file - description: "Unstacked copy number spectra filled plot in PNG format" - pattern: "*.spectra-cn.fl.png" - - spectra_cn_ln_png: - type: file - description: "Unstacked copy number spectra line plot in PNG format" - pattern: "*.spectra-cn.ln.png" - - spectra_cn_st_png: - type: file - description: "Stacked copy number spectra line plot in PNG format" - pattern: "*.spectra-cn.st.png" - - spectra_asm_fl_png: - type: file - description: "Unstacked assembly spectra filled plot in PNG format" - pattern: "*.spectra-asm.fl.png" - - spectra_asm_ln_png: - type: file - description: "Unstacked assembly spectra line plot in PNG format" - pattern: "*.spectra-asm.ln.png" - - spectra_asm_st_png: - type: file - description: "Stacked assembly spectra line plot in PNG format" - pattern: "*.spectra-asm.st.png" - - spectra_cn_fl_pdf: - type: file - description: "Unstacked copy number spectra filled plot in PDF format" - pattern: "*.spectra-cn.fl.pdf" - - spectra_cn_ln_pdf: - type: file - description: "Unstacked copy number spectra line plot in PDF format" - pattern: "*.spectra-cn.ln.pdf" - - spectra_cn_st_pdf: - type: file - description: "Stacked copy number spectra line plot in PDF format" - pattern: "*.spectra-cn.st.pdf" - - spectra_asm_fl_pdf: - type: file - description: "Unstacked assembly spectra filled plot in PDF format" - pattern: "*.spectra-asm.fl.pdf" - - spectra_asm_ln_pdf: - type: file - description: "Unstacked assembly spectra line plot in PDF format" - pattern: "*.spectra-asm.ln.pdf" - - spectra_asm_st_pdf: - type: file - description: "Stacked assembly spectra line plot in PDF format" - pattern: "*.spectra-asm.st.pdf" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*_only.bed: + type: file + description: Assembly only kmer positions not supported by reads in bed format + pattern: "*_only.bed" - assembly_qv: - type: file - description: "error and qv table for each scaffold of the assembly" - pattern: "*.qv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*.qv: + type: file + description: "error and qv table for each scaffold of the assembly" + pattern: "*.qv" + - spectra_cn_fl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*.spectra-cn.fl.{png,pdf}: + type: file + description: "Unstacked copy number spectra filled plot in PNG or PDF format" + pattern: "*.spectra-cn.fl.{png,pdf}" + - spectra_cn_ln: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*.spectra-cn.ln.{png,pdf}: + type: file + description: "Unstacked copy number spectra line plot in PNG or PDF format" + pattern: "*.spectra-cn.ln.{png,pdf}" + - spectra_cn_st: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.*.spectra-cn.st.{png,pdf}: + type: file + description: "Stacked copy number spectra line plot in PNG or PDF format" + pattern: "*.spectra-cn.st.{png,pdf}" - qv: - type: file - description: "error and qv of each assembly as a whole" - pattern: "*.qv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.qv: + type: file + description: "error and qv of each assembly as a whole" + pattern: "*.qv" + - spectra_asm_fl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.spectra-asm.fl.{png,pdf}: + type: file + description: "Unstacked assembly spectra filled plot in PNG or PDF format" + pattern: "*.spectra-asm.fl.{png,pdf}" + - spectra_asm_ln: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.spectra-asm.ln.{png,pdf}: + type: file + description: "Unstacked assembly spectra line plot in PNG or PDF format" + pattern: "*.spectra-asm.ln.{png,pdf}" + - spectra_asm_st: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.spectra-asm.st.{png,pdf}: + type: file + description: "Stacked assembly spectra line plot in PNG or PDF format" + pattern: "*.spectra-asm.st.{png,pdf}" + - phased_block_bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.phased_block.bed: + type: file + description: Assembly kmer positions seperated by block in bed format + pattern: "*.phased.block.bed" + - phased_block_stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.phased_block.stats: + type: file + description: phased assembly statistics file + pattern: "*.phased.block.stats" + - continuity_N: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.continuity.N.{pdf,png}: + type: file + description: "Stacked assembly N continuity plot in PNG or PDF format" + pattern: "*.continuity.N.{png,pdf}" + - block_N: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.block.N.{pdf,png}: + type: file + description: "Stacked assembly N continuity by block plot in PNG or PDF format" + pattern: "*.block.N.{png,pdf}" + - block_blob: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.block.blob.{pdf,png}: + type: file + description: "Stacked assembly block plot in PNG or PDF format" + pattern: "*.block.blob.{png,pdf}" + - hapmers_blob: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.hapmers.blob.{pdf,png}: + type: file + description: "Stacked assembly hapmers block plot in PNG or PDF format" + pattern: "*.hapmers.blob.{png,pdf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@mahesh-panchal" + - "@yumisims" maintainers: - "@mahesh-panchal" + - "@yumisims" diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index 41e8fe9..dc6476b 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -1,9 +1,6 @@ -name: minimap2_align - channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::htslib=1.20 diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index 8996f88..a4cfc89 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -14,62 +14,77 @@ tools: homepage: https://github.com/lh3/minimap2 documentation: https://github.com/lh3/minimap2#uguide licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FASTA or FASTQ files of size 1 and 2 for single-end - and paired-end data, respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test_ref'] - - reference: - type: file - description: | - Reference database in FASTA format. - - bam_format: - type: boolean - description: Specify that output should be in BAM format - - bam_index_extension: - type: string - description: BAM alignment index extension (e.g. "bai") - - cigar_paf_format: - type: boolean - description: Specify that output CIGAR should be in PAF format - - cigar_bam: - type: boolean - description: | - Write CIGAR with >65535 ops at the CG tag. This is recommended when - doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - - bam_format: + type: boolean + description: Specify that output should be in BAM format + - - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - paf: - type: file - description: Alignment in PAF format - pattern: "*.paf" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: Alignment in PAF format + pattern: "*.paf" - bam: - type: file - description: Alignment in BAM format - pattern: "*.bam" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Alignment in BAM format + pattern: "*.bam" - index: - type: file - description: BAM alignment index - pattern: "*.bam.*" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam.${bam_index_extension}": + type: file + description: BAM alignment index + pattern: "*.bam.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@heuermh" - "@sofstam" diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml index cd366d6..62054fc 100644 --- a/modules/nf-core/samtools/merge/environment.yml +++ b/modules/nf-core/samtools/merge/environment.yml @@ -1,8 +1,8 @@ -name: samtools_merge +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.20 - - bioconda::htslib=1.20 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index 693b1d8..34da4c7 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_MERGE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : - 'biocontainers/samtools:1.20--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml index 2e8f3db..235aa21 100644 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -15,60 +15,81 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_files: - type: file - description: BAM/CRAM file - pattern: "*.{bam,cram,sam}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fasta: - type: file - description: Reference file the CRAM was created with (optional) - pattern: "*.{fasta,fa}" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fai: - type: file - description: Index of the reference file the CRAM was created with (optional) - pattern: "*.fai" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: BAM file - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: BAM file + pattern: "*.{bam}" - cram: - type: file - description: CRAM file - pattern: "*.{cram}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: CRAM file + pattern: "*.{cram}" - csi: - type: file - description: BAM index file (optional) - pattern: "*.csi" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" - crai: - type: file - description: CRAM index file (optional) - pattern: "*.crai" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@yuukiiwa " diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap index 17bc846..0a41e01 100644 --- a/modules/nf-core/samtools/merge/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -80,14 +80,14 @@ "bam_versions": { "content": [ [ - "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:46:35.851936597" + "timestamp": "2024-09-16T09:16:30.476887194" }, "bams_csi": { "content": [ @@ -124,14 +124,14 @@ "bams_stub_versions": { "content": [ [ - "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:46:41.405707643" + "timestamp": "2024-09-16T09:16:52.203823961" }, "bam_cram": { "content": [ @@ -158,14 +158,14 @@ "bams_versions": { "content": [ [ - "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:45:51.695689923" + "timestamp": "2024-09-16T08:29:57.524363148" }, "crams_bam": { "content": [ @@ -182,14 +182,14 @@ "crams_versions": { "content": [ [ - "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:46:30.185392319" + "timestamp": "2024-09-16T09:16:06.977096207" }, "bam_csi": { "content": [ diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml index 36a12ea..62054fc 100644 --- a/modules/nf-core/samtools/sort/environment.yml +++ b/modules/nf-core/samtools/sort/environment.yml @@ -1,8 +1,8 @@ -name: samtools_sort +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.20 - - bioconda::htslib=1.20 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 8e01909..caf3c61 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -4,19 +4,19 @@ process SAMTOOLS_SORT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : - 'biocontainers/samtools:1.20--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta) , path(bam) tuple val(meta2), path(fasta) output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -32,7 +32,6 @@ process SAMTOOLS_SORT { """ samtools cat \\ - --threads $task.cpus \\ ${bam} \\ | \\ samtools sort \\ diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index 341a7d0..a9dbec5 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -15,52 +15,73 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file(s) - pattern: "*.{bam,cram,sam}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fasta: - type: file - description: Reference genome FASTA file - pattern: "*.{fa,fasta,fna}" - optional: true + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: Sorted BAM file - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.{bam}" - cram: - type: file - description: Sorted CRAM file - pattern: "*.{cram}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Sorted CRAM file + pattern: "*.{cram}" - crai: - type: file - description: CRAM index file (optional) - pattern: "*.crai" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" - csi: - type: file - description: BAM index file (optional) - pattern: "*.csi" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test index c2ea9c7..b05e669 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -39,6 +39,40 @@ nextflow_process { } } + test("multiple bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + test("cram") { config "./nextflow_cram.config" @@ -98,6 +132,36 @@ nextflow_process { } } + test("multiple bam - stub") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("cram - stub") { options "-stub" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap index da38d5d..469891f 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -20,14 +20,14 @@ ] ], [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T17:19:37.196205" + "timestamp": "2024-09-16T08:49:58.207549273" }, "bam - stub": { "content": [ @@ -57,7 +57,7 @@ ] ], "4": [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ], "bam": [ [ @@ -84,15 +84,15 @@ ] ], "versions": [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T15:54:46.580756" + "timestamp": "2024-09-16T08:50:08.630951018" }, "cram - stub": { "content": [ @@ -122,7 +122,7 @@ ], "4": [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ], "bam": [ @@ -149,15 +149,110 @@ ], "versions": [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T15:57:30.505698" + "timestamp": "2024-09-16T08:50:19.061912443" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:55.479443" + }, + "multiple bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:36:13.781404" }, "bam": { "content": [ @@ -167,7 +262,7 @@ "id": "test", "single_end": false }, - "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + "test.sorted.bam:md5,34aa85e86abefe637f7a4a9887f016fc" ] ], [ @@ -180,13 +275,13 @@ ] ], [ - "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + "versions.yml:md5,2659b187d681241451539d4c53500b9f" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.09.0" }, - "timestamp": "2024-07-22T15:54:25.872954" + "timestamp": "2024-10-08T11:59:46.372244" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index c278c76..7436e70 100644 --- a/nextflow.config +++ b/nextflow.config @@ -224,7 +224,7 @@ manifest { description = """ERGA Assembly Report pipeline""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.0' - version = '0.6.1' + version = '0.7.0' doi = 'https://zenodo.org/records/13819520' } diff --git a/workflows/ear.nf b/workflows/ear.nf index 9f1c434..395946d 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -125,7 +125,9 @@ workflow EAR { // MODULE: MERQURYFK PLOTS OF GENOME // MERQURYFK_MERQURYFK( - merquryfk_input + merquryfk_input, + [], + [] ) ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) } From 01f4cfb4cff21b3df155ada5e16ec2641b9442d6 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 13 Dec 2024 16:52:24 +0000 Subject: [PATCH 37/46] Module updates --- modules.json | 39 +- .../merquryfk/merquryfk-merquryfk.diff | 21 + .../merquryfk/merquryfk/tests/main.nf.test | 170 +++++++ .../merquryfk/tests/main.nf.test.snap | 454 ++++++++++++++++++ .../merquryfk/merquryfk/tests/nextflow.config | 8 + .../merquryfk/tests/nextflow.pdf.config | 9 + .../merquryfk/tests/nextflow.png.config | 9 + .../merquryfk/tests/nextflow.trio.config | 9 + .../merquryfk/merquryfk/tests/tags.yml | 2 + 9 files changed, 711 insertions(+), 10 deletions(-) create mode 100644 modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/main.nf.test create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/nextflow.config create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/tags.yml diff --git a/modules.json b/modules.json index 49c2c8b..cffa68f 100644 --- a/modules.json +++ b/modules.json @@ -8,33 +8,46 @@ "cat/cat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gfastats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "merquryfk/merquryfk": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ], + "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" }, "minimap2/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -43,20 +56,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff b/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff new file mode 100644 index 0000000..84c5901 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff @@ -0,0 +1,21 @@ +Changes in module 'nf-core/merquryfk/merquryfk' +--- modules/nf-core/merquryfk/merquryfk/main.nf ++++ modules/nf-core/merquryfk/merquryfk/main.nf +@@ -44,12 +44,12 @@ + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ ++ cp ${fastk_ktab}/*ktab . && cp ${fastk_ktab}/.*ktab.* . ++ + MerquryFK \\ + $args \\ + -T$task.cpus \\ +- ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\ +- ${mat_ktab} \\ +- ${pat_ktab} \\ ++ *.ktab \\ + $assembly \\ + $haplotigs \\ + $prefix + +************************************************************ diff --git a/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test new file mode 100644 index 0000000..c46843c --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test @@ -0,0 +1,170 @@ +nextflow_process { + + name "Test Process MERQURYFK" + script "../main.nf" + process "MERQURYFK_MERQURYFK" + + tag "modules" + tag "modules_nfcore" + tag "merquryfk" + tag "merquryfk/merquryfk" + tag "fastk" + tag "fastk/fastk" + + setup { + run("FASTK_FASTK") { + script "../../../fastk/fastk" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + run("FASTK_FASTK", alias: "FASTK_MAT") { + script "../../../fastk/fastk" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + ] + """ + } + } + + run("FASTK_FASTK", alias: "FASTK_PAT") { + script "../../../fastk/fastk" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + ] + """ + } + } + } + + test("homo_sapiens - Illumina - png") { + config "./nextflow.png.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - Illumina - pdf") { + config "./nextflow.pdf.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - Illumina - trio") { + config "./nextflow.trio.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = FASTK_MAT.out.ktab + input[2] = FASTK_PAT.out.ktab + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - Illumina - stub") { + options "-stub" + config "./nextflow.pdf.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + + +} \ No newline at end of file diff --git a/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap new file mode 100644 index 0000000..f7ce47f --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap @@ -0,0 +1,454 @@ +{ + "homo_sapiens - Illumina - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:22:52.240373868" + }, + "homo_sapiens - Illumina - pdf": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:22:19.530675341" + }, + "homo_sapiens - Illumina - png": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:21:57.682723412" + }, + "homo_sapiens - Illumina - trio": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:22:40.060937299" + } +} \ No newline at end of file diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.config new file mode 100644 index 0000000..ba1eebc --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config new file mode 100644 index 0000000..52beeaa --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config @@ -0,0 +1,9 @@ +process { + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } + withName: 'MERQURYFK_MERQURYFK' { + ext.args = '-lfs -pdf' + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config new file mode 100644 index 0000000..47c3d63 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config @@ -0,0 +1,9 @@ +process { + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } + withName: 'MERQURYFK_MERQURYFK' { + ext.args = '-lfs' + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config new file mode 100644 index 0000000..47c3d63 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config @@ -0,0 +1,9 @@ +process { + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } + withName: 'MERQURYFK_MERQURYFK' { + ext.args = '-lfs' + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/tags.yml b/modules/nf-core/merquryfk/merquryfk/tests/tags.yml new file mode 100644 index 0000000..7dcac99 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/tags.yml @@ -0,0 +1,2 @@ +merquryfk/merquryfk: + - "modules/nf-core/merquryfk/merquryfk/**" From 74e55ac717c017d010ce0452ccbb7e0ab15a1c52 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 13 Dec 2024 16:56:23 +0000 Subject: [PATCH 38/46] Updates --- CHANGELOG.md | 6 +++++- modules.json | 38 ++++++++++---------------------------- 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff4ec69..69e8982 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type). +Naming based on: [Audiologists](https://en.wikipedia.org/wiki/Category:Audiologists). + +## v0.7.0 - Chris Campbell [13/12/2024] + +- TODO ## v0.6.1 - Robert Beiny H1 [08/10/2024] diff --git a/modules.json b/modules.json index cffa68f..8ca244a 100644 --- a/modules.json +++ b/modules.json @@ -8,46 +8,34 @@ "cat/cat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gfastats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "merquryfk/merquryfk": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" }, "minimap2/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -56,26 +44,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From c88fa043e6358a9aad5696f1a756ed70cd8f6021 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 9 Jan 2025 13:22:02 +0000 Subject: [PATCH 39/46] Updates for modules and CICD --- .github/workflows/ci.yml | 33 +-------------------------------- .github/workflows/linting.yml | 6 +++--- CHANGELOG.md | 17 +++++++++++++++-- nextflow.config | 2 +- 4 files changed, 20 insertions(+), 38 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c347e38..a21e2f4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,42 +29,14 @@ jobs: - "24.04.0" - "latest-everything" steps: - - name: Get branch names - # Pulls the names of current branches in repo - # steps.branch-names.outputs.current_branch is used later and returns the name of the branch the PR is made FROM not to - id: branch-names - uses: tj-actions/branch-names@v8 - - - name: Setup apptainer - uses: eWaterCycle/setup-apptainer@main - - - name: Set up Singularity - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Install Python - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install nf-core - run: | - pip install nf-core - - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - # This will only download the main pipeline containers, subpipelines need their own nf-download - - name: NF-Core Download - download singularity containers - run: | - nf-core download sanger-tol/ear --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity - - name: Download Tiny test data # Download A fungal test data set that is full enough to show some real output. # Needs a kmer db for merqury @@ -73,9 +45,6 @@ jobs: cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-hap.fa cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa - # - name: Disk space cleanup - # uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1fcafe8..8c9f4e6 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,7 +14,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + - uses: actions/checkout@v4 - name: Set up Python 3.12 uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 @@ -44,7 +44,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==2.8.0 - name: Run nf-core lint env: diff --git a/CHANGELOG.md b/CHANGELOG.md index 69e8982..e4b0716 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,22 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). Naming based on: [Audiologists](https://en.wikipedia.org/wiki/Category:Audiologists). -## v0.7.0 - Chris Campbell [13/12/2024] +## v0.6.0 - Robert Beiny H2 [09/01/2025] + +- Modules have been updated to remove conda defaults. + +### Software dependencies + +| Dependency | Old version | New version | +| ---------------------------- | ----------- | ------------------- | +| sanger-tol/blobtoolkit\* | | 0.6.0 (Bellsprout) | +| sanger-tol/curationpretext\* | | 1.0.1 (UNSC Cradle H1) | +| GFASTATS | | 1.3.6--hdcf5f25_3 | +| MERQUERY_FK | | 1.2 | +| MINIMAP2_ALIGN | | 2.28 | +| SAMTOOLS_MERGE | | 1.21--h50ea8bc_0 | +| SAMTOOLS_SORT | | 1.21--h50ea8bc_0' | -- TODO ## v0.6.1 - Robert Beiny H1 [08/10/2024] diff --git a/nextflow.config b/nextflow.config index 7436e70..f197bbc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -224,7 +224,7 @@ manifest { description = """ERGA Assembly Report pipeline""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.0' - version = '0.7.0' + version = '0.6.2' doi = 'https://zenodo.org/records/13819520' } From 16cc585401703db0d3e3ce991d3cd6d4ec8c42a5 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 9 Jan 2025 13:34:50 +0000 Subject: [PATCH 40/46] Updates to include CurationPretext 1.1.0 --- CHANGELOG.md | 4 ++-- conf/modules.config | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4b0716..439c0bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,12 @@ Naming based on: [Audiologists](https://en.wikipedia.org/wiki/Category:Audiologi | Dependency | Old version | New version | | ---------------------------- | ----------- | ------------------- | | sanger-tol/blobtoolkit\* | | 0.6.0 (Bellsprout) | -| sanger-tol/curationpretext\* | | 1.0.1 (UNSC Cradle H1) | +| sanger-tol/curationpretext\* | | 1.1.0 (UNSC Delphi) | | GFASTATS | | 1.3.6--hdcf5f25_3 | | MERQUERY_FK | | 1.2 | | MINIMAP2_ALIGN | | 2.28 | | SAMTOOLS_MERGE | | 1.21--h50ea8bc_0 | -| SAMTOOLS_SORT | | 1.21--h50ea8bc_0' | +| SAMTOOLS_SORT | | 1.21--h50ea8bc_0 | ## v0.6.1 - Robert Beiny H1 [08/10/2024] diff --git a/conf/modules.config b/conf/modules.config index 90a7a0e..ac33898 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -51,7 +51,7 @@ process { ext.executor = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'" ext.profiles = "singularity,sanger" ext.get_versions = "lsid | head -n1 | cut -d ',' -f 1" - ext.version = "1.0.0" + ext.version = "1.1.0" } } From 2e970a20b8f4f3c4a31a299a2df125394bdc02e6 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 9 Jan 2025 13:38:58 +0000 Subject: [PATCH 41/46] Remove references to Anaconda --- CITATIONS.md | 4 ++-- README.md | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 28e3ca8..2973402 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -36,9 +36,9 @@ ## Software packaging/containerisation tools -- [Anaconda](https://anaconda.com) +- [Conda](https://conda.org/) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + > conda contributors. conda: A system-level, binary package and environment manager running on all major operating systems and platforms. Computer software. https://github.com/conda/conda - [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) diff --git a/README.md b/README.md index 5c42432..6197506 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ [![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![DOI](https://zenodo.org/badge/833605808.svg)](https://doi.org/10.5281/zenodo.13819520) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/ear) From 07fd995c13c1b9501ee48f1bcc524dc3e12c064c Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 9 Jan 2025 13:57:26 +0000 Subject: [PATCH 42/46] Prettier and lints --- .nf-core.yml | 12 ++++++++++++ CHANGELOG.md | 19 +++++++++---------- nextflow.config | 2 +- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 407734e..3f725bf 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,10 +1,16 @@ lint: + schema_params: + - validationSchemaIgnoreParams files_exist: - CODE_OF_CONDUCT.md - assets/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_dark.png - lib/nfcore_external_java_deps.jar + - lib/NfcoreSchema.groovy + - lib/NfcoreTemplate.groovy + - lib/Utils.groovy + - lib/WorkflowMain.groovy - .github/ISSUE_TEMPLATE/config.yml - .github/workflows/awstest.yml - .github/workflows/awsfulltest.yml @@ -16,6 +22,12 @@ lint: - docs/images/nf-core-ear_logo_light.png - docs/images/nf-core-ear_logo_dark.png - .github/ISSUE_TEMPLATE/bug_report.yml + - .github/CONTRIBUTING.md + - .github/PULL_REQUEST_TEMPLATE.md + - .github/workflows/branch.yml + - .github/workflows/linting_comment.yml + - .github/workflows/linting.yml + - assets/email_template.htmlp multiqc_config: - report_comment nextflow_config: diff --git a/CHANGELOG.md b/CHANGELOG.md index 439c0bc..f3fdf65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,16 +10,15 @@ Naming based on: [Audiologists](https://en.wikipedia.org/wiki/Category:Audiologi ### Software dependencies -| Dependency | Old version | New version | -| ---------------------------- | ----------- | ------------------- | -| sanger-tol/blobtoolkit\* | | 0.6.0 (Bellsprout) | -| sanger-tol/curationpretext\* | | 1.1.0 (UNSC Delphi) | -| GFASTATS | | 1.3.6--hdcf5f25_3 | -| MERQUERY_FK | | 1.2 | -| MINIMAP2_ALIGN | | 2.28 | -| SAMTOOLS_MERGE | | 1.21--h50ea8bc_0 | -| SAMTOOLS_SORT | | 1.21--h50ea8bc_0 | - +| Dependency | Old version | New version | +| ---------------------------- | ----------- | ------------------- | +| sanger-tol/blobtoolkit\* | | 0.6.0 (Bellsprout) | +| sanger-tol/curationpretext\* | | 1.1.0 (UNSC Delphi) | +| GFASTATS | | 1.3.6--hdcf5f25_3 | +| MERQUERY_FK | | 1.2 | +| MINIMAP2_ALIGN | | 2.28 | +| SAMTOOLS_MERGE | | 1.21--h50ea8bc_0 | +| SAMTOOLS_SORT | | 1.21--h50ea8bc_0 | ## v0.6.1 - Robert Beiny H1 [08/10/2024] diff --git a/nextflow.config b/nextflow.config index f197bbc..edd8915 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,7 +13,7 @@ params { // Input options input = null mapped = false - steps = "" + steps = "ALL" // Boilerplate options outdir = null From 2caa0b558255375d3de3e3845a9a2fbb30791f10 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 9 Jan 2025 14:36:55 +0000 Subject: [PATCH 43/46] Prettier and lints --- .nf-core.yml | 4 +++- CHANGELOG.md | 20 ++++++++++---------- conf/base.config | 4 ++-- nextflow.config | 1 - 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 3f725bf..bb21f01 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -27,12 +27,14 @@ lint: - .github/workflows/branch.yml - .github/workflows/linting_comment.yml - .github/workflows/linting.yml - - assets/email_template.htmlp + - assets/email_template.html multiqc_config: - report_comment nextflow_config: - manifest.name - manifest.homePage + - params.show_hidden_params + - params.schema_ignore_params nf_core_version: 2.14.1 repository_type: pipeline template: diff --git a/CHANGELOG.md b/CHANGELOG.md index f3fdf65..f8847d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,21 +4,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). Naming based on: [Audiologists](https://en.wikipedia.org/wiki/Category:Audiologists). -## v0.6.0 - Robert Beiny H2 [09/01/2025] +## v0.6.2 - Robert Beiny H2 [09/01/2025] - Modules have been updated to remove conda defaults. ### Software dependencies -| Dependency | Old version | New version | -| ---------------------------- | ----------- | ------------------- | -| sanger-tol/blobtoolkit\* | | 0.6.0 (Bellsprout) | -| sanger-tol/curationpretext\* | | 1.1.0 (UNSC Delphi) | -| GFASTATS | | 1.3.6--hdcf5f25_3 | -| MERQUERY_FK | | 1.2 | -| MINIMAP2_ALIGN | | 2.28 | -| SAMTOOLS_MERGE | | 1.21--h50ea8bc_0 | -| SAMTOOLS_SORT | | 1.21--h50ea8bc_0 | +| Dependency | Old version | New version | +| ---------------------------- | ------------------- | ------------------- | +| sanger-tol/blobtoolkit\* | | 0.6.0 (Bellsprout) | +| sanger-tol/curationpretext\* | 1.0.0 (UNSC Cradle) | 1.1.0 (UNSC Delphi) | +| GFASTATS | | 1.3.6--hdcf5f25_3 | +| MERQUERY_FK | | 1.2 | +| MINIMAP2_ALIGN | | 2.28 | +| SAMTOOLS_MERGE | 1.20--h50ea8bc_0 | 1.21--h50ea8bc_0 | +| SAMTOOLS_SORT | 1.21--h50ea8bc_0 | 1.21--h50ea8bc_0 | ## v0.6.1 - Robert Beiny H1 [08/10/2024] diff --git a/conf/base.config b/conf/base.config index f600868..4b330f4 100644 --- a/conf/base.config +++ b/conf/base.config @@ -25,8 +25,8 @@ process { withName: "MINIMAP2_ALIGN_SE" { cpus = { check_max( 16 , 'cpus' ) } - memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 40 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * task.attempt ) , 'memory') } - time = { check_max( 1.h * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48), 'time' ) } + memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 60 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * task.attempt ) , 'memory') } + time = { check_max( 1.h * ( reference.size() < 1e9 ? 15 : reference.size() < 10e9 ? 30 : 48), 'time' ) } } // Process-specific resource requirements diff --git a/nextflow.config b/nextflow.config index edd8915..39722f2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -44,7 +44,6 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base' validationShowHiddenParams = false validate_params = true From 6b0def5760213ee410e2a7932969261f10d4c121 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 9 Jan 2025 15:07:36 +0000 Subject: [PATCH 44/46] Updates --- .nf-core.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index bb21f01..b3bc23b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,6 +1,4 @@ lint: - schema_params: - - validationSchemaIgnoreParams files_exist: - CODE_OF_CONDUCT.md - assets/nf-core-ear_logo_light.png @@ -35,6 +33,7 @@ lint: - manifest.homePage - params.show_hidden_params - params.schema_ignore_params + - params.validationSchemaIgnoreParams nf_core_version: 2.14.1 repository_type: pipeline template: From 0f375f85006cab5b8c348a2a90a8967dcbda3897 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 10 Jan 2025 11:43:08 +0000 Subject: [PATCH 45/46] Minor changes --- modules.json | 38 ++++++++++++++++++++++++++++---------- nextflow.config | 2 +- workflows/ear.nf | 4 ++-- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/modules.json b/modules.json index 8ca244a..cffa68f 100644 --- a/modules.json +++ b/modules.json @@ -8,34 +8,46 @@ "cat/cat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gfastats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "merquryfk/merquryfk": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" }, "minimap2/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -44,20 +56,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 39722f2..a2a13b0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,7 +13,7 @@ params { // Input options input = null mapped = false - steps = "ALL" + steps = "NONE" // Boilerplate options outdir = null diff --git a/workflows/ear.nf b/workflows/ear.nf index 395946d..a37966c 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -40,9 +40,9 @@ workflow EAR { ch_versions = Channel.empty() ch_align_bam = Channel.empty() - exclude_steps = params.steps ? params.steps.split(",") : "" + exclude_steps = params.steps ? params.steps.split(",") : "NONE" - full_list = ["btk", "cpretext", "merquryfk", ""] + full_list = ["btk", "cpretext", "merquryfk", "NONE"] if (!full_list.containsAll(exclude_steps)) { exit 1, "There is an extra argument given on Command Line: \nCheck contents of: $exclude_steps\nMaster list is: $full_list" From 9bcfbf148f465f0a34454d42d3c9c001f9a080d9 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 10 Jan 2025 11:44:50 +0000 Subject: [PATCH 46/46] Prettier linting --- modules.json | 38 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/modules.json b/modules.json index cffa68f..8ca244a 100644 --- a/modules.json +++ b/modules.json @@ -8,46 +8,34 @@ "cat/cat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gfastats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/gfastats/gfastats.diff" }, "merquryfk/merquryfk": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" }, "minimap2/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -56,26 +44,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +}