From 361a72b833c221dd2c48571154d6dbc90bbb6459 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 7 Feb 2025 12:57:35 +0000 Subject: [PATCH 1/6] Adding NF-TEST files --- assets/schema_input.json | 3 +++ assets/test.yaml | 4 ++-- conf/base.config | 6 ++--- conf/test.config | 25 ++++++++++++++------ nf-test.config | 16 +++++++++++++ tests/main.nf.test | 49 ++++++++++++++++++++++++++++++++++++++++ tests/main.nf.test.snap | 33 +++++++++++++++++++++++++++ workflows/ear.nf | 48 ++++++++++++++++++++------------------- 8 files changed, 149 insertions(+), 35 deletions(-) create mode 100644 nf-test.config create mode 100644 tests/main.nf.test create mode 100644 tests/main.nf.test.snap diff --git a/assets/schema_input.json b/assets/schema_input.json index 61d2b74..2b6741b 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -53,6 +53,9 @@ }, "fastk_ktab": { "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.ktab$", "errorMessage": "Directory containing ktab files" } } diff --git a/assets/test.yaml b/assets/test.yaml index 0f5f6ed..14b0b50 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -8,8 +8,8 @@ reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/gr mapped_bam: [] merquryfk: - fastk_hist: "./" - fastk_ktab: "./" + fastk_hist: "./EMPTY.hist" + fastk_ktab: "./EMPTY.ktab" # Used by both subpipelines longread: diff --git a/conf/base.config b/conf/base.config index 4b330f4..8d7339e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -20,13 +20,13 @@ process { maxErrors = '-1' withName: "SANGER_TOL_CPRETEXT|SANGER_TOL_BTK" { - time = { check_max( 70.h * task.attempt, 'time' ) } + time = { check_max( 8.d * task.attempt, 'time' ) } } withName: "MINIMAP2_ALIGN_SE" { cpus = { check_max( 16 , 'cpus' ) } - memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 60 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * task.attempt ) , 'memory') } - time = { check_max( 1.h * ( reference.size() < 1e9 ? 15 : reference.size() < 10e9 ? 30 : 48), 'time' ) } + memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 25 ) * task.attempt ) , 'memory') } + time = { check_max( 1.h * ( reference.size() < 1e9 ? 20 : reference.size() < 10e9 ? 35 : 48), 'time' ) } } // Process-specific resource requirements diff --git a/conf/test.config b/conf/test.config index 06d069f..2fc0011 100644 --- a/conf/test.config +++ b/conf/test.config @@ -10,15 +10,26 @@ ---------------------------------------------------------------------------------------- */ +cleanup = true + +process { + resourceLimits = [ + cpus: 4, + memory: '10.GB', + time: '1.h' + ] +} + params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + // Input data for full size test // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - input = "${projectDir}/assets/test.yaml" - outdir = "results" + max_cpus = 4 + max_memory = '10.GB' + max_time = '1.h' + input = "${baseDir}/assets/test.yaml" + steps = "btk,cpretext,merquryfk" } diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..cc4372f --- /dev/null +++ b/nf-test.config @@ -0,0 +1,16 @@ +config { + testsDir "tests" + workDir ".nf-test" + libDir "tests/lib" + withTrace true + autoSort false + + options "-dump-channels" + + plugins { + load "nft-utils@0.0.3" + } + + configFile "conf/test.config" + profile "test" +} diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 0000000..556f956 --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + tag "pipeline" + tag "pipeline_sanger_tol" + tag "full" + + test("Minimal run | No SANGER-TOL-* nested pipelines") { + + when { + params { + outdir = "${outputDir}" + } + } + + then { + + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ["pipeline_info/*.{html,json,txt}"]) + + def gfastats = getAllFilesFromDir(params.outdir, include: ['gfastats/_summary']) + + //def merquryfk = getAllFilesFromDir(params.outdir, include: ['merquryfk/*']) + + assertAll ( + {assert workflow.success}, + {assert snapshot( + // Test for number of successful processes - should be 29 for a full run + workflow.trace.succeeded().size(), + + removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"), + + stable_name, + // GFASTATS should be a stable file as it is measuring metrics of the files + // so nothing special needs to be done + + gfastats.size(), + gfastats, + + // MERQURY_FK is running statistics on the input files + // input files are also split between the two main input haps + // need to actually test merqury first + //merquryfk, + + ).match()}, + ) + } + } +} diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap new file mode 100644 index 0000000..7329d59 --- /dev/null +++ b/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Minimal run | No SANGER-TOL-* nested pipelines": { + "content": [ + 2, + { + "CAT_CAT": { + "pigz": "2.3.4" + }, + "GFASTATS": { + "gfastats": "1.3.6" + }, + "Workflow": { + "sanger-tol/ear": "v0.6.2" + } + }, + [ + "gfastats", + "gfastats/grTriPseu1.assembly_summary", + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml" + ], + 0, + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2025-02-07T12:39:47.429262" + } +} \ No newline at end of file diff --git a/workflows/ear.nf b/workflows/ear.nf index a37966c..fb9154e 100644 --- a/workflows/ear.nf +++ b/workflows/ear.nf @@ -50,7 +50,7 @@ workflow EAR { // // MODULE: YAML_INPUT - // - YAML_INPUT SHOULD BE REWORKED TO BE SMARTER + // TODO: REPLACE WITH -params-input // YAML_INPUT(ch_input) @@ -120,7 +120,6 @@ workflow EAR { // LOGIC: STEP TO STOP MERQURY_FK RUNNING IF SPECIFIED BY USER // if (!exclude_steps.contains('merquryfk')) { - // // MODULE: MERQURYFK PLOTS OF GENOME // @@ -132,32 +131,32 @@ workflow EAR { ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions ) } - // - // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE - // SKIP THE MAPPING SUBWORKFLOW - // - if (!params.mapped) { - // - // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC - // This allows us to more esily bypass the mapping if we already have a sorted and mapped bam - // - MAIN_MAPPING ( - YAML_INPUT.out.sample_id, - YAML_INPUT.out.longread_type, - YAML_INPUT.out.reference_hap1, - YAML_INPUT.out.pacbio_tuple, - ) - ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions ) - ch_mapped_bam = MAIN_MAPPING.out.mapped_bam - } else { - ch_mapped_bam = YAML_INPUT.out.mapped_bam - } - // // LOGIC: STEP TO STOP BTK RUNNING IF SPECIFIED BY USER // if (!exclude_steps.contains('btk')) { + // + // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE + // SKIP THE MAPPING SUBWORKFLOW + // + if (!params.mapped) { + // + // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC + // This allows us to more esily bypass the mapping if we already have a sorted and mapped bam + // + MAIN_MAPPING ( + YAML_INPUT.out.sample_id, + YAML_INPUT.out.longread_type, + YAML_INPUT.out.reference_hap1, + YAML_INPUT.out.pacbio_tuple, + ) + ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions ) + ch_mapped_bam = MAIN_MAPPING.out.mapped_bam + } else { + ch_mapped_bam = YAML_INPUT.out.mapped_bam + } + // // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline @@ -187,6 +186,7 @@ workflow EAR { ch_versions = ch_versions.mix(SANGER_TOL_BTK.out.versions) } + // // LOGIC: STEP TO STOP CURATION_PRETEXT RUNNING IF SPECIFIED BY USER // @@ -207,6 +207,7 @@ workflow EAR { ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions ) } + // // Collate and save software versions // @@ -218,6 +219,7 @@ workflow EAR { newLine: true ).set { ch_collated_versions } + summary_params = paramsSummaryMap( workflow, parameters_schema: "nextflow_schema.json") ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) From 476152038668af2332301977134a59292126b68b Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 7 Feb 2025 13:01:04 +0000 Subject: [PATCH 2/6] Updating CICD --- .github/workflows/ci.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a21e2f4..0304912 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,10 +45,13 @@ jobs: cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-hap.fa cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix - # Skip BTK and CPRETEXT as they are already tested on their repos. + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + + # Test the component + - name: Run nf-test run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext,merquryfk + nf-test test \ + --profile="docker" \ + --debug \ + --verbose From 9d01433220bd90ec4b7024f3ce3152771bb39718 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 7 Feb 2025 13:03:02 +0000 Subject: [PATCH 3/6] Reduce limit of 8.d to 2.d --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index 8d7339e..865625d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -20,7 +20,7 @@ process { maxErrors = '-1' withName: "SANGER_TOL_CPRETEXT|SANGER_TOL_BTK" { - time = { check_max( 8.d * task.attempt, 'time' ) } + time = { check_max( 2.d * task.attempt, 'time' ) } } withName: "MINIMAP2_ALIGN_SE" { From c5e7ea4fa0fecd3478081023c488f7bb50c5e775 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 7 Feb 2025 13:35:03 +0000 Subject: [PATCH 4/6] updates --- .github/workflows/branch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index e3f6dae..7ac1e04 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -3,7 +3,7 @@ name: nf-core branch protection # It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` on: pull_request_target: - branches: [master] + branches: [main] jobs: test: From 8edc609bc16a46fd3f4c9ff1254e8124f5cecf12 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 7 Feb 2025 14:24:15 +0000 Subject: [PATCH 5/6] Update to template --- conf/test.config | 4 --- .../local/utils_nfcore_ear_pipeline/main.nf | 36 +++++++++---------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/conf/test.config b/conf/test.config index 2fc0011..db0d1a3 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,10 +26,6 @@ params { // Input data for full size test // Limit resources so that this can run on GitHub Actions - max_cpus = 4 - max_memory = '10.GB' - max_time = '1.h' - input = "${baseDir}/assets/test.yaml" steps = "btk,cpretext,merquryfk" } diff --git a/subworkflows/local/utils_nfcore_ear_pipeline/main.nf b/subworkflows/local/utils_nfcore_ear_pipeline/main.nf index 3bef592..9fd6da0 100644 --- a/subworkflows/local/utils_nfcore_ear_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ear_pipeline/main.nf @@ -76,28 +76,24 @@ workflow PIPELINE_INITIALISATION { // // Create channel from input file provided through params.input // - Channel - .fromSamplesheet("input") - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { - validateInputSamplesheet(it) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } - .set { ch_samplesheet } + + // Channel + // .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + // .map { + // meta, file + // } + // .groupTuple() + // .map { samplesheet -> + // validateInputSamplesheet(samplesheet) + // } + // .map { + // meta, fastqs -> + // return [ meta, fastqs.flatten() ] + // } + // .set { ch_samplesheet } emit: - samplesheet = ch_samplesheet + samplesheet = params.input versions = ch_versions } From 86ccafcb75b8dc81a86b49b83cb8c70646f4b151 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 7 Feb 2025 14:29:31 +0000 Subject: [PATCH 6/6] Update --- assets/test-local.yaml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 assets/test-local.yaml diff --git a/assets/test-local.yaml b/assets/test-local.yaml new file mode 100755 index 0000000..c5e9ab5 --- /dev/null +++ b/assets/test-local.yaml @@ -0,0 +1,33 @@ +# General Vales for all subpiplines and modules +assembly_id: grTriPseu1 +reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa +reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/assembly/draft/grTriPseu1-hap.fa +reference_haplotigs: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa + +# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore. +mapped_bam: [] + +merquryfk: + fastk_hist: "./EMPTY.hist" + fastk_ktab: "./EMPTY.ktab" + +# Used by both subpipelines +longread: + type: hifi + dir: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/genomic_data/pacbio/ + +curationpretext: + aligner: minimap2 + telomere_motif: TTAGGG + hic_dir: /home/runner/work/ear/ear/TreeValTinyData/genomic_data/hic-arima/ +btk: + taxid: 352914 + gca_accession: GCA_0001 + lineages: "fungi_odb10" + nt_database: /home/runner/work/ascc/ascc/NT_database/ + nt_database_prefix: 18S_fungal_sequences + diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd + diamond_nr_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd + ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/ + ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp + config: /home/runner/work/ear/ear/conf/sanger-tol-btk.config