Merge branch 'NF-TEST-ADDITIONS' into Template_update_3.0

sanger-tol · Feb 7, 2025 · a7fe938 · a7fe938
2 parents a27fed3 + 86ccafc
commit a7fe938
Show file tree

Hide file tree

Showing 10 changed files with 179 additions and 40 deletions.
diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml
@@ -3,9 +3,7 @@ name: nf-core branch protection
 # It fails when someone tries to make a PR against the nf-core `main`/`master` branch instead of `dev`
 on:
   pull_request_target:
-    branches:
-      - main
-      - master
+    branches: [main]
 
 jobs:
   test:

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -62,10 +62,13 @@ jobs:
           cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-hap.fa
           cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa
 
-      - name: Run pipeline with test data
-        # TODO nf-core: You can customise CI pipeline run tests as required
-        # For example: adding multiple test runs with different parameters
-        # Remember that you can parallelise this by using strategy.matrix
-        # Skip BTK and CPRETEXT as they are already tested on their repos.
+      - name: Install nf-test
+        uses: nf-core/setup-nf-test@v1
+
+      # Test the component
+      - name: Run nf-test
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext,merquryfk
+          nf-test test \
+            --profile="docker" \
+            --debug \
+            --verbose
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -53,6 +53,9 @@
                     },
                     "fastk_ktab": {
                         "type": "string",
+                        "format": "file-path",
+                        "exists": true,
+                        "pattern": "^\\S+\\.ktab$",
                         "errorMessage": "Directory containing ktab files"
                     }
                 }

diff --git a/assets/test-local.yaml b/assets/test-local.yaml
@@ -0,0 +1,33 @@
+# General Vales for all subpiplines and modules
+assembly_id: grTriPseu1
+reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa
+reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/assembly/draft/grTriPseu1-hap.fa
+reference_haplotigs: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa
+
+# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore.
+mapped_bam: []
+
+merquryfk:
+  fastk_hist: "./EMPTY.hist"
+  fastk_ktab: "./EMPTY.ktab"
+
+# Used by both subpipelines
+longread:
+  type: hifi
+  dir: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/genomic_data/pacbio/
+
+curationpretext:
+  aligner: minimap2
+  telomere_motif: TTAGGG
+  hic_dir: /home/runner/work/ear/ear/TreeValTinyData/genomic_data/hic-arima/
+btk:
+  taxid: 352914
+  gca_accession: GCA_0001
+  lineages: "fungi_odb10"
+  nt_database: /home/runner/work/ascc/ascc/NT_database/
+  nt_database_prefix: 18S_fungal_sequences
+  diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
+  diamond_nr_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
+  ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/
+  ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp
+  config: /home/runner/work/ear/ear/conf/sanger-tol-btk.config
diff --git a/assets/test.yaml b/assets/test.yaml
@@ -8,8 +8,8 @@ reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/gr
 mapped_bam: []
 
 merquryfk:
-  fastk_hist: "./"
-  fastk_ktab: "./"
+  fastk_hist: "./EMPTY.hist"
+  fastk_ktab: "./EMPTY.ktab"
 
 # Used by both subpipelines
 longread:

diff --git a/conf/test.config b/conf/test.config
@@ -10,20 +10,22 @@
 ----------------------------------------------------------------------------------------
 */
 
+cleanup = true
+
 process {
     resourceLimits = [
         cpus: 4,
-        memory: '15.GB',
+        memory: '10.GB',
         time: '1.h'
     ]
 }
 
 params {
-    config_profile_name        = 'Test profile'
-    config_profile_description = 'Minimal test dataset to check pipeline function'
+    config_profile_name        = 'Full test profile'
+    config_profile_description = 'Full test dataset to check pipeline function'
 
+    // Input data for full size test
     // Limit resources so that this can run on GitHub Actions
-    input       = "${projectDir}/assets/test.yaml"
-    outdir      = "results"
-
+    input       =  "${baseDir}/assets/test.yaml"
+    steps       = "btk,cpretext,merquryfk"
 }
diff --git a/nf-test.config b/nf-test.config
@@ -0,0 +1,16 @@
+config {
+    testsDir "tests"
+    workDir ".nf-test"
+    libDir "tests/lib"
+    withTrace true
+    autoSort false
+
+    options "-dump-channels"
+
+    plugins {
+        load "[email protected]"
+    }
+
+    configFile "conf/test.config"
+    profile "test"
+}
diff --git a/tests/main.nf.test b/tests/main.nf.test
@@ -0,0 +1,49 @@
+nextflow_pipeline {
+
+    name "Test Workflow main.nf"
+    script "main.nf"
+    tag "pipeline"
+    tag "pipeline_sanger_tol"
+    tag "full"
+
+    test("Minimal run | No SANGER-TOL-* nested pipelines") {
+
+        when {
+            params {
+                outdir = "${outputDir}"
+            }
+        }
+
+        then {
+
+            def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ["pipeline_info/*.{html,json,txt}"])
+
+            def gfastats = getAllFilesFromDir(params.outdir, include: ['gfastats/_summary'])
+
+            //def merquryfk = getAllFilesFromDir(params.outdir, include: ['merquryfk/*'])
+
+            assertAll (
+                {assert workflow.success},
+                {assert snapshot(
+                    // Test for number of successful processes - should be 29 for a full run
+                    workflow.trace.succeeded().size(),
+
+                    removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"),
+
+                    stable_name,
+                    // GFASTATS should be a stable file as it is measuring metrics of the files
+                    // so nothing special needs to be done
+
+                    gfastats.size(),
+                    gfastats,
+
+                    // MERQURY_FK is running statistics on the input files
+                    // input files are also split between the two main input haps
+                    // need to actually test merqury first
+                    //merquryfk,
+
+                ).match()},
+            )
+        }
+    }
+}
diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
@@ -0,0 +1,33 @@
+{
+    "Minimal run | No SANGER-TOL-* nested pipelines": {
+        "content": [
+            2,
+            {
+                "CAT_CAT": {
+                    "pigz": "2.3.4"
+                },
+                "GFASTATS": {
+                    "gfastats": "1.3.6"
+                },
+                "Workflow": {
+                    "sanger-tol/ear": "v0.6.2"
+                }
+            },
+            [
+                "gfastats",
+                "gfastats/grTriPseu1.assembly_summary",
+                "pipeline_info",
+                "pipeline_info/nf_core_pipeline_software_mqc_versions.yml"
+            ],
+            0,
+            [
+
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2025-02-07T12:39:47.429262"
+    }
+}
diff --git a/workflows/ear.nf b/workflows/ear.nf
@@ -50,7 +50,7 @@ workflow EAR {
 
     //
     // MODULE: YAML_INPUT
-    //          - YAML_INPUT SHOULD BE REWORKED TO BE SMARTER
+    //          TODO: REPLACE WITH -params-input
     //
     YAML_INPUT(ch_input)
 
@@ -120,7 +120,6 @@ workflow EAR {
     // LOGIC: STEP TO STOP MERQURY_FK RUNNING IF SPECIFIED BY USER
     //
     if (!exclude_steps.contains('merquryfk')) {
-
         //
         // MODULE: MERQURYFK PLOTS OF GENOME
         //
@@ -132,32 +131,32 @@ workflow EAR {
         ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
     }
 
-    //
-    // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE
-    //          SKIP THE MAPPING SUBWORKFLOW
-    //
-    if (!params.mapped) {
-        //
-        // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
-        //              This allows us to more esily bypass the mapping if we already have a sorted and mapped bam
-        //
-        MAIN_MAPPING (
-            YAML_INPUT.out.sample_id,
-            YAML_INPUT.out.longread_type,
-            YAML_INPUT.out.reference_hap1,
-            YAML_INPUT.out.pacbio_tuple,
-        )
-        ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
-        ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
-    } else {
-        ch_mapped_bam = YAML_INPUT.out.mapped_bam
-    }
-
 
     //
     // LOGIC: STEP TO STOP BTK RUNNING IF SPECIFIED BY USER
     //
     if (!exclude_steps.contains('btk')) {
+        //
+        // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE
+        //          SKIP THE MAPPING SUBWORKFLOW
+        //
+        if (!params.mapped) {
+            //
+            // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
+            //              This allows us to more esily bypass the mapping if we already have a sorted and mapped bam
+            //
+            MAIN_MAPPING (
+                YAML_INPUT.out.sample_id,
+                YAML_INPUT.out.longread_type,
+                YAML_INPUT.out.reference_hap1,
+                YAML_INPUT.out.pacbio_tuple,
+            )
+            ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
+            ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
+        } else {
+            ch_mapped_bam = YAML_INPUT.out.mapped_bam
+        }
+
 
         //
         // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline
@@ -187,6 +186,7 @@ workflow EAR {
         ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
     }
 
+
     //
     // LOGIC: STEP TO STOP CURATION_PRETEXT RUNNING IF SPECIFIED BY USER
     //
@@ -207,6 +207,7 @@ workflow EAR {
         ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
     }
 
+
     //
     // Collate and save software versions
     //
@@ -218,6 +219,7 @@ workflow EAR {
             newLine: true
         ).set { ch_collated_versions }
 
+
     summary_params      = paramsSummaryMap(
         workflow, parameters_schema: "nextflow_schema.json")
     ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))