From 361a72b833c221dd2c48571154d6dbc90bbb6459 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 7 Feb 2025 12:57:35 +0000
Subject: [PATCH 1/6] Adding NF-TEST files

---
 assets/schema_input.json |  3 +++
 assets/test.yaml         |  4 ++--
 conf/base.config         |  6 ++---
 conf/test.config         | 25 ++++++++++++++------
 nf-test.config           | 16 +++++++++++++
 tests/main.nf.test       | 49 ++++++++++++++++++++++++++++++++++++++++
 tests/main.nf.test.snap  | 33 +++++++++++++++++++++++++++
 workflows/ear.nf         | 48 ++++++++++++++++++++-------------------
 8 files changed, 149 insertions(+), 35 deletions(-)
 create mode 100644 nf-test.config
 create mode 100644 tests/main.nf.test
 create mode 100644 tests/main.nf.test.snap

diff --git a/assets/schema_input.json b/assets/schema_input.json
index 61d2b74..2b6741b 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -53,6 +53,9 @@
                     },
                     "fastk_ktab": {
                         "type": "string",
+                        "format": "file-path",
+                        "exists": true,
+                        "pattern": "^\\S+\\.ktab$",
                         "errorMessage": "Directory containing ktab files"
                     }
                 }
diff --git a/assets/test.yaml b/assets/test.yaml
index 0f5f6ed..14b0b50 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -8,8 +8,8 @@ reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/gr
 mapped_bam: []
 
 merquryfk:
-  fastk_hist: "./"
-  fastk_ktab: "./"
+  fastk_hist: "./EMPTY.hist"
+  fastk_ktab: "./EMPTY.ktab"
 
 # Used by both subpipelines
 longread:
diff --git a/conf/base.config b/conf/base.config
index 4b330f4..8d7339e 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -20,13 +20,13 @@ process {
     maxErrors           = '-1'
 
     withName: "SANGER_TOL_CPRETEXT|SANGER_TOL_BTK" {
-        time            = { check_max( 70.h  * task.attempt, 'time'   ) }
+        time            = { check_max( 8.d  * task.attempt, 'time'   ) }
     }
 
     withName: "MINIMAP2_ALIGN_SE" {
         cpus            = { check_max( 16                  , 'cpus'    ) }
-        memory          = { check_max( 1.GB     * ( reference.size() < 2e9 ? 60 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * task.attempt ) , 'memory') }
-        time            = { check_max( 1.h      * ( reference.size() < 1e9 ? 15 : reference.size() < 10e9 ? 30 : 48), 'time'      ) }
+        memory          = { check_max( 1.GB     * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 25 ) * task.attempt ) , 'memory') }
+        time            = { check_max( 1.h      * ( reference.size() < 1e9 ? 20 : reference.size() < 10e9 ? 35 : 48), 'time'      ) }
     }
 
     // Process-specific resource requirements
diff --git a/conf/test.config b/conf/test.config
index 06d069f..2fc0011 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -10,15 +10,26 @@
 ----------------------------------------------------------------------------------------
 */
 
+cleanup = true
+
+process {
+    resourceLimits = [
+        cpus: 4,
+        memory: '10.GB',
+        time: '1.h'
+    ]
+}
+
 params {
-    config_profile_name        = 'Test profile'
-    config_profile_description = 'Minimal test dataset to check pipeline function'
+    config_profile_name        = 'Full test profile'
+    config_profile_description = 'Full test dataset to check pipeline function'
 
+    // Input data for full size test
     // Limit resources so that this can run on GitHub Actions
-    max_cpus    = 2
-    max_memory  = '6.GB'
-    max_time    = '6.h'
-    input       = "${projectDir}/assets/test.yaml"
-    outdir      = "results"
+    max_cpus    = 4
+    max_memory  = '10.GB'
+    max_time    = '1.h'
 
+    input       =  "${baseDir}/assets/test.yaml"
+    steps       = "btk,cpretext,merquryfk"
 }
diff --git a/nf-test.config b/nf-test.config
new file mode 100644
index 0000000..cc4372f
--- /dev/null
+++ b/nf-test.config
@@ -0,0 +1,16 @@
+config {
+    testsDir "tests"
+    workDir ".nf-test"
+    libDir "tests/lib"
+    withTrace true
+    autoSort false
+
+    options "-dump-channels"
+
+    plugins {
+        load "nft-utils@0.0.3"
+    }
+
+    configFile "conf/test.config"
+    profile "test"
+}
diff --git a/tests/main.nf.test b/tests/main.nf.test
new file mode 100644
index 0000000..556f956
--- /dev/null
+++ b/tests/main.nf.test
@@ -0,0 +1,49 @@
+nextflow_pipeline {
+
+    name "Test Workflow main.nf"
+    script "main.nf"
+    tag "pipeline"
+    tag "pipeline_sanger_tol"
+    tag "full"
+
+    test("Minimal run | No SANGER-TOL-* nested pipelines") {
+
+        when {
+            params {
+                outdir = "${outputDir}"
+            }
+        }
+
+        then {
+
+            def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ["pipeline_info/*.{html,json,txt}"])
+
+            def gfastats = getAllFilesFromDir(params.outdir, include: ['gfastats/_summary'])
+
+            //def merquryfk = getAllFilesFromDir(params.outdir, include: ['merquryfk/*'])
+
+            assertAll (
+                {assert workflow.success},
+                {assert snapshot(
+                    // Test for number of successful processes - should be 29 for a full run
+                    workflow.trace.succeeded().size(),
+
+                    removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"),
+
+                    stable_name,
+                    // GFASTATS should be a stable file as it is measuring metrics of the files
+                    // so nothing special needs to be done
+
+                    gfastats.size(),
+                    gfastats,
+
+                    // MERQURY_FK is running statistics on the input files
+                    // input files are also split between the two main input haps
+                    // need to actually test merqury first
+                    //merquryfk,
+
+                ).match()},
+            )
+        }
+    }
+}
diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
new file mode 100644
index 0000000..7329d59
--- /dev/null
+++ b/tests/main.nf.test.snap
@@ -0,0 +1,33 @@
+{
+    "Minimal run | No SANGER-TOL-* nested pipelines": {
+        "content": [
+            2,
+            {
+                "CAT_CAT": {
+                    "pigz": "2.3.4"
+                },
+                "GFASTATS": {
+                    "gfastats": "1.3.6"
+                },
+                "Workflow": {
+                    "sanger-tol/ear": "v0.6.2"
+                }
+            },
+            [
+                "gfastats",
+                "gfastats/grTriPseu1.assembly_summary",
+                "pipeline_info",
+                "pipeline_info/nf_core_pipeline_software_mqc_versions.yml"
+            ],
+            0,
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2025-02-07T12:39:47.429262"
+    }
+}
\ No newline at end of file
diff --git a/workflows/ear.nf b/workflows/ear.nf
index a37966c..fb9154e 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -50,7 +50,7 @@ workflow EAR {
 
     //
     // MODULE: YAML_INPUT
-    //          - YAML_INPUT SHOULD BE REWORKED TO BE SMARTER
+    //          TODO: REPLACE WITH -params-input
     //
     YAML_INPUT(ch_input)
 
@@ -120,7 +120,6 @@ workflow EAR {
     // LOGIC: STEP TO STOP MERQURY_FK RUNNING IF SPECIFIED BY USER
     //
     if (!exclude_steps.contains('merquryfk')) {
-
         //
         // MODULE: MERQURYFK PLOTS OF GENOME
         //
@@ -132,32 +131,32 @@ workflow EAR {
         ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
     }
 
-    //
-    // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE
-    //          SKIP THE MAPPING SUBWORKFLOW
-    //
-    if (!params.mapped) {
-        //
-        // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
-        //              This allows us to more esily bypass the mapping if we already have a sorted and mapped bam
-        //
-        MAIN_MAPPING (
-            YAML_INPUT.out.sample_id,
-            YAML_INPUT.out.longread_type,
-            YAML_INPUT.out.reference_hap1,
-            YAML_INPUT.out.pacbio_tuple,
-        )
-        ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
-        ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
-    } else {
-        ch_mapped_bam = YAML_INPUT.out.mapped_bam
-    }
-
 
     //
     // LOGIC: STEP TO STOP BTK RUNNING IF SPECIFIED BY USER
     //
     if (!exclude_steps.contains('btk')) {
+        //
+        // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE
+        //          SKIP THE MAPPING SUBWORKFLOW
+        //
+        if (!params.mapped) {
+            //
+            // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
+            //              This allows us to more esily bypass the mapping if we already have a sorted and mapped bam
+            //
+            MAIN_MAPPING (
+                YAML_INPUT.out.sample_id,
+                YAML_INPUT.out.longread_type,
+                YAML_INPUT.out.reference_hap1,
+                YAML_INPUT.out.pacbio_tuple,
+            )
+            ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
+            ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
+        } else {
+            ch_mapped_bam = YAML_INPUT.out.mapped_bam
+        }
+
 
         //
         // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline
@@ -187,6 +186,7 @@ workflow EAR {
         ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
     }
 
+
     //
     // LOGIC: STEP TO STOP CURATION_PRETEXT RUNNING IF SPECIFIED BY USER
     //
@@ -207,6 +207,7 @@ workflow EAR {
         ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
     }
 
+
     //
     // Collate and save software versions
     //
@@ -218,6 +219,7 @@ workflow EAR {
             newLine: true
         ).set { ch_collated_versions }
 
+
     summary_params      = paramsSummaryMap(
         workflow, parameters_schema: "nextflow_schema.json")
     ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))

From 476152038668af2332301977134a59292126b68b Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 7 Feb 2025 13:01:04 +0000
Subject: [PATCH 2/6] Updating CICD

---
 .github/workflows/ci.yml | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a21e2f4..0304912 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -45,10 +45,13 @@ jobs:
           cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-hap.fa
           cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa
 
-      - name: Run pipeline with test data
-        # TODO nf-core: You can customise CI pipeline run tests as required
-        # For example: adding multiple test runs with different parameters
-        # Remember that you can parallelise this by using strategy.matrix
-        # Skip BTK and CPRETEXT as they are already tested on their repos.
+      - name: Install nf-test
+        uses: nf-core/setup-nf-test@v1
+
+      # Test the component
+      - name: Run nf-test
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext,merquryfk
+          nf-test test \
+            --profile="docker" \
+            --debug \
+            --verbose

From 9d01433220bd90ec4b7024f3ce3152771bb39718 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 7 Feb 2025 13:03:02 +0000
Subject: [PATCH 3/6] Reduce limit of 8.d to 2.d

---
 conf/base.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/base.config b/conf/base.config
index 8d7339e..865625d 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -20,7 +20,7 @@ process {
     maxErrors           = '-1'
 
     withName: "SANGER_TOL_CPRETEXT|SANGER_TOL_BTK" {
-        time            = { check_max( 8.d  * task.attempt, 'time'   ) }
+        time            = { check_max( 2.d  * task.attempt, 'time'   ) }
     }
 
     withName: "MINIMAP2_ALIGN_SE" {

From c5e7ea4fa0fecd3478081023c488f7bb50c5e775 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 7 Feb 2025 13:35:03 +0000
Subject: [PATCH 4/6] updates

---
 .github/workflows/branch.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml
index e3f6dae..7ac1e04 100644
--- a/.github/workflows/branch.yml
+++ b/.github/workflows/branch.yml
@@ -3,7 +3,7 @@ name: nf-core branch protection
 # It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev`
 on:
   pull_request_target:
-    branches: [master]
+    branches: [main]
 
 jobs:
   test:

From 8edc609bc16a46fd3f4c9ff1254e8124f5cecf12 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 7 Feb 2025 14:24:15 +0000
Subject: [PATCH 5/6] Update to template

---
 conf/test.config                              |  4 ---
 .../local/utils_nfcore_ear_pipeline/main.nf   | 36 +++++++++----------
 2 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 2fc0011..db0d1a3 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -26,10 +26,6 @@ params {
 
     // Input data for full size test
     // Limit resources so that this can run on GitHub Actions
-    max_cpus    = 4
-    max_memory  = '10.GB'
-    max_time    = '1.h'
-
     input       =  "${baseDir}/assets/test.yaml"
     steps       = "btk,cpretext,merquryfk"
 }
diff --git a/subworkflows/local/utils_nfcore_ear_pipeline/main.nf b/subworkflows/local/utils_nfcore_ear_pipeline/main.nf
index 3bef592..9fd6da0 100644
--- a/subworkflows/local/utils_nfcore_ear_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_ear_pipeline/main.nf
@@ -76,28 +76,24 @@ workflow PIPELINE_INITIALISATION {
     //
     // Create channel from input file provided through params.input
     //
-    Channel
-        .fromSamplesheet("input")
-        .map {
-            meta, fastq_1, fastq_2 ->
-                if (!fastq_2) {
-                    return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
-                } else {
-                    return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
-                }
-        }
-        .groupTuple()
-        .map {
-            validateInputSamplesheet(it)
-        }
-        .map {
-            meta, fastqs ->
-                return [ meta, fastqs.flatten() ]
-        }
-        .set { ch_samplesheet }
+
+    // Channel
+    //     .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json"))
+    //     .map {
+    //         meta, file
+    //     }
+    //     .groupTuple()
+    //     .map { samplesheet ->
+    //         validateInputSamplesheet(samplesheet)
+    //     }
+    //     .map {
+    //         meta, fastqs ->
+    //             return [ meta, fastqs.flatten() ]
+    //     }
+    //     .set { ch_samplesheet }
 
     emit:
-    samplesheet = ch_samplesheet
+    samplesheet = params.input
     versions    = ch_versions
 }
 

From 86ccafcb75b8dc81a86b49b83cb8c70646f4b151 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 7 Feb 2025 14:29:31 +0000
Subject: [PATCH 6/6] Update

---
 assets/test-local.yaml | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100755 assets/test-local.yaml

diff --git a/assets/test-local.yaml b/assets/test-local.yaml
new file mode 100755
index 0000000..c5e9ab5
--- /dev/null
+++ b/assets/test-local.yaml
@@ -0,0 +1,33 @@
+# General Vales for all subpiplines and modules
+assembly_id: grTriPseu1
+reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa
+reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/assembly/draft/grTriPseu1-hap.fa
+reference_haplotigs: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa
+
+# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore.
+mapped_bam: []
+
+merquryfk:
+  fastk_hist: "./EMPTY.hist"
+  fastk_ktab: "./EMPTY.ktab"
+
+# Used by both subpipelines
+longread:
+  type: hifi
+  dir: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/genomic_data/pacbio/
+
+curationpretext:
+  aligner: minimap2
+  telomere_motif: TTAGGG
+  hic_dir: /home/runner/work/ear/ear/TreeValTinyData/genomic_data/hic-arima/
+btk:
+  taxid: 352914
+  gca_accession: GCA_0001
+  lineages: "fungi_odb10"
+  nt_database: /home/runner/work/ascc/ascc/NT_database/
+  nt_database_prefix: 18S_fungal_sequences
+  diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
+  diamond_nr_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
+  ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/
+  ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp
+  config: /home/runner/work/ear/ear/conf/sanger-tol-btk.config