Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hifiadapterfilt #7398

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions modules/nf-core/hifiadapterfilt/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::hifiadapterfilt=3.0.0"
57 changes: 57 additions & 0 deletions modules/nf-core/hifiadapterfilt/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
process HIFIADAPTERFILT {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hifiadapterfilt:3.0.0--hdfd78af_0':
'biocontainers/hifiadapterfilt:3.0.0--hdfd78af_0' }"

input:
tuple val(meta), path(fastq)

output:
tuple val(meta), path("*.filt.fastq.gz") , emit: filt
tuple val(meta), path("*.contaminant.blastout") , emit: blast_search
tuple val(meta), path("*.stats") , emit: stats
tuple val(meta), path("*.blocklist") , emit: headers
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
// The tool AUTOMATICALLY detects fastq files from the input folder, hence an explicit call of "fastq" is not needed.
def prefix = task.ext.prefix ?: "${meta.id}"

"""
hifiadapterfilt.sh \\
${args}

mv *.filt.fastq.gz ${prefix}.filt.fastq.gz
mv *.contaminant.blastout ${prefix}.contaminant.blastout
mv *.stats ${prefix}.stats
mv *.blocklist ${prefix}.blocklist

cat <<-END_VERSIONS > versions.yml
"${task.process}":
hifiadapterfilt: \$(hifiadapterfilt.sh -v)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo "" | gzip > ${prefix}.filt.fastq.gz
touch ${prefix}.contaminant.blastout
touch ${prefix}.stats
touch ${prefix}.blocklist

cat <<-END_VERSIONS > versions.yml
"${task.process}":
hifiadapterfilt: \$(hifiadapterfilt.sh -v)
END_VERSIONS
"""
}
86 changes: 86 additions & 0 deletions modules/nf-core/hifiadapterfilt/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
name: "hifiadapterfilt"
description: "Convert .bam to .fastq and remove reads with remnant PacBio adapter
sequences"
keywords:
- pacbio
- hifi
- filter
- metagenomics
- adapters
tools:
- hifiadapterfilt:
description: "Convert .bam to .fastq and remove CCS reads with remnant PacBio
adapter sequences"
homepage: "https://bio.tools/hifiadapterfilt"
documentation: "https://github.com/sheinasim/HiFiAdapterFilt"
tool_dev_url: "https://github.com/sheinasim/HiFiAdapterFilt"
doi: "10.1186/s12864-022-08375-1"
licence: ["GPL-3.0"]
identifier: biotools:hifiadapterfilt

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- fastq:
type: file
description: Fastq pacbio hifi raw reads file. The tool AUTOMATICALLY detects fastq files from the input folder, hence an explicit call of "fastq" is not needed.
pattern: "*.fastq"

output:
- filt:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.filt.fastq.gz":
type: file
description: |
Fastq reads free of pacbio adapter sequence ready for assembly
pattern: "*.filt.fastq.gz"
- blast_search:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.contaminant.blastout":
type: file
description: |
Output of BLAST search
pattern: "*.contaminant.blastout"
- stats:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.stats":
type: file
description: |
File with simple math on number of reads removed, etc
pattern: "*.stats"
- headers:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.blocklist":
type: file
description: |
Headers of pacbio adapter contaminated reads to be removed
pattern: "*.blocklist"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@Ge94"
maintainers:
- "@Ge94"
54 changes: 54 additions & 0 deletions modules/nf-core/hifiadapterfilt/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
nextflow_process {

name "Test Process HIFIADAPTERFILT"
script "../main.nf"
process "HIFIADAPTERFILT"

tag "modules"
tag "modules_nfcore"
tag "hifiadapterfilt"

test("hifiadapterfilt - fastq.gz") {
when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out[0][1].findAll {
file(it).name == "test.filt.fastq.gz" &&
file(it).name == "test.contaminant.blastout" &&
file(it).name == "test.blocklist" }).match()},
{ assert path(process.out.stats[0].get(1)).text.contains('Number of adapter contaminated ccs reads: 0 (0% of total)') },
{ assert snapshot(process.out.versions).match("versions") }
)
}
}

test("hifiadapterfilt - fastq.gz - stub") {
options "-stub"
when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
107 changes: 107 additions & 0 deletions modules/nf-core/hifiadapterfilt/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
{
"hifiadapterfilt - fastq.gz": {
"content": [
[

]
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.4"
},
"timestamp": "2025-01-30T15:23:02.060065966"
},
"hifiadapterfilt - fastq.gz - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.filt.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"1": [
[
{
"id": "test"
},
"test.contaminant.blastout:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"2": [
[
{
"id": "test"
},
"test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"3": [
[
{
"id": "test"
},
"test.blocklist:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"4": [
"versions.yml:md5,2fbf3754b3f590ea1ef7d80105bb1333"
],
"blast_search": [
[
{
"id": "test"
},
"test.contaminant.blastout:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"filt": [
[
{
"id": "test"
},
"test.filt.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"headers": [
[
{
"id": "test"
},
"test.blocklist:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"stats": [
[
{
"id": "test"
},
"test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,2fbf3754b3f590ea1ef7d80105bb1333"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.4"
},
"timestamp": "2025-01-30T15:33:57.890894465"
},
"versions": {
"content": [
[
"versions.yml:md5,2fbf3754b3f590ea1ef7d80105bb1333"
]
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.4"
},
"timestamp": "2025-01-30T15:23:02.135872615"
}
}
Loading