Skip to content
This repository has been archived by the owner on May 21, 2024. It is now read-only.

Commit

Permalink
ENH: additional Bowtie2 types (#27)
Browse files Browse the repository at this point in the history
* ENH: Bowtie2 types
* review suggestions
  • Loading branch information
misialq authored Mar 16, 2022
1 parent 89a5202 commit 04372be
Show file tree
Hide file tree
Showing 33 changed files with 125 additions and 23 deletions.
2 changes: 2 additions & 0 deletions ci/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ requirements:

run:
- python {{ python }}
- bowtie2
- samtools
- q2-types {{ release }}.*
- qiime2 {{ release }}.*

Expand Down
10 changes: 6 additions & 4 deletions q2_types_genomics/per_sample_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,18 @@

from ._format import (
MultiMAGSequencesDirFmt, MultiMAGManifestFormat,
ContigSequencesDirFmt, MultiBowtie2IndexDirFmt
ContigSequencesDirFmt, MultiBowtie2IndexDirFmt,
BAMFormat, BAMDirFmt, MultiBAMDirFmt
)
from ._type import (
MAGs, Contigs, MultiBowtie2Index
MAGs, Contigs, SingleBowtie2Index, MultiBowtie2Index
)

__all__ = [
'MAGs', 'MultiMAGSequencesDirFmt', 'MultiMAGManifestFormat',
'ContigSequencesDirFmt', 'Contigs', 'MultiBowtie2Index',
'MultiBowtie2IndexDirFmt'
'ContigSequencesDirFmt', 'Contigs', 'SingleBowtie2Index',
'MultiBowtie2Index', 'MultiBowtie2IndexDirFmt',
'BAMFormat', 'BAMDirFmt', 'MultiBAMDirFmt'
]

importlib.import_module('q2_types_genomics.per_sample_data._transformer')
32 changes: 31 additions & 1 deletion q2_types_genomics/per_sample_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# ----------------------------------------------------------------------------

import os
import subprocess

from q2_types.bowtie2 import Bowtie2IndexDirFmt
from q2_types.feature_data import DNAFASTAFormat
Expand Down Expand Up @@ -126,9 +127,38 @@ def sequences_path_maker(self, sample_id):
return r'%s_contigs\.fasta' % sample_id


# borrowed from q2-phylogenomics
class BAMFormat(model.BinaryFileFormat):
def _validate_(self, level):
cmd = ['samtools', 'quickcheck', '-v', str(self)]
result = subprocess.run(cmd)
if result.returncode != 0:
raise model.ValidationError(
'samtools quickcheck -v failed on %s' % self.path.name)


# borrowed from q2-phylogenomics
class BAMDirFmt(model.DirectoryFormat):
bams = model.FileCollection(r'.+\.bam', format=BAMFormat)

@bams.set_path_maker
def bams_path_maker(self, sample_id):
return '%s.bam' % sample_id


class MultiBAMDirFmt(MultiDirValidationMixin, model.DirectoryFormat):
bams = model.FileCollection(r'.+\/.+\.bam', format=BAMFormat)

@bams.set_path_maker
def bams_path_maker(self, sample_id, genome_id):
return '%s/%s.bam' % sample_id, genome_id


plugin.register_formats(
MultiFASTADirectoryFormat,
MultiMAGSequencesDirFmt,
ContigSequencesDirFmt,
MultiBowtie2IndexDirFmt
MultiBowtie2IndexDirFmt,
BAMDirFmt,
MultiBAMDirFmt
)
28 changes: 25 additions & 3 deletions q2_types_genomics/per_sample_data/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,33 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from q2_types.bowtie2 import Bowtie2IndexDirFmt
from q2_types.sample_data import SampleData
from qiime2.core.type import SemanticType

from . import (
MultiMAGSequencesDirFmt, ContigSequencesDirFmt, MultiBowtie2IndexDirFmt
MultiMAGSequencesDirFmt, ContigSequencesDirFmt, MultiBowtie2IndexDirFmt,
BAMDirFmt, MultiBAMDirFmt
)
from ..plugin_setup import plugin

MAGs = SemanticType(
'MAGs', variant_of=SampleData.field['type'])
Contigs = SemanticType(
'Contigs', variant_of=SampleData.field['type'])
SingleBowtie2Index = SemanticType(
'SingleBowtie2Index', variant_of=SampleData.field['type'])
MultiBowtie2Index = SemanticType(
'MultiBowtie2Index', variant_of=SampleData.field['type'])
AlignmentMap = SemanticType(
'AlignmentMap', variant_of=SampleData.field['type'])
MultiAlignmentMap = SemanticType(
'MultiAlignmentMap', variant_of=SampleData.field['type'])

plugin.register_semantic_types(MAGs, Contigs, MultiBowtie2Index)
plugin.register_semantic_types(
MAGs, Contigs, SingleBowtie2Index, MultiBowtie2Index,
AlignmentMap, MultiAlignmentMap
)

plugin.register_semantic_type_to_format(
SampleData[MAGs],
Expand All @@ -31,7 +41,19 @@
SampleData[Contigs],
artifact_format=ContigSequencesDirFmt
)
plugin.register_semantic_type_to_format(
SampleData[SingleBowtie2Index],
artifact_format=Bowtie2IndexDirFmt
)
plugin.register_semantic_type_to_format(
SampleData[MultiBowtie2Index],
artifact_format=MultiBowtie2IndexDirFmt
)
plugin.register_semantic_type_to_format(
SampleData[AlignmentMap],
artifact_format=BAMDirFmt
)
plugin.register_semantic_type_to_format(
SampleData[MultiAlignmentMap],
artifact_format=MultiBAMDirFmt
)
Empty file.
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
30 changes: 25 additions & 5 deletions q2_types_genomics/per_sample_data/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from q2_types_genomics.per_sample_data._format import (
MultiFASTADirectoryFormat, MultiMAGManifestFormat,
ContigSequencesDirFmt, MultiBowtie2IndexDirFmt
ContigSequencesDirFmt, MultiBowtie2IndexDirFmt, BAMDirFmt, MultiBAMDirFmt
)


Expand Down Expand Up @@ -94,14 +94,14 @@ def test_multifasta_dirfmt_unorganized(self):
ValidationError, 'should be .* per-sample directories'):
format.validate()

def test_multibowtie_dirfmt(self):
dirpath = self.get_data_path('bowtie/valid')
def test_multibowtie_index_dirfmt(self):
dirpath = self.get_data_path('bowtie/index-valid')
format = MultiBowtie2IndexDirFmt(dirpath, mode='r')

format.validate()

def test_multibowtie_dirfmt_unorganized(self):
dirpath = self.get_data_path('bowtie/unorganized')
def test_multibowtie_index_dirfmt_unorganized(self):
dirpath = self.get_data_path('bowtie/index-unorganized')
format = MultiBowtie2IndexDirFmt(dirpath, mode='r')

with self.assertRaisesRegex(
Expand All @@ -113,6 +113,26 @@ def test_contig_seqs_dirfmt(self):
shutil.copytree(filepath, self.temp_dir.name, dirs_exist_ok=True)
ContigSequencesDirFmt(self.temp_dir.name, mode='r').validate()

def test_bam_dirmt(self):
filepath = self.get_data_path('bowtie/maps-single')
format = BAMDirFmt(filepath, mode='r')

format.validate()

def test_bam_dirmt_invalid(self):
filepath = self.get_data_path('bowtie/maps-invalid')
format = BAMDirFmt(filepath, mode='r')

with self.assertRaisesRegex(
ValidationError, 'samtools quickcheck -v failed on'):
format.validate()

def test_multibam_dirmt(self):
filepath = self.get_data_path('bowtie/maps-multi')
format = MultiBAMDirFmt(filepath, mode='r')

format.validate()


if __name__ == '__main__':
unittest.main()
37 changes: 34 additions & 3 deletions q2_types_genomics/per_sample_data/tests/test_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,18 @@

import unittest

from q2_types.bowtie2 import Bowtie2IndexDirFmt
from q2_types.sample_data import SampleData
from qiime2.plugin.testing import TestPluginBase

from q2_types_genomics.per_sample_data import (
MAGs, MultiMAGSequencesDirFmt,
Contigs, ContigSequencesDirFmt,
MultiBowtie2Index, MultiBowtie2IndexDirFmt
SingleBowtie2Index, MultiBowtie2Index, MultiBowtie2IndexDirFmt, BAMDirFmt,
MultiBAMDirFmt
)
from q2_types_genomics.per_sample_data._type import (AlignmentMap,
MultiAlignmentMap)


class TestTypes(TestPluginBase):
Expand All @@ -39,15 +43,42 @@ def test_contigs_semantic_type_to_format_registration(self):
ContigSequencesDirFmt
)

def test_multibowtie_semantic_type_registration(self):
def test_singlebowtie_semantic_type_registration(self):
self.assertRegisteredSemanticType(SingleBowtie2Index)

def test_singlebowtie_semantic_type_to_format_registration(self):
self.assertSemanticTypeRegisteredToFormat(
SampleData[SingleBowtie2Index],
Bowtie2IndexDirFmt
)

def test_multibowtie_index_semantic_type_registration(self):
self.assertRegisteredSemanticType(MultiBowtie2Index)

def test_multibowtie_semantic_type_to_format_registration(self):
def test_multibowtie_index_semantic_type_to_format_registration(self):
self.assertSemanticTypeRegisteredToFormat(
SampleData[MultiBowtie2Index],
MultiBowtie2IndexDirFmt
)

def test_aln_map_semantic_type_registration(self):
self.assertRegisteredSemanticType(AlignmentMap)

def test_aln_map_semantic_type_to_format_registration(self):
self.assertSemanticTypeRegisteredToFormat(
SampleData[AlignmentMap],
BAMDirFmt
)

def test_multi_aln_map_semantic_type_registration(self):
self.assertRegisteredSemanticType(MultiAlignmentMap)

def test_multi_aln_map_semantic_type_to_format_registration(self):
self.assertSemanticTypeRegisteredToFormat(
SampleData[MultiAlignmentMap],
MultiBAMDirFmt
)


if __name__ == '__main__':
unittest.main()
9 changes: 2 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,9 @@
'q2_types_genomics.tests': ['data/*'],
'q2_types_genomics.per_sample_data.tests':
['data/*',
'data/mags/mags-fa/sample1/*', 'data/mags/mags-fa/sample2/*',
'data/mags/mags-fasta/sample1/*',
'data/mags/mags-fasta/sample2/*',
'data/mags/mags-unorganized/*',
'data/mags/*/*', 'data/mags/*/*/*',
'data/manifests/*', 'data/contigs/*',
'data/bowtie/unorganized/*',
'data/bowtie/valid/sample1/mag1/*',
'data/bowtie/valid/sample2/mag1/*'],
'data/bowtie/*/*', 'data/bowtie/*/*/*/*', 'data/bowtie/*/*/*'],
'q2_types_genomics.feature_data.tests':
['data/*', 'data/mags-fa/*', 'data/mags-fasta/*'],
'q2_types_genomics.genome_data.tests':
Expand Down

0 comments on commit 04372be

Please sign in to comment.