Skip to content
This repository has been archived by the owner on May 21, 2024. It is now read-only.

Commit

Permalink
ENH: support for multidirectory bowtie index (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
misialq authored May 12, 2021
1 parent cadc9ce commit 1c150c6
Show file tree
Hide file tree
Showing 26 changed files with 84 additions and 16 deletions.
4 changes: 2 additions & 2 deletions ci/recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# revert to proper versioning when ready for the first release
{% set data = load_setup_py_data() %}
{% set version = '2021.4.0.dev0' %}
{% set release = '2021.4' %}
{% set version = '2021.8.0.dev0' %}
{% set release = '2021.8' %}

package:
name: q2-types-genomics
Expand Down
8 changes: 5 additions & 3 deletions q2_types_genomics/per_sample_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@
import importlib

from ._format import (
MAGSequencesDirFmt, MultiMAGManifestFormat, ContigSequencesDirFmt
MAGSequencesDirFmt, MultiMAGManifestFormat,
ContigSequencesDirFmt, MultiBowtie2IndexDirFmt
)
from ._type import (
MAGs, Contigs
MAGs, Contigs, MultiBowtie2Index
)

__all__ = [
'MAGs', 'MAGSequencesDirFmt', 'MultiMAGManifestFormat',
'ContigSequencesDirFmt', 'Contigs'
'ContigSequencesDirFmt', 'Contigs', 'MultiBowtie2Index',
'MultiBowtie2IndexDirFmt'
]

importlib.import_module('q2_types_genomics.per_sample_data._transformer')
10 changes: 8 additions & 2 deletions q2_types_genomics/per_sample_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import os

from q2_types.bowtie2 import Bowtie2IndexDirFmt
from q2_types.feature_data import DNAFASTAFormat
from qiime2.core.exceptions import ValidationError
from qiime2.plugin import model
Expand Down Expand Up @@ -93,7 +94,7 @@ def _validate_(self, level):
class MultiDirValidationMixin:
def _validate_(self, level):
for p in self.path.iterdir():
if not p.is_dir() and p.name not in ['MANIFEST', 'metadata.yml']:
if not p.is_dir() and p.name not in ['MANIFEST']:
raise ValidationError(
"Files should be organised in per-sample directories")

Expand All @@ -112,12 +113,17 @@ class MAGSequencesDirFmt(MultiFASTADirectoryFormat):
manifest = model.File('MANIFEST', format=MultiMAGManifestFormat)


class MultiBowtie2IndexDirFmt(MultiDirValidationMixin, Bowtie2IndexDirFmt):
pass


ContigSequencesDirFmt = model.SingleFileDirectoryFormat(
'ContigSequencesDirFmt', 'contigs.fasta', DNAFASTAFormat
)

plugin.register_formats(
MultiFASTADirectoryFormat,
MAGSequencesDirFmt,
ContigSequencesDirFmt
ContigSequencesDirFmt,
MultiBowtie2IndexDirFmt
)
18 changes: 14 additions & 4 deletions q2_types_genomics/per_sample_data/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@
from q2_types.sample_data import SampleData
from qiime2.core.type import SemanticType

from . import MAGSequencesDirFmt, ContigSequencesDirFmt
from . import (
MAGSequencesDirFmt, ContigSequencesDirFmt, MultiBowtie2IndexDirFmt
)
from ..plugin_setup import plugin

MAGs = SemanticType('MAGs', variant_of=SampleData.field['type'])
Contigs = SemanticType('Contigs', variant_of=SampleData.field['type'])
MAGs = SemanticType(
'MAGs', variant_of=SampleData.field['type'])
Contigs = SemanticType(
'Contigs', variant_of=SampleData.field['type'])
MultiBowtie2Index = SemanticType(
'MultiBowtie2Index', variant_of=SampleData.field['type'])

plugin.register_semantic_types(MAGs, Contigs)
plugin.register_semantic_types(MAGs, Contigs, MultiBowtie2Index)

plugin.register_semantic_type_to_format(
SampleData[MAGs],
Expand All @@ -25,3 +31,7 @@
SampleData[Contigs],
artifact_format=ContigSequencesDirFmt
)
plugin.register_semantic_type_to_format(
SampleData[MultiBowtie2Index],
artifact_format=MultiBowtie2IndexDirFmt
)
20 changes: 20 additions & 0 deletions q2_types_genomics/per_sample_data/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,23 @@ def _mag_manifest_helper(dirfmt, output_cls, manifest_fmt,
result.manifest.write_data(manifest, manifest_fmt)

return result


# def _bowtie2_fmt_helper(dirfmt, output_cls, bowtie_fmt):
# result = output_cls()
# for path, view in dirfmt.sequences.iter_views(bowtie_fmt):
# sample_id, mag_id = _parse_mag_filename(path)
# result.sequences.write_data(view, bowtie_fmt,
# sample_id=sample_id,
# mag_id=mag_id)
#
# filepath = result.sequences.path_maker(sample_id=sample_id,
# mag_id=mag_id)
# name = f"{filepath.parent.name}/{filepath.name}"
#
# manifest_fh.write('%s,%s,%s\n' % (sample_id, mag_id, name))
#
# manifest_fh.close()
# result.manifest.write_data(manifest, manifest_fmt)
#
# return result
17 changes: 16 additions & 1 deletion q2_types_genomics/per_sample_data/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from qiime2.plugin.testing import TestPluginBase

from q2_types_genomics.per_sample_data._format import (
MultiFASTADirectoryFormat, MultiMAGManifestFormat, ContigSequencesDirFmt
MultiFASTADirectoryFormat, MultiMAGManifestFormat,
ContigSequencesDirFmt, MultiBowtie2IndexDirFmt
)


Expand Down Expand Up @@ -93,6 +94,20 @@ def test_multifasta_dirfmt_unorganized(self):
ValidationError, 'should be .* per-sample directories'):
format.validate()

def test_multibowtie_dirfmt(self):
dirpath = self.get_data_path('bowtie/valid')
format = MultiBowtie2IndexDirFmt(dirpath, mode='r')

format.validate()

def test_multibowtie_dirfmt_unorganized(self):
dirpath = self.get_data_path('bowtie/unorganized')
format = MultiBowtie2IndexDirFmt(dirpath, mode='r')

with self.assertRaisesRegex(
ValidationError, 'should be .* per-sample directories'):
format.validate()

def test_contig_seqs_dirfmt(self):
filepath = self.get_data_path('mags/mags-fasta/sample2/mag1.fasta')
shutil.copy(filepath, os.path.join(
Expand Down
13 changes: 12 additions & 1 deletion q2_types_genomics/per_sample_data/tests/test_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from qiime2.plugin.testing import TestPluginBase

from q2_types_genomics.per_sample_data import (
MAGs, MAGSequencesDirFmt, Contigs, ContigSequencesDirFmt
MAGs, MAGSequencesDirFmt,
Contigs, ContigSequencesDirFmt,
MultiBowtie2Index, MultiBowtie2IndexDirFmt
)


Expand All @@ -37,6 +39,15 @@ def test_contigs_semantic_type_to_format_registration(self):
ContigSequencesDirFmt
)

def test_multibowtie_semantic_type_registration(self):
self.assertRegisteredSemanticType(MultiBowtie2Index)

def test_multibowtie_semantic_type_to_format_registration(self):
self.assertSemanticTypeRegisteredToFormat(
SampleData[MultiBowtie2Index],
MultiBowtie2IndexDirFmt
)


if __name__ == '__main__':
unittest.main()
10 changes: 7 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@
'q2_types_genomics.tests': ['data/*'],
'q2_types_genomics.per_sample_data.tests':
['data/*',
'data/mags/mags-fa/*',
'data/mags/mags-fasta/*',
'data/mags/mags-fa/sample1/*', 'data/mags/mags-fa/sample2/*',
'data/mags/mags-fasta/sample1/*',
'data/mags/mags-fasta/sample2/*',
'data/mags/mags-unorganized/*',
'data/manifests/*']
'data/manifests/*',
'data/bowtie/unorganized/*',
'data/bowtie/valid/sample1/mag1/*',
'data/bowtie/valid/sample2/mag1/*']
},
zip_safe=False,
)

0 comments on commit 1c150c6

Please sign in to comment.