diff --git a/pyfastaq/sequences.py b/pyfastaq/sequences.py index d7e2c07..207e334 100644 --- a/pyfastaq/sequences.py +++ b/pyfastaq/sequences.py @@ -151,7 +151,9 @@ class Fasta: '''Class to store and manipulate FASTA sequences. They have two things: a name and a sequence''' # this defines the line length when printing sequences line_length = 60 - + # cached translation object for reverse-complementation (with ambiguous codes) + _revcomp_trans = str.maketrans("ATCGatcgMRWSYKVHDBNmrwsykvhdbn", + "TAGCtagcKYWSRMBDHVNkywsrmbdhvn") def _get_id_from_header_line(self, line): if line.startswith('>'): return line.rstrip()[1:] @@ -214,7 +216,7 @@ def strip_illumina_suffix(self): def revcomp(self): '''Reverse complements the sequence''' - self.seq = self.seq.translate(str.maketrans("ATCGatcg", "TAGCtagc"))[::-1] + self.seq = self.seq.translate(self._revcomp_trans)[::-1] def is_all_Ns(self, start=0, end=None): '''Returns true if the sequence is all Ns (upper or lower case)''' diff --git a/pyfastaq/tests/sequences_test.py b/pyfastaq/tests/sequences_test.py index 2663df7..6b1740e 100644 --- a/pyfastaq/tests/sequences_test.py +++ b/pyfastaq/tests/sequences_test.py @@ -125,6 +125,12 @@ def test_revcomp(self): fa.revcomp() self.assertEqual(fa, sequences.Fasta('ID', 'nacgtNACGT')) + def test_revcomp_ambig(self): + '''revcomp() should correctly reverse complement a sequence with ambiguous bases''' + fa = sequences.Fasta('ID', 'ACTGMRWSYKVHDBNmrwsykvhdbn') + fa.revcomp() + self.assertEqual(fa, sequences.Fasta('ID', 'nvhdbmrswykNVHDBMRSWYKCAGT')) + def test_gaps(self): '''gaps() should find the gaps in a sequence correctly''' test_seqs = [sequences.Fasta('ID', 'ACGT'),