diff --git a/ex11_1.sh b/ex11_1.sh new file mode 100644 index 0000000..d27b786 --- /dev/null +++ b/ex11_1.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#Marya Poterek + +#Question 1 + +for file in spore* +do +cat $file >> spore_all.fasta +echo >> spore_all.fasta +done + +/Users/mlpoterek/Biocomp/muscle3.8.31_i86darwin32 -in spore_all.fasta -out spore.align + +for file in transporter* +do +cat $file >> transporter_all.fasta +echo >> transporter_all.fasta +done + +/Users/mlpoterek/Biocomp/muscle3.8.31_i86darwin32 -in transporter_all.fasta -out transporter.align diff --git a/ex11_2.sh b/ex11_2.sh new file mode 100644 index 0000000..785d588 --- /dev/null +++ b/ex11_2.sh @@ -0,0 +1,19 @@ +#!/bin/bash +#Marya Poterek + +#Question 2 + +/afs/nd.edu/user8/mpoterek/local/bin/hmmbuild transporter.hmm transporter.align + +for file in /afs/nd.edu/user8/mpoterek/proteomes/*.fasta +do +/afs/nd.edu/user8/mpoterek/local/bin/hmmsearch --tblout $file.hmm.out transporter.hmm $file +done + +for file in /afs/nd.edu/user8/mpoterek/proteomes/*.hmm.out +do +a=$(basename $file) +b=`basename $a .fasta.hmm.out` +echo $b >> hmm_hits.txt +grep -v '#' $file | wc -l >> hmm_hits.txt +done diff --git a/gene_sequences/A9CH19_A b/gene_sequences/A9CH19_A new file mode 100644 index 0000000..e69de29 diff --git a/gene_sequences/ex11.sh b/gene_sequences/ex11.sh new file mode 100644 index 0000000..d27b786 --- /dev/null +++ b/gene_sequences/ex11.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#Marya Poterek + +#Question 1 + +for file in spore* +do +cat $file >> spore_all.fasta +echo >> spore_all.fasta +done + +/Users/mlpoterek/Biocomp/muscle3.8.31_i86darwin32 -in spore_all.fasta -out spore.align + +for file in transporter* +do +cat $file >> transporter_all.fasta +echo >> transporter_all.fasta +done + +/Users/mlpoterek/Biocomp/muscle3.8.31_i86darwin32 -in transporter_all.fasta -out transporter.align diff --git a/gene_sequences/finalfile.txt b/gene_sequences/finalfile.txt new file mode 100644 index 0000000..4731d62 --- /dev/null +++ b/gene_sequences/finalfile.txt @@ -0,0 +1,103 @@ +>Q8XVY0_RALSO/26164 +TTTTFSVSTTVNATCVINSASALTFAAFDPSQGAQASTSSISVNCTNTTPFNIGLNAGTGTGATVASRVMTSGANTLTYSLYQDSGHASVWGNTVGTNTVAGTGAGMAAGNAITKTVYGLIPSQPNTVPGNYADTVTVT +>Q0BSH7_GRABC/37169 +TTTTFQVTATVQASCIIQATNLSFGNYSGSQTDATSTIQVTCTNSTPYNVGLSAGTGSGATVSNRKMSLNSTSALPYALYSDASRSTNWGNTPNQDTVSGTGNGSAQSLTVYGRIQTGNYPTPGSYADTITAT +>Q0AAK3_ALKEH/24160 +DTATFDVTATVDPTCTVDADNLVFGTYDPFSDTPLDENSEIRVQCTSDTPYDIGLDDGDNTGAEGERRMALADESDFLEYDLYHDNHGGTSWGDIDSGAELTGLSGTGSEQSYVVYGRIFAEQSVAVGNYVDTIEVT +>Q8XPY6_RALSO/36170 +KTTTFTVSLTLQADCSISANALNFGTQGVLAANVDQTATLSVTCSNTAPYNVGFDAGTTTGSTIAARLLAGSGAATVGFQLYSDSARTQIWGNTVGTDTVSGTGSGTAQVLTVYGRVPSQNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVSLTIQANCTISANALSFGTNGVLATAVNQQTTLSVTCSNTTSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGNTVGTNTVSGTGNGTAQTLSVYGQVPAQTTPKPDTYESTVTAT>Q7CVQ4_AGRFC/27162 +ATTNFNVQITIQAACQINSAGNLDFGTNGVIGAPIDVTSQIVVQCTASTPFSLGLSAGAGSGATVANRLMTSAAGATISYSLYTTAAHSTVWGNTVGTDRQTGTGTGAPQNFTVFGRVPAQTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMTVRITIQAECKVQTATDMDFGTNGVIDANVDQTSTISVQCTNSTPYNVGLSAGVGAGATVAVRKMTGPAAAVLNYSLYRDVARAQLWGTTIGTDTVAGTGNGAAQPLTVYGRVPPQTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMNVRITIQAECKIVTATDLDFGTKGVIDVNVDQTSTISVQCTNGTPYTVGLSAGGGAGATVAMRKMTGAASATINYTIYRDAARTQVWGVTAGTDVVSGTGNGNAQSITAYGRVPAQTTPAPGVYSDVVSVT +>A9CH19_AGRFC/182313 +TQVPFTVSAAVAPTCIISAQNINFGSHGVLNTAVDANGAINLTCTNGLNYSVALNGGLSNSPPAARQMVQGAASIIYGLYRDVSRTNVWGSAAGQIATGTGNGSLQTLTVFGRVPAQNTPAPGNYADTVVVT +>Q985D8_RHILO/205338 +DRPTFTINAIVPANCLLAIQNIDFGSNGILGANVDATGGVSITCTPGTPYTVSLSNGTTGSAPTARKMSKGVETVTYGLYKDNARSQVWGDAAMPGSTVAGSGSGAAQNLTIYGRVPAQTTPSAGVYTDTVVVT +>A7H7Q6_ANADF/25150 +ATAQFQVTATVVKKCKISATTIAFGNYDPATILSAEGTLTLKCTKGTLYSVALDGGSTGSRQMTQAAEVLDYELYSDAGHTAVWPSTAAAPSVAAAGADEALIIFAQVPADQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLDVTATVVPSCTIAATPVAFGSYDPLVTNAATALDAQGTVTVTCTTGTAYTVGLGAGNSGSGSRAMQHASIAGAQLPYELYQEAARTTVWDSTVMQAGTAASITPVQYTVYGRIPAAQNVPTGNYADAVVAT +>Q0C623_HYPNA/23151 +ANGTLDVQATVVNTCVVLTAPVVFASVGLDEVTANGSITVNCTNTSAFTVALDGGDSGDISARSLTHASLPASFNYQLYTDAGLTTVWGDGVTGSQANGSGPSQTLTVYGRTTSTPDTAGAYADEVQVT +>Q63W78_BURPS/193326 +TFAFTASATVVNDCFINATNVAFGSTGVIQGALTATGTISAQCTNGDAFRIALNGGASGNVAARAMQRTGGGGAVNYQLYLDAAHSTIWGDGTAGTSTATGTGSGLSQSLTVYGQVPAQTTPAPGTYSDTITAT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSVSANALSFGAYNTTSNLTGTTTVTITCGAWGGASSINYTLSASVGSGTYANRQVLNGSNVIAYNLYTTSADTSIWGDGNGDGTVTLSGTVTKQVGTVNLTIYGKINGGQNVVPGSYATTIPIT +>Q8XPY9_RALSO/25163 +ASAQSCSVASASLNFGSISPVQAGNTDTSTTLTVSCSGFLLQGTVARACLNLGVGSGDTGISPRVLSAGANQLQYNLYADSARSVVWGGRTTPATPAIQVDVSLGLLGFGSATVTVYGRVPGGQTTVPAGAYTQSFSGT>A9CH19_AGRFC/11158 +IAAAFVASPVLAQSCTFSMSDMNFGFVNLAGGAAVDTTATLSVTCNNPLSLALSIRICPNINAGGGGQSGGIRRMLQGSNILNYQLYQTSARTTAWGSVTQPALGAPPPIDMALPLLINSTTRTVYGRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPTVAWAQSCSFGVSAMNFGLVDTLSGSSSNSTATLSVNCTGLLLQRILVCPNLGTGSGGATASARQMLSGANDLNYQLYSDSARSVVWGSYAWPYPPTAPGFALTLNVLGSGSASQTIYGAILGGQATAVPSTYLSTFSGS +>Q3A2W0_PELCD/10150 +IIVLLFAVDAYAFHCEVTTTPVSFGAYDVFSSFSLDTTGRISVSCNNPEKKRMPVTISISRGAANSFSPRQMRRIGGSDRMDYYLFVDASRTAVWGDGTGGSSTYVGMIDRTSPLNVPIYGRIPARQNLRAGSYQDILVVT>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSVSAGSVAFGAYDPLSPTHLDSTGTIGLTCAVRQLVTISLGTGQSGTFARELRGPGGAALRYDLYTDATRTQVWGDGTAGTATWPFETERGRYVPVYARVLAGQDVPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLSPVPLTTTGMLQYRCSRGQPIRITFTAGSSGDVYARTLRQGPWTLAYNLYADAGFGTVWGDGTGGTAAAPAVTTLSNGLTVAYVFGRIPARQEPPVGPYSDTIVVT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTDLLPRDSAGSITYRCEGQITPITIDFSAGGSGTPLARSMAGPGAQRLEYNLYVDATRLIVWGNGTSGTGRYGPVVPLFGVEVTVPIFGRIPAGQAIPAGAYADTVVMT +>Q60C08_METCA/20167 +LLACPKISDADPYQCDIGNISVPHAVYDPTDSNPNSSGVGTVGITCHLKNAKQTQQVQYTIALSRGSSGSYNPRRMSGGRGSLGYNLYLDAARVTIWGDGSGGTFPLRGTLLLNPTTPVQQVIHNIYGLIPPLQDVYAGTYTDTVTIT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSISADPLAFGQYDPITGAQVDGASEVSVSCSLLGLVSLLVSYEISLDPGTGGSYHPRALSSATDTLDYNLYVDTARTEIWGDGTDDTATVTDSYTLGVLTVTRYYPVYGRVFADQNVAAGVYDDTITAT +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSVSGNGFTSVYDPISTVPNDNVSSVTINCSRASGDPTTTTYSLASTNGLYPQGQNNRAYYPTNKYLKYDIYKDAAYSSRWGPGGSAPFTGTLNFGSGTSASLTLPYYNRVAAQQSAVAADYTDTMTAT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTVGSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCTGIASGGAYSIALGPSTTGSGDRISTRYLGNSNGGDDMSFNIYTSASYSTVWGNGTTGGLVGGSIPVGDSNQSQPVYGRIAASQNTLRAGSYSGSLTMT + +>Q8XVY0_RALSO/26164 +TTTTFSVSTTVNATCVINSASALTFAAFDPSQGAQASTSSISVNCTNTTPFNIGLNAGTGTGATVASRVMTSGANTLTYSLYQDSGHASVWGNTVGTNTVAGTGAGMAAGNAITKTVYGLIPSQPNTVPGNYADTVTVT +>Q0BSH7_GRABC/37169 +TTTTFQVTATVQASCIIQATNLSFGNYSGSQTDATSTIQVTCTNSTPYNVGLSAGTGSGATVSNRKMSLNSTSALPYALYSDASRSTNWGNTPNQDTVSGTGNGSAQSLTVYGRIQTGNYPTPGSYADTITAT +>Q0AAK3_ALKEH/24160 +DTATFDVTATVDPTCTVDADNLVFGTYDPFSDTPLDENSEIRVQCTSDTPYDIGLDDGDNTGAEGERRMALADESDFLEYDLYHDNHGGTSWGDIDSGAELTGLSGTGSEQSYVVYGRIFAEQSVAVGNYVDTIEVT +>Q8XPY6_RALSO/36170 +KTTTFTVSLTLQADCSISANALNFGTQGVLAANVDQTATLSVTCSNTAPYNVGFDAGTTTGSTIAARLLAGSGAATVGFQLYSDSARTQIWGNTVGTDTVSGTGSGTAQVLTVYGRVPSQNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVSLTIQANCTISANALSFGTNGVLATAVNQQTTLSVTCSNTTSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGNTVGTNTVSGTGNGTAQTLSVYGQVPAQTTPKPDTYESTVTAT +>Q7CVQ4_AGRFC/27162 +ATTNFNVQITIQAACQINSAGNLDFGTNGVIGAPIDVTSQIVVQCTASTPFSLGLSAGAGSGATVANRLMTSAAGATISYSLYTTAAHSTVWGNTVGTDRQTGTGTGAPQNFTVFGRVPAQTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMTVRITIQAECKVQTATDMDFGTNGVIDANVDQTSTISVQCTNSTPYNVGLSAGVGAGATVAVRKMTGPAAAVLNYSLYRDVARAQLWGTTIGTDTVAGTGNGAAQPLTVYGRVPPQTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMNVRITIQAECKIVTATDLDFGTKGVIDVNVDQTSTISVQCTNGTPYTVGLSAGGGAGATVAMRKMTGAASATINYTIYRDAARTQVWGVTAGTDVVSGTGNGNAQSITAYGRVPAQTTPAPGVYSDVVSVT +>A9CH19_AGRFC/182313 +TQVPFTVSAAVAPTCIISAQNINFGSHGVLNTAVDANGAINLTCTNGLNYSVALNGGLSNSPPAARQMVQGAASIIYGLYRDVSRTNVWGSAAGQIATGTGNGSLQTLTVFGRVPAQNTPAPGNYADTVVVT +>Q985D8_RHILO/205338 +DRPTFTINAIVPANCLLAIQNIDFGSNGILGANVDATGGVSITCTPGTPYTVSLSNGTTGSAPTARKMSKGVETVTYGLYKDNARSQVWGDAAMPGSTVAGSGSGAAQNLTIYGRVPAQTTPSAGVYTDTVVVT +>A7H7Q6_ANADF/25150 +ATAQFQVTATVVKKCKISATTIAFGNYDPATILSAEGTLTLKCTKGTLYSVALDGGSTGSRQMTQAAEVLDYELYSDAGHTAVWPSTAAAPSVAAAGADEALIIFAQVPADQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLDVTATVVPSCTIAATPVAFGSYDPLVTNAATALDAQGTVTVTCTTGTAYTVGLGAGNSGSGSRAMQHASIAGAQLPYELYQEAARTTVWDSTVMQAGTAASITPVQYTVYGRIPAAQNVPTGNYADAVVAT +>Q0C623_HYPNA/23151 +ANGTLDVQATVVNTCVVLTAPVVFASVGLDEVTANGSITVNCTNTSAFTVALDGGDSGDISARSLTHASLPASFNYQLYTDAGLTTVWGDGVTGSQANGSGPSQTLTVYGRTTSTPDTAGAYADEVQVT +>Q63W78_BURPS/193326 +TFAFTASATVVNDCFINATNVAFGSTGVIQGALTATGTISAQCTNGDAFRIALNGGASGNVAARAMQRTGGGGAVNYQLYLDAAHSTIWGDGTAGTSTATGTGSGLSQSLTVYGQVPAQTTPAPGTYSDTITAT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSVSANALSFGAYNTTSNLTGTTTVTITCGAWGGASSINYTLSASVGSGTYANRQVLNGSNVIAYNLYTTSADTSIWGDGNGDGTVTLSGTVTKQVGTVNLTIYGKINGGQNVVPGSYATTIPIT +>Q8XPY9_RALSO/25163 +ASAQSCSVASASLNFGSISPVQAGNTDTSTTLTVSCSGFLLQGTVARACLNLGVGSGDTGISPRVLSAGANQLQYNLYADSARSVVWGGRTTPATPAIQVDVSLGLLGFGSATVTVYGRVPGGQTTVPAGAYTQSFSGT +>A9CH19_AGRFC/11158 +IAAAFVASPVLAQSCTFSMSDMNFGFVNLAGGAAVDTTATLSVTCNNPLSLALSIRICPNINAGGGGQSGGIRRMLQGSNILNYQLYQTSARTTAWGSVTQPALGAPPPIDMALPLLINSTTRTVYGRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPTVAWAQSCSFGVSAMNFGLVDTLSGSSSNSTATLSVNCTGLLLQRILVCPNLGTGSGGATASARQMLSGANDLNYQLYSDSARSVVWGSYAWPYPPTAPGFALTLNVLGSGSASQTIYGAILGGQATAVPSTYLSTFSGS +>Q3A2W0_PELCD/10150 +IIVLLFAVDAYAFHCEVTTTPVSFGAYDVFSSFSLDTTGRISVSCNNPEKKRMPVTISISRGAANSFSPRQMRRIGGSDRMDYYLFVDASRTAVWGDGTGGSSTYVGMIDRTSPLNVPIYGRIPARQNLRAGSYQDILVVT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSVSAGSVAFGAYDPLSPTHLDSTGTIGLTCAVRQLVTISLGTGQSGTFARELRGPGGAALRYDLYTDATRTQVWGDGTAGTATWPFETERGRYVPVYARVLAGQDVPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLSPVPLTTTGMLQYRCSRGQPIRITFTAGSSGDVYARTLRQGPWTLAYNLYADAGFGTVWGDGTGGTAAAPAVTTLSNGLTVAYVFGRIPARQEPPVGPYSDTIVVT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTDLLPRDSAGSITYRCEGQITPITIDFSAGGSGTPLARSMAGPGAQRLEYNLYVDATRLIVWGNGTSGTGRYGPVVPLFGVEVTVPIFGRIPAGQAIPAGAYADTVVMT +>Q60C08_METCA/20167 +LLACPKISDADPYQCDIGNISVPHAVYDPTDSNPNSSGVGTVGITCHLKNAKQTQQVQYTIALSRGSSGSYNPRRMSGGRGSLGYNLYLDAARVTIWGDGSGGTFPLRGTLLLNPTTPVQQVIHNIYGLIPPLQDVYAGTYTDTVTIT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSISADPLAFGQYDPITGAQVDGASEVSVSCSLLGLVSLLVSYEISLDPGTGGSYHPRALSSATDTLDYNLYVDTARTEIWGDGTDDTATVTDSYTLGVLTVTRYYPVYGRVFADQNVAAGVYDDTITAT +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSVSGNGFTSVYDPISTVPNDNVSSVTINCSRASGDPTTTTYSLASTNGLYPQGQNNRAYYPTNKYLKYDIYKDAAYSSRWGPGGSAPFTGTLNFGSGTSASLTLPYYNRVAAQQSAVAADYTDTMTAT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTVGSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCTGIASGGAYSIALGPSTTGSGDRISTRYLGNSNGGDDMSFNIYTSASYSTVWGNGTTGGLVGGSIPVGDSNQSQPVYGRIAASQNTLRAGSYSGSLTMT + diff --git a/gene_sequences/newfile b/gene_sequences/newfile new file mode 100644 index 0000000..d06aa1e --- /dev/null +++ b/gene_sequences/newfile @@ -0,0 +1,101 @@ +>Q8XVY0_RALSO/26164 +TTTTFSVSTTVNATCVINSASALTFAAFDPSQGAQASTSSISVNCTNTTPFNIGLNAGTGTGATVASRVMTSGANTLTYSLYQDSGHASVWGNTVGTNTVAGTGAGMAAGNAITKTVYGLIPSQPNTVPGNYADTVTVT +>Q0BSH7_GRABC/37169 +TTTTFQVTATVQASCIIQATNLSFGNYSGSQTDATSTIQVTCTNSTPYNVGLSAGTGSGATVSNRKMSLNSTSALPYALYSDASRSTNWGNTPNQDTVSGTGNGSAQSLTVYGRIQTGNYPTPGSYADTITAT +>Q0AAK3_ALKEH/24160 +DTATFDVTATVDPTCTVDADNLVFGTYDPFSDTPLDENSEIRVQCTSDTPYDIGLDDGDNTGAEGERRMALADESDFLEYDLYHDNHGGTSWGDIDSGAELTGLSGTGSEQSYVVYGRIFAEQSVAVGNYVDTIEVT +>Q8XPY6_RALSO/36170 +KTTTFTVSLTLQADCSISANALNFGTQGVLAANVDQTATLSVTCSNTAPYNVGFDAGTTTGSTIAARLLAGSGAATVGFQLYSDSARTQIWGNTVGTDTVSGTGSGTAQVLTVYGRVPSQNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVSLTIQANCTISANALSFGTNGVLATAVNQQTTLSVTCSNTTSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGNTVGTNTVSGTGNGTAQTLSVYGQVPAQTTPKPDTYESTVTAT>Q7CVQ4_AGRFC/27162 +ATTNFNVQITIQAACQINSAGNLDFGTNGVIGAPIDVTSQIVVQCTASTPFSLGLSAGAGSGATVANRLMTSAAGATISYSLYTTAAHSTVWGNTVGTDRQTGTGTGAPQNFTVFGRVPAQTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMTVRITIQAECKVQTATDMDFGTNGVIDANVDQTSTISVQCTNSTPYNVGLSAGVGAGATVAVRKMTGPAAAVLNYSLYRDVARAQLWGTTIGTDTVAGTGNGAAQPLTVYGRVPPQTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMNVRITIQAECKIVTATDLDFGTKGVIDVNVDQTSTISVQCTNGTPYTVGLSAGGGAGATVAMRKMTGAASATINYTIYRDAARTQVWGVTAGTDVVSGTGNGNAQSITAYGRVPAQTTPAPGVYSDVVSVT +>A9CH19_AGRFC/182313 +TQVPFTVSAAVAPTCIISAQNINFGSHGVLNTAVDANGAINLTCTNGLNYSVALNGGLSNSPPAARQMVQGAASIIYGLYRDVSRTNVWGSAAGQIATGTGNGSLQTLTVFGRVPAQNTPAPGNYADTVVVT +>Q985D8_RHILO/205338 +DRPTFTINAIVPANCLLAIQNIDFGSNGILGANVDATGGVSITCTPGTPYTVSLSNGTTGSAPTARKMSKGVETVTYGLYKDNARSQVWGDAAMPGSTVAGSGSGAAQNLTIYGRVPAQTTPSAGVYTDTVVVT +>A7H7Q6_ANADF/25150 +ATAQFQVTATVVKKCKISATTIAFGNYDPATILSAEGTLTLKCTKGTLYSVALDGGSTGSRQMTQAAEVLDYELYSDAGHTAVWPSTAAAPSVAAAGADEALIIFAQVPADQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLDVTATVVPSCTIAATPVAFGSYDPLVTNAATALDAQGTVTVTCTTGTAYTVGLGAGNSGSGSRAMQHASIAGAQLPYELYQEAARTTVWDSTVMQAGTAASITPVQYTVYGRIPAAQNVPTGNYADAVVAT +>Q0C623_HYPNA/23151 +ANGTLDVQATVVNTCVVLTAPVVFASVGLDEVTANGSITVNCTNTSAFTVALDGGDSGDISARSLTHASLPASFNYQLYTDAGLTTVWGDGVTGSQANGSGPSQTLTVYGRTTSTPDTAGAYADEVQVT +>Q63W78_BURPS/193326 +TFAFTASATVVNDCFINATNVAFGSTGVIQGALTATGTISAQCTNGDAFRIALNGGASGNVAARAMQRTGGGGAVNYQLYLDAAHSTIWGDGTAGTSTATGTGSGLSQSLTVYGQVPAQTTPAPGTYSDTITAT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSVSANALSFGAYNTTSNLTGTTTVTITCGAWGGASSINYTLSASVGSGTYANRQVLNGSNVIAYNLYTTSADTSIWGDGNGDGTVTLSGTVTKQVGTVNLTIYGKINGGQNVVPGSYATTIPIT +>Q8XPY9_RALSO/25163 +ASAQSCSVASASLNFGSISPVQAGNTDTSTTLTVSCSGFLLQGTVARACLNLGVGSGDTGISPRVLSAGANQLQYNLYADSARSVVWGGRTTPATPAIQVDVSLGLLGFGSATVTVYGRVPGGQTTVPAGAYTQSFSGT>A9CH19_AGRFC/11158 +IAAAFVASPVLAQSCTFSMSDMNFGFVNLAGGAAVDTTATLSVTCNNPLSLALSIRICPNINAGGGGQSGGIRRMLQGSNILNYQLYQTSARTTAWGSVTQPALGAPPPIDMALPLLINSTTRTVYGRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPTVAWAQSCSFGVSAMNFGLVDTLSGSSSNSTATLSVNCTGLLLQRILVCPNLGTGSGGATASARQMLSGANDLNYQLYSDSARSVVWGSYAWPYPPTAPGFALTLNVLGSGSASQTIYGAILGGQATAVPSTYLSTFSGS +>Q3A2W0_PELCD/10150 +IIVLLFAVDAYAFHCEVTTTPVSFGAYDVFSSFSLDTTGRISVSCNNPEKKRMPVTISISRGAANSFSPRQMRRIGGSDRMDYYLFVDASRTAVWGDGTGGSSTYVGMIDRTSPLNVPIYGRIPARQNLRAGSYQDILVVT>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSVSAGSVAFGAYDPLSPTHLDSTGTIGLTCAVRQLVTISLGTGQSGTFARELRGPGGAALRYDLYTDATRTQVWGDGTAGTATWPFETERGRYVPVYARVLAGQDVPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLSPVPLTTTGMLQYRCSRGQPIRITFTAGSSGDVYARTLRQGPWTLAYNLYADAGFGTVWGDGTGGTAAAPAVTTLSNGLTVAYVFGRIPARQEPPVGPYSDTIVVT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTDLLPRDSAGSITYRCEGQITPITIDFSAGGSGTPLARSMAGPGAQRLEYNLYVDATRLIVWGNGTSGTGRYGPVVPLFGVEVTVPIFGRIPAGQAIPAGAYADTVVMT +>Q60C08_METCA/20167 +LLACPKISDADPYQCDIGNISVPHAVYDPTDSNPNSSGVGTVGITCHLKNAKQTQQVQYTIALSRGSSGSYNPRRMSGGRGSLGYNLYLDAARVTIWGDGSGGTFPLRGTLLLNPTTPVQQVIHNIYGLIPPLQDVYAGTYTDTVTIT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSISADPLAFGQYDPITGAQVDGASEVSVSCSLLGLVSLLVSYEISLDPGTGGSYHPRALSSATDTLDYNLYVDTARTEIWGDGTDDTATVTDSYTLGVLTVTRYYPVYGRVFADQNVAAGVYDDTITAT +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSVSGNGFTSVYDPISTVPNDNVSSVTINCSRASGDPTTTTYSLASTNGLYPQGQNNRAYYPTNKYLKYDIYKDAAYSSRWGPGGSAPFTGTLNFGSGTSASLTLPYYNRVAAQQSAVAADYTDTMTAT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTVGSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCTGIASGGAYSIALGPSTTGSGDRISTRYLGNSNGGDDMSFNIYTSASYSTVWGNGTTGGLVGGSIPVGDSNQSQPVYGRIAASQNTLRAGSYSGSLTMT +>Q8XVY0_RALSO/26164 +TTTTFSVSTTVNATCVINSASALTFAAFDPSQGAQASTSSISVNCTNTTPFNIGLNAGTGTGATVASRVMTSGANTLTYSLYQDSGHASVWGNTVGTNTVAGTGAGMAAGNAITKTVYGLIPSQPNTVPGNYADTVTVT +>Q0BSH7_GRABC/37169 +TTTTFQVTATVQASCIIQATNLSFGNYSGSQTDATSTIQVTCTNSTPYNVGLSAGTGSGATVSNRKMSLNSTSALPYALYSDASRSTNWGNTPNQDTVSGTGNGSAQSLTVYGRIQTGNYPTPGSYADTITAT +>Q0AAK3_ALKEH/24160 +DTATFDVTATVDPTCTVDADNLVFGTYDPFSDTPLDENSEIRVQCTSDTPYDIGLDDGDNTGAEGERRMALADESDFLEYDLYHDNHGGTSWGDIDSGAELTGLSGTGSEQSYVVYGRIFAEQSVAVGNYVDTIEVT +>Q8XPY6_RALSO/36170 +KTTTFTVSLTLQADCSISANALNFGTQGVLAANVDQTATLSVTCSNTAPYNVGFDAGTTTGSTIAARLLAGSGAATVGFQLYSDSARTQIWGNTVGTDTVSGTGSGTAQVLTVYGRVPSQNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVSLTIQANCTISANALSFGTNGVLATAVNQQTTLSVTCSNTTSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGNTVGTNTVSGTGNGTAQTLSVYGQVPAQTTPKPDTYESTVTAT +>Q7CVQ4_AGRFC/27162 +ATTNFNVQITIQAACQINSAGNLDFGTNGVIGAPIDVTSQIVVQCTASTPFSLGLSAGAGSGATVANRLMTSAAGATISYSLYTTAAHSTVWGNTVGTDRQTGTGTGAPQNFTVFGRVPAQTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMTVRITIQAECKVQTATDMDFGTNGVIDANVDQTSTISVQCTNSTPYNVGLSAGVGAGATVAVRKMTGPAAAVLNYSLYRDVARAQLWGTTIGTDTVAGTGNGAAQPLTVYGRVPPQTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMNVRITIQAECKIVTATDLDFGTKGVIDVNVDQTSTISVQCTNGTPYTVGLSAGGGAGATVAMRKMTGAASATINYTIYRDAARTQVWGVTAGTDVVSGTGNGNAQSITAYGRVPAQTTPAPGVYSDVVSVT +>A9CH19_AGRFC/182313 +TQVPFTVSAAVAPTCIISAQNINFGSHGVLNTAVDANGAINLTCTNGLNYSVALNGGLSNSPPAARQMVQGAASIIYGLYRDVSRTNVWGSAAGQIATGTGNGSLQTLTVFGRVPAQNTPAPGNYADTVVVT +>Q985D8_RHILO/205338 +DRPTFTINAIVPANCLLAIQNIDFGSNGILGANVDATGGVSITCTPGTPYTVSLSNGTTGSAPTARKMSKGVETVTYGLYKDNARSQVWGDAAMPGSTVAGSGSGAAQNLTIYGRVPAQTTPSAGVYTDTVVVT +>A7H7Q6_ANADF/25150 +ATAQFQVTATVVKKCKISATTIAFGNYDPATILSAEGTLTLKCTKGTLYSVALDGGSTGSRQMTQAAEVLDYELYSDAGHTAVWPSTAAAPSVAAAGADEALIIFAQVPADQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLDVTATVVPSCTIAATPVAFGSYDPLVTNAATALDAQGTVTVTCTTGTAYTVGLGAGNSGSGSRAMQHASIAGAQLPYELYQEAARTTVWDSTVMQAGTAASITPVQYTVYGRIPAAQNVPTGNYADAVVAT +>Q0C623_HYPNA/23151 +ANGTLDVQATVVNTCVVLTAPVVFASVGLDEVTANGSITVNCTNTSAFTVALDGGDSGDISARSLTHASLPASFNYQLYTDAGLTTVWGDGVTGSQANGSGPSQTLTVYGRTTSTPDTAGAYADEVQVT +>Q63W78_BURPS/193326 +TFAFTASATVVNDCFINATNVAFGSTGVIQGALTATGTISAQCTNGDAFRIALNGGASGNVAARAMQRTGGGGAVNYQLYLDAAHSTIWGDGTAGTSTATGTGSGLSQSLTVYGQVPAQTTPAPGTYSDTITAT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSVSANALSFGAYNTTSNLTGTTTVTITCGAWGGASSINYTLSASVGSGTYANRQVLNGSNVIAYNLYTTSADTSIWGDGNGDGTVTLSGTVTKQVGTVNLTIYGKINGGQNVVPGSYATTIPIT +>Q8XPY9_RALSO/25163 +ASAQSCSVASASLNFGSISPVQAGNTDTSTTLTVSCSGFLLQGTVARACLNLGVGSGDTGISPRVLSAGANQLQYNLYADSARSVVWGGRTTPATPAIQVDVSLGLLGFGSATVTVYGRVPGGQTTVPAGAYTQSFSGT +>A9CH19_AGRFC/11158 +IAAAFVASPVLAQSCTFSMSDMNFGFVNLAGGAAVDTTATLSVTCNNPLSLALSIRICPNINAGGGGQSGGIRRMLQGSNILNYQLYQTSARTTAWGSVTQPALGAPPPIDMALPLLINSTTRTVYGRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPTVAWAQSCSFGVSAMNFGLVDTLSGSSSNSTATLSVNCTGLLLQRILVCPNLGTGSGGATASARQMLSGANDLNYQLYSDSARSVVWGSYAWPYPPTAPGFALTLNVLGSGSASQTIYGAILGGQATAVPSTYLSTFSGS +>Q3A2W0_PELCD/10150 +IIVLLFAVDAYAFHCEVTTTPVSFGAYDVFSSFSLDTTGRISVSCNNPEKKRMPVTISISRGAANSFSPRQMRRIGGSDRMDYYLFVDASRTAVWGDGTGGSSTYVGMIDRTSPLNVPIYGRIPARQNLRAGSYQDILVVT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSVSAGSVAFGAYDPLSPTHLDSTGTIGLTCAVRQLVTISLGTGQSGTFARELRGPGGAALRYDLYTDATRTQVWGDGTAGTATWPFETERGRYVPVYARVLAGQDVPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLSPVPLTTTGMLQYRCSRGQPIRITFTAGSSGDVYARTLRQGPWTLAYNLYADAGFGTVWGDGTGGTAAAPAVTTLSNGLTVAYVFGRIPARQEPPVGPYSDTIVVT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTDLLPRDSAGSITYRCEGQITPITIDFSAGGSGTPLARSMAGPGAQRLEYNLYVDATRLIVWGNGTSGTGRYGPVVPLFGVEVTVPIFGRIPAGQAIPAGAYADTVVMT +>Q60C08_METCA/20167 +LLACPKISDADPYQCDIGNISVPHAVYDPTDSNPNSSGVGTVGITCHLKNAKQTQQVQYTIALSRGSSGSYNPRRMSGGRGSLGYNLYLDAARVTIWGDGSGGTFPLRGTLLLNPTTPVQQVIHNIYGLIPPLQDVYAGTYTDTVTIT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSISADPLAFGQYDPITGAQVDGASEVSVSCSLLGLVSLLVSYEISLDPGTGGSYHPRALSSATDTLDYNLYVDTARTEIWGDGTDDTATVTDSYTLGVLTVTRYYPVYGRVFADQNVAAGVYDDTITAT +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSVSGNGFTSVYDPISTVPNDNVSSVTINCSRASGDPTTTTYSLASTNGLYPQGQNNRAYYPTNKYLKYDIYKDAAYSSRWGPGGSAPFTGTLNFGSGTSASLTLPYYNRVAAQQSAVAADYTDTMTAT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTVGSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCTGIASGGAYSIALGPSTTGSGDRISTRYLGNSNGGDDMSFNIYTSASYSTVWGNGTTGGLVGGSIPVGDSNQSQPVYGRIAASQNTLRAGSYSGSLTMT diff --git a/gene_sequences/spore.align b/gene_sequences/spore.align new file mode 100644 index 0000000..9ef8b9d --- /dev/null +++ b/gene_sequences/spore.align @@ -0,0 +1,208 @@ +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSV-SGNGFT-SVYDPIS------TVPNDNVSSVTINCS-R--- +-ASGDPTTTTYSLASTNGLYPQGQNNRAYYPTNKYLK--YDIYKDAAYSSRWGPG---GS +APFTGTLNFGSGT-SASLTLP-YYNRVAA-QQSAVAADYTDTMTAT +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSV-SGNGFT-SVYDPIS------TVPNDNVSSVTINCS-R--- +-ASGDPTTTTYSLASTNGLYPQGQNNRAYYPTNKYLK--YDIYKDAAYSSRWGPG---GS +APFTGTLNFGSGT-SASLTLP-YYNRVAA-QQSAVAADYTDTMTAT +>Q60C08_METCA/20167 +LLACPKIS--DADPYQCDI-GNISVPHAVYDPTDS-----NPNSSGVGTVGITCHLKNAK +QTQQVQYTIALSRGSSGSYNP--RRMSG---GRGSLG--YNLYLDAARVTIWGDGSG-GT +FPLRGTLLLNPTTPVQQVIHN-IYGLIPP-LQDVYAGTYTDTVTIT +>Q60C08_METCA/20167 +LLACPKIS--DADPYQCDI-GNISVPHAVYDPTDS-----NPNSSGVGTVGITCHLKNAK +QTQQVQYTIALSRGSSGSYNP--RRMSG---GRGSLG--YNLYLDAARVTIWGDGSG-GT +FPLRGTLLLNPTTPVQQVIHN-IYGLIPP-LQDVYAGTYTDTVTIT +>A9CH19_AGRFC/11158 +IAAAFVAS--PVLAQSCTF-SMSDMNFGFVNLAG------GAAVDTTATLSVTCN-NPLS +LALSIRICPNINAGGGGQSGGIRRMLQ----GSNILN--YQLYQTSARTTAWGSVTQ-PA +LGAPPPIDMALPLLINSTTRT-VYGRINAGQASAARGLYLSSFAGG +>A9CH19_AGRFC/11158 +IAAAFVAS--PVLAQSCTF-SMSDMNFGFVNLAG------GAAVDTTATLSVTCN-NPLS +LALSIRICPNINAGGGGQSGGIRRMLQ----GSNILN--YQLYQTSARTTAWGSVTQ-PA +LGAPPPIDMALPLLINSTTRT-VYGRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPT--VAWAQSCSF-GVSAMNFGLVDTLS------GSSSNSTATLSVNCT-GLLL +QR--ILVCPNLGTGSGGATASARQMLS----GANDLN--YQLYSDSARSVVWGSYAWPYP +PTAPGFALTLNVLGSGSASQT-IYGAILGGQATAVPSTYLSTFSGS +>Q985D8_RHILO/37183 +SAALLLPT--VAWAQSCSF-GVSAMNFGLVDTLS------GSSSNSTATLSVNCT-GLLL +QR--ILVCPNLGTGSGGATASARQMLS----GANDLN--YQLYSDSARSVVWGSYAWPYP +PTAPGFALTLNVLGSGSASQT-IYGAILGGQATAVPSTYLSTFSGS +>Q8XPY9_RALSO/25163 +-----------ASAQSCSV-ASASLNFGSISPVQ------AGNTDTSTTLTVSCS-GFLL +QG--TVARACLNLGVGSGDTGISPRVLSA--GANQLQ--YNLYADSARSVVWGGRTTPAT +PAIQVDVSLGLLGFGSA-TVT-VYGRVPGGQTTVPAGAYTQSFSGT +>Q8XPY9_RALSO/25163 +-----------ASAQSCSV-ASASLNFGSISPVQ------AGNTDTSTTLTVSCS-GFLL +QG--TVARACLNLGVGSGDTGISPRVLSA--GANQLQ--YNLYADSARSVVWGGRTTPAT +PAIQVDVSLGLLGFGSA-TVT-VYGRVPGGQTTVPAGAYTQSFSGT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSV-SANALSFGAYNT--------TSNLTGTTTVTITCG-A--- +WGGASSINYTLSASVGSGTYANRQVLN----GSNVIA--YNLYTTSADTSIWGDGNGDGT +VTLSGTVTKQ----VGTVNLT-IYGKING-GQNVVPGSYATTIPIT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSV-SANALSFGAYNT--------TSNLTGTTTVTITCG-A--- +WGGASSINYTLSASVGSGTYANRQVLN----GSNVIA--YNLYTTSADTSIWGDGNGDGT +VTLSGTVTKQ----VGTVNLT-IYGKING-GQNVVPGSYATTIPIT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTV-GSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCT-GIAS +GG---AYSIALGPSTTGSGDRISTRYLGNSNGGDDMS--FNIYTSASYSTVWGNGTT--G +GLVGGSIPVG----DSNQSQP-VYGRIAASQNTLRAGSYSGSLTMT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTV-GSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCT-GIAS +GG---AYSIALGPSTTGSGDRISTRYLGNSNGGDDMS--FNIYTSASYSTVWGNGTT--G +GLVGGSIPVG----DSNQSQP-VYGRIAASQNTLRAGSYSGSLTMT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSI-SADPLAFGQYDPIT------GAQVDGASEVSVSCS-LLGL +VSLLVSYEISLDPGTGGSYHPRALSS-----ATDTLD--YNLYVDTARTEIWGDGTD-DT +ATVTDSYTLGVL--TVTRYYP-VYGRVFA-DQNVAAGVYDDTITAT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSI-SADPLAFGQYDPIT------GAQVDGASEVSVSCS-LLGL +VSLLVSYEISLDPGTGGSYHPRALSS-----ATDTLD--YNLYVDTARTEIWGDGTD-DT +ATVTDSYTLGVL--TVTRYYP-VYGRVFA-DQNVAAGVYDDTITAT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTD------LLPRDSAGSITYRCE-G--- +QI--TPITIDFSAGGSG--TPLARSMAGP--GAQRLE--YNLYVDATRLIVWGNGTS-GT +GRYGPVVPLF----GVEVTVP-IFGRIPA-GQAIPAGAYADTVVMT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTD------LLPRDSAGSITYRCE-G--- +QI--TPITIDFSAGGSG--TPLARSMAGP--GAQRLE--YNLYVDATRLIVWGNGTS-GT +GRYGPVVPLF----GVEVTVP-IFGRIPA-GQAIPAGAYADTVVMT +>Q3A2W0_PELCD/10150 +IIVLLFAV--DAYAFHCEV-TTTPVSFGAYDVFS------SFSLDTTGRISVSCN-NPEK +KR--MPVTISISRGAANSFSPRQMRRIG---GSDRMD--YYLFVDASRTAVWGDGTG-GS +STYVGMIDR-----TSPLNVP-IYGRIPA-RQNLRAGSYQDILVVT +>Q3A2W0_PELCD/10150 +IIVLLFAV--DAYAFHCEV-TTTPVSFGAYDVFS------SFSLDTTGRISVSCN-NPEK +KR--MPVTISISRGAANSFSPRQMRRIG---GSDRMD--YYLFVDASRTAVWGDGTG-GS +STYVGMIDR-----TSPLNVP-IYGRIPA-RQNLRAGSYQDILVVT +>Q0C623_HYPNA/23151 +ANGTLDVQ--ATVVNTCVV-LTAPVVFASVG---------LDEVTANGSITVNCT-N--- +-T--SAFTVALDGGDSGDISARSLTHAS---LPASFN--YQLYTDAGLTTVWGDGVT-GS +QANGS---------GPSQTLT-VYGRTTS-TPDTA-GAYADEVQVT +>Q0C623_HYPNA/23151 +ANGTLDVQ--ATVVNTCVV-LTAPVVFASVG---------LDEVTANGSITVNCT-N--- +-T--SAFTVALDGGDSGDISARSLTHAS---LPASFN--YQLYTDAGLTTVWGDGVT-GS +QANGS---------GPSQTLT-VYGRTTS-TPDTA-GAYADEVQVT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSV-SAGSVAFGAYDPLS------PTHLDSTGTIGLTCA-V--- +-R--QLVTISLGTGQSG---TFARELRGP--GGAALR--YDLYTDATRTQVWGDGTA-GT +ATWPFET-------ERGRYVP-VYARVLA-GQDVPAGPYSDTIVVT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSV-SAGSVAFGAYDPLS------PTHLDSTGTIGLTCA-V--- +-R--QLVTISLGTGQSG---TFARELRGP--GGAALR--YDLYTDATRTQVWGDGTA-GT +ATWPFET-------ERGRYVP-VYARVLA-GQDVPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLS------PVPLTTTGMLQYRCS-R--- +-G--QPIRITFTAGSSGDVYARTLRQ-----GPWTLA--YNLYADAGFGTVWGDGTG-GT +AAAPAVTTL-----SNGLTVAYVFGRIPA-RQEPPVGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLS------PVPLTTTGMLQYRCS-R--- +-G--QPIRITFTAGSSGDVYARTLRQ-----GPWTLA--YNLYADAGFGTVWGDGTG-GT +AAAPAVTTL-----SNGLTVAYVFGRIPA-RQEPPVGPYSDTIVVT +>Q0AAK3_ALKEH/24160 +DTATFDVT--ATVDPTCTV-DADNLVFGTYDPFS------DTPLDENSEIRVQCT-S--- +-D--TPYDIGLDDGDNTGAEGERRMALAD--ESDFLE--YDLYHDNHGGTSWGDIDS-GA +ELTGLSGT------GSEQSYV-VYGRIFA-EQSVAVGNYVDTIEVT +>Q0AAK3_ALKEH/24160 +DTATFDVT--ATVDPTCTV-DADNLVFGTYDPFS------DTPLDENSEIRVQCT-S--- +-D--TPYDIGLDDGDNTGAEGERRMALAD--ESDFLE--YDLYHDNHGGTSWGDIDS-GA +ELTGLSGT------GSEQSYV-VYGRIFA-EQSVAVGNYVDTIEVT +>A7H7Q6_ANADF/25150 +ATAQFQVT--ATVVKKCKI-SATTIAFGNYDP--------ATILSAEGTLTLKCT-K--- +-G--TLYSVALDGGSTG-----SRQMTQ---AAEVLD--YELYSDAGHTAVWPSTAA-AP +SVAAA---------GADEALI-IFAQVPA-DQYPAPGAYADTVTAT +>A7H7Q6_ANADF/25150 +ATAQFQVT--ATVVKKCKI-SATTIAFGNYDP--------ATILSAEGTLTLKCT-K--- +-G--TLYSVALDGGSTG-----SRQMTQ---AAEVLD--YELYSDAGHTAVWPSTAA-AP +SVAAA---------GADEALI-IFAQVPA-DQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLDVT--ATVVPSCTI-AATPVAFGSYDPLVTN---AATALDAQGTVTVTCT-T--- +-G--TAYTVGLGAGNSGSGSRAMQHASI---AGAQLP--YELYQEAARTTVWDSTVM-QA +GTAAS---------ITPVQYT-VYGRIPA-AQNVPTGNYADAVVAT +>Q2IFL0_ANADE/27162 +ATATLDVT--ATVVPSCTI-AATPVAFGSYDPLVTN---AATALDAQGTVTVTCT-T--- +-G--TAYTVGLGAGNSGSGSRAMQHASI---AGAQLP--YELYQEAARTTVWDSTVM-QA +GTAAS---------ITPVQYT-VYGRIPA-AQNVPTGNYADAVVAT +>Q985D8_RHILO/205338 +DRPTFTIN--AIVPANCLL-AIQNIDFGSNGIL-------GANVDATGGVSITCT-P--- +-G--TPYTVSLSNGT-TGSAPTARKMSK---GVETVT--YGLYKDNARSQVWGDAAM-PG +STVAGSGS------GAAQNLT-IYGRVPA-QTTPSAGVYTDTVVVT +>Q985D8_RHILO/205338 +DRPTFTIN--AIVPANCLL-AIQNIDFGSNGIL-------GANVDATGGVSITCT-P--- +-G--TPYTVSLSNGT-TGSAPTARKMSK---GVETVT--YGLYKDNARSQVWGDAAM-PG +STVAGSGS------GAAQNLT-IYGRVPA-QTTPSAGVYTDTVVVT +>Q8XVY0_RALSO/26164 +TTTTFSVS--TTVNATCVINSASALTFAAFDPS-------QGAQASTSSISVNCT-N--- +-T--TPFNIGLNAGTGTGATVASRVMTS---GANTLT--YSLYQDSGHASVWGN-TV-GT +NTVAGTGAGMAA--GNAITKT-VYGLIPS-QPNTVPGNYADTVTVT +>Q8XVY0_RALSO/26164 +TTTTFSVS--TTVNATCVINSASALTFAAFDPS-------QGAQASTSSISVNCT-N--- +-T--TPFNIGLNAGTGTGATVASRVMTS---GANTLT--YSLYQDSGHASVWGN-TV-GT +NTVAGTGAGMAA--GNAITKT-VYGLIPS-QPNTVPGNYADTVTVT +>Q0BSH7_GRABC/37169 +TTTTFQVT--ATVQASCII-QATNLSFGNYS---------GSQTDATSTIQVTCT-N--- +-S--TPYNVGLSAGTGSGATVSNRKMSLN--STSALP--YALYSDASRSTNWGN-TP-NQ +DTVSGTGN------GSAQSLT-VYGRIQT-GNYPTPGSYADTITAT +>Q0BSH7_GRABC/37169 +TTTTFQVT--ATVQASCII-QATNLSFGNYS---------GSQTDATSTIQVTCT-N--- +-S--TPYNVGLSAGTGSGATVSNRKMSLN--STSALP--YALYSDASRSTNWGN-TP-NQ +DTVSGTGN------GSAQSLT-VYGRIQT-GNYPTPGSYADTITAT +>Q8XPY6_RALSO/36170 +KTTTFTVS--LTLQADCSI-SANALNFGTQGVL-------AANVDQTATLSVTCS-N--- +-T--APYNVGFDAGTTTGSTIAARLLAGS--GAATVG--FQLYSDSARTQIWGN-TV-GT +DTVSGTGS------GTAQVLT-VYGRVPS-QNSPAAGTYSTTITAT +>Q8XPY6_RALSO/36170 +KTTTFTVS--LTLQADCSI-SANALNFGTQGVL-------AANVDQTATLSVTCS-N--- +-T--APYNVGFDAGTTTGSTIAARLLAGS--GAATVG--FQLYSDSARTQIWGN-TV-GT +DTVSGTGS------GTAQVLT-VYGRVPS-QNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVS--LTIQANCTI-SANALSFGTNGVL-------ATAVNQQTTLSVTCS-N--- +-T--TSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGN-TV-GT +NTVSGTGN------GTAQTLS-VYGQVPA-QTTPKPDTYESTVTAT +>Q63W79_BURPS/31169 +ATATFTVS--LTIQANCTI-SANALSFGTNGVL-------ATAVNQQTTLSVTCS-N--- +-T--TSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGN-TV-GT +NTVSGTGN------GTAQTLS-VYGQVPA-QTTPKPDTYESTVTAT +>Q7CVQ4_AGRFC/27162 +ATTNFNVQ--ITIQAACQINSAGNLDFGTNGVI-------GAPIDVTSQIVVQCT-A--- +-S--TPFSLGLSAGAGSGATVANRLMTSA--AGATIS--YSLYTTAAHSTVWGN-TV-GT +DRQTGTGT------GAPQNFT-VFGRVPA-QTTPAVGVYTDTVTAT +>Q7CVQ4_AGRFC/27162 +ATTNFNVQ--ITIQAACQINSAGNLDFGTNGVI-------GAPIDVTSQIVVQCT-A--- +-S--TPFSLGLSAGAGSGATVANRLMTSA--AGATIS--YSLYTTAAHSTVWGN-TV-GT +DRQTGTGT------GAPQNFT-VFGRVPA-QTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMTVR--ITIQAECKVQTATDMDFGTNGVI-------DANVDQTSTISVQCT-N--- +-S--TPYNVGLSAGVGAGATVAVRKMTGP--AAAVLN--YSLYRDVARAQLWGT-TI-GT +DTVAGTGN------GAAQPLT-VYGRVPP-QTTPGAGVYTDTVAIT +>Q985D5_RHILO/27162 +ATGNMTVR--ITIQAECKVQTATDMDFGTNGVI-------DANVDQTSTISVQCT-N--- +-S--TPYNVGLSAGVGAGATVAVRKMTGP--AAAVLN--YSLYRDVARAQLWGT-TI-GT +DTVAGTGN------GAAQPLT-VYGRVPP-QTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMNVR--ITIQAECKIVTATDLDFGTKGVI-------DVNVDQTSTISVQCT-N--- +-G--TPYTVGLSAGGGAGATVAMRKMTGA--ASATIN--YTIYRDAARTQVWGV-TA-GT +DVVSGTGN------GNAQSIT-AYGRVPA-QTTPAPGVYSDVVSVT +>A6X8A6_OCHA4/27162 +ATGNMNVR--ITIQAECKIVTATDLDFGTKGVI-------DVNVDQTSTISVQCT-N--- +-G--TPYTVGLSAGGGAGATVAMRKMTGA--ASATIN--YTIYRDAARTQVWGV-TA-GT +DVVSGTGN------GNAQSIT-AYGRVPA-QTTPAPGVYSDVVSVT +>A9CH19_AGRFC/182313 +TQVPFTVS--AAVAPTCII-SAQNINFGSHGVL-------NTAVDANGAINLTCT-N--- +-G--LNYSVALNGGLSNS-PPAARQMVQ---GAASII--YGLYRDVSRTNVWGSAAG--- +QIATGTGN------GSLQTLT-VFGRVPA-QNTPAPGNYADTVVVT +>A9CH19_AGRFC/182313 +TQVPFTVS--AAVAPTCII-SAQNINFGSHGVL-------NTAVDANGAINLTCT-N--- +-G--LNYSVALNGGLSNS-PPAARQMVQ---GAASII--YGLYRDVSRTNVWGSAAG--- +QIATGTGN------GSLQTLT-VFGRVPA-QNTPAPGNYADTVVVT +>Q63W78_BURPS/193326 +-TFAFTAS--ATVVNDCFI-NATNVAFGSTGVI-------QGALTATGTISAQCT-N--- +-G--DAFRIALNGGASGNVAARAMQRTG---GGGAVN--YQLYLDAAHSTIWGDGTA-GT +STATGTGS------GLSQSLT-VYGQVPA-QTTPAPGTYSDTITAT +>Q63W78_BURPS/193326 +-TFAFTAS--ATVVNDCFI-NATNVAFGSTGVI-------QGALTATGTISAQCT-N--- +-G--DAFRIALNGGASGNVAARAMQRTG---GGGAVN--YQLYLDAAHSTIWGDGTA-GT +STATGTGS------GLSQSLT-VYGQVPA-QTTPAPGTYSDTITAT diff --git a/gene_sequences/spore_all.fasta b/gene_sequences/spore_all.fasta new file mode 100644 index 0000000..5433a24 --- /dev/null +++ b/gene_sequences/spore_all.fasta @@ -0,0 +1,158 @@ +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSV-SGNGFT-SVYDPIS------TVPNDNVSSVTINCS-R--- +-ASGDPTTTTYSLASTNGLYPQGQNNRAY--YPTNKYLKYDIYKDAAYSSRWG--PGGSA +PFTG--TLNFGS-GTSASLTLPYYNRVAA-QQSAVAADYTDTMTAT +>Q60C08_METCA/20167 +LLACPKIS--DADPYQCDI-GNISVPHAVYDPTDS-----NPNSSGVGTVGITCHLKNAK +QTQQVQYTIALSRGSSGSYNP--RRMSG-----GRGSLGYNLYLDAARVTIWGDGSGGTF +PLRGTLLLNPTTPVQQVIHN-IY-GLIPP-LQDVYAGTYTDTVTIT +>A9CH19_AGRFC/11158 +IAAAFVAS--PVLAQSCTF-SMSDMNFGFVNLAG------GAAVDTTATLSVTCN-NPLS +LA--LSIRICPNINAGGGGQSGGIRRMLQ----GSNILNYQLYQTSARTTAWGSVTQPAL +GAPPPIDMALPLLINSTTRT-VY-GRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPT--VAWAQSCSF-GVSAMNFGLVDTLS------GSSSNSTATLSVNCT-G--- +-LLLQRILVCPNLGTGSGGATASARQMLS----GANDLNYQLYSDSARSVVWGSYAWPYP +PTAPGFALTLNVLGSGSASQTIY-GAILGGQATAVPSTYLSTFSGS +>Q8XPY9_RALSO/25163 +-----------ASAQSCSV-ASASLNFGSISPVQ------AGNTDTSTTLTVSCS-GFLL +QG--TVARACLNLGVGSGDTGISPRVLSA----GANQLQYNLYADSARSVVWGGRTTPAT +PAIQ-VDVSLGLLGFGSATVTVY-GRVPGGQTTVPAGAYTQSFSGT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSV-SANALSFGAYNT--------TSNLTGTTTVTITCG-A--- +WGGASSINYTLSASVGSGTYANRQVLN------GSNVIAYNLYTTSADTSIWGD-GNGDG +TVTL--SGTVTKQVGTVNLT-IY-GKING-GQNVVPGSYATTIPIT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTV-GSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCT-GIAS +GG---AYSIALGPSTTGSGDRISTRYLGN--SNGGDDMSFNIYTSASYSTVWGNGTTG-G +LVGG--SIPVG--DSNQSQP-VY-GRIAASQNTLRAGSYSGSLTMT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSI-SADPLAFGQYDPIT------GAQVDGASEVSVSCS-LLGL +VSLLVSYEISLDPGTGGSYHPRALSS-------ATDTLDYNLYVDTARTEIWGDGTDDTA +TVTD--SYTLGVLTVTRYYP-VY-GRVFA-DQNVAAGVYDDTITAT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTD------LLPRDSAGSITYRCE-G--- +QI--TPITIDFSAGGSG--TPLARSMAGP----GAQRLEYNLYVDATRLIVWGNGTSGTG +RYGP--VVPLF--GVEVTVP-IF-GRIPA-GQAIPAGAYADTVVMT +>Q3A2W0_PELCD/10150 +IIVLLFAV--DAYAFHCEV-TTTPVSFGAYDVFS------SFSLDTTGRISVSCN-NPEK +KR--MPVTISISRGAANSFSPRQMRRIG-----GSDRMDYYLFVDASRTAVWGDGTGGSS +TYVG--MIDR---TSPLNVP-IY-GRIPA-RQNLRAGSYQDILVVT +>Q0C623_HYPNA/23151 +ANGTLDVQ--ATVVNTCVV-LTAPVVFASVG---------LDEVTANGSITVNCT-N--- +-T--SAFTVALDGGDSGDISARSLTHAS-----LPASFNYQLYTDAGLTTVWGDGVTGSQ +ANGS---------GPSQTLT-VY-GRTTS--TPDTAGAYADEVQVT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSV-SAGSVAFGAYDPLS------PTHLDSTGTIGLTCA-V--- +-R--QLVTISLGTGQSG---TFARELRGP----GGAALRYDLYTDATRTQVWGDGTAGTA +TWPF--ET-----ERGRYVP-VY-ARVLA-GQDVPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLS------PVPLTTTGMLQYRCS-R--- +-G--QPIRITFTAGSSGDVYARTLRQ-------GPWTLAYNLYADAGFGTVWGDGTGGTA +AAPA--VTTL---SNGLTVAYVF-GRIPA-RQEPPVGPYSDTIVVT +>Q0AAK3_ALKEH/24160 +DTATFDVT--ATVDPTCTV-DADNLVFGTYDPFS------DTPLDENSEIRVQCT-S--- +-D--TPYDIGLDDGDNTGAEGERRMALAD----ESDFLEYDLYHDNHGGTSWGDIDSGAE +LTGL--SGT----GSEQSYV-VY-GRIFA-EQSVAVGNYVDTIEVT +>A7H7Q6_ANADF/25150 +ATAQFQVT--ATVVKKCKI-SATTIAFGNYDP--------ATILSAEGTLTLKCT-K--- +-G--TLYSVALDGGSTGS-----RQMTQ-----AAEVLDYELYSDAGHTAVWPSTAAAPS +VAAA---------GADEALI-IF-AQVPA-DQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLDVT--ATVVPSCTI-AATPVAFGSYDPLVTN---AATALDAQGTVTVTCT-T--- +-G--TAYTVGLGAGNSGSGSRAMQHASI-----AGAQLPYELYQEAARTTVWDSTVMQAG +TAAS---------ITPVQYT-VY-GRIPA-AQNVPTGNYADAVVAT +>Q985D8_RHILO/205338 +DRPTFTIN--AIVPANCLL-AIQNIDFGSNGIL-------GANVDATGGVSITCT-P--- +-G--TPYTVSLSNGT-TGSAPTARKMSK-----GVETVTYGLYKDNARSQVWGDAAMPGS +TVAG--SGS----GAAQNLT-IY-GRVPA-QTTPSAGVYTDTVVVT +>Q8XVY0_RALSO/26164 +TTTTFSVS--TTVNATCVINSASALTFAAFDPS-------QGAQASTSSISVNCT-N--- +-T--TPFNIGLNAGTGTGATVASRVMTS-----GANTLTYSLYQDSGHASVWGN-TVGTN +TVAG--TGAGMAAGNAITKT-VY-GLIPS-QPNTVPGNYADTVTVT +>Q0BSH7_GRABC/37169 +TTTTFQVT--ATVQASCII-QATNLSFGNYS---------GSQTDATSTIQVTCT-N--- +-S--TPYNVGLSAGTGSGATVSNRKMSLN----STSALPYALYSDASRSTNWGN-TPNQD +TVSG--TGN----GSAQSLT-VY-GRIQT-GNYPTPGSYADTITAT +>Q8XPY6_RALSO/36170 +KTTTFTVS--LTLQADCSI-SANALNFGTQGVL-------AANVDQTATLSVTCS-N--- +-T--APYNVGFDAGTTTGSTIAARLLAGS----GAATVGFQLYSDSARTQIWGN-TVGTD +TVSG--TGS----GTAQVLT-VY-GRVPS-QNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVS--LTIQANCTI-SANALSFGTNGVL-------ATAVNQQTTLSVTCS-N--- +-T--TSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGN-TVGTN +TVSG--TGN----GTAQTLS-VY-GQVPA-QTTPKPDTYESTVTAT +>Q7CVQ4_AGRFC/27162 +ATTNFNVQ--ITIQAACQINSAGNLDFGTNGVI-------GAPIDVTSQIVVQCT-A--- +-S--TPFSLGLSAGAGSGATVANRLMTSA----AGATISYSLYTTAAHSTVWGN-TVGTD +RQTG--TGT----GAPQNFT-VF-GRVPA-QTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMTVR--ITIQAECKVQTATDMDFGTNGVI-------DANVDQTSTISVQCT-N--- +-S--TPYNVGLSAGVGAGATVAVRKMTGP----AAAVLNYSLYRDVARAQLWGT-TIGTD +TVAG--TGN----GAAQPLT-VY-GRVPP-QTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMNVR--ITIQAECKIVTATDLDFGTKGVI-------DVNVDQTSTISVQCT-N--- +-G--TPYTVGLSAGGGAGATVAMRKMTGA----ASATINYTIYRDAARTQVWGV-TAGTD +VVSG--TGN----GNAQSIT-AY-GRVPA-QTTPAPGVYSDVVSVT +>A9CH19_AGRFC/182313 +TQVPFTVS--AAVAPTCII-SAQNINFGSHGVL-------NTAVDANGAINLTCT-N--- +-G--LNYSVALNGGLSNS-PPAARQMVQ-----GAASIIYGLYRDVSRTNVWG--SAAGQ +IATG--TGN----GSLQTLT-VF-GRVPA-QNTPAPGNYADTVVVT +>Q63W78_BURPS/193326 +-TFAFTAS--ATVVNDCFI-NATNVAFGSTGVI-------QGALTATGTISAQCT-N--- +-G--DAFRIALNGGASGNVAARAMQRTG-----GGGAVNYQLYLDAAHSTIWGDGTAGTS +TATG--TGS----GLSQSLT-VY-GQVPA-QTTPAPGTYSDTITAT + +>Q8XVY0_RALSO/26164 +TTTTFSVSTTVNATCVINSASALTFAAFDPSQGAQASTSSISVNCTNTTPFNIGLNAGTGTGATVASRVMTSGANTLTYSLYQDSGHASVWGNTVGTNTVAGTGAGMAAGNAITKTVYGLIPSQPNTVPGNYADTVTVT +>Q0BSH7_GRABC/37169 +TTTTFQVTATVQASCIIQATNLSFGNYSGSQTDATSTIQVTCTNSTPYNVGLSAGTGSGATVSNRKMSLNSTSALPYALYSDASRSTNWGNTPNQDTVSGTGNGSAQSLTVYGRIQTGNYPTPGSYADTITAT +>Q0AAK3_ALKEH/24160 +DTATFDVTATVDPTCTVDADNLVFGTYDPFSDTPLDENSEIRVQCTSDTPYDIGLDDGDNTGAEGERRMALADESDFLEYDLYHDNHGGTSWGDIDSGAELTGLSGTGSEQSYVVYGRIFAEQSVAVGNYVDTIEVT +>Q8XPY6_RALSO/36170 +KTTTFTVSLTLQADCSISANALNFGTQGVLAANVDQTATLSVTCSNTAPYNVGFDAGTTTGSTIAARLLAGSGAATVGFQLYSDSARTQIWGNTVGTDTVSGTGSGTAQVLTVYGRVPSQNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVSLTIQANCTISANALSFGTNGVLATAVNQQTTLSVTCSNTTSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGNTVGTNTVSGTGNGTAQTLSVYGQVPAQTTPKPDTYESTVTAT +>Q7CVQ4_AGRFC/27162 +ATTNFNVQITIQAACQINSAGNLDFGTNGVIGAPIDVTSQIVVQCTASTPFSLGLSAGAGSGATVANRLMTSAAGATISYSLYTTAAHSTVWGNTVGTDRQTGTGTGAPQNFTVFGRVPAQTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMTVRITIQAECKVQTATDMDFGTNGVIDANVDQTSTISVQCTNSTPYNVGLSAGVGAGATVAVRKMTGPAAAVLNYSLYRDVARAQLWGTTIGTDTVAGTGNGAAQPLTVYGRVPPQTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMNVRITIQAECKIVTATDLDFGTKGVIDVNVDQTSTISVQCTNGTPYTVGLSAGGGAGATVAMRKMTGAASATINYTIYRDAARTQVWGVTAGTDVVSGTGNGNAQSITAYGRVPAQTTPAPGVYSDVVSVT +>A9CH19_AGRFC/182313 +TQVPFTVSAAVAPTCIISAQNINFGSHGVLNTAVDANGAINLTCTNGLNYSVALNGGLSNSPPAARQMVQGAASIIYGLYRDVSRTNVWGSAAGQIATGTGNGSLQTLTVFGRVPAQNTPAPGNYADTVVVT +>Q985D8_RHILO/205338 +DRPTFTINAIVPANCLLAIQNIDFGSNGILGANVDATGGVSITCTPGTPYTVSLSNGTTGSAPTARKMSKGVETVTYGLYKDNARSQVWGDAAMPGSTVAGSGSGAAQNLTIYGRVPAQTTPSAGVYTDTVVVT +>A7H7Q6_ANADF/25150 +ATAQFQVTATVVKKCKISATTIAFGNYDPATILSAEGTLTLKCTKGTLYSVALDGGSTGSRQMTQAAEVLDYELYSDAGHTAVWPSTAAAPSVAAAGADEALIIFAQVPADQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLDVTATVVPSCTIAATPVAFGSYDPLVTNAATALDAQGTVTVTCTTGTAYTVGLGAGNSGSGSRAMQHASIAGAQLPYELYQEAARTTVWDSTVMQAGTAASITPVQYTVYGRIPAAQNVPTGNYADAVVAT +>Q0C623_HYPNA/23151 +ANGTLDVQATVVNTCVVLTAPVVFASVGLDEVTANGSITVNCTNTSAFTVALDGGDSGDISARSLTHASLPASFNYQLYTDAGLTTVWGDGVTGSQANGSGPSQTLTVYGRTTSTPDTAGAYADEVQVT +>Q63W78_BURPS/193326 +TFAFTASATVVNDCFINATNVAFGSTGVIQGALTATGTISAQCTNGDAFRIALNGGASGNVAARAMQRTGGGGAVNYQLYLDAAHSTIWGDGTAGTSTATGTGSGLSQSLTVYGQVPAQTTPAPGTYSDTITAT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSVSANALSFGAYNTTSNLTGTTTVTITCGAWGGASSINYTLSASVGSGTYANRQVLNGSNVIAYNLYTTSADTSIWGDGNGDGTVTLSGTVTKQVGTVNLTIYGKINGGQNVVPGSYATTIPIT +>Q8XPY9_RALSO/25163 +ASAQSCSVASASLNFGSISPVQAGNTDTSTTLTVSCSGFLLQGTVARACLNLGVGSGDTGISPRVLSAGANQLQYNLYADSARSVVWGGRTTPATPAIQVDVSLGLLGFGSATVTVYGRVPGGQTTVPAGAYTQSFSGT +>A9CH19_AGRFC/11158 +IAAAFVASPVLAQSCTFSMSDMNFGFVNLAGGAAVDTTATLSVTCNNPLSLALSIRICPNINAGGGGQSGGIRRMLQGSNILNYQLYQTSARTTAWGSVTQPALGAPPPIDMALPLLINSTTRTVYGRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPTVAWAQSCSFGVSAMNFGLVDTLSGSSSNSTATLSVNCTGLLLQRILVCPNLGTGSGGATASARQMLSGANDLNYQLYSDSARSVVWGSYAWPYPPTAPGFALTLNVLGSGSASQTIYGAILGGQATAVPSTYLSTFSGS +>Q3A2W0_PELCD/10150 +IIVLLFAVDAYAFHCEVTTTPVSFGAYDVFSSFSLDTTGRISVSCNNPEKKRMPVTISISRGAANSFSPRQMRRIGGSDRMDYYLFVDASRTAVWGDGTGGSSTYVGMIDRTSPLNVPIYGRIPARQNLRAGSYQDILVVT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSVSAGSVAFGAYDPLSPTHLDSTGTIGLTCAVRQLVTISLGTGQSGTFARELRGPGGAALRYDLYTDATRTQVWGDGTAGTATWPFETERGRYVPVYARVLAGQDVPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLSPVPLTTTGMLQYRCSRGQPIRITFTAGSSGDVYARTLRQGPWTLAYNLYADAGFGTVWGDGTGGTAAAPAVTTLSNGLTVAYVFGRIPARQEPPVGPYSDTIVVT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTDLLPRDSAGSITYRCEGQITPITIDFSAGGSGTPLARSMAGPGAQRLEYNLYVDATRLIVWGNGTSGTGRYGPVVPLFGVEVTVPIFGRIPAGQAIPAGAYADTVVMT +>Q60C08_METCA/20167 +LLACPKISDADPYQCDIGNISVPHAVYDPTDSNPNSSGVGTVGITCHLKNAKQTQQVQYTIALSRGSSGSYNPRRMSGGRGSLGYNLYLDAARVTIWGDGSGGTFPLRGTLLLNPTTPVQQVIHNIYGLIPPLQDVYAGTYTDTVTIT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSISADPLAFGQYDPITGAQVDGASEVSVSCSLLGLVSLLVSYEISLDPGTGGSYHPRALSSATDTLDYNLYVDTARTEIWGDGTDDTATVTDSYTLGVLTVTRYYPVYGRVFADQNVAAGVYDDTITAT +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSVSGNGFTSVYDPISTVPNDNVSSVTINCSRASGDPTTTTYSLASTNGLYPQGQNNRAYYPTNKYLKYDIYKDAAYSSRWGPGGSAPFTGTLNFGSGTSASLTLPYYNRVAAQQSAVAADYTDTMTAT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTVGSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCTGIASGGAYSIALGPSTTGSGDRISTRYLGNSNGGDDMSFNIYTSASYSTVWGNGTTGGLVGGSIPVGDSNQSQPVYGRIAASQNTLRAGSYSGSLTMT + diff --git a/gene_sequences/sporecoat04.fasta b/gene_sequences/sporecoat04.fasta index 809ace1..0c368ad 100755 --- a/gene_sequences/sporecoat04.fasta +++ b/gene_sequences/sporecoat04.fasta @@ -11,4 +11,4 @@ SLFLVAAGSGSAQAYTCSISADPLAFGQYDPITGAQVDGASEVSVSCSLLGLVSLLVSYEISLDPGTGGSYHPRALSSAT >Q12FX3_POLSJ/50194 TWGVLLAAGTAHATISCSVSGNGFTSVYDPISTVPNDNVSSVTINCSRASGDPTTTTYSLASTNGLYPQGQNNRAYYPTNKYLKYDIYKDAAYSSRWGPGGSAPFTGTLNFGSGTSASLTLPYYNRVAAQQSAVAADYTDTMTAT >A1VIJ0_POLNA/19170 -ALFLLLATAGPAQAGSCTVGSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCTGIASGGAYSIALGPSTTGSGDRISTRYLGNSNGGDDMSFNIYTSASYSTVWGNGTTGGLVGGSIPVGDSNQSQPVYGRIAASQNTLRAGSYSGSLTMT \ No newline at end of file +ALFLLLATAGPAQAGSCTVGSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCTGIASGGAYSIALGPSTTGSGDRISTRYLGNSNGGDDMSFNIYTSASYSTVWGNGTTGGLVGGSIPVGDSNQSQPVYGRIAASQNTLRAGSYSGSLTMT diff --git a/gene_sequences/transporter.align b/gene_sequences/transporter.align new file mode 100644 index 0000000..2f47110 --- /dev/null +++ b/gene_sequences/transporter.align @@ -0,0 +1,32 @@ +>A0A026RKY7_ECOLX/4411 +NIPFRNAYYRFASSYSFLFFISWSLWWSLYAIWLKGHLGLTGTELGTLYSVNQFTSILFM +MFYGIVQDKLGLKKPLIWCMSFILVLTGPFMIYVYEPLLQSNFSVGLILGALFFGLGYLA +GCGLLDSFTEKMARNFHFEYGTARAWGSFGYAIGAFFAGIFFSISPHINFWLVSLFGAVF +-MMINMRFKDKDHQCIAADAGGVKKEDF-----IAVFKDRNFWVFVIFIVGTWSFYNIFD +QQLFPVFYAGLFESHDVGTRLYGYLNSFQVVLEALCMAIIPFFVNRVGPKNALLIGVVIM +ALRILSCALFVNPWIISLVKLLHAIEVPLCVISVFKYSVANFDKRLSSTIFLIGFQIASS +LGIVLLSTPTGILFDHAGYQTVFFAISGIVCLMLLFGIFFLSKKREQIVMETPV +>RAFB_ECOLX/4415 +ASTHKNTDFWIFGLFFFLYFFIMATCFPFLPVWLSDVVGLSKTDTGIVFSCLSLFAISFQ +PLLGVISDRLGLKKNLIWSISLLLVFFAPFFLYVFAPLLHLNIWAGALTGGVFIGFVFSA +GAGAIEAYIERVSRSSGFEYGKARMFGCLGWALCATMAGILFNVDPSLVFWMGSGGALLL +LLLLYLARPSTSQTAMVMNALGANSSLISTRMVFSLFRMRQMWMFVLYTIGVACVYDVFD +QQ-FAIFFRSFFDTPQAGIKAFGFATTAGEICNAIIMFCTPWIINRIGAKNTLLVAGGIM +TIRITGSAFATTMTEVVILKMLHALEVPFLLVGAFKYITGVFDTRLSATVYLIGFQFSKQ +LAAILLSTFAGHLYDRMGFQNTYFVLGMIVLTVTVISAFTLS-SSPGIVHPSVE +>LACY_CITFR/1412 +MYYLKNTNFWMFGFFFFFYFFIMGAYFPFFPIWLHEVNHISKGDTGIIFACISLFSLLFQ +PIFGLLSDKLGLRKHLLWVITGMLVMFAPFFIYVFGPLLQVNILLGSIVGGIYLGFIYNA +GAPAIEAYIEKASRRSNFEFGRARMFGCVGWALCASIAGIMFTINNQFVFWLGSGCAVIL +ALLLLFSKTDVPSSAKVADAVGANNSAFSLKLALELFKQPKLWLISLYVVGVSCTYDVFD +QQ-FANFFTSFFATGEQGTRVFGYVTTMGELLNASIMFFAPLIVNRIGGKNALLLAGTIM +SVRIIGS-HSHTALEVVILKTLHMFEIPFLIVGCFKYITSQFEVRFSATIYLVCFCFFKQ +LAMIFMSVLAGKMYESIGFQGAYLVLGIIRVSFTLISVFTLSGPGPFSLLRRRE +>LACY_KLEOX/6416 +LAPRERHNFIYFMLFFFFYYFIMSAYFPFFPVWLAEVNHLTKTETGIVFSCISLFAIIFQ +PVFGLISDKLGLRKHLLWTITILLILFAPFFIFVFSPLLQMNIMAGALVGGVYLGIVFSS +RSGAVEAYIERVSRANRFEYGKVRVSGCVGWALCASITGILFSIDPNITFWIASGFALIL +GVLLWVSKPESSNSAEVIDALGANRQAFSMRTAAELFRMPRFWGFIIYVVGVASVYDVFD +QQ-FANFFKGFFSSPQRGTEVFGFVTTGGELLNALIMFCAPAIINRIGAKNALLIAGLIM +SVRILGSSFATSAVEVIILKMLHMFEIPFLLVGTFKYISSAFKGKLSATLFLIGFNLSKQ +LSSVVLSAWVGRMYDTVGFHQAYLILGCITLSFTVISLFTL--KGSKTLLPATA diff --git a/gene_sequences/transporter_all.fasta b/gene_sequences/transporter_all.fasta new file mode 100644 index 0000000..c75277a --- /dev/null +++ b/gene_sequences/transporter_all.fasta @@ -0,0 +1,8 @@ +>LACY_CITFR/1412 +MYYLKNTNFWMFGFFFFFYFFIMGAYFPFFPIWLHEVNHISKGDTGIIFACISLFSLLFQPIFGLLSDKLGLRKHLLWVITGMLVMFAPFFIYVFGPLLQVNILLGSIVGGIYLGFIYNAGAPAIEAYIEKASRRSNFEFGRARMFGCVGWALCASIAGIMFTINNQFVFWLGSGCAVILALLLLFSKTDVPSSAKVADAVGANNSAFSLKLALELFKQPKLWLISLYVVGVSCTYDVFDQQFANFFTSFFATGEQGTRVFGYVTTMGELLNASIMFFAPLIVNRIGGKNALLLAGTIMSVRIIGSHSHTALEVVILKTLHMFEIPFLIVGCFKYITSQFEVRFSATIYLVCFCFFKQLAMIFMSVLAGKMYESIGFQGAYLVLGIIRVSFTLISVFTLSGPGPFSLLRRRE +>LACY_KLEOX/6416 +LAPRERHNFIYFMLFFFFYYFIMSAYFPFFPVWLAEVNHLTKTETGIVFSCISLFAIIFQPVFGLISDKLGLRKHLLWTITILLILFAPFFIFVFSPLLQMNIMAGALVGGVYLGIVFSSRSGAVEAYIERVSRANRFEYGKVRVSGCVGWALCASITGILFSIDPNITFWIASGFALILGVLLWVSKPESSNSAEVIDALGANRQAFSMRTAAELFRMPRFWGFIIYVVGVASVYDVFDQQFANFFKGFFSSPQRGTEVFGFVTTGGELLNALIMFCAPAIINRIGAKNALLIAGLIMSVRILGSSFATSAVEVIILKMLHMFEIPFLLVGTFKYISSAFKGKLSATLFLIGFNLSKQLSSVVLSAWVGRMYDTVGFHQAYLILGCITLSFTVISLFTLKGSKTLLPATA +>RAFB_ECOLX/4415 +ASTHKNTDFWIFGLFFFLYFFIMATCFPFLPVWLSDVVGLSKTDTGIVFSCLSLFAISFQPLLGVISDRLGLKKNLIWSISLLLVFFAPFFLYVFAPLLHLNIWAGALTGGVFIGFVFSAGAGAIEAYIERVSRSSGFEYGKARMFGCLGWALCATMAGILFNVDPSLVFWMGSGGALLLLLLLYLARPSTSQTAMVMNALGANSSLISTRMVFSLFRMRQMWMFVLYTIGVACVYDVFDQQFAIFFRSFFDTPQAGIKAFGFATTAGEICNAIIMFCTPWIINRIGAKNTLLVAGGIMTIRITGSAFATTMTEVVILKMLHALEVPFLLVGAFKYITGVFDTRLSATVYLIGFQFSKQLAAILLSTFAGHLYDRMGFQNTYFVLGMIVLTVTVISAFTLSSSPGIVHPSVE +>A0A026RKY7_ECOLX/4411 +NIPFRNAYYRFASSYSFLFFISWSLWWSLYAIWLKGHLGLTGTELGTLYSVNQFTSILFMMFYGIVQDKLGLKKPLIWCMSFILVLTGPFMIYVYEPLLQSNFSVGLILGALFFGLGYLAGCGLLDSFTEKMARNFHFEYGTARAWGSFGYAIGAFFAGIFFSISPHINFWLVSLFGAVFMMINMRFKDKDHQCIAADAGGVKKEDFIAVFKDRNFWVFVIFIVGTWSFYNIFDQQLFPVFYAGLFESHDVGTRLYGYLNSFQVVLEALCMAIIPFFVNRVGPKNALLIGVVIMALRILSCALFVNPWIISLVKLLHAIEVPLCVISVFKYSVANFDKRLSSTIFLIGFQIASSLGIVLLSTPTGILFDHAGYQTVFFAISGIVCLMLLFGIFFLSKKREQIVMETPV