diff --git a/gene_sequences/script-problem1.sh b/gene_sequences/script-problem1.sh new file mode 100644 index 0000000..88d5acf --- /dev/null +++ b/gene_sequences/script-problem1.sh @@ -0,0 +1,23 @@ +#usage: bash script-problem1.sh +#origin directory /afs/nd.edu/user30/poneil1/local/bin/IBC_EX11/gene_sequences + +#run each transporter file into an aligned sequence of their own and... +#...combine these sequences into a single fasta file +for transporter in transporter*.fasta +do + ../../muscle -in $transporter -out transporterout.fasta + cat transporterout.fasta >> transporter-combined.fasta +done +#run the combined fasta file through muscle to align for final sequence +../../muscle -in transporter-combined.fasta -out transporter-final.fasta + +#run each sporecoat file into an aligned sequence of their own and... +#...combine these sequences into a single fasta file +for sporecoat in sporecoat* +do + ../../muscle -in $sporecoat -out sporecoatout.fasta + cat sporecoatout.fasta >> sporecoat-combined.fasta +done + +#run the combined fasta file through muscle to align for final sequence +../../muscle -in sporecoat-combined.fasta -out sporecoat-final.fasta diff --git a/gene_sequences/script-problem2.sh b/gene_sequences/script-problem2.sh new file mode 100644 index 0000000..7e17039 --- /dev/null +++ b/gene_sequences/script-problem2.sh @@ -0,0 +1,14 @@ +#Usage: bash script-problem2.sh +#HMMer searches each transporter fasta with each proteome fasta +#initial working directory: gene_sequences +for transporter in transporter0*.fasta + do + for proteome in ../proteomes/* + do + echo $transporter >> sequence-hits.txt + echo $proteome >> sequence-hits.txt + ../../hmmbuild transporter.hmm $transporter + ../../hmmsearch transporter.hmm $proteome | grep$ + done + done + diff --git a/gene_sequences/sequence-hits.txt b/gene_sequences/sequence-hits.txt new file mode 100644 index 0000000..4b069b3 --- /dev/null +++ b/gene_sequences/sequence-hits.txt @@ -0,0 +1,352 @@ +transporter01.fasta +../proteomes/Arthrobacter.fasta +1 +transporter01.fasta +../proteomes/Bacillus.fasta +0 +transporter01.fasta +../proteomes/Clostridium.fasta +0 +transporter01.fasta +../proteomes/Flavobacterium.fasta +0 +transporter01.fasta +../proteomes/Limnohabitans.fasta +0 +transporter01.fasta +../proteomes/Rhizobium.fasta +0 +transporter01.fasta +../proteomes/Roseobacter.fasta +1 +transporter01.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter02.fasta +../proteomes/Arthrobacter.fasta +0 +transporter02.fasta +../proteomes/Bacillus.fasta +0 +transporter02.fasta +../proteomes/Clostridium.fasta +0 +transporter02.fasta +../proteomes/Flavobacterium.fasta +0 +transporter02.fasta +../proteomes/Limnohabitans.fasta +0 +transporter02.fasta +../proteomes/Rhizobium.fasta +0 +transporter02.fasta +../proteomes/Roseobacter.fasta +0 +transporter02.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter03.fasta +../proteomes/Arthrobacter.fasta +0 +transporter03.fasta +../proteomes/Bacillus.fasta +0 +transporter03.fasta +../proteomes/Clostridium.fasta +0 +transporter03.fasta +../proteomes/Flavobacterium.fasta +0 +transporter03.fasta +../proteomes/Limnohabitans.fasta +0 +transporter03.fasta +../proteomes/Rhizobium.fasta +0 +transporter03.fasta +../proteomes/Roseobacter.fasta +0 +transporter03.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter04.fasta +../proteomes/Arthrobacter.fasta +0 +transporter04.fasta +../proteomes/Bacillus.fasta +1 +transporter04.fasta +../proteomes/Clostridium.fasta +0 +transporter04.fasta +../proteomes/Flavobacterium.fasta +1 +transporter04.fasta +../proteomes/Limnohabitans.fasta +0 +transporter04.fasta +../proteomes/Rhizobium.fasta +2 +transporter04.fasta +../proteomes/Roseobacter.fasta +0 +transporter04.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter01.fasta +../proteomes/Arthrobacter.fasta +1 +transporter01.fasta +../proteomes/Bacillus.fasta +0 +transporter01.fasta +../proteomes/Clostridium.fasta +0 +transporter01.fasta +../proteomes/Flavobacterium.fasta +0 +transporter01.fasta +../proteomes/Limnohabitans.fasta +0 +transporter01.fasta +../proteomes/Rhizobium.fasta +0 +transporter01.fasta +../proteomes/Roseobacter.fasta +1 +transporter01.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter02.fasta +../proteomes/Arthrobacter.fasta +0 +transporter02.fasta +../proteomes/Bacillus.fasta +0 +transporter02.fasta +../proteomes/Clostridium.fasta +0 +transporter02.fasta +../proteomes/Flavobacterium.fasta +0 +transporter02.fasta +../proteomes/Limnohabitans.fasta +0 +transporter02.fasta +../proteomes/Rhizobium.fasta +0 +transporter02.fasta +../proteomes/Roseobacter.fasta +0 +transporter02.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter03.fasta +../proteomes/Arthrobacter.fasta +0 +transporter03.fasta +../proteomes/Bacillus.fasta +0 +transporter03.fasta +../proteomes/Clostridium.fasta +0 +transporter03.fasta +../proteomes/Flavobacterium.fasta +0 +transporter03.fasta +../proteomes/Limnohabitans.fasta +0 +transporter03.fasta +../proteomes/Rhizobium.fasta +0 +transporter03.fasta +../proteomes/Roseobacter.fasta +0 +transporter03.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter04.fasta +../proteomes/Arthrobacter.fasta +0 +transporter04.fasta +../proteomes/Bacillus.fasta +1 +transporter04.fasta +../proteomes/Clostridium.fasta +0 +transporter04.fasta +../proteomes/Flavobacterium.fasta +1 +transporter04.fasta +../proteomes/Limnohabitans.fasta +0 +transporter04.fasta +../proteomes/Rhizobium.fasta +2 +transporter04.fasta +../proteomes/Roseobacter.fasta +0 +transporter04.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter-combined.fasta +../proteomes/Arthrobacter.fasta +transporter-combined.fasta +../proteomes/Bacillus.fasta +transporter-combined.fasta +../proteomes/Clostridium.fasta +transporter-combined.fasta +../proteomes/Flavobacterium.fasta +transporter-combined.fasta +../proteomes/Limnohabitans.fasta +transporter-combined.fasta +../proteomes/Rhizobium.fasta +transporter-combined.fasta +../proteomes/Roseobacter.fasta +transporter-combined.fasta +../proteomes/Verrucomicrobia.fasta +transporter-final.fasta +../proteomes/Arthrobacter.fasta +0 +transporter-final.fasta +../proteomes/Bacillus.fasta +1 +transporter-final.fasta +../proteomes/Clostridium.fasta +0 +transporter-final.fasta +../proteomes/Flavobacterium.fasta +1 +transporter-final.fasta +../proteomes/Limnohabitans.fasta +1 +transporter-final.fasta +../proteomes/Rhizobium.fasta +0 +transporter-final.fasta +../proteomes/Roseobacter.fasta +1 +transporter-final.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporterout.fasta +../proteomes/Arthrobacter.fasta +0 +transporterout.fasta +../proteomes/Bacillus.fasta +1 +transporterout.fasta +../proteomes/Clostridium.fasta +0 +transporterout.fasta +../proteomes/Flavobacterium.fasta +1 +transporterout.fasta +../proteomes/Limnohabitans.fasta +0 +transporterout.fasta +../proteomes/Rhizobium.fasta +2 +transporterout.fasta +../proteomes/Roseobacter.fasta +0 +transporterout.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter01.fasta +../proteomes/Arthrobacter.fasta +1 +transporter01.fasta +../proteomes/Bacillus.fasta +0 +transporter01.fasta +../proteomes/Clostridium.fasta +0 +transporter01.fasta +../proteomes/Flavobacterium.fasta +0 +transporter01.fasta +../proteomes/Limnohabitans.fasta +0 +transporter01.fasta +../proteomes/Rhizobium.fasta +0 +transporter01.fasta +../proteomes/Roseobacter.fasta +1 +transporter01.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter02.fasta +../proteomes/Arthrobacter.fasta +0 +transporter02.fasta +../proteomes/Bacillus.fasta +0 +transporter02.fasta +../proteomes/Clostridium.fasta +0 +transporter02.fasta +../proteomes/Flavobacterium.fasta +0 +transporter02.fasta +../proteomes/Limnohabitans.fasta +0 +transporter02.fasta +../proteomes/Rhizobium.fasta +0 +transporter02.fasta +../proteomes/Roseobacter.fasta +0 +transporter02.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter03.fasta +../proteomes/Arthrobacter.fasta +0 +transporter03.fasta +../proteomes/Bacillus.fasta +0 +transporter03.fasta +../proteomes/Clostridium.fasta +0 +transporter03.fasta +../proteomes/Flavobacterium.fasta +0 +transporter03.fasta +../proteomes/Limnohabitans.fasta +0 +transporter03.fasta +../proteomes/Rhizobium.fasta +0 +transporter03.fasta +../proteomes/Roseobacter.fasta +0 +transporter03.fasta +../proteomes/Verrucomicrobia.fasta +0 +transporter04.fasta +../proteomes/Arthrobacter.fasta +0 +transporter04.fasta +../proteomes/Bacillus.fasta +1 +transporter04.fasta +../proteomes/Clostridium.fasta +0 +transporter04.fasta +../proteomes/Flavobacterium.fasta +1 +transporter04.fasta +../proteomes/Limnohabitans.fasta +0 +transporter04.fasta +../proteomes/Rhizobium.fasta +2 +transporter04.fasta +../proteomes/Roseobacter.fasta +0 +transporter04.fasta +../proteomes/Verrucomicrobia.fasta +0 diff --git a/gene_sequences/sporecoat-combined.fasta b/gene_sequences/sporecoat-combined.fasta new file mode 100644 index 0000000..f9fa1da --- /dev/null +++ b/gene_sequences/sporecoat-combined.fasta @@ -0,0 +1,104 @@ +>Q0AAK3_ALKEH/24160 +DTATFDVTATVDPTCTV-DADNLVFGTYDPFSDTPLDENSEIRVQCTSDTPYDIGLDDGD +NTGAEGERRMAL----ADESDFLEYDLYHDNHGGTSWGDIDSGAELTGLSG----TGSEQ +SYVVYGRIFAEQSVAVGNYVDTIEVT +>Q8XPY6_RALSO/36170 +KTTTFTVSLTLQADCSI-SANALNFGTQGV-LAANVDQTATLSVTCSNTAPYNVGFDAGT +TTGSTIAARLLA----GSGAATVGFQLYSDSARTQIWGNTVGTDTVSG-TG----SGTAQ +VLTVYGRVPSQNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVSLTIQANCTI-SANALSFGTNGV-LATAVNQQTTLSVTCSNTTSYNVGLDAGN +VSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGNTVGTNTVSG-TG----NGTAQ +TLSVYGQVPAQTTPKPDTYESTVTAT +>Q0BSH7_GRABC/37169 +TTTTFQVTATVQASCII-QATNLSFGNY---SGSQTDATSTIQVTCTNSTPYNVGLSAGT +GSGATVSNRKMS----LNSTSALPYALYSDASRSTNWGNTPNQDTVSG-TG----NGSAQ +SLTVYGRIQTGNYPTPGSYADTITAT +>Q8XVY0_RALSO/26164 +TTTTFSVSTTVNATCVINSASALTFAAFDP-SQGAQASTSSISVNCTNTTPFNIGLNAGT +GTGATVASRVMT-----SGANTLTYSLYQDSGHASVWGNTVGTNTVAG-TGAGMAAGNAI +TKTVYGLIPSQPNTVPGNYADTVTVT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSV-SANALSFGAYNT-----TSNLTGTTTVTITCGAWG-GASS +INYTLSASVGSGTYA-NRQVLN-GSNVIAYNLYTTSADTSIWGDGN-GD------GTVTL +SGTVTKQVGTVNLTIYGKIN-GGQNVVPGSYATTIPIT +>Q8XPY9_RALSO/25163 +-----------ASAQSCSV-ASASLNFGSISPVQ---AGNTDTSTTLTVSCSGFLLQGTV +ARACLNLGVGSGDTGISPRVLSAGANQLQYNLYADSARSVVWGGRTTPATPAIQVDVSLG +LLGFGSA----TVTVYGRVPGGQTTVPAGAYTQSFSGT +>A7H7Q6_ANADF/25150 +ATAQFQ--VTATVVKKCKI-SATTIAFGNYDP-----ATILSAEGTLTLKCT----KGTL +YSVALDGGSTGS-----RQMTQ-AAEVLDYELYSDAGHTAVWPSTA---------AAPSV +AAAGADE----ALIIFAQVP-ADQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLD--VTATVVPSCTI-AATPVAFGSYDPLVTNAATALDAQGTVTVTCT----TGTA +YTVGLGAGNSGSGSRAMQHASI-AGAQLPYELYQEAARTTVWDSTV----------MQAG +TAASITP---VQYTVYGRIP-AAQNVPTGNYADAVVAT +>Q0C623_HYPNA/23151 +ANGTLD--VQATVVNTCVV-LTAPVVFASVGL------DEVTANGSITVNCT----NTSA +FTVALDGGDSGDISARSLTHAS-LPASFNYQLYTDAGLTTVWGDGVTGS------Q---A +NGSGPSQ----TLTVYGRT--TSTPDTAGAYADEVQVT +>Q7CVQ4_AGRFC/27162 +ATTNFN--VQITIQAACQINSAGNLDFGTNGVI----GAPIDVTSQIVVQCT----ASTP +FSLGLSAGAGSGATVANRLMTSAAGATISYSLYTTAAHSTVWGNTV-GT------DRQTG +TGTGAPQ----NFTVFGRVP-AQTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMT--VRITIQAECKVQTATDMDFGTNGVI----DANVDQTSTISVQCT----NSTP +YNVGLSAGVGAGATVAVRKMTGPAAAVLNYSLYRDVARAQLWGTTI-GT------DTVAG +TGNGAAQ----PLTVYGRVP-PQTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMN--VRITIQAECKIVTATDLDFGTKGVI----DVNVDQTSTISVQCT----NGTP +YTVGLSAGGGAGATVAMRKMTGAASATINYTIYRDAARTQVWGVTA-GT------DVVSG +TGNGNAQ----SITAYGRVP-AQTTPAPGVYSDVVSVT +>Q63W78_BURPS/193326 +-TFAFT--ASATVVNDCFI-NATNVAFGSTGVI----QGALTATGTISAQCT----NGDA +FRIALNGGASGNVAARAMQRTG-GGGAVNYQLYLDAAHSTIWGDGTAGT------STATG +TGSGLSQ----SLTVYGQVP-AQTTPAPGTYSDTITAT +>A9CH19_AGRFC/182313 +TQVPFT--VSAAVAPTCII-SAQNINFGSHGVL----NTAVDANGAINLTCT----NGLN +YSVALNGGLSNSPPA-ARQMVQ-GAASIIYGLYRDVSRTNVWGSAA-G-------QIATG +TGNGSLQ----TLTVFGRVP-AQNTPAPGNYADTVVVT +>Q985D8_RHILO/205338 +DRPTFT--INAIVPANCLL-AIQNIDFGSNGIL----GANVDATGGVSITCT----PGTP +YTVSLSNGTTGSAPT-ARKMSK-GVETVTYGLYKDNARSQVWGDAAMPG------STVAG +SGSGAAQ----NLTIYGRVP-AQTTPSAGVYTDTVVVT +>Q3A2W0_PELCD/10150 +IIVLLFAVDAYAFHCEVTTTPVSFGAYDVFSSFSLDTTGRISVSCNNPEKKRMPVTISIS +RGAANSFSPRQMRR-IGGSDRMDYYLFVDASRTAVWGDGTGGSSTYVGMIDRTSPL---- +--NVPIYGRIPARQNLRA-GSYQDILVVT +>A9CH19_AGRFC/11158 +IAAAFVASPVLAQSCTFSMSDMNFGFVNLAGGAAVDTTATLSVTCNNPLSLALSIRICPN +INAGGGGQSGGIRRMLQGSNILNYQLYQTSARTTAWGSVTQPALGAPPPIDMALPLL-IN +STTRTVYGRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPTVAWAQSCSFGVSAMNFGLVDTLSGSSSNSTATLSVNCTGLLLQR--ILVCPN +LGTGSGGATASARQMLSGANDLNYQLYSDSARSVVWGSYAWPYPPTAPGFALTLNVLGSG +SASQTIYGAILGGQATAVPSTYLSTFSGS +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSVS---GNGFTSVYDPIS-----TVP-NDNVSSVTINC-SRAS +GDPTTTTYSLASTNGLYPQGQ-NNRAYY---PTNKYLKYDIYKDAAYSSRWGPG--GSAP +FTGTLNFGSGTSASLTLP-YYNRVAAQQS-AVAADYTDTMTAT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTVGSS-GLAFGA-YQPLTFAGKLTSSAVTSNASISVVCTGIAS +GGA----YSIALGPSTTGSGDRISTRYLGNSNGGDDMSFNIYTSASYSTVWGNGTTGGLV +GG---SI-PVGDSNQSQP-VYGRIAASQNTLRAGSYSGSLTMT +>Q60C08_METCA/20167 +--LLACPKISDADPYQCDIGNI-SVPHAV-YDPTD-----SNPNSSGVGTVGITCHLKNA +KQTQQVQYTIALSRG--SSGSYNPRRMS---GGRGSLGYNLYLDAARVTIWGDGSGGTFP +LRGTLLLNPTTPVQQVIHNIYGLIPPLQD-VYAGTYTDTVTIT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGT-YLTTD-----LLP-RDSAGSITYRCEGQIT +PIT------IDFSAG--GSGTPLARSMAG--PGAQRLEYNLYVDATRLIVWGNGTSGTGR +YG---PVVPLFGVEVTVP-IFGRIPAGQA-IPAGAYADTVVMT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSISAD-PLAFGQ-YDPIT-----GAQ-VDGASEVSVSC-SLLG +LVSLLVSYEISLDPG--TGGSYHPRALS---SATDTLDYNLYVDTARTEIWGDGTDDTAT +VTDSYTL-GVLTVTRYYP-VYGRVFADQN-VAAGVYDDTITAT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSVSAG-SVAFGA-YDPLS-----PTH-LDSTGTIGLTC-AVRQ +LVT------ISLGTG--QSGT-FARELRG--PGGAALRYDLYTDATRTQVWGDGTAGTAT +WP------FETERGRYVP-VYARVLAGQD-VPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGA-YDPLS-----PVP-LTTTGMLQYRC-SRGQ +PIR------ITFTAG--SSGDVYARTLR---QGPWTLAYNLYADAGFGTVWGDGTGGTAA +AP---AV-TTLSNGLTVAYVFGRIPARQE-PPVGPYSDTIVVT diff --git a/gene_sequences/sporecoat-final.fasta b/gene_sequences/sporecoat-final.fasta new file mode 100644 index 0000000..1f2fec3 --- /dev/null +++ b/gene_sequences/sporecoat-final.fasta @@ -0,0 +1,104 @@ +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSV-SGNGFT-SVYDPIS------TVPNDNVSSVTINCS-R--- +-ASGDPTTTTYSLASTNGLYPQGQNNRAY--YPTNKYLKYDIYKDAAYSSRWGPG--GSA +PFTG--TLNFGSGTSASLTLPYYNRVAA-QQSAVAADYTDTMTAT +>Q60C08_METCA/20167 +LLACPKIS--DADPYQCDI-GNISVPHAVYDPTDS-----NPNSSGVGTVGITCHLKNAK +QTQQVQYTIALSRGSSGSYNP--RRMSG-----GRGSLGYNLYLDAARVTIWGDGSGGTF +PLRGTLLLNPTTPVQQVIHN-IYGLIPP-LQDVYAGTYTDTVTIT +>A9CH19_AGRFC/11158 +IAAAFVAS--PVLAQSCTF-SMSDMNFGFVNLAG------GAAVDTTATLSVTCN-NPLS +LALSIRICPNINAGGGGQSGGIRRMLQ------GSNILNYQLYQTSARTTAWGSVTQPAL +GAPPPIDMALPLLINSTTRT-VYGRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPT--VAWAQSCSF-GVSAMNFGLVDTLS------GSSSNSTATLSVNCT-GLLL +QR--ILVCPNLGTGSGGATASARQMLS------GANDLNYQLYSDSARSVVWGSYAWPYP +PTAPGFALTLNVLGSGSASQTIYGAILGGQATAVPSTYLSTFSGS +>Q8XPY9_RALSO/25163 +-----------ASAQSCSV-ASASLNFGSISPVQ------AGNTDTSTTLTVSCS-GFLL +QG--TVARACLNLGVGSGDTGISPRVLSA----GANQLQYNLYADSARSVVWGGRTTPAT +PAIQ-VDVSLGLLGFGSATVTVYGRVPGGQTTVPAGAYTQSFSGT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSV-SANALSFGAYNT--------TSNLTGTTTVTITCG-A--- +WGGASSINYTLSASVGSGTYANRQVLN------GSNVIAYNLYTTSADTSIWGDGN-GDG +TVTL--SGTVTKQVGTVNLT-IYGKING-GQNVVPGSYATTIPIT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTV-GSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCT-GIAS +GG---AYSIALGPSTTGSGDRISTRYLGN--SNGGDDMSFNIYTSASYSTVWGNGTTG-G +LVGG--SIPVG--DSNQSQP-VYGRIAASQNTLRAGSYSGSLTMT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSI-SADPLAFGQYDPIT------GAQVDGASEVSVSCS-LLGL +VSLLVSYEISLDPGTGGSYHPRALSS-------ATDTLDYNLYVDTARTEIWGDGTDDTA +TVTD--SYTLGVLTVTRYYP-VYGRVFA-DQNVAAGVYDDTITAT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTD------LLPRDSAGSITYRCE-G--- +QI--TPITIDFSAGGSG--TPLARSMAGP----GAQRLEYNLYVDATRLIVWGNGTSGTG +RYGP--VVPLF--GVEVTVP-IFGRIPA-GQAIPAGAYADTVVMT +>Q3A2W0_PELCD/10150 +IIVLLFAV--DAYAFHCEV-TTTPVSFGAYDVFS------SFSLDTTGRISVSCN-NPEK +KR--MPVTISISRGAANSFSPRQMRRIG-----GSDRMDYYLFVDASRTAVWGDGTGGSS +TYVG--MIDR---TSPLNVP-IYGRIPA-RQNLRAGSYQDILVVT +>Q0C623_HYPNA/23151 +ANGTLDVQ--ATVVNTCVV-LTAPVVFASVG---------LDEVTANGSITVNCT-N--- +-T--SAFTVALDGGDSGDISARSLTHAS-----LPASFNYQLYTDAGLTTVWGDGVTGSQ +ANGS---------GPSQTLT-VYGRTTS-TPDTA-GAYADEVQVT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSV-SAGSVAFGAYDPLS------PTHLDSTGTIGLTCA-V--- +-R--QLVTISLGTGQSG---TFARELRGP----GGAALRYDLYTDATRTQVWGDGTAGTA +TWPF--ET-----ERGRYVP-VYARVLA-GQDVPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLS------PVPLTTTGMLQYRCS-R--- +-G--QPIRITFTAGSSGDVYARTLRQ-------GPWTLAYNLYADAGFGTVWGDGTGGTA +AAPA--VTTL---SNGLTVAYVFGRIPA-RQEPPVGPYSDTIVVT +>Q0AAK3_ALKEH/24160 +DTATFDVT--ATVDPTCTV-DADNLVFGTYDPFS------DTPLDENSEIRVQCT-S--- +-D--TPYDIGLDDGDNTGAEGERRMALAD----ESDFLEYDLYHDNHGGTSWGDIDSGAE +LTGL--SGT----GSEQSYV-VYGRIFA-EQSVAVGNYVDTIEVT +>A7H7Q6_ANADF/25150 +ATAQFQVT--ATVVKKCKI-SATTIAFGNYDP--------ATILSAEGTLTLKCT-K--- +-G--TLYSVALDGGSTG-----SRQMTQ-----AAEVLDYELYSDAGHTAVWPSTAAAPS +VAAA---------GADEALI-IFAQVPA-DQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLDVT--ATVVPSCTI-AATPVAFGSYDPLVTN---AATALDAQGTVTVTCT-T--- +-G--TAYTVGLGAGNSGSGSRAMQHASI-----AGAQLPYELYQEAARTTVWDSTVMQAG +TAAS---------ITPVQYT-VYGRIPA-AQNVPTGNYADAVVAT +>Q985D8_RHILO/205338 +DRPTFTIN--AIVPANCLL-AIQNIDFGSNGIL-------GANVDATGGVSITCT-P--- +-G--TPYTVSLSNGT-TGSAPTARKMSK-----GVETVTYGLYKDNARSQVWGDAAMPGS +TVAG--SGS----GAAQNLT-IYGRVPA-QTTPSAGVYTDTVVVT +>Q8XVY0_RALSO/26164 +TTTTFSVS--TTVNATCVINSASALTFAAFDPS-------QGAQASTSSISVNCT-N--- +-T--TPFNIGLNAGTGTGATVASRVMTS-----GANTLTYSLYQDSGHASVWGN-TVGTN +TVAG--TGAGMAAGNAITKT-VYGLIPS-QPNTVPGNYADTVTVT +>Q0BSH7_GRABC/37169 +TTTTFQVT--ATVQASCII-QATNLSFGNYS---------GSQTDATSTIQVTCT-N--- +-S--TPYNVGLSAGTGSGATVSNRKMSLN----STSALPYALYSDASRSTNWGN-TPNQD +TVSG--TGN----GSAQSLT-VYGRIQT-GNYPTPGSYADTITAT +>Q8XPY6_RALSO/36170 +KTTTFTVS--LTLQADCSI-SANALNFGTQGVL-------AANVDQTATLSVTCS-N--- +-T--APYNVGFDAGTTTGSTIAARLLAGS----GAATVGFQLYSDSARTQIWGN-TVGTD +TVSG--TGS----GTAQVLT-VYGRVPS-QNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVS--LTIQANCTI-SANALSFGTNGVL-------ATAVNQQTTLSVTCS-N--- +-T--TSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGN-TVGTN +TVSG--TGN----GTAQTLS-VYGQVPA-QTTPKPDTYESTVTAT +>Q7CVQ4_AGRFC/27162 +ATTNFNVQ--ITIQAACQINSAGNLDFGTNGVI-------GAPIDVTSQIVVQCT-A--- +-S--TPFSLGLSAGAGSGATVANRLMTSA----AGATISYSLYTTAAHSTVWGN-TVGTD +RQTG--TGT----GAPQNFT-VFGRVPA-QTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMTVR--ITIQAECKVQTATDMDFGTNGVI-------DANVDQTSTISVQCT-N--- +-S--TPYNVGLSAGVGAGATVAVRKMTGP----AAAVLNYSLYRDVARAQLWGT-TIGTD +TVAG--TGN----GAAQPLT-VYGRVPP-QTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMNVR--ITIQAECKIVTATDLDFGTKGVI-------DVNVDQTSTISVQCT-N--- +-G--TPYTVGLSAGGGAGATVAMRKMTGA----ASATINYTIYRDAARTQVWGV-TAGTD +VVSG--TGN----GNAQSIT-AYGRVPA-QTTPAPGVYSDVVSVT +>A9CH19_AGRFC/182313 +TQVPFTVS--AAVAPTCII-SAQNINFGSHGVL-------NTAVDANGAINLTCT-N--- +-G--LNYSVALNGGLSNS-PPAARQMVQ-----GAASIIYGLYRDVSRTNVW--GSAAGQ +IATG--TGN----GSLQTLT-VFGRVPA-QNTPAPGNYADTVVVT +>Q63W78_BURPS/193326 +-TFAFTAS--ATVVNDCFI-NATNVAFGSTGVI-------QGALTATGTISAQCT-N--- +-G--DAFRIALNGGASGNVAARAMQRTG-----GGGAVNYQLYLDAAHSTIWGDGTAGTS +TATG--TGS----GLSQSLT-VYGQVPA-QTTPAPGTYSDTITAT diff --git a/gene_sequences/sporecoatout.fasta b/gene_sequences/sporecoatout.fasta new file mode 100644 index 0000000..d38c594 --- /dev/null +++ b/gene_sequences/sporecoatout.fasta @@ -0,0 +1,10 @@ +LVSLLVSYEISLDPG--TGGSYHPRALS---SATDTLDYNLYVDTARTEIWGDGTDDTAT +VTDSYTL-GVLTVTRYYP-VYGRVFADQN-VAAGVYDDTITAT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSVSAG-SVAFGA-YDPLS-----PTH-LDSTGTIGLTC-AVRQ +LVT------ISLGTG--QSGT-FARELRG--PGGAALRYDLYTDATRTQVWGDGTAGTAT +WP------FETERGRYVP-VYARVLAGQD-VPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGA-YDPLS-----PVP-LTTTGMLQYRC-SRGQ +PIR------ITFTAG--SSGDVYARTLR---QGPWTLAYNLYADAGFGTVWGDGTGGTAA +AP---AV-TTLSNGLTVAYVFGRIPARQE-PPVGPYSDTIVVT diff --git a/gene_sequences/transporter-combined.fasta b/gene_sequences/transporter-combined.fasta new file mode 100644 index 0000000..70f4629 --- /dev/null +++ b/gene_sequences/transporter-combined.fasta @@ -0,0 +1,32 @@ +>LACY_CITFR/1412 +MYYLKNTNFWMFGFFFFFYFFIMGAYFPFFPIWLHEVNHISKGDTGIIFACISLFSLLFQ +PIFGLLSDKLGLRKHLLWVITGMLVMFAPFFIYVFGPLLQVNILLGSIVGGIYLGFIYNA +GAPAIEAYIEKASRRSNFEFGRARMFGCVGWALCASIAGIMFTINNQFVFWLGSGCAVIL +ALLLLFSKTDVPSSAKVADAVGANNSAFSLKLALELFKQPKLWLISLYVVGVSCTYDVFD +QQFANFFTSFFATGEQGTRVFGYVTTMGELLNASIMFFAPLIVNRIGGKNALLLAGTIMS +VRIIGSHSHTALEVVILKTLHMFEIPFLIVGCFKYITSQFEVRFSATIYLVCFCFFKQLA +MIFMSVLAGKMYESIGFQGAYLVLGIIRVSFTLISVFTLSGPGPFSLLRRRE +>LACY_KLEOX/6416 +LAPRERHNFIYFMLFFFFYYFIMSAYFPFFPVWLAEVNHLTKTETGIVFSCISLFAIIFQ +PVFGLISDKLGLRKHLLWTITILLILFAPFFIFVFSPLLQMNIMAGALVGGVYLGIVFSS +RSGAVEAYIERVSRANRFEYGKVRVSGCVGWALCASITGILFSIDPNITFWIASGFALIL +GVLLWVSKPESSNSAEVIDALGANRQAFSMRTAAELFRMPRFWGFIIYVVGVASVYDVFD +QQFANFFKGFFSSPQRGTEVFGFVTTGGELLNALIMFCAPAIINRIGAKNALLIAGLIMS +VRILGSSFATSAVEVIILKMLHMFEIPFLLVGTFKYISSAFKGKLSATLFLIGFNLSKQL +SSVVLSAWVGRMYDTVGFHQAYLILGCITLSFTVISLFTLKGSKTLLPATA +>RAFB_ECOLX/4415 +ASTHKNTDFWIFGLFFFLYFFIMATCFPFLPVWLSDVVGLSKTDTGIVFSCLSLFAISFQ +PLLGVISDRLGLKKNLIWSISLLLVFFAPFFLYVFAPLLHLNIWAGALTGGVFIGFVFSA +GAGAIEAYIERVSRSSGFEYGKARMFGCLGWALCATMAGILFNVDPSLVFWMGSGGALLL +LLLLYLARPSTSQTAMVMNALGANSSLISTRMVFSLFRMRQMWMFVLYTIGVACVYDVFD +QQFAIFFRSFFDTPQAGIKAFGFATTAGEICNAIIMFCTPWIINRIGAKNTLLVAGGIMT +IRITGSAFATTMTEVVILKMLHALEVPFLLVGAFKYITGVFDTRLSATVYLIGFQFSKQL +AAILLSTFAGHLYDRMGFQNTYFVLGMIVLTVTVISAFTLSSSPGIVHPSVE +>A0A026RKY7_ECOLX/4411 +NIPFRNAYYRFASSYSFLFFISWSLWWSLYAIWLKGHLGLTGTELGTLYSVNQFTSILFM +MFYGIVQDKLGLKKPLIWCMSFILVLTGPFMIYVYEPLLQSNFSVGLILGALFFGLGYLA +GCGLLDSFTEKMARNFHFEYGTARAWGSFGYAIGAFFAGIFFSISPHINFWLVSLFGAVF +MMINMRFKDKDHQCIAADAGGVKKEDFIAVFKDRNFWVFVIFIVGTWSFYNIFDQQLFPV +FYAGLFESHDVGTRLYGYLNSFQVVLEALCMAIIPFFVNRVGPKNALLIGVVIMALRILS +CALFVNPWIISLVKLLHAIEVPLCVISVFKYSVANFDKRLSSTIFLIGFQIASSLGIVLL +STPTGILFDHAGYQTVFFAISGIVCLMLLFGIFFLSKKREQIVMETPV diff --git a/gene_sequences/transporter-final.fasta b/gene_sequences/transporter-final.fasta new file mode 100644 index 0000000..2f47110 --- /dev/null +++ b/gene_sequences/transporter-final.fasta @@ -0,0 +1,32 @@ +>A0A026RKY7_ECOLX/4411 +NIPFRNAYYRFASSYSFLFFISWSLWWSLYAIWLKGHLGLTGTELGTLYSVNQFTSILFM +MFYGIVQDKLGLKKPLIWCMSFILVLTGPFMIYVYEPLLQSNFSVGLILGALFFGLGYLA +GCGLLDSFTEKMARNFHFEYGTARAWGSFGYAIGAFFAGIFFSISPHINFWLVSLFGAVF +-MMINMRFKDKDHQCIAADAGGVKKEDF-----IAVFKDRNFWVFVIFIVGTWSFYNIFD +QQLFPVFYAGLFESHDVGTRLYGYLNSFQVVLEALCMAIIPFFVNRVGPKNALLIGVVIM +ALRILSCALFVNPWIISLVKLLHAIEVPLCVISVFKYSVANFDKRLSSTIFLIGFQIASS +LGIVLLSTPTGILFDHAGYQTVFFAISGIVCLMLLFGIFFLSKKREQIVMETPV +>RAFB_ECOLX/4415 +ASTHKNTDFWIFGLFFFLYFFIMATCFPFLPVWLSDVVGLSKTDTGIVFSCLSLFAISFQ +PLLGVISDRLGLKKNLIWSISLLLVFFAPFFLYVFAPLLHLNIWAGALTGGVFIGFVFSA +GAGAIEAYIERVSRSSGFEYGKARMFGCLGWALCATMAGILFNVDPSLVFWMGSGGALLL +LLLLYLARPSTSQTAMVMNALGANSSLISTRMVFSLFRMRQMWMFVLYTIGVACVYDVFD +QQ-FAIFFRSFFDTPQAGIKAFGFATTAGEICNAIIMFCTPWIINRIGAKNTLLVAGGIM +TIRITGSAFATTMTEVVILKMLHALEVPFLLVGAFKYITGVFDTRLSATVYLIGFQFSKQ +LAAILLSTFAGHLYDRMGFQNTYFVLGMIVLTVTVISAFTLS-SSPGIVHPSVE +>LACY_CITFR/1412 +MYYLKNTNFWMFGFFFFFYFFIMGAYFPFFPIWLHEVNHISKGDTGIIFACISLFSLLFQ +PIFGLLSDKLGLRKHLLWVITGMLVMFAPFFIYVFGPLLQVNILLGSIVGGIYLGFIYNA +GAPAIEAYIEKASRRSNFEFGRARMFGCVGWALCASIAGIMFTINNQFVFWLGSGCAVIL +ALLLLFSKTDVPSSAKVADAVGANNSAFSLKLALELFKQPKLWLISLYVVGVSCTYDVFD +QQ-FANFFTSFFATGEQGTRVFGYVTTMGELLNASIMFFAPLIVNRIGGKNALLLAGTIM +SVRIIGS-HSHTALEVVILKTLHMFEIPFLIVGCFKYITSQFEVRFSATIYLVCFCFFKQ +LAMIFMSVLAGKMYESIGFQGAYLVLGIIRVSFTLISVFTLSGPGPFSLLRRRE +>LACY_KLEOX/6416 +LAPRERHNFIYFMLFFFFYYFIMSAYFPFFPVWLAEVNHLTKTETGIVFSCISLFAIIFQ +PVFGLISDKLGLRKHLLWTITILLILFAPFFIFVFSPLLQMNIMAGALVGGVYLGIVFSS +RSGAVEAYIERVSRANRFEYGKVRVSGCVGWALCASITGILFSIDPNITFWIASGFALIL +GVLLWVSKPESSNSAEVIDALGANRQAFSMRTAAELFRMPRFWGFIIYVVGVASVYDVFD +QQ-FANFFKGFFSSPQRGTEVFGFVTTGGELLNALIMFCAPAIINRIGAKNALLIAGLIM +SVRILGSSFATSAVEVIILKMLHMFEIPFLLVGTFKYISSAFKGKLSATLFLIGFNLSKQ +LSSVVLSAWVGRMYDTVGFHQAYLILGCITLSFTVISLFTL--KGSKTLLPATA diff --git a/gene_sequences/transporterout.fasta b/gene_sequences/transporterout.fasta new file mode 100644 index 0000000..5649f92 --- /dev/null +++ b/gene_sequences/transporterout.fasta @@ -0,0 +1,8 @@ +>A0A026RKY7_ECOLX/4411 +NIPFRNAYYRFASSYSFLFFISWSLWWSLYAIWLKGHLGLTGTELGTLYSVNQFTSILFM +MFYGIVQDKLGLKKPLIWCMSFILVLTGPFMIYVYEPLLQSNFSVGLILGALFFGLGYLA +GCGLLDSFTEKMARNFHFEYGTARAWGSFGYAIGAFFAGIFFSISPHINFWLVSLFGAVF +MMINMRFKDKDHQCIAADAGGVKKEDFIAVFKDRNFWVFVIFIVGTWSFYNIFDQQLFPV +FYAGLFESHDVGTRLYGYLNSFQVVLEALCMAIIPFFVNRVGPKNALLIGVVIMALRILS +CALFVNPWIISLVKLLHAIEVPLCVISVFKYSVANFDKRLSSTIFLIGFQIASSLGIVLL +STPTGILFDHAGYQTVFFAISGIVCLMLLFGIFFLSKKREQIVMETPV