diff --git a/align.sh b/align.sh new file mode 100644 index 0000000..8a21782 --- /dev/null +++ b/align.sh @@ -0,0 +1,10 @@ +#usage: bash align.sh +#aligns sequences for bioinformatics purposes + +for x in *.fasta +do sed -e '$s/$/\n/' -s sporecoat*.fasta > combinedspores.fasta; sed -e '$s/$/\n/' -s transporter*.fasta > combinedtransporter.fasta +done + +/afs/nd.edu/user9/svandext/local/bin/muscle3.8.31_i86linux64 -in combinedspores.fasta -out sporesaligned.afa +/afs/nd.edu/user9/svandext/local/bin/muscle3.8.31_i86linux64 -in combinedtransporter.fasta -out transporteraligned.afa + diff --git a/gene_sequences/combined.fasta b/gene_sequences/combined.fasta new file mode 100644 index 0000000..3d09656 --- /dev/null +++ b/gene_sequences/combined.fasta @@ -0,0 +1,52 @@ +>Q8XVY0_RALSO/26164 +TTTTFSVSTTVNATCVINSASALTFAAFDPSQGAQASTSSISVNCTNTTPFNIGLNAGTGTGATVASRVMTSGANTLTYSLYQDSGHASVWGNTVGTNTVAGTGAGMAAGNAITKTVYGLIPSQPNTVPGNYADTVTVT +>Q0BSH7_GRABC/37169 +TTTTFQVTATVQASCIIQATNLSFGNYSGSQTDATSTIQVTCTNSTPYNVGLSAGTGSGATVSNRKMSLNSTSALPYALYSDASRSTNWGNTPNQDTVSGTGNGSAQSLTVYGRIQTGNYPTPGSYADTITAT +>Q0AAK3_ALKEH/24160 +DTATFDVTATVDPTCTVDADNLVFGTYDPFSDTPLDENSEIRVQCTSDTPYDIGLDDGDNTGAEGERRMALADESDFLEYDLYHDNHGGTSWGDIDSGAELTGLSGTGSEQSYVVYGRIFAEQSVAVGNYVDTIEVT +>Q8XPY6_RALSO/36170 +KTTTFTVSLTLQADCSISANALNFGTQGVLAANVDQTATLSVTCSNTAPYNVGFDAGTTTGSTIAARLLAGSGAATVGFQLYSDSARTQIWGNTVGTDTVSGTGSGTAQVLTVYGRVPSQNSPAAGTYSTTITAT +>Q63W79_BURPS/31169 +ATATFTVSLTIQANCTISANALSFGTNGVLATAVNQQTTLSVTCSNTTSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGNTVGTNTVSGTGNGTAQTLSVYGQVPAQTTPKPDTYESTVTAT +>Q7CVQ4_AGRFC/27162 +ATTNFNVQITIQAACQINSAGNLDFGTNGVIGAPIDVTSQIVVQCTASTPFSLGLSAGAGSGATVANRLMTSAAGATISYSLYTTAAHSTVWGNTVGTDRQTGTGTGAPQNFTVFGRVPAQTTPAVGVYTDTVTAT +>Q985D5_RHILO/27162 +ATGNMTVRITIQAECKVQTATDMDFGTNGVIDANVDQTSTISVQCTNSTPYNVGLSAGVGAGATVAVRKMTGPAAAVLNYSLYRDVARAQLWGTTIGTDTVAGTGNGAAQPLTVYGRVPPQTTPGAGVYTDTVAIT +>A6X8A6_OCHA4/27162 +ATGNMNVRITIQAECKIVTATDLDFGTKGVIDVNVDQTSTISVQCTNGTPYTVGLSAGGGAGATVAMRKMTGAASATINYTIYRDAARTQVWGVTAGTDVVSGTGNGNAQSITAYGRVPAQTTPAPGVYSDVVSVT +>A9CH19_AGRFC/182313 +TQVPFTVSAAVAPTCIISAQNINFGSHGVLNTAVDANGAINLTCTNGLNYSVALNGGLSNSPPAARQMVQGAASIIYGLYRDVSRTNVWGSAAGQIATGTGNGSLQTLTVFGRVPAQNTPAPGNYADTVVVT +>Q985D8_RHILO/205338 +DRPTFTINAIVPANCLLAIQNIDFGSNGILGANVDATGGVSITCTPGTPYTVSLSNGTTGSAPTARKMSKGVETVTYGLYKDNARSQVWGDAAMPGSTVAGSGSGAAQNLTIYGRVPAQTTPSAGVYTDTVVVT +>A7H7Q6_ANADF/25150 +ATAQFQVTATVVKKCKISATTIAFGNYDPATILSAEGTLTLKCTKGTLYSVALDGGSTGSRQMTQAAEVLDYELYSDAGHTAVWPSTAAAPSVAAAGADEALIIFAQVPADQYPAPGAYADTVTAT +>Q2IFL0_ANADE/27162 +ATATLDVTATVVPSCTIAATPVAFGSYDPLVTNAATALDAQGTVTVTCTTGTAYTVGLGAGNSGSGSRAMQHASIAGAQLPYELYQEAARTTVWDSTVMQAGTAASITPVQYTVYGRIPAAQNVPTGNYADAVVAT +>Q0C623_HYPNA/23151 +ANGTLDVQATVVNTCVVLTAPVVFASVGLDEVTANGSITVNCTNTSAFTVALDGGDSGDISARSLTHASLPASFNYQLYTDAGLTTVWGDGVTGSQANGSGPSQTLTVYGRTTSTPDTAGAYADEVQVT +>Q63W78_BURPS/193326 +TFAFTASATVVNDCFINATNVAFGSTGVIQGALTATGTISAQCTNGDAFRIALNGGASGNVAARAMQRTGGGGAVNYQLYLDAAHSTIWGDGTAGTSTATGTGSGLSQSLTVYGQVPAQTTPAPGTYSDTITAT +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSVSANALSFGAYNTTSNLTGTTTVTITCGAWGGASSINYTLSASVGSGTYANRQVLNGSNVIAYNLYTTSADTSIWGDGNGDGTVTLSGTVTKQVGTVNLTIYGKINGGQNVVPGSYATTIPIT +>Q8XPY9_RALSO/25163 +ASAQSCSVASASLNFGSISPVQAGNTDTSTTLTVSCSGFLLQGTVARACLNLGVGSGDTGISPRVLSAGANQLQYNLYADSARSVVWGGRTTPATPAIQVDVSLGLLGFGSATVTVYGRVPGGQTTVPAGAYTQSFSGT +>A9CH19_AGRFC/11158 +IAAAFVASPVLAQSCTFSMSDMNFGFVNLAGGAAVDTTATLSVTCNNPLSLALSIRICPNINAGGGGQSGGIRRMLQGSNILNYQLYQTSARTTAWGSVTQPALGAPPPIDMALPLLINSTTRTVYGRINAGQASAARGLYLSSFAGG +>Q985D8_RHILO/37183 +SAALLLPTVAWAQSCSFGVSAMNFGLVDTLSGSSSNSTATLSVNCTGLLLQRILVCPNLGTGSGGATASARQMLSGANDLNYQLYSDSARSVVWGSYAWPYPPTAPGFALTLNVLGSGSASQTIYGAILGGQATAVPSTYLSTFSGS +>Q3A2W0_PELCD/10150 +IIVLLFAVDAYAFHCEVTTTPVSFGAYDVFSSFSLDTTGRISVSCNNPEKKRMPVTISISRGAANSFSPRQMRRIGGSDRMDYYLFVDASRTAVWGDGTGGSSTYVGMIDRTSPLNVPIYGRIPARQNLRAGSYQDILVVT +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSVSAGSVAFGAYDPLSPTHLDSTGTIGLTCAVRQLVTISLGTGQSGTFARELRGPGGAALRYDLYTDATRTQVWGDGTAGTATWPFETERGRYVPVYARVLAGQDVPAGPYSDTIVVT +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLSPVPLTTTGMLQYRCSRGQPIRITFTAGSSGDVYARTLRQGPWTLAYNLYADAGFGTVWGDGTGGTAAAPAVTTLSNGLTVAYVFGRIPARQEPPVGPYSDTIVVT +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTDLLPRDSAGSITYRCEGQITPITIDFSAGGSGTPLARSMAGPGAQRLEYNLYVDATRLIVWGNGTSGTGRYGPVVPLFGVEVTVPIFGRIPAGQAIPAGAYADTVVMT +>Q60C08_METCA/20167 +LLACPKISDADPYQCDIGNISVPHAVYDPTDSNPNSSGVGTVGITCHLKNAKQTQQVQYTIALSRGSSGSYNPRRMSGGRGSLGYNLYLDAARVTIWGDGSGGTFPLRGTLLLNPTTPVQQVIHNIYGLIPPLQDVYAGTYTDTVTIT +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSISADPLAFGQYDPITGAQVDGASEVSVSCSLLGLVSLLVSYEISLDPGTGGSYHPRALSSATDTLDYNLYVDTARTEIWGDGTDDTATVTDSYTLGVLTVTRYYPVYGRVFADQNVAAGVYDDTITAT +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSVSGNGFTSVYDPISTVPNDNVSSVTINCSRASGDPTTTTYSLASTNGLYPQGQNNRAYYPTNKYLKYDIYKDAAYSSRWGPGGSAPFTGTLNFGSGTSASLTLPYYNRVAAQQSAVAADYTDTMTAT +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTVGSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCTGIASGGAYSIALGPSTTGSGDRISTRYLGNSNGGDDMSFNIYTSASYSTVWGNGTTGGLVGGSIPVGDSNQSQPVYGRIAASQNTLRAGSYSGSLTMT diff --git a/gene_sequences/combined2.fasta b/gene_sequences/combined2.fasta new file mode 100644 index 0000000..c75277a --- /dev/null +++ b/gene_sequences/combined2.fasta @@ -0,0 +1,8 @@ +>LACY_CITFR/1412 +MYYLKNTNFWMFGFFFFFYFFIMGAYFPFFPIWLHEVNHISKGDTGIIFACISLFSLLFQPIFGLLSDKLGLRKHLLWVITGMLVMFAPFFIYVFGPLLQVNILLGSIVGGIYLGFIYNAGAPAIEAYIEKASRRSNFEFGRARMFGCVGWALCASIAGIMFTINNQFVFWLGSGCAVILALLLLFSKTDVPSSAKVADAVGANNSAFSLKLALELFKQPKLWLISLYVVGVSCTYDVFDQQFANFFTSFFATGEQGTRVFGYVTTMGELLNASIMFFAPLIVNRIGGKNALLLAGTIMSVRIIGSHSHTALEVVILKTLHMFEIPFLIVGCFKYITSQFEVRFSATIYLVCFCFFKQLAMIFMSVLAGKMYESIGFQGAYLVLGIIRVSFTLISVFTLSGPGPFSLLRRRE +>LACY_KLEOX/6416 +LAPRERHNFIYFMLFFFFYYFIMSAYFPFFPVWLAEVNHLTKTETGIVFSCISLFAIIFQPVFGLISDKLGLRKHLLWTITILLILFAPFFIFVFSPLLQMNIMAGALVGGVYLGIVFSSRSGAVEAYIERVSRANRFEYGKVRVSGCVGWALCASITGILFSIDPNITFWIASGFALILGVLLWVSKPESSNSAEVIDALGANRQAFSMRTAAELFRMPRFWGFIIYVVGVASVYDVFDQQFANFFKGFFSSPQRGTEVFGFVTTGGELLNALIMFCAPAIINRIGAKNALLIAGLIMSVRILGSSFATSAVEVIILKMLHMFEIPFLLVGTFKYISSAFKGKLSATLFLIGFNLSKQLSSVVLSAWVGRMYDTVGFHQAYLILGCITLSFTVISLFTLKGSKTLLPATA +>RAFB_ECOLX/4415 +ASTHKNTDFWIFGLFFFLYFFIMATCFPFLPVWLSDVVGLSKTDTGIVFSCLSLFAISFQPLLGVISDRLGLKKNLIWSISLLLVFFAPFFLYVFAPLLHLNIWAGALTGGVFIGFVFSAGAGAIEAYIERVSRSSGFEYGKARMFGCLGWALCATMAGILFNVDPSLVFWMGSGGALLLLLLLYLARPSTSQTAMVMNALGANSSLISTRMVFSLFRMRQMWMFVLYTIGVACVYDVFDQQFAIFFRSFFDTPQAGIKAFGFATTAGEICNAIIMFCTPWIINRIGAKNTLLVAGGIMTIRITGSAFATTMTEVVILKMLHALEVPFLLVGAFKYITGVFDTRLSATVYLIGFQFSKQLAAILLSTFAGHLYDRMGFQNTYFVLGMIVLTVTVISAFTLSSSPGIVHPSVE +>A0A026RKY7_ECOLX/4411 +NIPFRNAYYRFASSYSFLFFISWSLWWSLYAIWLKGHLGLTGTELGTLYSVNQFTSILFMMFYGIVQDKLGLKKPLIWCMSFILVLTGPFMIYVYEPLLQSNFSVGLILGALFFGLGYLAGCGLLDSFTEKMARNFHFEYGTARAWGSFGYAIGAFFAGIFFSISPHINFWLVSLFGAVFMMINMRFKDKDHQCIAADAGGVKKEDFIAVFKDRNFWVFVIFIVGTWSFYNIFDQQLFPVFYAGLFESHDVGTRLYGYLNSFQVVLEALCMAIIPFFVNRVGPKNALLIGVVIMALRILSCALFVNPWIISLVKLLHAIEVPLCVISVFKYSVANFDKRLSSTIFLIGFQIASSLGIVLLSTPTGILFDHAGYQTVFFAISGIVCLMLLFGIFFLSKKREQIVMETPV