From be3f134d4141f6e538a392109454eba7727cf3b1 Mon Sep 17 00:00:00 2001 From: vsanders Date: Tue, 27 Nov 2018 22:42:03 -0500 Subject: [PATCH 1/4] Added file for problem #1 --- #1.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 #1.sh diff --git a/#1.sh b/#1.sh new file mode 100644 index 0000000..c3fc410 --- /dev/null +++ b/#1.sh @@ -0,0 +1,16 @@ +for sc in gene_sequences/sporecoat0[1234].fasta +do +cat $sc >> gene_sequences/allsc.fasta +echo \n >> gene_sequences/allsc.fasta +done + +/afs/nd.edu/user25/vsanders/local/bin/muscle3.8.31_i86linux64 -in gene_sequences/allsc.fasta -out gene_sequences/allsc.align + +for trans in gene_sequences/transporter0[1234].fasta +do +cat $trans >> gene_sequences/alltrans.fasta +echo \n >> gene_sequences/alltrans.fasta +done + +/afs/nd.edu/user25/vsanders/local/bin/muscle3.8.31_i86linux64 -in gene_sequences/alltrans.fasta -out gene_sequences/alltrans.align + From af8742b2b393347c5d478e31ddf0c726322d81d5 Mon Sep 17 00:00:00 2001 From: vsanders Date: Tue, 27 Nov 2018 22:43:16 -0500 Subject: [PATCH 2/4] Added muscle align files --- allsc.align | 104 +++++++++++++++++++++++++++++++++++++++++++++++++ alltrans.align | 32 +++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 allsc.align create mode 100644 alltrans.align diff --git a/allsc.align b/allsc.align new file mode 100644 index 0000000..9696aff --- /dev/null +++ b/allsc.align @@ -0,0 +1,104 @@ +>Q12FX3_POLSJ/50194 +TWGVLLAAGTAHATISCSV-SGNGFT-SVYDPIS------TVPNDNVSSVTINCS-R--- +-ASGDPTTTTYSLASTNGLYPQGQNNRAY--YPTNKYLKYDIYKDAAYSSRWGPG--GSA +PFTGTLNFGSGT-SA-SLTLPYYNRVAA-QQSAVAADYTDTMTAT- +>Q60C08_METCA/20167 +LLACPKIS--DADPYQCDI-GNISVPHAVYDPTDS-----NPNSSGVGTVGITCHLKNAK +QTQQVQYTIALSRGSSGSYNP--RRMSG-----GRGSLGYNLYLDAARVTIWGDGSGGTF +PLRGTLLLNPTTPVQ-QVIHNIYGLIPP-LQDVYAGTYTDTVTIT- +>A9CH19_AGRFC/11158 +IAAAFVAS--PVLAQSCTF-SMSDMNFGFVNLAG------GAAVDTTATLSVTCN-NPLS +LA--LSIRICPNINAGGGGQSGGIRRMLQ----GSNILNYQLYQTSARTTAWGSVTQPAL +GAPPPIDMALPLLIN-STTRTVYGRINAGQASAARGLYLSSFAGG- +>Q985D8_RHILO/37183 +SAALLLPT--VAWAQSCSF-GVSAMNFGLVDTLS------GSSSNSTATLSVNCT-G--- +-LLLQRILVCPNLGTGSGGATASARQMLS----GANDLNYQLYSDSARSVVWGSYAWPYP +PTAPGFALTLNVLGSGSASQTIYGAILGGQATAVPSTYLSTFSGS- +>Q8XPY9_RALSO/25163 +-----------ASAQSCSV-ASASLNFGSISPVQ------AGNTDTSTTLTVSCS-GFLL +QG--TVARACLNLGVGSGDTGISPRVLSA----GANQLQYNLYADSARSVVWGGRTTPAT +PAIQVD-VSLGLLGFGSATVTVYGRVPGGQTTVPAGAYTQSFSGTN +>Q8XVY3_RALSO/23163 +VLVALASWAPGALAVSCSV-SANALSFGAYNT--------TSNLTGTTTVTITCG-A--- +WGGASSINYTLSASVGSGTYANRQVLN------GSNVIAYNLYTTSADTSIWGDGN-GDG +TVTLSGTVTKQ--VG-TVNLTIYGKING-GQNVVPGSYATTIPIT- +>A1VIJ0_POLNA/19170 +ALFLLLATAGPAQAGSCTV-GSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCT-GIAS +GG---AYSIALGPSTTGSGDRISTRYLGN--SNGGDDMSFNIYTSASYSTVWGNGTTG-G +LVGGSIPV--G--DS-NQSQPVYGRIAASQNTLRAGSYSGSLTMTN +>Q0AAK6_ALKEH/8153 +SLFLVAAGSGSAQAYTCSI-SADPLAFGQYDPIT------GAQVDGASEVSVSCS-LLGL +VSLLVSYEISLDPGTGGSYHPRALSS-------ATDTLDYNLYVDTARTEIWGDGTDDTA +TVTDSYTLGVL--TV-TRYYPVYGRVFA-DQNVAAGVYDDTITAT- +>Q1D5L1_MYXXD/13153 +AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTD------LLPRDSAGSITYRCE-G--- +QI--TPITIDFSAGGSG--TPLARSMAGP----GAQRLEYNLYVDATRLIVWGNGTSGTG +RYGPVVPL--F--GV-EVTVPIFGRIPA-GQAIPAGAYADTVVMT- +>Q3A2W0_PELCD/10150 +IIVLLFAV--DAYAFHCEV-TTTPVSFGAYDVFS------SFSLDTTGRISVSCN-NPEK +KR--MPVTISISRGAANSFSPRQMRRIG-----GSDRMDYYLFVDASRTAVWGDGTGGSS +TYVGMID---R--TS-PLNVPIYGRIPA-RQNLRAGSYQDILVVTN +>Q0C623_HYPNA/23151 +ANGTLDVQ--ATVVNTCVV-LTAPVVFASVG---------LDEVTANGSITVNCT-N--- +-T--SAFTVALDGGDSGDISARSLTHAS-----LPASFNYQLYTDAGLTTVWGDGVTGSQ +ANGS---------GP-SQTLTVYGRTTS--TPDTAGAYADEVQVT- +>A7H7Q5_ANADF/16150 +APRAVDAAQPPSPGPSCSV-SAGSVAFGAYDPLS------PTHLDSTGTIGLTCA-V--- +-R--QLVTISLGTGQSG---TFARELRGP----GGAALRYDLYTDATRTQVWGDGTAGTA +TWPFET-------ER-GRYVPVYARVLA-GQDVPAGPYSDTIVVT- +>Q2IFK8_ANADE/19157 +ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLS------PVPLTTTGMLQYRCS-R--- +-G--QPIRITFTAGSSGDVYARTLRQ-------GPWTLAYNLYADAGFGTVWGDGTGGTA +AAPAVTTLS-N--GL-TVAY-VFGRIPA-RQEPPVGPYSDTIVVT- +>Q0AAK3_ALKEH/24160 +DTATFDVT--ATVDPTCTV-DADNLVFGTYDPFS------DTPLDENSEIRVQCT-S--- +-D--TPYDIGLDDGDNTGAEGERRMALAD----ESDFLEYDLYHDNHGGTSWGDIDSGAE +LTGLSG----T--GS-EQSYVVYGRIFA-EQSVAVGNYVDTIEVT- +>A7H7Q6_ANADF/25150 +ATAQFQVT--ATVVKKCKI-SATTIAFGNYDP--------ATILSAEGTLTLKCT-K--- +-G--TLYSVALDGGSTGS-----RQMTQ-----AAEVLDYELYSDAGHTAVWPSTAAAPS +VAAA---------GA-DEALIIFAQVPA-DQYPAPGAYADTVTAT- +>Q2IFL0_ANADE/27162 +ATATLDVT--ATVVPSCTI-AATPVAFGSYDPLVTN---AATALDAQGTVTVTCT-T--- +-G--TAYTVGLGAGNSGSGSRAMQHASI-----AGAQLPYELYQEAARTTVWDSTVMQAG +TAAS---------IT-PVQYTVYGRIPA-AQNVPTGNYADAVVAT- +>Q985D8_RHILO/205338 +DRPTFTIN--AIVPANCLL-AIQNIDFGSNGIL-------GANVDATGGVSITCT-P--- +-G--TPYTVSLSNGT-TGSAPTARKMSK-----GVETVTYGLYKDNARSQVWGDAAMPGS +TVAGSG----S--GA-AQNLTIYGRVPA-QTTPSAGVYTDTVVVT- +>Q8XVY0_RALSO/26164 +TTTTFSVS--TTVNATCVINSASALTFAAFDPS-------QGAQASTSSISVNCT-N--- +-T--TPFNIGLNAGTGTGATVASRVMTS-----GANTLTYSLYQDSGHASVWGN-TVGTN +TVAGTGAGMAA--GN-AITKTVYGLIPS-QPNTVPGNYADTVTVT- +>Q0BSH7_GRABC/37169 +TTTTFQVT--ATVQASCII-QATNLSFGNYS---------GSQTDATSTIQVTCT-N--- +-S--TPYNVGLSAGTGSGATVSNRKMSLN----STSALPYALYSDASRSTNWGN-TPNQD +TVSGTG----N--GS-AQSLTVYGRIQT-GNYPTPGSYADTITAT- +>Q8XPY6_RALSO/36170 +KTTTFTVS--LTLQADCSI-SANALNFGTQGVL-------AANVDQTATLSVTCS-N--- +-T--APYNVGFDAGTTTGSTIAARLLAGS----GAATVGFQLYSDSARTQIWGN-TVGTD +TVSGTG----S--GT-AQVLTVYGRVPS-QNSPAAGTYSTTITAT- +>Q63W79_BURPS/31169 +ATATFTVS--LTIQANCTI-SANALSFGTNGVL-------ATAVNQQTTLSVTCS-N--- +-T--TSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGN-TVGTN +TVSGTG----N--GT-AQTLSVYGQVPA-QTTPKPDTYESTVTATN +>Q7CVQ4_AGRFC/27162 +ATTNFNVQ--ITIQAACQINSAGNLDFGTNGVI-------GAPIDVTSQIVVQCT-A--- +-S--TPFSLGLSAGAGSGATVANRLMTSA----AGATISYSLYTTAAHSTVWGN-TVGTD +RQTGTG----T--GA-PQNFTVFGRVPA-QTTPAVGVYTDTVTAT- +>Q985D5_RHILO/27162 +ATGNMTVR--ITIQAECKVQTATDMDFGTNGVI-------DANVDQTSTISVQCT-N--- +-S--TPYNVGLSAGVGAGATVAVRKMTGP----AAAVLNYSLYRDVARAQLWGT-TIGTD +TVAGTG----N--GA-AQPLTVYGRVPP-QTTPGAGVYTDTVAIT- +>A6X8A6_OCHA4/27162 +ATGNMNVR--ITIQAECKIVTATDLDFGTKGVI-------DVNVDQTSTISVQCT-N--- +-G--TPYTVGLSAGGGAGATVAMRKMTGA----ASATINYTIYRDAARTQVWGV-TAGTD +VVSGTG----N--GN-AQSITAYGRVPA-QTTPAPGVYSDVVSVT- +>A9CH19_AGRFC/182313 +TQVPFTVS--AAVAPTCII-SAQNINFGSHGVL-------NTAVDANGAINLTCT-N--- +-G--LNYSVALNGGLSNS-PPAARQMVQ-----GAASIIYGLYRDVSRTNVWG--SAAGQ +IATGTG----N--GS-LQTLTVFGRVPA-QNTPAPGNYADTVVVT- +>Q63W78_BURPS/193326 +-TFAFTAS--ATVVNDCFI-NATNVAFGSTGVI-------QGALTATGTISAQCT-N--- +-G--DAFRIALNGGASGNVAARAMQRTG-----GGGAVNYQLYLDAAHSTIWGDGTAGTS +TATGTG----S--GL-SQSLTVYGQVPA-QTTPAPGTYSDTITAT- diff --git a/alltrans.align b/alltrans.align new file mode 100644 index 0000000..97234e6 --- /dev/null +++ b/alltrans.align @@ -0,0 +1,32 @@ +>A0A026RKY7_ECOLX/4411 +NIPFRNAYYRFASSYSFLFFISWSLWWSLYAIWLKGHLGLTGTELGTLYSVNQFTSILFM +MFYGIVQDKLGLKKPLIWCMSFILVLTGPFMIYVYEPLLQSNFSVGLILGALFFGLGYLA +GCGLLDSFTEKMARNFHFEYGTARAWGSFGYAIGAFFAGIFFSISPHINFWLVSLFGAVF +-MMINMRFKDKDHQCIAADAGGVKKEDF-----IAVFKDRNFWVFVIFIVGTWSFYNIFD +QQLFPVFYAGLFESHDVGTRLYGYLNSFQVVLEALCMAIIPFFVNRVGPKNALLIGVVIM +ALRILSCALFVNPWIISLVKLLHAIEVPLCVISVFKYSVANFDKRLSSTIFLIGFQIASS +LGIVLLSTPTGILFDHAGYQTVFFAISGIVCLMLLFGIFFLSKKREQIVMETPVN +>RAFB_ECOLX/4415 +ASTHKNTDFWIFGLFFFLYFFIMATCFPFLPVWLSDVVGLSKTDTGIVFSCLSLFAISFQ +PLLGVISDRLGLKKNLIWSISLLLVFFAPFFLYVFAPLLHLNIWAGALTGGVFIGFVFSA +GAGAIEAYIERVSRSSGFEYGKARMFGCLGWALCATMAGILFNVDPSLVFWMGSGGALLL +LLLLYLARPSTSQTAMVMNALGANSSLISTRMVFSLFRMRQMWMFVLYTIGVACVYDVFD +QQ-FAIFFRSFFDTPQAGIKAFGFATTAGEICNAIIMFCTPWIINRIGAKNTLLVAGGIM +TIRITGSAFATTMTEVVILKMLHALEVPFLLVGAFKYITGVFDTRLSATVYLIGFQFSKQ +LAAILLSTFAGHLYDRMGFQNTYFVLGMIVLTVTVISAFTLS-SSPGIVHPSVEN +>LACY_CITFR/1412 +MYYLKNTNFWMFGFFFFFYFFIMGAYFPFFPIWLHEVNHISKGDTGIIFACISLFSLLFQ +PIFGLLSDKLGLRKHLLWVITGMLVMFAPFFIYVFGPLLQVNILLGSIVGGIYLGFIYNA +GAPAIEAYIEKASRRSNFEFGRARMFGCVGWALCASIAGIMFTINNQFVFWLGSGCAVIL +ALLLLFSKTDVPSSAKVADAVGANNSAFSLKLALELFKQPKLWLISLYVVGVSCTYDVFD +QQ-FANFFTSFFATGEQGTRVFGYVTTMGELLNASIMFFAPLIVNRIGGKNALLLAGTIM +SVRIIGS-HSHTALEVVILKTLHMFEIPFLIVGCFKYITSQFEVRFSATIYLVCFCFFKQ +LAMIFMSVLAGKMYESIGFQGAYLVLGIIRVSFTLISVFTLSGPGPFSLLRRREN +>LACY_KLEOX/6416 +LAPRERHNFIYFMLFFFFYYFIMSAYFPFFPVWLAEVNHLTKTETGIVFSCISLFAIIFQ +PVFGLISDKLGLRKHLLWTITILLILFAPFFIFVFSPLLQMNIMAGALVGGVYLGIVFSS +RSGAVEAYIERVSRANRFEYGKVRVSGCVGWALCASITGILFSIDPNITFWIASGFALIL +GVLLWVSKPESSNSAEVIDALGANRQAFSMRTAAELFRMPRFWGFIIYVVGVASVYDVFD +QQ-FANFFKGFFSSPQRGTEVFGFVTTGGELLNALIMFCAPAIINRIGAKNALLIAGLIM +SVRILGSSFATSAVEVIILKMLHMFEIPFLLVGTFKYISSAFKGKLSATLFLIGFNLSKQ +LSSVVLSAWVGRMYDTVGFHQAYLILGCITLSFTVISLFTL--KGSKTLLPATAN From 5926deabdf4f83e0db025ad313afa73998c0e87c Mon Sep 17 00:00:00 2001 From: vsanders Date: Tue, 27 Nov 2018 23:58:02 -0500 Subject: [PATCH 3/4] Added file for problem #2 --- #2.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 #2.sh diff --git a/#2.sh b/#2.sh new file mode 100644 index 0000000..5bc1b38 --- /dev/null +++ b/#2.sh @@ -0,0 +1,13 @@ + +/afs/nd.edu/user25/vsanders/local/bin/hmmbuild transporter.hmm gene_sequences/alltrans.align + +for file in proteomes/*.fasta +do +/afs/nd.edu/user25/vsanders/local/bin/hmmsearch --tblout $file.hits transporter.hmm $file +done + +for hits in proteomes/*.hits +do +echo $hits >> hmm_hits.txt +cat $hits | grep -v "#" | wc -l >> hmm_hits.txt +done From 7c88243af42d1991675b3110c8d5cb92c0065e6b Mon Sep 17 00:00:00 2001 From: vsanders Date: Wed, 28 Nov 2018 00:03:43 -0500 Subject: [PATCH 4/4] Added hmm_hits text file --- hmm_hits.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 hmm_hits.txt diff --git a/hmm_hits.txt b/hmm_hits.txt new file mode 100644 index 0000000..12c10c9 --- /dev/null +++ b/hmm_hits.txt @@ -0,0 +1,16 @@ +proteomes/Arthrobacter.fasta.hits +1 +proteomes/Bacillus.fasta.hits +1 +proteomes/Clostridium.fasta.hits +0 +proteomes/Flavobacterium.fasta.hits +1 +proteomes/Limnohabitans.fasta.hits +1 +proteomes/Rhizobium.fasta.hits +0 +proteomes/Roseobacter.fasta.hits +1 +proteomes/Verrucomicrobia.fasta.hits +0