Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions #1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
for sc in gene_sequences/sporecoat0[1234].fasta
do
cat $sc >> gene_sequences/allsc.fasta
echo \n >> gene_sequences/allsc.fasta
done

/afs/nd.edu/user25/vsanders/local/bin/muscle3.8.31_i86linux64 -in gene_sequences/allsc.fasta -out gene_sequences/allsc.align

for trans in gene_sequences/transporter0[1234].fasta
do
cat $trans >> gene_sequences/alltrans.fasta
echo \n >> gene_sequences/alltrans.fasta
done

/afs/nd.edu/user25/vsanders/local/bin/muscle3.8.31_i86linux64 -in gene_sequences/alltrans.fasta -out gene_sequences/alltrans.align

13 changes: 13 additions & 0 deletions #2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

/afs/nd.edu/user25/vsanders/local/bin/hmmbuild transporter.hmm gene_sequences/alltrans.align

for file in proteomes/*.fasta
do
/afs/nd.edu/user25/vsanders/local/bin/hmmsearch --tblout $file.hits transporter.hmm $file
done

for hits in proteomes/*.hits
do
echo $hits >> hmm_hits.txt
cat $hits | grep -v "#" | wc -l >> hmm_hits.txt
done
104 changes: 104 additions & 0 deletions allsc.align
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
>Q12FX3_POLSJ/50194
TWGVLLAAGTAHATISCSV-SGNGFT-SVYDPIS------TVPNDNVSSVTINCS-R---
-ASGDPTTTTYSLASTNGLYPQGQNNRAY--YPTNKYLKYDIYKDAAYSSRWGPG--GSA
PFTGTLNFGSGT-SA-SLTLPYYNRVAA-QQSAVAADYTDTMTAT-
>Q60C08_METCA/20167
LLACPKIS--DADPYQCDI-GNISVPHAVYDPTDS-----NPNSSGVGTVGITCHLKNAK
QTQQVQYTIALSRGSSGSYNP--RRMSG-----GRGSLGYNLYLDAARVTIWGDGSGGTF
PLRGTLLLNPTTPVQ-QVIHNIYGLIPP-LQDVYAGTYTDTVTIT-
>A9CH19_AGRFC/11158
IAAAFVAS--PVLAQSCTF-SMSDMNFGFVNLAG------GAAVDTTATLSVTCN-NPLS
LA--LSIRICPNINAGGGGQSGGIRRMLQ----GSNILNYQLYQTSARTTAWGSVTQPAL
GAPPPIDMALPLLIN-STTRTVYGRINAGQASAARGLYLSSFAGG-
>Q985D8_RHILO/37183
SAALLLPT--VAWAQSCSF-GVSAMNFGLVDTLS------GSSSNSTATLSVNCT-G---
-LLLQRILVCPNLGTGSGGATASARQMLS----GANDLNYQLYSDSARSVVWGSYAWPYP
PTAPGFALTLNVLGSGSASQTIYGAILGGQATAVPSTYLSTFSGS-
>Q8XPY9_RALSO/25163
-----------ASAQSCSV-ASASLNFGSISPVQ------AGNTDTSTTLTVSCS-GFLL
QG--TVARACLNLGVGSGDTGISPRVLSA----GANQLQYNLYADSARSVVWGGRTTPAT
PAIQVD-VSLGLLGFGSATVTVYGRVPGGQTTVPAGAYTQSFSGTN
>Q8XVY3_RALSO/23163
VLVALASWAPGALAVSCSV-SANALSFGAYNT--------TSNLTGTTTVTITCG-A---
WGGASSINYTLSASVGSGTYANRQVLN------GSNVIAYNLYTTSADTSIWGDGN-GDG
TVTLSGTVTKQ--VG-TVNLTIYGKING-GQNVVPGSYATTIPIT-
>A1VIJ0_POLNA/19170
ALFLLLATAGPAQAGSCTV-GSSGLAFGAYQPLTFAGKLTSSAVTSNASISVVCT-GIAS
GG---AYSIALGPSTTGSGDRISTRYLGN--SNGGDDMSFNIYTSASYSTVWGNGTTG-G
LVGGSIPV--G--DS-NQSQPVYGRIAASQNTLRAGSYSGSLTMTN
>Q0AAK6_ALKEH/8153
SLFLVAAGSGSAQAYTCSI-SADPLAFGQYDPIT------GAQVDGASEVSVSCS-LLGL
VSLLVSYEISLDPGTGGSYHPRALSS-------ATDTLDYNLYVDTARTEIWGDGTDDTA
TVTDSYTLGVL--TV-TRYYPVYGRVFA-DQNVAAGVYDDTITAT-
>Q1D5L1_MYXXD/13153
AVAGVCGLLPGLAGAVCQIRSTIGVSFGTYLTTD------LLPRDSAGSITYRCE-G---
QI--TPITIDFSAGGSG--TPLARSMAGP----GAQRLEYNLYVDATRLIVWGNGTSGTG
RYGPVVPL--F--GV-EVTVPIFGRIPA-GQAIPAGAYADTVVMT-
>Q3A2W0_PELCD/10150
IIVLLFAV--DAYAFHCEV-TTTPVSFGAYDVFS------SFSLDTTGRISVSCN-NPEK
KR--MPVTISISRGAANSFSPRQMRRIG-----GSDRMDYYLFVDASRTAVWGDGTGGSS
TYVGMID---R--TS-PLNVPIYGRIPA-RQNLRAGSYQDILVVTN
>Q0C623_HYPNA/23151
ANGTLDVQ--ATVVNTCVV-LTAPVVFASVG---------LDEVTANGSITVNCT-N---
-T--SAFTVALDGGDSGDISARSLTHAS-----LPASFNYQLYTDAGLTTVWGDGVTGSQ
ANGS---------GP-SQTLTVYGRTTS--TPDTAGAYADEVQVT-
>A7H7Q5_ANADF/16150
APRAVDAAQPPSPGPSCSV-SAGSVAFGAYDPLS------PTHLDSTGTIGLTCA-V---
-R--QLVTISLGTGQSG---TFARELRGP----GGAALRYDLYTDATRTQVWGDGTAGTA
TWPFET-------ER-GRYVPVYARVLA-GQDVPAGPYSDTIVVT-
>Q2IFK8_ANADE/19157
ATALSLVAPAAARAASCSLTMGTSIAFGAYDPLS------PVPLTTTGMLQYRCS-R---
-G--QPIRITFTAGSSGDVYARTLRQ-------GPWTLAYNLYADAGFGTVWGDGTGGTA
AAPAVTTLS-N--GL-TVAY-VFGRIPA-RQEPPVGPYSDTIVVT-
>Q0AAK3_ALKEH/24160
DTATFDVT--ATVDPTCTV-DADNLVFGTYDPFS------DTPLDENSEIRVQCT-S---
-D--TPYDIGLDDGDNTGAEGERRMALAD----ESDFLEYDLYHDNHGGTSWGDIDSGAE
LTGLSG----T--GS-EQSYVVYGRIFA-EQSVAVGNYVDTIEVT-
>A7H7Q6_ANADF/25150
ATAQFQVT--ATVVKKCKI-SATTIAFGNYDP--------ATILSAEGTLTLKCT-K---
-G--TLYSVALDGGSTGS-----RQMTQ-----AAEVLDYELYSDAGHTAVWPSTAAAPS
VAAA---------GA-DEALIIFAQVPA-DQYPAPGAYADTVTAT-
>Q2IFL0_ANADE/27162
ATATLDVT--ATVVPSCTI-AATPVAFGSYDPLVTN---AATALDAQGTVTVTCT-T---
-G--TAYTVGLGAGNSGSGSRAMQHASI-----AGAQLPYELYQEAARTTVWDSTVMQAG
TAAS---------IT-PVQYTVYGRIPA-AQNVPTGNYADAVVAT-
>Q985D8_RHILO/205338
DRPTFTIN--AIVPANCLL-AIQNIDFGSNGIL-------GANVDATGGVSITCT-P---
-G--TPYTVSLSNGT-TGSAPTARKMSK-----GVETVTYGLYKDNARSQVWGDAAMPGS
TVAGSG----S--GA-AQNLTIYGRVPA-QTTPSAGVYTDTVVVT-
>Q8XVY0_RALSO/26164
TTTTFSVS--TTVNATCVINSASALTFAAFDPS-------QGAQASTSSISVNCT-N---
-T--TPFNIGLNAGTGTGATVASRVMTS-----GANTLTYSLYQDSGHASVWGN-TVGTN
TVAGTGAGMAA--GN-AITKTVYGLIPS-QPNTVPGNYADTVTVT-
>Q0BSH7_GRABC/37169
TTTTFQVT--ATVQASCII-QATNLSFGNYS---------GSQTDATSTIQVTCT-N---
-S--TPYNVGLSAGTGSGATVSNRKMSLN----STSALPYALYSDASRSTNWGN-TPNQD
TVSGTG----N--GS-AQSLTVYGRIQT-GNYPTPGSYADTITAT-
>Q8XPY6_RALSO/36170
KTTTFTVS--LTLQADCSI-SANALNFGTQGVL-------AANVDQTATLSVTCS-N---
-T--APYNVGFDAGTTTGSTIAARLLAGS----GAATVGFQLYSDSARTQIWGN-TVGTD
TVSGTG----S--GT-AQVLTVYGRVPS-QNSPAAGTYSTTITAT-
>Q63W79_BURPS/31169
ATATFTVS--LTIQANCTI-SANALSFGTNGVL-------ATAVNQQTTLSVTCS-N---
-T--TSYNVGLDAGNVSGSTVSSRLLAGTTTGNTSTTVSFQLYQDSGHTTIWGN-TVGTN
TVSGTG----N--GT-AQTLSVYGQVPA-QTTPKPDTYESTVTATN
>Q7CVQ4_AGRFC/27162
ATTNFNVQ--ITIQAACQINSAGNLDFGTNGVI-------GAPIDVTSQIVVQCT-A---
-S--TPFSLGLSAGAGSGATVANRLMTSA----AGATISYSLYTTAAHSTVWGN-TVGTD
RQTGTG----T--GA-PQNFTVFGRVPA-QTTPAVGVYTDTVTAT-
>Q985D5_RHILO/27162
ATGNMTVR--ITIQAECKVQTATDMDFGTNGVI-------DANVDQTSTISVQCT-N---
-S--TPYNVGLSAGVGAGATVAVRKMTGP----AAAVLNYSLYRDVARAQLWGT-TIGTD
TVAGTG----N--GA-AQPLTVYGRVPP-QTTPGAGVYTDTVAIT-
>A6X8A6_OCHA4/27162
ATGNMNVR--ITIQAECKIVTATDLDFGTKGVI-------DVNVDQTSTISVQCT-N---
-G--TPYTVGLSAGGGAGATVAMRKMTGA----ASATINYTIYRDAARTQVWGV-TAGTD
VVSGTG----N--GN-AQSITAYGRVPA-QTTPAPGVYSDVVSVT-
>A9CH19_AGRFC/182313
TQVPFTVS--AAVAPTCII-SAQNINFGSHGVL-------NTAVDANGAINLTCT-N---
-G--LNYSVALNGGLSNS-PPAARQMVQ-----GAASIIYGLYRDVSRTNVWG--SAAGQ
IATGTG----N--GS-LQTLTVFGRVPA-QNTPAPGNYADTVVVT-
>Q63W78_BURPS/193326
-TFAFTAS--ATVVNDCFI-NATNVAFGSTGVI-------QGALTATGTISAQCT-N---
-G--DAFRIALNGGASGNVAARAMQRTG-----GGGAVNYQLYLDAAHSTIWGDGTAGTS
TATGTG----S--GL-SQSLTVYGQVPA-QTTPAPGTYSDTITAT-
32 changes: 32 additions & 0 deletions alltrans.align
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
>A0A026RKY7_ECOLX/4411
NIPFRNAYYRFASSYSFLFFISWSLWWSLYAIWLKGHLGLTGTELGTLYSVNQFTSILFM
MFYGIVQDKLGLKKPLIWCMSFILVLTGPFMIYVYEPLLQSNFSVGLILGALFFGLGYLA
GCGLLDSFTEKMARNFHFEYGTARAWGSFGYAIGAFFAGIFFSISPHINFWLVSLFGAVF
-MMINMRFKDKDHQCIAADAGGVKKEDF-----IAVFKDRNFWVFVIFIVGTWSFYNIFD
QQLFPVFYAGLFESHDVGTRLYGYLNSFQVVLEALCMAIIPFFVNRVGPKNALLIGVVIM
ALRILSCALFVNPWIISLVKLLHAIEVPLCVISVFKYSVANFDKRLSSTIFLIGFQIASS
LGIVLLSTPTGILFDHAGYQTVFFAISGIVCLMLLFGIFFLSKKREQIVMETPVN
>RAFB_ECOLX/4415
ASTHKNTDFWIFGLFFFLYFFIMATCFPFLPVWLSDVVGLSKTDTGIVFSCLSLFAISFQ
PLLGVISDRLGLKKNLIWSISLLLVFFAPFFLYVFAPLLHLNIWAGALTGGVFIGFVFSA
GAGAIEAYIERVSRSSGFEYGKARMFGCLGWALCATMAGILFNVDPSLVFWMGSGGALLL
LLLLYLARPSTSQTAMVMNALGANSSLISTRMVFSLFRMRQMWMFVLYTIGVACVYDVFD
QQ-FAIFFRSFFDTPQAGIKAFGFATTAGEICNAIIMFCTPWIINRIGAKNTLLVAGGIM
TIRITGSAFATTMTEVVILKMLHALEVPFLLVGAFKYITGVFDTRLSATVYLIGFQFSKQ
LAAILLSTFAGHLYDRMGFQNTYFVLGMIVLTVTVISAFTLS-SSPGIVHPSVEN
>LACY_CITFR/1412
MYYLKNTNFWMFGFFFFFYFFIMGAYFPFFPIWLHEVNHISKGDTGIIFACISLFSLLFQ
PIFGLLSDKLGLRKHLLWVITGMLVMFAPFFIYVFGPLLQVNILLGSIVGGIYLGFIYNA
GAPAIEAYIEKASRRSNFEFGRARMFGCVGWALCASIAGIMFTINNQFVFWLGSGCAVIL
ALLLLFSKTDVPSSAKVADAVGANNSAFSLKLALELFKQPKLWLISLYVVGVSCTYDVFD
QQ-FANFFTSFFATGEQGTRVFGYVTTMGELLNASIMFFAPLIVNRIGGKNALLLAGTIM
SVRIIGS-HSHTALEVVILKTLHMFEIPFLIVGCFKYITSQFEVRFSATIYLVCFCFFKQ
LAMIFMSVLAGKMYESIGFQGAYLVLGIIRVSFTLISVFTLSGPGPFSLLRRREN
>LACY_KLEOX/6416
LAPRERHNFIYFMLFFFFYYFIMSAYFPFFPVWLAEVNHLTKTETGIVFSCISLFAIIFQ
PVFGLISDKLGLRKHLLWTITILLILFAPFFIFVFSPLLQMNIMAGALVGGVYLGIVFSS
RSGAVEAYIERVSRANRFEYGKVRVSGCVGWALCASITGILFSIDPNITFWIASGFALIL
GVLLWVSKPESSNSAEVIDALGANRQAFSMRTAAELFRMPRFWGFIIYVVGVASVYDVFD
QQ-FANFFKGFFSSPQRGTEVFGFVTTGGELLNALIMFCAPAIINRIGAKNALLIAGLIM
SVRILGSSFATSAVEVIILKMLHMFEIPFLLVGTFKYISSAFKGKLSATLFLIGFNLSKQ
LSSVVLSAWVGRMYDTVGFHQAYLILGCITLSFTVISLFTL--KGSKTLLPATAN
16 changes: 16 additions & 0 deletions hmm_hits.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
proteomes/Arthrobacter.fasta.hits
1
proteomes/Bacillus.fasta.hits
1
proteomes/Clostridium.fasta.hits
0
proteomes/Flavobacterium.fasta.hits
1
proteomes/Limnohabitans.fasta.hits
1
proteomes/Rhizobium.fasta.hits
0
proteomes/Roseobacter.fasta.hits
1
proteomes/Verrucomicrobia.fasta.hits
0