diff --git a/unix-code-hmm.txt b/unix-code-hmm.txt new file mode 100644 index 0000000..22eed7c --- /dev/null +++ b/unix-code-hmm.txt @@ -0,0 +1,30 @@ +#This is my shell script: + +#generate HMM profile for transporter gene + +../local/bin/hmmbuild transportergene.hmm ../transporterfiles.fasta.align + +#use hmm to search the 8 bacteria - record name and number of hits +#Usage from proteomes folder: bash unix-code-hmm.txt + +for files in *.fasta + +do + +../local/bin/hmmsearch --tblout genehits.txt transportergene.hmm $files +x=$(cat genehits.txt | grep -v "#" | wc -l) +echo $files,$x >> finalgenehits.txt + +done + +#These are the results for the eight bacteria found in the finalgenehits.txt file: + +#Arthrobacter.fasta,0 +#Bacillus.fasta,1 +#Clostridium.fasta,0 +#Flavobacterium.fasta,1 +#Limnohabitans.fasta,1 +#Rhizobium.fasta,0 +#Roseobacter.fasta,1 +#Verrucomicrobia.fasta,0 + diff --git a/unix-code-sequences.txt b/unix-code-sequences.txt new file mode 100644 index 0000000..f83776d --- /dev/null +++ b/unix-code-sequences.txt @@ -0,0 +1,64 @@ +#I condensed the four files for sporecoat and transporter, respectively, into single files in order to make the sequencing more efficient. See code below. + +cat sporecoat01.fasta sporecoat02.fasta sporecoat03.fasta sporecoat04.fasta > sporecoatfiles.fasta +cat transporter01.fasta transporter02.fasta transporter03.fasta transporter04.fasta > transporterfiles.fasta + +#I had to manually enter a few line breaks where necessary in order for the sequencing to run smoothly. + +#I then copied them into the remote computer file in order to use the muscle tool, using the code below. + +scp sporecoatfiles.fasta atataria@remote107.helios.nd.edu:/afs/nd.edu/user35 +scp transporterfiles.fasta atataria@remote107.helios.nd.edu:/afs/nd.edu/user35/atataria + +#Once the files where in the remote drive, I ran muscle to get the sequence alignments as shown below. + +[atataria@remote107 ~]$ muscle3.8.31_i86linux64 -in sporecoatfiles.fasta -out sporecoatfiles.fasta.align + +MUSCLE v3.8.31 by Robert C. Edgar + +http://www.drive5.com/muscle +This software is donated to the public domain. +Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97. + +sporecoatfiles 26 seqs, max length 152, avg length 138 +00:00:00 10 MB(1%) Iter 1 100.00% K-mer dist pass 1 +00:00:00 10 MB(1%) Iter 1 100.00% K-mer dist pass 2 +00:00:00 14 MB(1%) Iter 1 100.00% Align node +00:00:00 14 MB(1%) Iter 1 100.00% Root alignment +00:00:00 14 MB(1%) Iter 2 100.00% Refine tree +00:00:00 14 MB(1%) Iter 2 100.00% Root alignment +00:00:00 14 MB(1%) Iter 2 100.00% Root alignment +00:00:00 14 MB(1%) Iter 3 100.00% Refine biparts +00:00:00 14 MB(1%) Iter 4 100.00% Refine biparts +00:00:00 14 MB(1%) Iter 5 100.00% Refine biparts +00:00:00 14 MB(1%) Iter 6 100.00% Refine biparts +00:00:00 14 MB(1%) Iter 7 100.00% Refine biparts +00:00:00 14 MB(1%) Iter 8 100.00% Refine biparts +00:00:00 14 MB(1%) Iter 9 100.00% Refine biparts +00:00:01 14 MB(1%) Iter 10 100.00% Refine biparts +[atataria@remote107 ~]$ muscle3.8.31_i86linux64 -in transporterfiles.fasta -out transporterfiles.fasta.align + +MUSCLE v3.8.31 by Robert C. Edgar + +http://www.drive5.com/muscle +This software is donated to the public domain. +Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97. + +transporterfiles 4 seqs, max length 412, avg length 410 +00:00:00 10 MB(1%) Iter 1 100.00% K-mer dist pass 1 +00:00:00 10 MB(1%) Iter 1 100.00% K-mer dist pass 2 +00:00:00 13 MB(1%) Iter 1 100.00% Align node +00:00:00 13 MB(1%) Iter 1 100.00% Root alignment +00:00:00 13 MB(1%) Iter 2 100.00% Root alignment +00:00:00 13 MB(1%) Iter 3 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 4 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 5 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 6 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 7 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 8 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 9 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 10 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 11 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 12 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 13 100.00% Refine biparts +00:00:00 13 MB(1%) Iter 14 100.00% Refine biparts