-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
25 changed files
with
2,261 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
#!usr/bin/perl -w | ||
use strict; | ||
use Getopt::Long; | ||
|
||
my $red = "\033[0;31m"; | ||
my $end = "\033[0m"; | ||
|
||
my($in,$out,$len,$chr,$help); | ||
|
||
GetOptions | ||
( | ||
"i=s"=>\$in, | ||
"o=s"=>\$out, | ||
"l=i"=>\$len, | ||
"ch=s"=>\$chr, | ||
"help|?"=>\$help | ||
); | ||
|
||
my $usage=<<INFO; | ||
Notice: | ||
${red}The input .sam/.bam file must be sorted according to coordinate.$end | ||
Usage: | ||
perl $0 [options] | ||
Options: | ||
-i <string> <in.input sam/bam file> | ||
-o <string> <out.output median of each positions> | ||
-l <int> <the longest length of reads, default=150> | ||
-ch <string> <the number of chromosome> | ||
INFO | ||
|
||
die $usage if ($help || !$in || !$out ||!$chr); | ||
|
||
die "$red$in does not exists$end\n" if(!(-e "$in")); | ||
|
||
if($in =~ /\.bam$/) | ||
{ | ||
my $head = `samtools view -H $in | grep SO`; | ||
$head =~ s/^\s+|\s+$//g; | ||
die "$red$in is not sorted bam file$end\n" if($head !~ /coordinate/); | ||
my $bai=$in; | ||
$bai=~s/\.bam/\.bai/g; | ||
if( (!-e "${in}.bai") || (!-e "$bai") ) | ||
{ | ||
`samtools index -@ 20 $in`; | ||
} | ||
open IN,"samtools view $in $chr |" || die $!; | ||
} | ||
else | ||
{ | ||
my $so = &Sorted($in); | ||
die "$red$in is not sorted sam file$end\n" if($so == 0); | ||
if($in =~ /\.gz$/) | ||
{ | ||
open IN,"gzip -dc $in | awk '$3==$chr' | " || die $!; | ||
} | ||
else | ||
{ | ||
open IN,"grep -w $chr $in |" || die $!; | ||
} | ||
} | ||
|
||
sub Sorted | ||
{ | ||
my $in = @_; | ||
if($in =~ /\.gz/) | ||
{ | ||
open IN, "gzip -dc $in | grep -v '^\@' | " || die $!; | ||
} | ||
else | ||
{ | ||
open IN, "$in | grep -v '^\@' | " || die $!; | ||
} | ||
|
||
my $read = 0; | ||
my $pos = 0; | ||
my $tags = 0; | ||
my $tagu = 0; | ||
my $chrt; | ||
while(my $line = <IN>) | ||
{ | ||
chomp $line; | ||
next unless($line !~ /^@/); | ||
$read ++; | ||
last if($read > 2000); | ||
my @F = split /\t/, $line; | ||
if($read == 1) | ||
{ | ||
$pos = $F[3]; | ||
$chrt = $F[2]; | ||
} | ||
else | ||
{ | ||
last if($F[2] ne $chrt); | ||
if($F[3] >= $pos) | ||
{ | ||
$tags ++; | ||
$pos = $F[3]; | ||
} | ||
else | ||
{ | ||
$tagu ++; | ||
} | ||
} | ||
} | ||
|
||
my $sort = ($tags > $tagu) ? 1 : 0; | ||
return $sort; | ||
} | ||
|
||
if($out =~ /\.gz$/) | ||
{ | ||
open OUT,"| gzip > $out" || die $!; | ||
} | ||
else | ||
{ | ||
open OUT,"| gzip > $out.gz" || die $!; | ||
} | ||
|
||
$len ||= 150; | ||
|
||
#initialization of @Median | ||
my @Median = (); | ||
for my $i(0..($len-1)) | ||
{ | ||
$Median[$i] = []; | ||
} | ||
|
||
sub Find_Median | ||
{ | ||
my $remainder = $#_%2; | ||
my $quotient = int($#_/2); | ||
my $the_Median; | ||
if($remainder == 0) | ||
{ | ||
$the_Median = $_[$quotient]; | ||
} | ||
else | ||
{ | ||
$the_Median = int( ( $_[$quotient+1] + $_[$quotient] ) / 2 ); | ||
} | ||
return $the_Median; | ||
} | ||
|
||
sub Out_Queue | ||
{ | ||
my $median_temp = shift @_; | ||
my @out; | ||
for(my $j = 0;; $j++) | ||
{ | ||
last if(!$median_temp->[$j]); | ||
push(@out, $median_temp->[$j]); | ||
} | ||
return @out; | ||
} | ||
|
||
my $gap; | ||
my $aloci = 0; | ||
my @median = (); | ||
my $median_value = 0; | ||
while(<IN>) | ||
{ | ||
chomp; | ||
my @temp=split/\s+/, $_; | ||
next if($temp[2] ne $chr); | ||
$gap = $temp[3] - $aloci; | ||
if($aloci != 0 and $gap != 0) | ||
{ | ||
#progress of median finding | ||
for(my $i = 1; $i <= $gap; $i++) | ||
{ | ||
last if !$Median[0]->[0]; | ||
@median = &Out_Queue(@Median); | ||
shift @Median; | ||
@median = sort{ $a<=>$b } @median; | ||
$median_value = &Find_Median(@median); | ||
my $print_loci = $aloci + $i - 1; | ||
print OUT "$print_loci\t$median_value\n"; | ||
@median = (); | ||
$Median[$len-1] = []; | ||
} | ||
} | ||
my @quality = split '',$temp[10]; | ||
|
||
#adding the new qualities | ||
for my $k(0..$#quality) | ||
{ | ||
my $quality_score = ord($quality[$k])-33; | ||
push(@{$Median[$k]}, $quality_score); | ||
} | ||
$aloci = $temp[3]; | ||
} | ||
|
||
#print out the rest elements in @Median | ||
while($Median[0]->[0]){ | ||
@median = &Out_Queue(@Median); | ||
shift @Median; | ||
@median = sort {$a<=>$b} @median; | ||
$median_value = &Find_Median(@median); | ||
print OUT "$aloci\t$median_value\n"; | ||
$aloci++; | ||
} | ||
|
||
close IN; | ||
close OUT; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#!usr/bin/perl -w | ||
use strict; | ||
use Getopt::Long; | ||
use File::Spec; | ||
use File::Basename; | ||
|
||
my $red = "\033[0;31m"; | ||
my $end = "\033[0m"; | ||
|
||
my ($in,$out,$fasta,$help); | ||
|
||
GetOptions( | ||
"i=s"=>\$in, | ||
"f=s"=>\$fasta, | ||
"o=s"=>\$out | ||
); | ||
|
||
my $usage=<<INFO; | ||
Usgae: | ||
perl $0 [options] | ||
Options: | ||
-i file, input PQ file, the directory of $red\$in will be used for save the output$end | ||
-f file, input fasta file | ||
-o file, the prefix of the output PQ with seq file, ${red}default [PQ_g4predict] [the output is \$out_plus && \$out_minus]$end | ||
INFO | ||
|
||
die $usage if ($help || !$in || !$fasta); | ||
|
||
my $dir = dirname(File::Spec->rel2abs( $in )); | ||
$dir=~s/^\s+|\s+$//g; | ||
|
||
$out ||= "PQ_g4predict"; | ||
|
||
open IN,"<$in" || die $!; | ||
open OUTP,">$dir/${out}_plus" || die $!; | ||
open OUTM,">$dir/${out}_minus" || die $!; | ||
open INF,"<$fasta" || die $!; | ||
|
||
my @ref=('N'); | ||
while(<INF>) | ||
{ | ||
next if $_ =~ />/; | ||
chomp; | ||
my @fasta=split("",$_); | ||
push(@ref,@fasta); | ||
} | ||
close INF; | ||
|
||
while(<IN>) | ||
{ | ||
chomp; | ||
my @temp = split/\s+/,$_; | ||
my $pqs = $temp[1] + 1; | ||
my $pqe = $temp[2]; | ||
my $line = join('',@ref[$pqs..$pqe]); | ||
my $length = length($line); | ||
if($temp[5] eq "+") | ||
{ | ||
print OUTP "$pqs\t$pqe\t$length\t$line\t$temp[4]\t$temp[5]\n"; | ||
} | ||
elsif($temp[5] eq "-") | ||
{ | ||
print OUTM "$pqs\t$pqe\t$length\t$line\t$temp[4]\t$temp[5]\n"; | ||
} | ||
} | ||
close IN; | ||
close OUTP; | ||
close OUTM; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#!/usr/bin/perl -w | ||
######################################################################### | ||
# FileName: SplitFa2Chr_G4Predict.pl | ||
# Version: 96a78e56-2c37-4de2-9a77-1ce128c2d58e | ||
# Author: Luna <[email protected]> | ||
# CreatedTime: Thu Sep 9 16:58:20 2021 | ||
######################################################################### | ||
use strict; | ||
use Getopt::Long; | ||
use File::Basename; | ||
my $red = "\033[0;31m"; | ||
my $end = "\033[0m"; | ||
my ($in, $dir, $py2, $G4Predict, $help); | ||
|
||
GetOptions | ||
( | ||
"i|in=s"=>\$in, | ||
"d|dir=s"=>\$dir, | ||
"p|py=s"=>\$py2, | ||
"g|g4=s"=>\$G4Predict, | ||
"help|?"=>\$help, | ||
); | ||
|
||
|
||
my $usage=<<INFO; | ||
Usage: | ||
${red}1. Split the whole genome reference fasta to each chr fasta$end | ||
${red}2. Using g4predict intra for predicting putative G Quadruplexes in each chromosome$end | ||
perl $0 | ||
-i <the whole genome reference fasta file> | ||
${red}-d <the directory for saving the chromosome fa file> $end | ||
-p <the absolute path of python2> (default: `which python2`) | ||
-g <the absolute path of g4predict> (default: `which g4predict`) | ||
INFO | ||
|
||
die "$usage\n" if ($help || !$in || !$dir); | ||
|
||
$py2 ||= `which python2`; | ||
$py2=~s/^\s+|\s+$//g; | ||
die "${red}There did not install python2 $py2$end\n" if(!$py2); | ||
|
||
$G4Predict ||= `which g4predict`; | ||
$G4Predict=~s/^\s+|\s+$//g; | ||
die "${red}There did not install g4predict $G4Predict$end\n" if(!$G4Predict); | ||
|
||
if($in =~ /\.gz$/) | ||
{ | ||
open IN, "gzip -dc $in |" || die $!; | ||
} | ||
else | ||
{ | ||
open IN,"$in" || die $!; | ||
} | ||
|
||
my @chrs = (); | ||
|
||
while (<IN>) | ||
{ | ||
chomp; | ||
if ($_ =~ />(.*)/) | ||
{ | ||
my $chr = $1; | ||
|
||
my $out = "$chr.fa"; | ||
if(!-e "$dir/$chr") | ||
{ | ||
`mkdir -p $dir/$chr` || die "${red}Error: can't create $dir/$chr$end\n"; | ||
} | ||
push(@chrs, $chr); | ||
open OUT, "> $dir/$chr/$out" || die $!; | ||
print OUT "$_\n"; | ||
} | ||
else | ||
{ | ||
print OUT "$_\n"; | ||
} | ||
} | ||
close IN; | ||
close OUT; | ||
|
||
for my $chr(@chrs) | ||
{ | ||
my $fa = "$dir/$chr/$chr.fa"; | ||
my $cmd = "$py2 $G4Predict intra -f $fa -b $dir/$chr/PQ_g4predict.bed -s -M"; | ||
|
||
print "NOTICE: run this command\n${red}$cmd$end\n"; | ||
system('$cmd'); | ||
} | ||
|
||
print "${red}$in was splitted into chromosome fa in $dir$end\n"; | ||
print "${red}PQs of each chromosome were predicted by $G4Predict, saved into $dir/chr*$end\n"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/bin/bash | ||
######################################################################### | ||
# FileName: runG4Predict.sh | ||
# Version: 5aee49a0-a543-485d-aaa2-ac0f51b18e69 | ||
# Author: Luna <[email protected]> | ||
# CreatedTime: Wed Sep 8 22:53:27 2021 | ||
######################################################################### | ||
|
||
python2 /usr/local/bin/g4predict intra -f chr22 -b chr22.bed -s -M |
Oops, something went wrong.