-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprobdecon.pl
executable file
·103 lines (83 loc) · 2.31 KB
/
probdecon.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env perl
use strict;
use warnings;
BEGIN {
use FindBin '$Bin';
use Data::Dumper;
use Getopt::Long;
use Scalar::Util qw(looks_like_number);
use POSIX qw(floor);
require "$Bin/share/Metaheader.pm";
}
# Takes as input matrix, output a matrix of vectors
# Makes sense after takediagonalblock on single chromosomes
my $islog = 0;
GetOptions("log" => \$islog) # flag
or die("Error in command line arguments\n");
if ($#ARGV != 0) {
print STDERR "usage: ./probdecon.pl stepsize [--log] < rstmatrix"
. " > pdcmat\n";
exit;
}
my $steps = shift @ARGV;
die "Stepsize should be a number" if (!looks_like_number($steps));
die "Stepsize is expected to be greater than 1" if ($steps <= 1 && $islog);
# histograms
sub binscale {
my $i = shift;
floor($islog
? $steps ** ((2.0 * $i + 1.0) / 2)
: $steps * ((2.0 * $i + 1.0) / 2));
}
sub binsize {
my $i = shift;
($islog
? ($steps ** ($i + 1)) - ($steps ** $i)
: $steps );
}
# Read the header
my $header = <>;
chomp($header);
my $metah = Metaheader->new($header);
my @rowarray = @{ $metah->{rowinfo} };
sub getpos {
my $i=shift;
return ($rowarray[$i]->{pos}
// floor(($rowarray[$i]->{st} + $rowarray[$i]->{en})/2)
// die "No positional information found in header");
}
my $ipos = getpos(0);
my $enpos = getpos($#rowarray);
my $dist = $enpos - $ipos;
my $maxind = ($islog
? floor(log($dist) / log($steps))
: floor($dist / $steps));
print join("\t", "\"PDC\"", map { binscale($_); } (0 .. $maxind)), "\n";
my $i = 0;
while(<>) {
chomp;
my @records = split("\t");
my $head = shift @records;
my $frag = $metah->metarecord($head);
# last if ($i == $#rowarray);
$ipos = getpos($i);
$dist = $enpos - $ipos;
$maxind = ($islog
? floor(log($dist) / log($steps))
: floor($dist / $steps));
# Make single restriction fragment histogram
my @tmphisto = (0) x ($maxind+1);
for my $j ($i .. $#rowarray) {
my $jpos = getpos($j);
my $hits = $records[$j-$i];
my $ijdist = $jpos - $ipos;
my $bin = ($islog
? floor(log($ijdist) / log($steps))
: floor($ijdist / $steps));
$tmphisto[$bin]+=$hits;
}
my @reshisto = map { $tmphisto[$_] / binsize($_) } (0 .. $maxind);
print join("\t", "$head", @reshisto), "\n";
$i++;
}
0;