-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrelpair2sar
executable file
·130 lines (103 loc) · 2.85 KB
/
relpair2sar
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/perl
use warnings;
use strict;
use fralib;
use File::Basename;
use Getopt::Long;
use Pod::Usage;
use POSIX;
=head1 NAME
relpair2sar
=head1 SYNOPSIS
relpair2sar [options] relpair-out-file
-h help
-s output sar-file
relpair-out-file file
example: relpair2sar *.out
Extracts relative pairs from relpair output files.
It will be convenient to use fselect to outer join relpair and plink results:
fselect a.sample-pair-id, a.sample-id-1, a.sample-id-2, a.relationship, a.frequency, b.relationship, b.z0, b.z1, b.z2, b.kinship-coefficient, b.rss from pscalare.sar outer join paltum.sar where a.sample-pair-id=b.sample-pair-id
=head1 DESCRIPTION
=cut
#option variables
my $help;
my $colNo;
my %label2Column;
#data structures
my $sarFile;
my $headerProcessed;
my %SAMPLE_PAIR;
#initialize options
Getopt::Long::Configure ('bundling');
if(!GetOptions ('h'=>\$help, 's=s'=>\$sarFile) || scalar(@ARGV)<1)
{
if ($help)
{
pod2usage(-verbose => 2);
}
else
{
pod2usage(1);
}
}
my $relpairHeader = <<RELPAIR_HEADER;
*****************************************************
******* RELPAIR VERSION 2.0.1, JUNE 13, 2004 ********
*** WRITTEN BY WILLIAM L. DUREN, MICHAEL EPSTEIN, ***
********** MINGYAO LI AND MICHAEL BOEHNKE ***********
*****************************************************
RELPAIR_HEADER
$relpairHeader =~ s/([*\n.])/\\$1/g;
for my $relpairOutFile (@ARGV)
{
open(RELPAIR_OUT_FILE, $relpairOutFile) || die "Cannot open $relpairOutFile";
$headerProcessed = 0;
local $/;
my $relpairOutFileContent = <RELPAIR_OUT_FILE>;
close(RELPAIR_OUT_FILE);
if ($relpairOutFile!~/\.out$/)
{
warn "$relpairOutFile does not have an \"out\" extension";
next;
}
elsif ($relpairOutFileContent!~/$relpairHeader/m)
{
warn "$relpairOutFile does not contain the relevant header";
next;
}
if(!defined($sarFile))
{
my ($name, $path, $ext) = fileparse($relpairOutFile, '\..*');
$sarFile = "$name.sar";
}
while($relpairOutFileContent=~/\n (FAM[^\n]+)(?=\n)/g)
{
my @fields = split(/\s+/, $1, -1);
if (scalar(@fields)>=6)
{
my $sampleID1 = $fields[1];
my $sampleID2 = $fields[3];
my $relationship = $fields[5];
my $samplePair = join('/', sort(($sampleID1, $sampleID2)));
$SAMPLE_PAIR{$samplePair}{$relationship}++;
}
else
{
die "too few fields!!!!!";
}
}
}
if (defined($sarFile))
{
open(SAR, ">$sarFile") || die "Cannot open $sarFile";
print SAR "sample-pair-id\tsample-id-1\tsample-id-2\trelationship\tfrequency\n";
for my $samplePair (keys(%SAMPLE_PAIR))
{
my @samples = split('/', $samplePair);
for my $relationship (keys(%{$SAMPLE_PAIR{$samplePair}}))
{
print SAR "$samplePair\t$samples[0]\t$samples[1]\t$relationship\t$SAMPLE_PAIR{$samplePair}{$relationship}\n";
}
}
close(SAR);
}