-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfsplit
executable file
·105 lines (80 loc) · 1.69 KB
/
fsplit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/perl
use warnings;
use strict;
use fralib;
use POSIX qw(ceil floor);
use File::Basename;
use Getopt::Long;
use Pod::Usage;
=head1 NAME
fsplit
=head1 SYNOPSIS
fsplit [options] <filename>
-v verbose
-d debug
-n number of rows in each split file (inclusive of header) >1
-o out file name to append split to.
usage: fsplit -n 500 pscalare.txt
Splits a genotype/table file.
=head1 DESCRIPTION
=cut
#option variables
my $help;
my $n = 0;
my $outFile;
#initialize options
Getopt::Long::Configure ('bundling');
if(!GetOptions ('n=i'=>\$n, 'o=s'=>\$outFile)
|| $n<2 || scalar(@ARGV)==0)
{
if ($help)
{
pod2usage(-verbose => 2);
}
else
{
pod2usage(1);
}
}
my $file = $ARGV[0];
if (!defined($outFile))
{
my ($name, $dir, $ext) = fileparse($file, '\..*');
$outFile = "$name$ext";
}
open(IN, $file) || die "Cannot open $file\n";
my $header = "";
my $header_line_count = 0;
while (<IN>)
{
if (/^#/)
{
$header .= $_;
++$header_line_count;
}
else
{
if (($.-$header_line_count)%$n==1)
{
my $tempOutFile = "$outFile" . "_" . ceil(($.-1)/$n) . ".sites.vcf";
#open new file
open(OUT, ">$tempOutFile") || die "Cannot open $tempOutFile\n";
print OUT $header;
}
print OUT $_;
}
}
#checks if file is splitted properly
my $splitFileNo = ceil(($.-$header_line_count)/$n);
close(OUT);
close(IN);
#checks if file is splitted properly
my $totalSize = 0;
for my $i (1..$splitFileNo)
{
$totalSize += -s "$outFile" . "_$i" . ".sites.vcf";
}
if (-s $file != $totalSize-(($splitFileNo-1)*length($header)))
{
warn "Splitting of $file appears to be corrupted";
}