Skip to content

Commit 9e4ebd2

Browse files
committed
first commit
0 parents  commit 9e4ebd2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+37632
-0
lines changed

affya2mk

+209
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#!/usr/bin/perl
2+
3+
use warnings;
4+
use strict;
5+
use fralib;
6+
use File::Basename;
7+
use Getopt::Long;
8+
use Pod::Usage;
9+
10+
=head1 NAME
11+
12+
affya2mk
13+
14+
=head1 SYNOPSIS
15+
16+
affya2mk [options] <affy-annotation-file>
17+
18+
-h detailed help message
19+
-dbsnp specify the dbSNP version of the annotation
20+
PLEASE note that there exists PERLEGEN SNPs
21+
that are not found in dbSNP that are in SNP6
22+
affy-annotation-file affymetrix annotation file.
23+
24+
example: affya2mk pscalare.txt
25+
26+
Converts Affymetrix annotation (tab delimited) to mk format.
27+
Output file is named based on<affy-annotation-file> with the extension renamed
28+
to .mk Please note that this programme does not check for correctness of the
29+
field values.
30+
31+
=head1 DESCRIPTION
32+
33+
The extracted fields followed by its new name are:
34+
1)Probe Set ID snp-id
35+
2)dbSNP RS ID rs-id
36+
3)Chromosome chromosome
37+
4)Physical Position position
38+
5)ChrX pseudo-autosomal pseudo-autosomal
39+
ChrX pseudo-autosomal region 1 pseudo-autosomal
40+
ChrX pseudo-autosomal region 2 pseudo-autosomal
41+
6)DB SNP Version dbsnp (you will need this to describe the
42+
snp annotations)
43+
7)Allele A alleles
44+
8)Allele B alleles
45+
9)Flank flanks
46+
47+
An additional field - alleles-strand which describes the allele orientation
48+
with reference to the flanks is added.
49+
50+
=cut
51+
52+
my $help;
53+
my $headerProcessed;
54+
my $colNo;
55+
my $affyAnnotationFile;
56+
my $mkFile;
57+
my %label2Column;
58+
my $PARIsSeparate = 0;
59+
my $globalDBSNPVersion;
60+
my $annotationFileType = "";
61+
62+
#initialize options
63+
Getopt::Long::Configure ('bundling');
64+
65+
if(!GetOptions ('h'=>\$help, 'dbsnp=i'=>\$globalDBSNPVersion)
66+
|| $help
67+
|| (defined($globalDBSNPVersion) && $globalDBSNPVersion < 0)
68+
|| scalar(@ARGV)!=1)
69+
{
70+
if ($help)
71+
{
72+
pod2usage(-verbose => 2);
73+
}
74+
else
75+
{
76+
pod2usage(1);
77+
}
78+
}
79+
80+
$affyAnnotationFile = $ARGV[0];
81+
82+
#reads in the GISSNP illumina annotation
83+
open(AFFY_ANNOTATION, $affyAnnotationFile) || die "Cannot open $affyAnnotationFile\n";
84+
85+
if(!defined($mkFile))
86+
{
87+
my ($name, $dir, $ext) = fileparse($affyAnnotationFile, '\..*');
88+
$mkFile = "$name.mk";
89+
}
90+
91+
$headerProcessed = 0;
92+
open(MK, ">$mkFile") || die "Cannot open $mkFile\n";
93+
94+
while (<AFFY_ANNOTATION>)
95+
{
96+
s/\r?\n?$//;
97+
98+
if(!$headerProcessed)
99+
{
100+
next if (/^#/);
101+
102+
print MK "snp-id\trs-id\tchromosome\tstrand\tposition\tpseudo-autosomal\tdbsnp\talleles\talleles-strand\tflanks\n";
103+
104+
$colNo = s/\t/\t/g + 1;
105+
106+
my @fields = split('\t', $_, $colNo);
107+
108+
my @labels = ('Probe Set ID', 'dbSNP RS ID', 'Chromosome', 'Strand', 'DB SNP Version', 'Physical Position', 'ChrX pseudo-autosomal region', 'Flank', 'Allele A', 'Allele B');
109+
110+
SEARCH_LABEL: for my $label (@labels)
111+
{
112+
for my $col (0 .. $#fields)
113+
{
114+
if ($fields[$col] eq $label)
115+
{
116+
$label2Column{$label}=$col;
117+
next SEARCH_LABEL;
118+
}
119+
}
120+
121+
if ($label eq 'ChrX pseudo-autosomal region')
122+
{
123+
$PARIsSeparate = 1;
124+
push( @labels, 'ChrX pseudo-autosomal region 1');
125+
push( @labels, 'ChrX pseudo-autosomal region 2');
126+
}
127+
elsif ($label eq 'DB SNP Version' && defined($globalDBSNPVersion))
128+
{
129+
#use user defined global dbsnp version instead
130+
}
131+
else
132+
{
133+
if ($label eq 'DB SNP Version')
134+
{
135+
die "Cannot find '$label' in $mkFile, you can circumvent this with the --dbsnp option";
136+
}
137+
else
138+
{
139+
die "Cannot find '$label' in $mkFile";
140+
}
141+
}
142+
}
143+
144+
$headerProcessed = 1;
145+
}
146+
else
147+
{
148+
my @fields = split('\t', $_, -1);
149+
150+
my $snpId = trim($fields[$label2Column{'Probe Set ID'}]);
151+
my $rsId = trim($fields[$label2Column{'dbSNP RS ID'}]);
152+
my $chromosome = trim($fields[$label2Column{'Chromosome'}]);
153+
$chromosome = $chromosome eq 'MT' ? 'M' : $chromosome;
154+
my $strand = trim($fields[$label2Column{'Strand'}]);
155+
my $dbsnp = defined($globalDBSNPVersion) ? $globalDBSNPVersion : trim($fields[$label2Column{'DB SNP Version'}]);;
156+
my $position = trim($fields[$label2Column{'Physical Position'}]);
157+
my $pseudoAutosomal;
158+
if ($PARIsSeparate)
159+
{
160+
my $par1 = trim($fields[$label2Column{'ChrX pseudo-autosomal region 1'}]);
161+
my $par2 = trim($fields[$label2Column{'ChrX pseudo-autosomal region 2'}]);
162+
#print "$snpId\t$par1\t$par2\n";
163+
$pseudoAutosomal = ($par1 =~ /0|---/ && $par2 =~ /0|---/)? 0 : 1;
164+
}
165+
else
166+
{
167+
$pseudoAutosomal = trim($fields[$label2Column{'ChrX pseudo-autosomal region'}]);
168+
}
169+
my $alleleA = trim($fields[$label2Column{'Allele A'}]);
170+
my $alleleB = trim($fields[$label2Column{'Allele B'}]);
171+
my $alleles = "$alleleA/$alleleB";
172+
my $allelesStrand = 'ref';
173+
my $flanks = uc(trim($fields[$label2Column{'Flank'}]));
174+
175+
if ($rsId eq '---')
176+
{
177+
$rsId = 'n/a';
178+
$dbsnp = 'n/a';
179+
}
180+
181+
if ($chromosome eq '---')
182+
{
183+
$chromosome = 'n/a';
184+
$position = 'n/a';
185+
$strand = 'n/a';
186+
$pseudoAutosomal = 'n/a';
187+
}
188+
189+
#handles only biallelic SNPs
190+
if ($alleles!~/[ACGT]\/[ACGT]/i)
191+
{
192+
warn "$snpId: alleles not valid - $alleles";
193+
$alleles = 'n/a';
194+
}
195+
196+
if ($flanks!~/([ACGTMRWSYKVHDBNX]*)\[([ACGT]\/[ACGT])\]([ACGTMRWSYKVHDBNX]*)/i
197+
|| (length($1)==0 && length($3)==0))
198+
{
199+
warn "$snpId: flanks not valid - $flanks";
200+
$allelesStrand = 'n/a';
201+
$flanks = 'n/a';
202+
}
203+
204+
printf MK "$snpId\t$rsId\t$chromosome\t$strand\t$position\t$pseudoAutosomal\t$dbsnp\t$alleles\t$allelesStrand\t$flanks\n";
205+
}
206+
}
207+
208+
close(AFFY_ANNOTATION);
209+
close(MK);

affymetrix2tg

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/usr/bin/perl
2+
3+
use warnings;
4+
use strict;
5+
use fralib;
6+
use Getopt::Long;
7+
use File::Basename;
8+
use Pod::Usage;
9+
10+
=head1 NAME
11+
12+
affymetrix2tg
13+
14+
=head1 SYNOPSIS
15+
16+
affymetrix2tg [options] <affymetrix-call-file>
17+
18+
-h help
19+
-o output file name (optional)
20+
default: replaces extension of
21+
<affymetrix-call-file> with tg
22+
affymetrix-call-file Affymetrix calls file
23+
24+
example: affymetrix2tg -o pscalare.tg brlmm.calls.txt
25+
26+
Converts the affymetrix calls output file to a tg-file.
27+
28+
=head1 DESCRIPTION
29+
30+
=cut
31+
32+
my $help;
33+
my $affymetrixFile;
34+
my $tgFile;
35+
my $headerProcessed = 0;
36+
my $colNo;
37+
38+
#initialize options
39+
Getopt::Long::Configure ('bundling');
40+
41+
if(!GetOptions ('h'=>\$help, 'o=s'=>\$tgFile) || scalar(@ARGV)!=1)
42+
{
43+
if ($help)
44+
{
45+
pod2usage(-verbose => 2);
46+
}
47+
else
48+
{
49+
pod2usage(1);
50+
}
51+
}
52+
53+
$affymetrixFile = $ARGV[0];
54+
55+
if (!defined($tgFile))
56+
{
57+
my ($name, $path, $ext) = fileparse($affymetrixFile, '\..*');
58+
$tgFile = "$name.tg";
59+
}
60+
61+
open(IN, $affymetrixFile) || die "Cannot open $affymetrixFile\n";
62+
open(OUT, ">$tgFile") || die "Cannot open $tgFile\n";
63+
64+
while (<IN>)
65+
{
66+
s/\r?\n?$//;
67+
68+
if(!$headerProcessed)
69+
{
70+
if(/^probeset_id/)
71+
{
72+
$colNo = s/\t/\t/g + 1;
73+
my @fields = split('\t',$_,$colNo);
74+
75+
print OUT "snp-id";
76+
77+
#reads in sample ids
78+
for my $col (1 .. $#fields)
79+
{
80+
print OUT "\t$fields[$col]";
81+
}
82+
print OUT "\n";
83+
$headerProcessed = 1;
84+
}
85+
}
86+
else
87+
{
88+
my @fields = split(/\t/);
89+
90+
print OUT "$fields[0]";
91+
92+
for my $col (1 .. $#fields)
93+
{
94+
if($fields[$col]=~/^(0|1|2|-1)$/)
95+
{
96+
print OUT "\t$fields[$col]";
97+
}
98+
else
99+
{
100+
die "Unrecognized encoding in $affymetrixFile: $fields[$col]";
101+
}
102+
}
103+
104+
print OUT "\n";
105+
}
106+
}
107+
108+
close(OUT);
109+
close(IN);

0 commit comments

Comments
 (0)