Skip to content

Commit

Permalink
Fixed a bug with genome generation for annotations without spliece ju…
Browse files Browse the repository at this point in the history
…nctions.
  • Loading branch information
alexdobin committed Nov 3, 2015
1 parent 4cd6305 commit 0e2d3d3
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
Fixed a bug with genome generation for annotations without spliece junctions.
For STARlong, increased compilation-time max read length to 500000 and max number of exons to 1000
Implemented on the fly insertion of the extra sequences into the genome indexes.
Implemented --alignEndsType Extend3pOfRead1 option for full extension of the 3' end of read 1.
Fixed a bug in the --alignEndsType Extend5pOfRead1 option.
Expand Down
Binary file modified bin/Linux_x86_64/STAR
Binary file not shown.
Binary file modified bin/Linux_x86_64/STARlong
Binary file not shown.
Binary file modified bin/Linux_x86_64_static/STAR
Binary file not shown.
Binary file modified bin/Linux_x86_64_static/STARlong
Binary file not shown.
55 changes: 55 additions & 0 deletions extras/scripts/sjFromSAMcollapseUandM_inclOverlaps.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
BEGIN {
OFS="\t";
mapqU=255;
}
{
if (substr($1,1,1)!="@") {

m=and($2,0x80)/0x80+1;

if ($1!=readNameOld) delete readSJs;
readNameOld=$1;

n=split($6,L,/[A-Z]/)-1;
split($6,C,/[0-9]*/);
t=1;g=$4;
for (k=1;k<=n;k++) {#scan through CIGAR operations
if (C[k+1]=="S" || C[k+1]=="I") {
t+=L[k];
} else if (C[k+1]=="D") {
g+=L[k];
} else if (C[k+1]=="N") {
sj1=$3 "\t" g "\t" g+L[k]-1;
readSJs[sj1]++;

if (readSJs[sj1]==1) {#only count this junction if it has nto been counted for the same read
SJ[sj1]=1;
if ($5>=mapqU) {
SJu[sj1]++;
} else {
SJm[sj1]++;
};
};

if ($5>=mapqU) {
SJu1[sj1]++;
} else {
SJm1[sj1]++;
};

g+=L[k];

} else { # M operation
g+=L[k];
t+=L[k];
};
};
};
};
END {

for (ii in SJ) {
print ii, SJu[ii]+0, SJm[ii]+0, SJu1[ii]+0, SJm1[ii]+0;
};

};
2 changes: 1 addition & 1 deletion source/IncludeDefine.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ typedef uint8_t uint8;
#define MAX_N_MATES 2
#define DEF_readNameLengthMax 50000
#if defined COMPILE_FOR_LONG_READS
#define DEF_readSeqLengthMax 50000
#define DEF_readSeqLengthMax 500000
#else
#define DEF_readSeqLengthMax 500
#endif
Expand Down
1 change: 1 addition & 0 deletions source/genomeGenerate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ void genomeGenerate(Parameters *P) {

//write an extra 0 at the end of the array, filling the last bytes that otherwise are not accessible, but will be written to disk
//this is - to avoid valgrind complaints. Note that SA2 is allocated with plenty of space to spare.
P->nSAbyte=mainGenome.SA.lengthByte;
SA2.writePacked(P->nSA,0);
};

Expand Down

0 comments on commit 0e2d3d3

Please sign in to comment.