Skip to content

Commit 95dfc72

Browse files
committed
initial commit
0 parents  commit 95dfc72

File tree

128 files changed

+146934
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+146934
-0
lines changed

Makefile

+521
Large diffs are not rendered by default.

Makefile~

+521
Large diffs are not rendered by default.

NJtree.c

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#include <stdinc.h>
2+
#include <extvab.h>
3+
#include <extfunc.h>
4+
5+
#define MAX_DIST 20000
6+
7+
void NeighborJoin(int num_seq, double *EvoDist, int *tree, double *dist);
8+
void printNJtree(char *treeseq, int *tree, double *dist, int num_seq, char **segname);
9+
10+
void NeighborJoin(int num_seq, double *EvoDist, int *tree, double *dist)
11+
{
12+
int i, j, k, l, m, n, k1;
13+
int mini, minj;
14+
int num_step;
15+
char *label;
16+
double mins, d, *ut;
17+
18+
if(num_seq <= 1) {
19+
tree[0] = tree[1] = 0;
20+
dist[0] = dist[1] = EvoDist[0];
21+
return;
22+
}
23+
label = (char *) ckalloc(2 * num_seq * sizeof(char));
24+
ut = (double *) ckalloc(2 * num_seq * sizeof(double));
25+
num_step = 0;
26+
n = num_seq;
27+
while(num_step < n - 1) {
28+
mini = minj = -1;
29+
for(i = 0; i < num_seq; i ++) {
30+
if(label[i] == 1) continue;
31+
if(mini >= 0) minj = i;
32+
else mini = i;
33+
}
34+
if(n - num_step == 2) {
35+
k = numc(mini, minj);
36+
tree[2 * num_step] = mini;
37+
tree[2 * num_step + 1] = minj;
38+
dist[2 * num_step] = EvoDist[k];
39+
dist[2 * num_step + 1] = EvoDist[k];
40+
num_step ++;
41+
continue;
42+
}
43+
for(i = 0; i < num_seq; i ++) {
44+
ut[i] = 0;
45+
if(label[i] == 1) continue;
46+
for(j = 0; j < num_seq; j ++) {
47+
if(label[j] == 1) continue;
48+
if(i == j) continue;
49+
k = numc(i, j);
50+
ut[i] += EvoDist[k];
51+
}
52+
ut[i] /= (n - num_step - 2);
53+
}
54+
mins = MAX_DIST;
55+
for(i = 0; i < num_seq; i ++) {
56+
if(label[i] == 1) continue;
57+
for(j = i + 1; j < num_seq; j ++) {
58+
if(label[j] == 1) continue;
59+
k = numc(i, j);
60+
d = EvoDist[k] - ut[i] - ut[j];
61+
if(d < mins) {
62+
mins = d;
63+
mini = i;
64+
minj = j;
65+
}
66+
}
67+
}
68+
k = numc(mini, minj);
69+
tree[2 * num_step] = mini;
70+
tree[2 * num_step + 1] = minj;
71+
dist[2 * num_step] = 0.5 * EvoDist[k] + 0.5 * (ut[mini] - ut[minj]);
72+
dist[2 * num_step + 1] = 0.5 * EvoDist[k] + 0.5 * (ut[minj] - ut[mini]);
73+
label[mini] = label[minj] = 1;
74+
num_step ++;
75+
for(i = 0; i < num_seq; i ++) {
76+
if(label[i] == 1) continue;
77+
l = numc(mini, i);
78+
j = numc(minj, i);
79+
k = numc(mini, minj);
80+
k1 = numc(i, num_seq);
81+
EvoDist[k1] = (EvoDist[l] + EvoDist[j] - EvoDist[k]) / 2;
82+
}
83+
num_seq ++;
84+
}
85+
free((void *) ut);
86+
free((void *) label);
87+
}
88+
89+
void printNJtree(char *treeseq, int *tree, double *dist, int num_seq, char **segname)
90+
{
91+
int i, j, k, l;
92+
char **tmpstr, *tmpstr1;
93+
int len;
94+
95+
if(num_seq <= 1) {
96+
printf("Can't build the tree: sequence less than 2.\n");
97+
exit(0);
98+
}
99+
100+
tmpstr = (char **) ckalloc(num_seq * sizeof(char *));
101+
tmpstr1 = (char *) ckalloc(num_seq * 1000 * sizeof(char));
102+
len = 0;
103+
for(i = 0; i < num_seq - 1; i ++) {
104+
tmpstr[i] = (char *) ckalloc((2 * len + 1000) * sizeof(char));
105+
if(tree[2 * i] < num_seq) {
106+
sprintf(tmpstr[i], "(%s:%.4f, ", segname[tree[2 * i]], dist[2 * i]);
107+
} else {
108+
sprintf(tmpstr[i], "(%s:%.4f, ", tmpstr[tree[2 * i] - num_seq], dist[2 * i]);
109+
}
110+
if(tree[2 * i + 1] < num_seq) {
111+
sprintf(tmpstr1, "%s:%.4f)", segname[tree[2 * i + 1]], dist[2 * i + 1]);
112+
} else {
113+
sprintf(tmpstr1, "%s:%.4f)", tmpstr[tree[2 * i + 1] - num_seq], dist[2 * i + 1]);
114+
}
115+
strcat(tmpstr[i], tmpstr1);
116+
l = strlen(tmpstr[i]);
117+
if(l > len) {
118+
len = l;
119+
}
120+
}
121+
strcpy(treeseq, tmpstr[num_seq - 2]);
122+
for(i = 0; i < num_seq - 1; i ++) {
123+
free((void *) tmpstr[i]);
124+
}
125+
free((void **) tmpstr);
126+
free((void *) tmpstr1);
127+
}

add_int_edge.c

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#include <stdinc.h>
2+
#include <extvab.h>
3+
#include <extfunc.h>
4+
5+
int n1 = 0, n2 = 0;
6+
7+
int add_int_edge(EDGE **impedges, BINDEX *index, int num, NODES **vertex, int num_vertex, int pos, int num_seq, int len_seq, NODES **start_node);
8+
void buildedge(NODES *node1, NODES *node2, int index, int sp, int ep, int len_seq, int num_seq);
9+
EDGE *single_edge(NODES *node1, NODES *node2, int index, int sp, int ep);
10+
11+
int add_int_edge(EDGE **impedges, BINDEX *index, int num, NODES **vertex, int num_vertex, int pos, int num_seq, int len_seq, NODES **start_node)
12+
{
13+
int i, j, k, l, n, m;
14+
NODES *node, *bal_node;
15+
EDGE *edge1, *edge, *edge2, *bal_edge;
16+
READINTERVAL *readinterval, *readinterval1;
17+
18+
if(num == 0) return(num_vertex);
19+
20+
if(index[0].begin > 0) {
21+
edge2 = impedges[index[0].index];
22+
node = (NODES *) ckalloc(1 * sizeof(NODES));
23+
node -> lastedge = (EDGE **) ckalloc(1 * sizeof(EDGE *));
24+
node -> nextedge = (EDGE **) ckalloc(1 * sizeof(EDGE *));
25+
bal_node = (NODES *) ckalloc(1 * sizeof(NODES));
26+
bal_node -> lastedge = (EDGE **) ckalloc(1 * sizeof(EDGE *));
27+
bal_node -> nextedge = (EDGE **) ckalloc(1 * sizeof(EDGE *));
28+
node -> bal_node = bal_node;
29+
bal_node -> bal_node = node;
30+
vertex[num_vertex ++] = node;
31+
vertex[num_vertex ++] = bal_node;
32+
start_node[pos] = node;
33+
buildedge(node, edge2 -> begin, pos, 0, index[0].begin, len_seq, num_seq);
34+
} else {
35+
start_node[pos] = impedges[index[0].index] -> begin;
36+
}
37+
for(i = 0; i < num - 1; i ++) {
38+
readinterval = &(impedges[index[i].index] -> readinterval[index[i].index_mul]);
39+
readinterval1 = &(impedges[index[i + 1].index] -> readinterval[index[i + 1].index_mul]);
40+
if(readinterval -> begin + readinterval -> length - 1 < readinterval1 -> begin) {
41+
edge1 = impedges[index[i].index];
42+
edge2 = impedges[index[i + 1].index];
43+
/*
44+
printf("edge1 %d %d edge2 %d %d\n", edge1 -> end -> num_lastedge, edge2 -> end -> num_nextedge,
45+
edge2 -> begin -> num_lastedge, edge2 -> begin -> num_nextedge);
46+
*/
47+
buildedge(edge1 -> end, edge2 -> begin, pos, readinterval -> begin + readinterval -> length - 1,
48+
readinterval1 -> begin, len_seq, num_seq);
49+
}
50+
}
51+
readinterval = &(impedges[index[num - 1].index] -> readinterval[index[num - 1].index_mul]);
52+
if(readinterval -> begin + readinterval -> length < len_seq) {
53+
edge1 = impedges[index[num - 1].index];
54+
node = (NODES *) ckalloc(1 * sizeof(NODES));
55+
node -> lastedge = (EDGE **) ckalloc(1 * sizeof(EDGE *));
56+
node -> nextedge = (EDGE **) ckalloc(1 * sizeof(EDGE *));
57+
bal_node = (NODES *) ckalloc(1 * sizeof(NODES));
58+
bal_node -> lastedge = (EDGE **) ckalloc(1 * sizeof(EDGE *));
59+
bal_node -> nextedge = (EDGE **) ckalloc(1 * sizeof(EDGE *));
60+
node -> bal_node = bal_node;
61+
bal_node -> bal_node = node;
62+
vertex[num_vertex ++] = node;
63+
vertex[num_vertex ++] = bal_node;
64+
buildedge(edge1 -> end, node, pos, readinterval -> begin + readinterval -> length - 1, len_seq - 1, len_seq, num_seq);
65+
}
66+
return(num_vertex);
67+
}
68+
69+
void buildedge(NODES *node1, NODES *node2, int index, int sp, int ep, int len_seq, int num_seq)
70+
{
71+
NODES *bal_node1, *bal_node2;
72+
EDGE *edge, *bal_edge;
73+
74+
edge = single_edge(node1, node2, index, sp, ep);
75+
node2 -> lastedge[node2 -> num_lastedge ++] = edge;
76+
node1 -> nextedge[node1 -> num_nextedge ++] = edge;
77+
bal_node1 = node2 -> bal_node;
78+
bal_node2 = node1 -> bal_node;
79+
bal_edge = single_edge(bal_node1, bal_node2, index + num_seq, len_seq - ep - 1, len_seq - sp - 1);
80+
bal_node1 -> nextedge[bal_node1 -> num_nextedge ++] = bal_edge;
81+
bal_node2 -> lastedge[bal_node2 -> num_lastedge ++] = bal_edge;
82+
edge -> bal_edge = bal_edge;
83+
bal_edge -> bal_edge = edge;
84+
}
85+
86+
EDGE *single_edge(NODES *node1, NODES *node2, int index, int sp, int ep)
87+
{
88+
EDGE *edge;
89+
READINTERVAL *readinterval;
90+
91+
edge = (EDGE *) ckalloc(1 * sizeof(EDGE));
92+
edge -> length = ep - sp + 1;
93+
edge -> multip = 1;
94+
readinterval = (READINTERVAL *) ckalloc(1 * sizeof(READINTERVAL));
95+
readinterval -> eq_read = index;
96+
readinterval -> offset = 0;
97+
readinterval -> begin = sp;
98+
readinterval -> length = edge -> length;
99+
edge -> readinterval = readinterval;
100+
edge -> begin = node1;
101+
edge -> end = node2;
102+
return(edge);
103+
}

align2seq.c

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#include <stdinc.h>
2+
#include <extvab.h>
3+
#include <extfunc.h>
4+
5+
int align2seq(char *seq1, int len_seq1, char *seq2, int len_seq2, char **alnseq);
6+
7+
int align2seq(char *seq1, int len_seq1, char *seq2, int len_seq2, char **alnseq)
8+
{
9+
int i, j, k, l;
10+
int *sapp;
11+
int windows;
12+
int gscore, ALIGN0();
13+
14+
sapp = (int *) ckalloc((len_seq1 + len_seq2 + 1) * sizeof(int));
15+
alnseq[0] = (char *) ckalloc((len_seq1 + len_seq2 + 1) * sizeof(char));
16+
alnseq[1] = (char *) ckalloc((len_seq1 + len_seq2 + 1) * sizeof(char));
17+
windows = abs(len_seq1 - len_seq2) + 1000;
18+
gscore = ALIGN0(&seq1[-1], &seq2[-1], len_seq1, len_seq2, -windows, windows, W, 16, 4, sapp, len_seq1 + len_seq2 + 1,
19+
len_seq1 + len_seq2 + 1);
20+
i = j = k = 0;
21+
l = 0;
22+
while(i < len_seq1 && j < len_seq2) {
23+
if(sapp[k] == 0) {
24+
alnseq[0][l] = seq1[i];
25+
alnseq[1][l] = seq2[j];
26+
i ++;
27+
j ++;
28+
l ++;
29+
} else if(sapp[k] > 0) {
30+
j += sapp[k];
31+
} else {
32+
i -= sapp[k];
33+
}
34+
k ++;
35+
}
36+
free((void *) sapp);
37+
return(l);
38+
}

bl2aln.c

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#include <stdinc.h>
2+
#include <param.h>
3+
#include <extfunc.h>
4+
5+
char inpfile[100], outfile[100], qualfile[100], overlapfile[100];
6+
int qualinp;
7+
int min_leg;
8+
double min_id;
9+
10+
void initenv(int argc, char **argv);
11+
12+
main(int argc, char **argv)
13+
{
14+
int i, j, k, l, m, n;
15+
char **src_seq, **src_name;
16+
char temp[100];
17+
ALIGN **align, *aln, *aln0;
18+
FILE *fp;
19+
20+
readpar();
21+
random1(&idum);
22+
initenv(argc, argv);
23+
24+
/* read in pairwise alignments by BLAST */
25+
26+
align = (ALIGN **) ckalloc(2 * sizeof(ALIGN *));
27+
fp = ckopen(inpfile, "r");
28+
n = readblast(align, fp, min_leg, min_id);
29+
fclose(fp);
30+
printf("# alignments input: %d.\n", n);
31+
32+
/* Write alignments */
33+
34+
fp = ckopen(outfile, "w");
35+
for(m = 0; m < 2; m ++) {
36+
n = size_align(align[m]);
37+
fwrite(&n, sizeof(int), 1, fp);
38+
aln = align[m];
39+
while(aln) {
40+
fwrite(&(aln -> reads[1]), sizeof(int), 1, fp);
41+
fwrite(&(aln -> mis_match), sizeof(int), 1, fp);
42+
fwrite(&(aln -> length), sizeof(int), 1, fp);
43+
fwrite(aln -> pos[0], sizeof(int), aln -> length, fp);
44+
fwrite(aln -> pos[1], sizeof(int), aln -> length, fp);
45+
aln0 = aln -> next;
46+
free((void *) aln -> pos[0]);
47+
free((void *) aln -> pos[1]);
48+
free((void *) aln);
49+
aln = aln0;
50+
}
51+
}
52+
fclose(fp);
53+
printf("Done...\n");
54+
55+
free((void **) align);
56+
}
57+
58+
void initenv(int argc, char **argv)
59+
{
60+
int copt;
61+
int inpseq, outseq;
62+
extern char *optarg;
63+
64+
min_leg = 500;
65+
min_id = 0.99;
66+
inpseq = qualinp = outseq = 0;
67+
68+
while ((copt=getopt(argc,argv,"i:o:l:d:")) != EOF) {
69+
switch(copt) {
70+
case 'i':
71+
inpseq = 1;
72+
sscanf(optarg,"%s", inpfile);
73+
continue;
74+
case 'o':
75+
outseq = 1;
76+
sscanf(optarg,"%s", outfile);
77+
continue;
78+
case 'l':
79+
sscanf(optarg,"%d", &min_leg);
80+
continue;
81+
case 'd':
82+
sscanf(optarg,"%lf", &min_id);
83+
continue;
84+
default:
85+
printf("bl2aln -i InpFile -o outfile [-l min_leg -d min_id]\n");
86+
printf("-i InpFile: The input file name of reads\n");
87+
printf("-o OutFile: output alignment file\n");
88+
printf("-l min_leg: minimum length of repeats.\n");
89+
printf("-d min_id: minimum identity of repeats.\n");
90+
exit(-1);
91+
}
92+
optind--;
93+
}
94+
95+
if(inpseq == 0 || outseq == 0) {
96+
printf("bl2aln -i InpFile -o outfile [-l min_leg -d min_id]\n");
97+
printf("-i InpFile: The input file name of reads\n");
98+
printf("-o OutFile: output alignment file\n");
99+
printf("-l min_leg: minimum length of repeats.\n");
100+
printf("-d min_id: minimum identity of repeats.\n");
101+
exit(-1);
102+
}
103+
}

0 commit comments

Comments
 (0)