-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathncbi_taxonomy_read_order_family_genus.R
93 lines (77 loc) · 3.23 KB
/
ncbi_taxonomy_read_order_family_genus.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#add the orders from out_class
b = 1
out_family = NULL
all$name = as.character(all$name)
order$order_name=as.character(order$order_name)
for (b in 1:length(order$order_name)){#each class
print(b)
inds_order = which(all$name == order$order_name[b] &
all$type == "scientific name")
inds_order_children = which(all$parent == all$id[inds_order]
&
all$type == "scientific name"
&
all$rank == "genus" )
genus_tmp = all[inds_order_children,]
dim = dim(genus_tmp)[1]
# print(dim)
# print(b)
if (dim > 0){#there is at least one order record
names(genus_tmp)[names(genus_tmp)=="id"]="species_id"
names(genus_tmp)[names(genus_tmp)=="name"]="species_name"
genus_tmp$class_id = order$class_id[b]#
genus_tmp$class_name = order$class_name[b]
genus_tmp$order_id = order$order_id[b]#
genus_tmp$order_name = order$order_name[b]
genus_tmp$phylum_id = order$phylum_id[b]
genus_tmp$phylum_name = order$phylum_name[b]
genus_tmp$family_id = ""#
genus_tmp$family_name = ""
out_family = rbind(out_family, genus_tmp)
}#end if at least one order record
else {#there are no records under class
# print("check")
# print(b)
}#end else
#now get ones that go straight to family
inds_order_children = which(all$parent == all$id[inds_order]
&
all$type == "scientific name"
& all$rank == "family")
family_tmp = all[inds_order_children,]
dim = dim(family_tmp)[1]
if (dim > 0){#there is at least one order record
names(family_tmp)[names(family_tmp)=="id"]="family_id"
names(family_tmp)[names(family_tmp)=="name"]="family_name"
family_tmp$class_id = order$class_id[b]#
family_tmp$class_name = order$class_name[b]
family_tmp$phylum_id = order$phylum_id[b]
family_tmp$phylum_name = order$phylum_name[b]
family_tmp$order_id = order$order_id[b]#
family_tmp$order_name = order$order_name[b]
family_tmp$species_id = ""#
family_tmp$species_name = ""
out_family = rbind(out_family, family_tmp)
}#end if at least one order record
inds_order_children = which(all$parent == all$id[inds_order]
&
all$type == "scientific name"
&
all$rank == "species" )
species_tmp = all[inds_order_children,]
dim = dim(species_tmp)[1]
if (dim > 0){#there is at least one species record
names(species_tmp)[names(species_tmp)=="id"]="species_id"
names(species_tmp)[names(species_tmp)=="name"]="species_name"
species_tmp$class_id = order$class_id[b]#
species_tmp$class_name = order$class_name[b]
species_tmp$phylum_id = order$phylum_id[b]
species_tmp$phylum_name = order$phylum_name[b]
species_tmp$order_id = order$order_id[b]#
species_tmp$order_name = order$order_name[b]
species_tmp$family_id = ""#
species_tmp$family_name = ""
out_family = rbind(out_family, species_tmp)
}#end if at least one order record
#print(length(inds_order_children))
}#end class