-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathgenerating_training_data_1.01.py
190 lines (146 loc) · 5.51 KB
/
generating_training_data_1.01.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
__author__ = 'Oswaldo Ludwig'
__version__ = '1.01'
#This code automatically derive (from the COCO annotations) the VIEW-style training annotation: training_sentences.csv and training_target.
#In case of publication, cite:
##@ARTICLE{2016arXiv160308474L,
## author = {Oswaldo Ludwig and Xiao Liu and Parisa Kordjamshidi and Marie-Francine Moens
## },
## title = "{Deep Embedding for Spatial Role Labeling}",
## journal = {ArXiv e-prints},
##archivePrefix = "arXiv",
## eprint = {1603.08474},
## primaryClass = "cs.CL",
## keywords = {Computer Science - Computation and Language, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Learning, Computer Science - Neural and Evolutionary Computing},
## year = 2016,
## month = mar,
## adsurl = {http://adsabs.harvard.edu/abs/2016arXiv160308474L},
##}
from coco import COCO
import csv
import sys
import numpy as np
import pylab
import mask
import itertools
import pickle
import time
dataDir='..'
dataType='val2014'
annFile='instances_%s.json'%(dataType)
def assemble_output_vector(anotacao):
outvec=np.array(0)
outvec = [0] * 201
coord=np.zeros((2, 2))
sa=len(anotacao)
if sa>1:
dist = [0] * sa
centerx = [0] * sa
centery = [0] * sa
area = [0] * sa
indexes = [0] * 2
#getting the coordinates of the center of the object's bounding boxes:
for j in range(sa):
anota=anotacao[j]
bb=np.array(anota['bbox'])
centerx[j]=bb[0]+bb[2]/2
centery[j]=bb[1]+bb[3]/2
area[j]=bb[3]*bb[2]
#getting the centroid of the set of objects:
centroidx = np.mean(centerx)
centroidy = np.mean(centery)
#sorting objects from the most centered to the least centered
for j in range(sa):
dist[j] = (centerx[j] - centroidx)**2 - 0.1* area[j]
sorted_indexes = sorted(range(len(dist)), key=lambda k: dist[k])
indexes[0] = sorted_indexes[0]
#getting the index of the most centered object:
anota = anotacao[sorted_indexes[0]]
category_obj_1 = anota['category_id']
#searching for the second most centered object belonging to another category:
for j in range(sa):
anota = anotacao[sorted_indexes[j]]
category_obj = anota['category_id']
if category_obj != category_obj_1:
indexes[1] = sorted_indexes[j]
break
coord[1][0] = centery[indexes[0]]
coord[1][1] = centery[indexes[1]]
coord[0][0] = centerx[indexes[0]]
coord[0][1] = centerx[indexes[1]]
#if the second object isn't the highest, flip the order of the objects:
if coord[1][1] < coord[1][0]:
aux = coord[1][0]
coord[1][0] = coord[1][1]
coord[1][1] = aux
aux = coord[0][0]
coord[0][0] = coord[0][1]
coord[0][1] = aux
aux = indexes[0]
indexes[0] = indexes[1]
indexes[1] = aux
#calculating the gradient between the positions of the objects:
deltax = coord[0][1] - coord[0][0]
deltay = coord[1][1] - coord[1][0]
grad = deltay/deltax
#annotating the relative position between objects:
if (grad>1) or (grad<-1):
outvec[0] = 1 #below(A,B)
print 'below(A,B)'
else:
outvec[1] = 1 #besides(A,B)
print 'besides(A,B)'
#annotating the indexes of both visual objects (in one-hot style):
anota=anotacao[indexes[0]]
an0=anota['category_id']
outvec[2+an0] = 1
print an0
if indexes[1] > 0:
anota=anotacao[indexes[1]]
an1=anota['category_id']
outvec[100+an1] = 1
print an1
else:
outvec[200] = 1 #the position 200 is for the absence of the second object
outvec[0] = 0
outvec[1] = 0
else:
if sa>0:
anota=anotacao[0]
outvec[2+anota['category_id']] = 1
return outvec
# initializing COCO api for instance annotations
coco=COCO(annFile)
# get the image Ids:
imgIds = coco.getImgIds();
#creating the target output for all the captions:
s=len(imgIds)
out = np.zeros(shape=(5*s,201))
count=0
for imgId in imgIds:
#getting the Id of the annotation corresponding to the image Id:
annIds = coco.getAnnIds(imgIds=imgId, iscrowd=None)
#getting the annotation:
anotacao=coco.loadAnns(ids=annIds)
#creating the same target output for 5 captions:
for k in range(5):
out[count,:]=assemble_output_vector(anotacao)
count += 1
# Saving the objects:
with open('training_target', 'w') as f:
pickle.dump(out, f)
# If you want to get back the annotation:
#with open('training_target') as f:
# out = pickle.load(f)
# initializing COCO api for caption annotations
annFile = 'captions_%s.json'%(dataType)
caps=COCO(annFile)
with open('training_sentences.csv', 'wb') as f:
wri = csv.writer(f, skipinitialspace=True)
for imgId in imgIds:
annId = caps.getAnnIds(imgIds=imgId)
captions = coco.showAnns(caps.loadAnns(annId))
for n in range(5):
if len(captions[n])<3:
captions[n]='none'
cap = captions[n].lower()
wri.writerow([cap])