Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 29 additions & 7 deletions python-agent/cnn_dqn_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import six.moves.cPickle as pickle
import copy
import os
import os.path
from PIL import Image
import numpy as np
from chainer import cuda

Expand All @@ -17,25 +19,45 @@ class CnnDqnAgent(object):

actions = [0, 1, 2]

cnn_feature_extractor = 'alexnet_feature_extractor.pickle'
model = 'bvlc_alexnet.caffemodel'
model_type = 'alexnet'
image_feature_dim = 256 * 6 * 6
mean_file = 'ilsvrc_2012_mean.npy'
mean_name, ext = os.path.splitext(mean_file)

'''
#AlexNet
in_size = 227
model_file = 'bvlc_alexnet.caffemodel'
feature_name = 'pool5'
'''

#GoogLeNet
in_size = 224
model_file = 'bvlc_googlenet.caffemodel'
feature_name = 'pool5/7x7_s1' #aka loss3/fc

model_name, ext = os.path.splitext(model_file)

cnn_feature_extractor = model_name + '.' + mean_name + '.' + feature_name + '.extractor.pickle'
cnn_feature_extractor = cnn_feature_extractor.replace('/', '_')

def agent_init(self, **options):
self.use_gpu = options['use_gpu']
self.depth_image_dim = options['depth_image_dim']
self.q_net_input_dim = self.image_feature_dim + self.depth_image_dim

if os.path.exists(self.cnn_feature_extractor):
print("loading... " + self.cnn_feature_extractor),
self.feature_extractor = pickle.load(open(self.cnn_feature_extractor))
print("done")
else:
self.feature_extractor = CnnFeatureExtractor(self.use_gpu, self.model, self.model_type, self.image_feature_dim)
self.feature_extractor = CnnFeatureExtractor(self.use_gpu,
self.model_file, self.in_size, self.mean_file, self.feature_name)
pickle.dump(self.feature_extractor, open(self.cnn_feature_extractor, 'w'))
print("pickle.dump finished")

#One-time FF to get the feature length
image = Image.new("RGB", (256, 256)) # dummy image
self.image_feature_dim = self.feature_extractor.feature(image).size
self.depth_image_dim = options['depth_image_dim']
self.q_net_input_dim = self.image_feature_dim + self.depth_image_dim

self.time = 0
self.epsilon = 1.0 # Initial exploratoin rate
self.q_net = QNet(self.use_gpu, self.actions, self.q_net_input_dim)
Expand Down
57 changes: 26 additions & 31 deletions python-agent/cnn_feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,48 +10,39 @@


class CnnFeatureExtractor:
def __init__(self, gpu, model, model_type, out_dim):
def __init__(self, gpu, model_file, in_size, mean_file, feature_name):
self.gpu = gpu
self.model = 'bvlc_alexnet.caffemodel'
self.model_type = 'alexnet'
self.model_file = model_file
self.mean_file = mean_file
self.feature_name = feature_name
self.in_size = in_size
self.batchsize = 1
self.out_dim = out_dim

if self.gpu >= 0:
cuda.check_cuda_available()

print('Loading Caffe model file %s...' % self.model, file = sys.stderr)
self.func = caffe.CaffeFunction(self.model)
print('Loading Caffe model file %s...' % self.model_file, file = sys.stderr)
self.func = caffe.CaffeFunction(self.model_file)
print('Loaded', file=sys.stderr)
if self.gpu >= 0:
cuda.get_device(self.gpu).use()
self.func.to_gpu()

if self.model_type == 'alexnet':
self.in_size = 227
mean_image = np.load('ilsvrc_2012_mean.npy')
del self.func.layers[15:23]
self.outname = 'pool5'
#del self.func.layers[13:23]
#self.outname = 'conv5'
mean_image = np.load(self.mean_file)
self.mean_image = self.crop(mean_image)


cropwidth = 256 - self.in_size
start = cropwidth // 2
stop = start + self.in_size
self.mean_image = mean_image[:, start:stop, start:stop].copy()

def forward(self, x, t):
y, = self.func(inputs={'data': x}, outputs=[self.outname], train=False)
y, = self.func(inputs={'data': x}, outputs=[self.feature_name], train=False)
return F.softmax_cross_entropy(y, t), F.accuracy(y, t)

def predict(self, x):
y, = self.func(inputs={'data': x}, outputs=[self.outname], train=False)
y, = self.func(inputs={'data': x}, outputs=[self.feature_name], train=False)
return F.softmax(y)

def feature(self, camera_image):
x_batch = np.ndarray((self.batchsize, 3, self.in_size, self.in_size), dtype=np.float32)
image = np.asarray(camera_image).transpose(2, 0, 1)[::-1].astype(np.float32)
image = self.crop(image)
image -= self.mean_image

x_batch[0] = image
Expand All @@ -60,20 +51,24 @@ def feature(self, camera_image):

if self.gpu >= 0:
x_data=cuda.to_gpu(x_data)

x = chainer.Variable(x_data, volatile=True)
feature = self.predict(x)
feature = feature.data

if self.gpu >= 0:
feature = cuda.to_cpu(feature.data)
feature = feature.reshape(self.out_dim)
else:
feature = feature.data.reshape(self.out_dim)
feature = cuda.to_cpu(feature)
feature = self.vec(feature)

return feature * 255.0





def crop(self, image):
#assume image is square
cropwidth = image.shape[1] - self.in_size
start = cropwidth // 2
stop = start + self.in_size
return image[:, start:stop, start:stop].copy()

#vectrization, or mat[:] in MATLAB
def vec(self, mat):
return mat.reshape(mat.size)