diff --git a/python-agent/cnn_dqn_agent.py b/python-agent/cnn_dqn_agent.py index 4046ed4..5817e59 100644 --- a/python-agent/cnn_dqn_agent.py +++ b/python-agent/cnn_dqn_agent.py @@ -3,6 +3,8 @@ import six.moves.cPickle as pickle import copy import os +import os.path +from PIL import Image import numpy as np from chainer import cuda @@ -17,25 +19,45 @@ class CnnDqnAgent(object): actions = [0, 1, 2] - cnn_feature_extractor = 'alexnet_feature_extractor.pickle' - model = 'bvlc_alexnet.caffemodel' - model_type = 'alexnet' - image_feature_dim = 256 * 6 * 6 + mean_file = 'ilsvrc_2012_mean.npy' + mean_name, ext = os.path.splitext(mean_file) + + ''' + #AlexNet + in_size = 227 + model_file = 'bvlc_alexnet.caffemodel' + feature_name = 'pool5' + ''' + + #GoogLeNet + in_size = 224 + model_file = 'bvlc_googlenet.caffemodel' + feature_name = 'pool5/7x7_s1' #aka loss3/fc + + model_name, ext = os.path.splitext(model_file) + + cnn_feature_extractor = model_name + '.' + mean_name + '.' + feature_name + '.extractor.pickle' + cnn_feature_extractor = cnn_feature_extractor.replace('/', '_') def agent_init(self, **options): self.use_gpu = options['use_gpu'] - self.depth_image_dim = options['depth_image_dim'] - self.q_net_input_dim = self.image_feature_dim + self.depth_image_dim if os.path.exists(self.cnn_feature_extractor): print("loading... " + self.cnn_feature_extractor), self.feature_extractor = pickle.load(open(self.cnn_feature_extractor)) print("done") else: - self.feature_extractor = CnnFeatureExtractor(self.use_gpu, self.model, self.model_type, self.image_feature_dim) + self.feature_extractor = CnnFeatureExtractor(self.use_gpu, + self.model_file, self.in_size, self.mean_file, self.feature_name) pickle.dump(self.feature_extractor, open(self.cnn_feature_extractor, 'w')) print("pickle.dump finished") + #One-time FF to get the feature length + image = Image.new("RGB", (256, 256)) # dummy image + self.image_feature_dim = self.feature_extractor.feature(image).size + self.depth_image_dim = options['depth_image_dim'] + self.q_net_input_dim = self.image_feature_dim + self.depth_image_dim + self.time = 0 self.epsilon = 1.0 # Initial exploratoin rate self.q_net = QNet(self.use_gpu, self.actions, self.q_net_input_dim) diff --git a/python-agent/cnn_feature_extractor.py b/python-agent/cnn_feature_extractor.py index 1616dab..303b909 100644 --- a/python-agent/cnn_feature_extractor.py +++ b/python-agent/cnn_feature_extractor.py @@ -10,48 +10,39 @@ class CnnFeatureExtractor: - def __init__(self, gpu, model, model_type, out_dim): + def __init__(self, gpu, model_file, in_size, mean_file, feature_name): self.gpu = gpu - self.model = 'bvlc_alexnet.caffemodel' - self.model_type = 'alexnet' + self.model_file = model_file + self.mean_file = mean_file + self.feature_name = feature_name + self.in_size = in_size self.batchsize = 1 - self.out_dim = out_dim if self.gpu >= 0: cuda.check_cuda_available() - print('Loading Caffe model file %s...' % self.model, file = sys.stderr) - self.func = caffe.CaffeFunction(self.model) + print('Loading Caffe model file %s...' % self.model_file, file = sys.stderr) + self.func = caffe.CaffeFunction(self.model_file) print('Loaded', file=sys.stderr) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.func.to_gpu() - if self.model_type == 'alexnet': - self.in_size = 227 - mean_image = np.load('ilsvrc_2012_mean.npy') - del self.func.layers[15:23] - self.outname = 'pool5' - #del self.func.layers[13:23] - #self.outname = 'conv5' + mean_image = np.load(self.mean_file) + self.mean_image = self.crop(mean_image) - - cropwidth = 256 - self.in_size - start = cropwidth // 2 - stop = start + self.in_size - self.mean_image = mean_image[:, start:stop, start:stop].copy() - def forward(self, x, t): - y, = self.func(inputs={'data': x}, outputs=[self.outname], train=False) + y, = self.func(inputs={'data': x}, outputs=[self.feature_name], train=False) return F.softmax_cross_entropy(y, t), F.accuracy(y, t) - + def predict(self, x): - y, = self.func(inputs={'data': x}, outputs=[self.outname], train=False) + y, = self.func(inputs={'data': x}, outputs=[self.feature_name], train=False) return F.softmax(y) def feature(self, camera_image): x_batch = np.ndarray((self.batchsize, 3, self.in_size, self.in_size), dtype=np.float32) image = np.asarray(camera_image).transpose(2, 0, 1)[::-1].astype(np.float32) + image = self.crop(image) image -= self.mean_image x_batch[0] = image @@ -60,20 +51,24 @@ def feature(self, camera_image): if self.gpu >= 0: x_data=cuda.to_gpu(x_data) - + x = chainer.Variable(x_data, volatile=True) feature = self.predict(x) + feature = feature.data if self.gpu >= 0: - feature = cuda.to_cpu(feature.data) - feature = feature.reshape(self.out_dim) - else: - feature = feature.data.reshape(self.out_dim) + feature = cuda.to_cpu(feature) + feature = self.vec(feature) return feature * 255.0 - - - - + def crop(self, image): + #assume image is square + cropwidth = image.shape[1] - self.in_size + start = cropwidth // 2 + stop = start + self.in_size + return image[:, start:stop, start:stop].copy() + #vectrization, or mat[:] in MATLAB + def vec(self, mat): + return mat.reshape(mat.size)