Dhruvvvx17
diff --git a/‎.gitignore
+22 b/‎.gitignore
+22
diff --git a/‎Data/.gitignore
+3 b/‎Data/.gitignore
+3
diff --git a/‎README.md
+117-2 b/‎README.md
+117-2
diff --git a/‎Utils/__init__.py b/‎Utils/__init__.py
diff --git a/‎Utils/image_processing.py
+33 b/‎Utils/image_processing.py
+33
diff --git a/‎Utils/ops.py
+133 b/‎Utils/ops.py
+133
@@ -0,0 +1,22 @@
+.DS_Store
+.idea*
+*.pdf
+*.jpg
+*.png
+*.pyc
+*.py.bak
+sample.py
+vggtest.py
+*.pem
+amazon_ssh.sh
+awstransfer.sh
+localtrain.py
+vislstm.png
+sample_aws.sh
+eval_trec.py
+theanotest.py
+data_loader_old.py
+Utils/word_embeddings_old.py
+gen_backup.py
+data_loader_test.py
+downloadModels.sh
@@ -0,0 +1,3 @@
+*
+*/
+!.gitignore
@@ -1,2 +1,117 @@
-# Text-to-Image-Synthesis
-Text to Image Synthesis using GANs and Skipthought Vectors
+# Text To Image Synthesis Using Thought Vectors
+
+[![Join the chat at https://gitter.im/text-to-image/Lobby](https://badges.gitter.im/text-to-image/Lobby.svg)](https://gitter.im/text-to-image/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+
+This is an experimental tensorflow implementation of synthesizing images from captions using [Skip Thought Vectors][1]. The images are synthesized using the GAN-CLS Algorithm from the paper [Generative Adversarial Text-to-Image Synthesis][2]. This implementation is built on top of the excellent [DCGAN in Tensorflow][3]. The following is the model architecture. The blue bars represent the Skip Thought Vectors for the captions.
+
+![Model architecture](http://i.imgur.com/dNl2HkZ.jpg)
+
+Image Source : [Generative Adversarial Text-to-Image Synthesis][2] Paper
+
+## Requirements
+- Python 2.7.6
+- [Tensorflow][4]
+- [h5py][5]
+- [Theano][6] : for skip thought vectors
+- [scikit-learn][7] : for skip thought vectors
+- [NLTK][8] : for skip thought vectors
+
+## Datasets
+- All the steps below for downloading the datasets and models can be performed automatically by running `python download_datasets.py`. Several gigabytes of files will be downloaded and extracted.
+- The model is currently trained on the [flowers dataset][9]. Download the images from [this link][9] and save them in ```Data/flowers/jpg```. Also download the captions from [this link][10]. Extract the archive, copy the ```text_c10``` folder and paste it in ```Data/flowers```.
+- Download the pretrained models and vocabulary for skip thought vectors as per the instructions given [here][13]. Save the downloaded files in ```Data/skipthoughts```.
+- Make empty directories in Data, ```Data/samples```,  ```Data/val_samples``` and ```Data/Models```. They will be used for sampling the generated images and saving the trained models.
+
+## Usage
+- <b>Data Processing</b> : Extract the skip thought vectors for the flowers data set using :
+```
+python data_loader.py --data_set="flowers"
+```
+- <b>Training</b>
+  * Basic usage `python train.py --data_set="flowers"`
+  * Options
+      - `z_dim`: Noise Dimension. Default is 100.
+      - `t_dim`: Text feature dimension. Default is 256.
+      - `batch_size`: Batch Size. Default is 64.
+      - `image_size`: Image dimension. Default is 64.
+      - `gf_dim`: Number of conv in the first layer generator. Default is 64.
+      - `df_dim`: Number of conv in the first layer discriminator. Default is 64.
+      - `gfc_dim`: Dimension of gen untis for for fully connected layer. Default is 1024.
+      - `caption_vector_length`: Length of the caption vector. Default is 1024.
+      - `data_dir`: Data Directory. Default is `Data/`.
+      - `learning_rate`: Learning Rate. Default is 0.0002.
+      - `beta1`: Momentum for adam update. Default is 0.5.
+      - `epochs`: Max number of epochs. Default is 600.
+      - `resume_model`: Resume training from a pretrained model path.
+      - `data_set`: Data Set to train on. Default is flowers.
+      
+- <b>Generating Images from Captions</b>
+  * Write the captions in text file, and save it as ```Data/sample_captions.txt```. Generate the skip thought vectors for these captions using:
+  ```
+  python generate_thought_vectors.py --caption_file="Data/sample_captions.txt"
+  ```
+  * Generate the Images for the thought vectors using:
+  ```
+  python generate_images.py --model_path=<path to the trained model> --n_images=8
+  ```
+   ```n_images``` specifies the number of images to be generated per caption. The generated images will be saved in ```Data/val_samples/```. ```python generate_images.py --help``` for more options.
+
+## Sample Images Generated
+Following are the images generated by the generative model from the captions.
+
+| Caption        | Generated Images  |
+| ------------- | -----:|
+| the flower shown has yellow anther red pistil and bright red petals        | ![](http://i.imgur.com/SknZ3Sg.jpg)   |
+| this flower has petals that are yellow, white and purple and has dark lines        | ![](http://i.imgur.com/8zsv9Nc.jpg)   |
+| the petals on this flower are white with a yellow center        | ![](http://i.imgur.com/vvzv1cE.jpg)   |
+| this flower has a lot of small round pink petals.        | ![](http://i.imgur.com/w0zK1DC.jpg)   |
+| this flower is orange in color, and has petals that are ruffled and rounded.        | ![](http://i.imgur.com/VfBbRP1.jpg)   |
+| the flower has yellow petals and the center of it is brown        | ![](http://i.imgur.com/IAuOGZY.jpg)   |
+
+
+## Implementation Details
+- Only the uni-skip vectors from the skip thought vectors are used. I have not tried training the model with combine-skip vectors.
+- The model was trained for around 200 epochs on a GPU. This took roughly 2-3 days.
+- The images generated are 64 x 64 in dimension.
+- While processing the batches before training, the images are flipped horizontally with a probability of 0.5.
+- The train-val split is 0.75.
+
+## Pre-trained Models
+- Download the pretrained model from [here][14] and save it in ```Data/Models```. Use this path for generating the images.
+
+## TODO
+- Train the model on the MS-COCO data set, and generate more generic images.
+- Try different embedding options for captions(other than skip thought vectors). Also try to train the caption embedding RNN along with the GAN-CLS model. 
+
+## References
+- [Generative Adversarial Text-to-Image Synthesis][2] Paper
+- [Generative Adversarial Text-to-Image Synthesis][11] Code
+- [Skip Thought Vectors][1] Paper
+- [Skip Thought Vectors][12] Code
+- [DCGAN in Tensorflow][3]
+- [DCGAN in Tensorlayer][15]
+
+## Alternate Implementations
+- [Text to Image in Torch by Scot Reed][11]
+- [Text to Image in Tensorlayer by Dong Hao][16]
+
+## License
+MIT
+
+
+[1]:http://arxiv.org/abs/1506.06726
+[2]:http://arxiv.org/abs/1605.05396
+[3]:https://github.com/carpedm20/DCGAN-tensorflow
+[4]:https://github.com/tensorflow/tensorflow
+[5]:http://www.h5py.org/
+[6]:https://github.com/Theano/Theano
+[7]:http://scikit-learn.org/stable/index.html
+[8]:http://www.nltk.org/
+[9]:http://www.robots.ox.ac.uk/~vgg/data/flowers/102/
+[10]:https://drive.google.com/file/d/0B0ywwgffWnLLcms2WWJQRFNSWXM/view
+[11]:https://github.com/reedscot/icml2016
+[12]:https://github.com/ryankiros/skip-thoughts
+[13]:https://github.com/ryankiros/skip-thoughts#getting-started
+[14]:https://bitbucket.org/paarth_neekhara/texttomimagemodel/raw/74a4bbaeee26fe31e148a54c4f495694680e2c31/latest_model_flowers_temp.ckpt
+[15]:https://github.com/zsdonghao/dcgan
+[16]:https://github.com/zsdonghao/text-to-image
@@ -0,0 +1,33 @@
+import numpy as np
+from scipy import misc
+import random
+import skimage
+import skimage.io
+import skimage.transform
+import imageio
+
+def load_image_array(image_file, image_size):
+	img = skimage.io.imread(image_file)
+	# GRAYSCALE
+	if len(img.shape) == 2:
+		img_new = np.ndarray( (img.shape[0], img.shape[1], 3), dtype = 'uint8')
+		img_new[:,:,0] = img
+		img_new[:,:,1] = img
+		img_new[:,:,2] = img
+		img = img_new
+
+	img_resized = skimage.transform.resize(img, (image_size, image_size))
+
+	# FLIP HORIZONTAL WIRH A PROBABILITY 0.5
+	if random.random() > 0.5:
+		img_resized = np.fliplr(img_resized)
+	
+	
+	return img_resized.astype('float32')
+
+if __name__ == '__main__':
+	# TEST>>>
+	arr = load_image_array('sample.jpg', 64)
+	print(arr.mean())
+	# rev = np.fliplr(arr)
+	imageio.imwrite( 'rev.jpg', arr)
@@ -0,0 +1,133 @@
+# RESUED CODE FROM https://github.com/carpedm20/DCGAN-tensorflow/blob/master/ops.py
+import math
+import numpy as np 
+import tensorflow as tf
+
+from tensorflow.python.framework import ops
+
+
+class batch_norm(object):
+
+	# This function initailizes a batch_norm layer when the class name is called.
+	# Code modification of http://stackoverflow.com/a/33950177
+	def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"):
+		
+		with tf.variable_scope(name):
+			
+			self.epsilon = epsilon
+			self.momentum = momentum
+			self.ema = tf.train.ExponentialMovingAverage(decay=self.momentum)
+			self.name = name
+
+
+	def __call__(self, x, train=True):
+		shape = x.get_shape().as_list()
+
+		if train:
+			with tf.variable_scope(self.name) as scope:
+				self.beta = tf.get_variable("beta", [shape[-1]],
+									initializer=tf.constant_initializer(0.))
+				self.gamma = tf.get_variable("gamma", [shape[-1]],
+									initializer=tf.random_normal_initializer(1., 0.02))
+				
+				try:
+					batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
+				except:
+					batch_mean, batch_var = tf.nn.moments(x, [0, 1], name='moments')
+
+				with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):	
+					ema_apply_op = self.ema.apply([batch_mean, batch_var])
+					self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var)
+
+				with tf.control_dependencies([ema_apply_op]):
+					mean, var = tf.identity(batch_mean), tf.identity(batch_var)
+		else:
+			mean, var = self.ema_mean, self.ema_var
+
+		normed = tf.nn.batch_norm_with_global_normalization(
+				x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True)
+
+		return normed
+
+def binary_cross_entropy(preds, targets, name=None):
+	"""Computes binary cross entropy given `preds`.
+	For brevity, let `x = `, `z = targets`.  The logistic loss is
+		loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
+	Args:
+		preds: A `Tensor` of type `float32` or `float64`.
+		targets: A `Tensor` of the same type and shape as `preds`.
+	"""
+	eps = 1e-12
+	with ops.op_scope([preds, targets], name, "bce_loss") as name:
+		preds = ops.convert_to_tensor(preds, name="preds")
+		targets = ops.convert_to_tensor(targets, name="targets")
+		return tf.reduce_mean(-(targets * tf.log(preds + eps) + (1.0 - targets) * tf.log(1.0 - preds + eps)))
+
+def conv_cond_concat(x, y):
+	"""Concatenate conditioning vector on feature map axis."""
+	x_shapes = x.get_shape()
+	y_shapes = y.get_shape()
+	return tf.concat(3, [x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])])
+
+def conv2d(input_, output_dim, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,name="conv2d"):
+	with tf.variable_scope(name):
+		w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim], initializer=tf.truncated_normal_initializer(stddev=stddev))
+		
+		conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')
+
+		biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
+		conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
+
+		return conv
+
+# 
+def deconv2d(input_, output_shape, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="deconv2d", with_w=False):
+
+	with tf.variable_scope(name):
+		# filter : [height, width, output_channels, in_channels]
+		w = tf.get_variable('w', [k_h, k_h, output_shape[-1], input_.get_shape()[-1]], initializer=tf.random_normal_initializer(stddev=stddev))
+		
+		try:
+			deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,strides=[1, d_h, d_w, 1])
+
+		# Support for verisons of TensorFlow before 0.7.0
+		except AttributeError:
+			deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape, strides=[1, d_h, d_w, 1])
+
+		biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
+		deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
+
+		if with_w:
+			return deconv, w, biases
+		else:
+			return deconv
+
+# Leaky relu activation
+def lrelu(x, leak=0.2, name="lrelu"):
+	return tf.maximum(x, leak*x)
+
+
+
+def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
+
+	# input_ is the text_embedding being passed from model.py
+	shape = input_.get_shape().as_list()
+
+	# Preserving the scope of the variable. Variable_scope allows to create new variables or use shared variables
+	# Check out this for variable_scope https://www.tensorflow.org/api_docs/python/tf/compat/v1/variable_scope
+	with tf.variable_scope(scope or "Linear"):
+		
+		# get_variable is used to get an existing variable with these parameters or to create a new one.
+		# Input arguments are : name, share, dtype and initializer.
+
+		# Weight matrix
+		matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, tf.random_normal_initializer(stddev=stddev))
+
+		# Bias matrix
+		bias = tf.get_variable("bias", [output_size], initializer=tf.constant_initializer(bias_start))
+
+		# Return the matmul of the input with the weight matrix + the bias		
+		if with_w:
+			return tf.matmul(input_, matrix) + bias, matrix, bias
+		else:
+			return tf.matmul(input_, matrix) + bias