diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f635ca8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,169 @@
+
+# Created by https://www.gitignore.io/api/python,pycharm
+
+### PyCharm ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/dictionaries
+
+# Sensitive or high-churn files:
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.xml
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+
+# Gradle:
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# CMake
+cmake-build-debug/
+
+# Mongo Explorer plugin:
+.idea/**/mongoSettings.xml
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+### PyCharm Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+
+# Sonarlint plugin
+.idea/sonarlint
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# End of https://www.gitignore.io/api/python,pycharm
+
+# Custom
+.idea/
diff --git a/attention_decoder.py b/attention_decoder.py
index eccddf4..9404458 100644
--- a/attention_decoder.py
+++ b/attention_decoder.py
@@ -16,6 +16,7 @@
 
 """This file defines the decoder"""
 
+from builtins import str
 import tensorflow as tf
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import array_ops
diff --git a/batcher.py b/batcher.py
index d86b5eb..d96e84a 100644
--- a/batcher.py
+++ b/batcher.py
@@ -15,8 +15,13 @@
 # ==============================================================================
 
 """This file contains code to process data into batches"""
+from __future__ import absolute_import
 
-import Queue
+from future import standard_library
+standard_library.install_aliases()
+from builtins import range
+from builtins import object
+import queue
 from random import shuffle
 from threading import Thread
 import time
@@ -199,7 +204,7 @@ def init_decoder_seq(self, example_list, hps):
     for i, ex in enumerate(example_list):
       self.dec_batch[i, :] = ex.dec_input[:]
       self.target_batch[i, :] = ex.target[:]
-      for j in xrange(ex.dec_len):
+      for j in range(ex.dec_len):
         self.padding_mask[i][j] = 1
 
   def store_orig_strings(self, example_list):
@@ -229,8 +234,8 @@ def __init__(self, data_path, vocab, hps, single_pass):
     self._single_pass = single_pass
 
     # Initialize a queue of Batches waiting to be used, and a queue of Examples waiting to be batched
-    self._batch_queue = Queue.Queue(self.BATCH_QUEUE_MAX)
-    self._example_queue = Queue.Queue(self.BATCH_QUEUE_MAX * self._hps.batch_size)
+    self._batch_queue = queue.Queue(self.BATCH_QUEUE_MAX)
+    self._example_queue = queue.Queue(self.BATCH_QUEUE_MAX * self._hps.batch_size)
 
     # Different settings depending on whether we're in single_pass mode or not
     if single_pass:
@@ -245,12 +250,12 @@ def __init__(self, data_path, vocab, hps, single_pass):
 
     # Start the threads that load the queues
     self._example_q_threads = []
-    for _ in xrange(self._num_example_q_threads):
+    for _ in range(self._num_example_q_threads):
       self._example_q_threads.append(Thread(target=self.fill_example_queue))
       self._example_q_threads[-1].daemon = True
       self._example_q_threads[-1].start()
     self._batch_q_threads = []
-    for _ in xrange(self._num_batch_q_threads):
+    for _ in range(self._num_batch_q_threads):
       self._batch_q_threads.append(Thread(target=self.fill_batch_queue))
       self._batch_q_threads[-1].daemon = True
       self._batch_q_threads[-1].start()
@@ -287,7 +292,7 @@ def fill_example_queue(self):
 
     while True:
       try:
-        (article, abstract) = input_gen.next() # read the next example from file. article and abstract are both strings.
+        (article, abstract) = next(input_gen) # read the next example from file. article and abstract are both strings.
       except StopIteration: # if there are no more examples:
         tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
         if self._single_pass:
@@ -311,13 +316,13 @@ def fill_batch_queue(self):
       if self._hps.mode != 'decode':
         # Get bucketing_cache_size-many batches of Examples into a list, then sort
         inputs = []
-        for _ in xrange(self._hps.batch_size * self._bucketing_cache_size):
+        for _ in range(self._hps.batch_size * self._bucketing_cache_size):
           inputs.append(self._example_queue.get())
         inputs = sorted(inputs, key=lambda inp: inp.enc_len) # sort by length of encoder sequence
 
         # Group the sorted Examples into batches, optionally shuffle the batches, and place in the batch queue.
         batches = []
-        for i in xrange(0, len(inputs), self._hps.batch_size):
+        for i in range(0, len(inputs), self._hps.batch_size):
           batches.append(inputs[i:i + self._hps.batch_size])
         if not self._single_pass:
           shuffle(batches)
@@ -326,7 +331,7 @@ def fill_batch_queue(self):
 
       else: # beam search decode mode
         ex = self._example_queue.get()
-        b = [ex for _ in xrange(self._hps.batch_size)]
+        b = [ex for _ in range(self._hps.batch_size)]
         self._batch_queue.put(Batch(b, self._hps, self._vocab))
 
 
@@ -356,10 +361,10 @@ def text_generator(self, example_generator):
     Args:
       example_generator: a generator of tf.Examples from file. See data.example_generator"""
     while True:
-      e = example_generator.next() # e is a tf.Example
+      e = next(example_generator) # e is a tf.Example
       try:
-        article_text = e.features.feature['article'].bytes_list.value[0] # the article text was saved under the key 'article' in the data files
-        abstract_text = e.features.feature['abstract'].bytes_list.value[0] # the abstract text was saved under the key 'abstract' in the data files
+        article_text = e.features.feature['article'].bytes_list.value[0].decode("utf-8") # the article text was saved under the key 'article' in the data files
+        abstract_text = e.features.feature['abstract'].bytes_list.value[0].decode("utf-8") # the abstract text was saved under the key 'abstract' in the data files
       except ValueError:
         tf.logging.error('Failed to get article or abstract from example: %s', text_format.MessageToString(e))
         continue
diff --git a/beam_search.py b/beam_search.py
index ff3e328..273045a 100644
--- a/beam_search.py
+++ b/beam_search.py
@@ -15,7 +15,12 @@
 # ==============================================================================
 
 """This file contains code to run beam search decoding"""
+from __future__ import division
+from __future__ import absolute_import
 
+from builtins import range
+from builtins import object
+from past.utils import old_div
 import tensorflow as tf
 import numpy as np
 import data
@@ -75,7 +80,7 @@ def log_prob(self):
   @property
   def avg_log_prob(self):
     # normalize log probability by number of tokens (otherwise longer sequences always have lower probability)
-    return self.log_prob / len(self.tokens)
+    return old_div(self.log_prob, len(self.tokens))
 
 
 def run_beam_search(sess, model, vocab, batch):
@@ -102,13 +107,13 @@ def run_beam_search(sess, model, vocab, batch):
                      attn_dists=[],
                      p_gens=[],
                      coverage=np.zeros([batch.enc_batch.shape[1]]) # zero vector of length attention_length
-                     ) for _ in xrange(FLAGS.beam_size)]
+                     ) for _ in range(FLAGS.beam_size)]
   results = [] # this will contain finished hypotheses (those that have emitted the [STOP] token)
 
   steps = 0
   while steps < FLAGS.max_dec_steps and len(results) < FLAGS.beam_size:
     latest_tokens = [h.latest_token for h in hyps] # latest token produced by each hypothesis
-    latest_tokens = [t if t in xrange(vocab.size()) else vocab.word2id(data.UNKNOWN_TOKEN) for t in latest_tokens] # change any in-article temporary OOV ids to [UNK] id, so that we can lookup word embeddings
+    latest_tokens = [t if t in range(vocab.size()) else vocab.word2id(data.UNKNOWN_TOKEN) for t in latest_tokens] # change any in-article temporary OOV ids to [UNK] id, so that we can lookup word embeddings
     states = [h.state for h in hyps] # list of current decoder states of the hypotheses
     prev_coverage = [h.coverage for h in hyps] # list of coverage vectors (or None)
 
@@ -123,9 +128,9 @@ def run_beam_search(sess, model, vocab, batch):
     # Extend each hypothesis and collect them all in all_hyps
     all_hyps = []
     num_orig_hyps = 1 if steps == 0 else len(hyps) # On the first step, we only had one original hypothesis (the initial hypothesis). On subsequent steps, all original hypotheses are distinct.
-    for i in xrange(num_orig_hyps):
+    for i in range(num_orig_hyps):
       h, new_state, attn_dist, p_gen, new_coverage_i = hyps[i], new_states[i], attn_dists[i], p_gens[i], new_coverage[i]  # take the ith hypothesis and new decoder state info
-      for j in xrange(FLAGS.beam_size * 2):  # for each of the top 2*beam_size hyps:
+      for j in range(FLAGS.beam_size * 2):  # for each of the top 2*beam_size hyps:
         # Extend the ith hypothesis with the jth option
         new_hyp = h.extend(token=topk_ids[i, j],
                            log_prob=topk_log_probs[i, j],
diff --git a/data.py b/data.py
index 93180fe..f3fb522 100644
--- a/data.py
+++ b/data.py
@@ -15,7 +15,9 @@
 # ==============================================================================
 
 """This file contains code to read the train/eval/test data from file and process it, and read the vocab data from file and process it"""
+from __future__ import print_function
 
+from builtins import range
 import glob
 import random
 import struct
@@ -58,7 +60,7 @@ def __init__(self, vocab_file, max_size):
       for line in vocab_f:
         pieces = line.split()
         if len(pieces) != 2:
-          print 'Warning: incorrectly formatted line in vocabulary file: %s\n' % line
+          print('Warning: incorrectly formatted line in vocabulary file: %s\n' % line)
           continue
         w = pieces[0]
         if w in [SENTENCE_START, SENTENCE_END, UNKNOWN_TOKEN, PAD_TOKEN, START_DECODING, STOP_DECODING]:
@@ -69,10 +71,10 @@ def __init__(self, vocab_file, max_size):
         self._id_to_word[self._count] = w
         self._count += 1
         if max_size != 0 and self._count >= max_size:
-          print "max_size of vocab was specified as %i; we now have %i words. Stopping reading." % (max_size, self._count)
+          print("max_size of vocab was specified as %i; we now have %i words. Stopping reading." % (max_size, self._count))
           break
 
-    print "Finished constructing vocabulary of %i total words. Last word added: %s" % (self._count, self._id_to_word[self._count-1])
+    print("Finished constructing vocabulary of %i total words. Last word added: %s" % (self._count, self._id_to_word[self._count-1]))
 
   def word2id(self, word):
     """Returns the id (integer) of a word (string). Returns [UNK] id if word is OOV."""
@@ -97,11 +99,11 @@ def write_metadata(self, fpath):
     Args:
       fpath: place to write the metadata file
     """
-    print "Writing word embedding metadata file to %s..." % (fpath)
+    print("Writing word embedding metadata file to %s..." % (fpath))
     with open(fpath, "w") as f:
       fieldnames = ['word']
       writer = csv.DictWriter(f, delimiter="\t", fieldnames=fieldnames)
-      for i in xrange(self.size()):
+      for i in range(self.size()):
         writer.writerow({"word": self._id_to_word[i]})
 
 
@@ -137,7 +139,7 @@ def example_generator(data_path, single_pass):
         example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0]
         yield example_pb2.Example.FromString(example_str)
     if single_pass:
-      print "example_generator completed reading all datafiles. No more data."
+      print("example_generator completed reading all datafiles. No more data.")
       break
 
 
diff --git a/decode.py b/decode.py
index 90b5aec..aae66fe 100644
--- a/decode.py
+++ b/decode.py
@@ -15,7 +15,10 @@
 # ==============================================================================
 
 """This file contains code to run beam search decoding, including running ROUGE evaluation and producing JSON datafiles for the in-browser attention visualizer, which can be found here https://github.com/abisee/attn_vis"""
+from __future__ import print_function
+from __future__ import absolute_import
 
+from builtins import object
 import os
 import time
 import tensorflow as tf
@@ -25,7 +28,6 @@
 import pyrouge
 import util
 import logging
-import numpy as np
 
 FLAGS = tf.app.flags.FLAGS
 
@@ -191,11 +193,11 @@ def write_for_attnvis(self, article, abstract, decoded_words, attn_dists, p_gens
 
 def print_results(article, abstract, decoded_output):
   """Prints the article, the reference summmary and the decoded summary to screen"""
-  print ""
+  print("")
   tf.logging.info('ARTICLE:  %s', article)
   tf.logging.info('REFERENCE SUMMARY: %s', abstract)
   tf.logging.info('GENERATED SUMMARY: %s', decoded_output)
-  print ""
+  print("")
 
 
 def make_html_safe(s):
diff --git a/model.py b/model.py
index 660848d..5c487b8 100644
--- a/model.py
+++ b/model.py
@@ -15,7 +15,13 @@
 # ==============================================================================
 
 """This file contains code to build and run the tensorflow graph for the sequence-to-sequence model"""
+from __future__ import division
+from __future__ import absolute_import
 
+from builtins import zip
+from builtins import range
+from past.utils import old_div
+from builtins import object
 import os
 import time
 import numpy as np
@@ -208,8 +214,10 @@ def _add_seq2seq(self):
       with tf.variable_scope('embedding'):
         embedding = tf.get_variable('embedding', [vsize, hps.emb_dim], dtype=tf.float32, initializer=self.trunc_norm_init)
         if hps.mode=="train": self._add_emb_vis(embedding) # add to tensorboard
-        emb_enc_inputs = tf.nn.embedding_lookup(embedding, self._enc_batch) # tensor with shape (batch_size, max_enc_steps, emb_size)
-        emb_dec_inputs = [tf.nn.embedding_lookup(embedding, x) for x in tf.unstack(self._dec_batch, axis=1)] # list length max_dec_steps containing shape (batch_size, emb_size)
+        emb_enc_inputs = tf.nn.embedding_lookup(embedding, self._enc_batch) # tensor with shape
+        # (batch_size, max_enc_steps, emb_size)
+        emb_dec_inputs = [tf.nn.embedding_lookup(embedding, x) for x in tf.unstack(self._dec_batch, axis=1)] # list
+        # length max_dec_steps containing shape (batch_size, emb_size)
 
       # Add the encoder.
       enc_outputs, fw_st, bw_st = self._add_encoder(emb_enc_inputs, self._enc_lens)
@@ -299,7 +307,7 @@ def _add_train_op(self):
     # Apply adagrad optimizer
     optimizer = tf.train.AdagradOptimizer(self._hps.lr, initial_accumulator_value=self._hps.adagrad_init_acc)
     with tf.device("/gpu:0"):
-      self._train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step, name='train_step')
+      self._train_op = optimizer.apply_gradients(list(zip(grads, tvars)), global_step=self.global_step, name='train_step')
 
 
   def build_graph(self):
@@ -416,7 +424,7 @@ def decode_onestep(self, sess, batch, latest_tokens, enc_states, dec_init_states
     results = sess.run(to_return, feed_dict=feed) # run the decoder step
 
     # Convert results['states'] (a single LSTMStateTuple) into a list of LSTMStateTuple -- one for each hypothesis
-    new_states = [tf.contrib.rnn.LSTMStateTuple(results['states'].c[i, :], results['states'].h[i, :]) for i in xrange(beam_size)]
+    new_states = [tf.contrib.rnn.LSTMStateTuple(results['states'].c[i, :], results['states'].h[i, :]) for i in range(beam_size)]
 
     # Convert singleton list containing a tensor to a list of k arrays
     assert len(results['attn_dists'])==1
@@ -427,14 +435,14 @@ def decode_onestep(self, sess, batch, latest_tokens, enc_states, dec_init_states
       assert len(results['p_gens'])==1
       p_gens = results['p_gens'][0].tolist()
     else:
-      p_gens = [None for _ in xrange(beam_size)]
+      p_gens = [None for _ in range(beam_size)]
 
     # Convert the coverage tensor to a list length k containing the coverage vector for each hypothesis
     if FLAGS.coverage:
       new_coverage = results['coverage'].tolist()
       assert len(new_coverage) == beam_size
     else:
-      new_coverage = [None for _ in xrange(beam_size)]
+      new_coverage = [None for _ in range(beam_size)]
 
     return results['ids'], results['probs'], new_states, attn_dists, p_gens, new_coverage
 
@@ -452,7 +460,7 @@ def _mask_and_avg(values, padding_mask):
 
   dec_lens = tf.reduce_sum(padding_mask, axis=1) # shape batch_size. float32
   values_per_step = [v * padding_mask[:,dec_step] for dec_step,v in enumerate(values)]
-  values_per_ex = sum(values_per_step)/dec_lens # shape (batch_size); normalized value for each batch member
+  values_per_ex = old_div(sum(values_per_step),dec_lens) # shape (batch_size); normalized value for each batch member
   return tf.reduce_mean(values_per_ex) # overall average
 
 
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..fee2382
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow
+pyrouge
diff --git a/run_summarization.py b/run_summarization.py
index 8639af9..44e85b6 100644
--- a/run_summarization.py
+++ b/run_summarization.py
@@ -15,6 +15,8 @@
 # ==============================================================================
 
 """This is the top-level file to train, evaluate or test your summarization model"""
+from __future__ import print_function
+from __future__ import absolute_import
 
 import sys
 import time
@@ -99,21 +101,21 @@ def convert_to_coverage_model():
 
   # initialize an entire coverage model from scratch
   sess = tf.Session(config=util.get_config())
-  print "initializing everything..."
+  print("initializing everything...")
   sess.run(tf.global_variables_initializer())
 
   # load all non-coverage weights from checkpoint
   saver = tf.train.Saver([v for v in tf.global_variables() if "coverage" not in v.name and "Adagrad" not in v.name])
-  print "restoring non-coverage variables..."
+  print("restoring non-coverage variables...")
   curr_ckpt = util.load_ckpt(saver, sess)
-  print "restored."
+  print("restored.")
 
   # save this model and quit
   new_fname = curr_ckpt + '_cov_init'
-  print "saving model to %s..." % (new_fname)
+  print("saving model to %s..." % (new_fname))
   new_saver = tf.train.Saver() # this one will save all variables that now exist
   new_saver.save(sess, new_fname)
-  print "saved."
+  print("saved.")
   exit()
 
 
@@ -252,12 +254,14 @@ def main(unused_argv):
     raise Exception("The single_pass flag should only be True in decode mode")
 
   # Make a namedtuple hps, containing the values of the hyperparameters that the model needs
-  hparam_list = ['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen']
+  hparam_list = ['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm',
+                 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt',
+                 'pointer_gen']
   hps_dict = {}
-  for key,val in FLAGS.__flags.iteritems(): # for each flag
+  for key,val in FLAGS.__flags.items(): # for each flag
     if key in hparam_list: # if it's in the list
       hps_dict[key] = val # add it to the dict
-  hps = namedtuple("HParams", hps_dict.keys())(**hps_dict)
+  hps = namedtuple("HParams", list(hps_dict.keys()))(**hps_dict)
 
   # Create a batcher object that will create minibatches of data
   batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass)
@@ -265,7 +269,7 @@ def main(unused_argv):
   tf.set_random_seed(111) # a seed value for randomness
 
   if hps.mode == 'train':
-    print "creating model..."
+    print("creating model...")
     model = SummarizationModel(hps, vocab)
     setup_training(model, batcher)
   elif hps.mode == 'eval':