From 8ec515b89f792332f81f9ade4d5b479e55913322 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <pengzhi.gao@petuum.com>
Date: Tue, 5 Jun 2018 13:45:47 -0400
Subject: [PATCH 01/14] add seq2seq translator example

---
 .DS_Store                                     | Bin 0 -> 8196 bytes
 examples/.DS_Store                            | Bin 0 -> 10244 bytes
 examples/sequence-to-sequence/.DS_Store       | Bin 0 -> 6148 bytes
 .../seq2seq_translator/.DS_Store              | Bin 0 -> 6148 bytes
 .../seq2seq_translator/README.md              | 268 +++++++++++++
 .../seq2seq_translator/seq2seq_dynet.py       | 331 ++++++++++++++++
 .../seq2seq_translator/seq2seq_pytorch.py     | 354 ++++++++++++++++++
 7 files changed, 953 insertions(+)
 create mode 100644 .DS_Store
 create mode 100644 examples/.DS_Store
 create mode 100644 examples/sequence-to-sequence/.DS_Store
 create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.DS_Store
 create mode 100644 examples/sequence-to-sequence/seq2seq_translator/README.md
 create mode 100644 examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
 create mode 100644 examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..d7dc14478065716eb8d8443f856f8a008e2b97a3
GIT binary patch
literal 8196
zcmeHM!EVz)5S?wC)@cOf0BX7U01}6)N}z}$gp{T&QY1hKS8xE-+Hu-gxONpgq$LF9
z!cV{*iC^Kukq_Ym;LWb7><!^oREb?_W;XVFyX)s!yW<j(X!iX&(J~QPFpkv<+#edB
zXPam<*^w)V0(_!wD~#J-7<&^~bHFHI6fg=H1&jhlf%BmN-r3x&DerxC)_9|UQQ*H+
zfS(UGjAJ8<V-@A215TCzh;ukB8~UgNB*)HTBa34dB^6_;>Oo{wk(C%i(Q)2pIASA<
zV-*#hgrbwkW)@k6BINAg*%D5op`!6d0i(dA0$jV7$fFKL)GpNTypis~Zv2!@QcJ)b
zl1s0tldJR@`9(usq)mFHko1rON+^Y`!XHyw<cg&UZkuAPXCJ%+aJsN5^NJ4|cL7M|
zrql!5(`Ndip=Zelx?M!gxEg9+q4=t)r(wTo*b=ZOn7^Z6rPT6sPD=}zV<>b;M--x;
z6i1ed{uS>HlB4Wf%UACx43e}~`z~f?=SuSnmbGXtURZqPbmM`Ox@kXYy1f_t>xr9o
zy{x|z$Kh$;^_~Tt!EW{PW*nt%5OsQz9QYkTUOo>ZKOQvWe&i=|97mn7tg=<!tzI1t
z>+7}pihcL?XvH4ZYq!=`>^nEtN29W}biIDRwRd>(cKB}e{v-546WL>&E~($A{u6o=
z%Di!UQ5Z)DOoUaCQ7z=xHa#W>etK5r^{XHsH^6Ao18^ML0WYAasJhIjJgW@WHf$C5
zG)7iU+$X^~m1oT3ex3l?<vc8oE;B2SEz?tszCl}ZmR00}uQ52kij0JFi~5*_cf~#(
z6d9an;#wik@(ip^!(-<b@FjJX@LERlJ}n?C@+t%DG@o{{9v8h-AD<O1uq<HuuvqR;
zm%`$DSkD~Zy#J8^rWpm!K!I5`<}%m++ta`QpTW##NTYyJ;Jhdx=C+)zCRX$Nk1~B>
yTx;8~Js3B}ja8HsI9ZM(%5og>^dE-k+ki5sk;So!ID%$A1V|Z-GYb4u1%3fMAy54P

literal 0
HcmV?d00001

diff --git a/examples/.DS_Store b/examples/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..2ef87546db611ef70fc85732a1589dc930659546
GIT binary patch
literal 10244
zcmeHM&2Jk;6o2C+jqQBcO;Ri>1dc+XhZs2l0TMzOCsAebktmKzs<xzSd+jV;@4CC|
zd^7~*Qi(r+8ypY^Bo6!&TtVW9#DTwn0}>Z*@Ov}sWOp4zgep;q8Efawj^FRS+27lF
z;~5i?sP<b+L`5P><6@bgM0HN#@A(=ksjz1rQUFiXAcF#G(r(Qj($*cC0nLDBKr^5j
z&<s2Y3}DUXlAcgf*O~#%fM(#70nQIaTr4Z0JW*1n4pedr06B_oS<uElKw^ratc3DJ
zNeP8bwR;dDRD@d$q2Tzw$m)=lP@X8M;3O2BMCe(BI}{<k16P#lBq~bkS~H*-7-nD|
zT^a1!``EX;(fzyIY}jrC-|~cd1K0IGWkHLN15S7PWRXLinjO>sCZRt~EA$?`+$WRX
z1;U3npI$<JKtUWU3JGim=LtQ0N^*=A+2kh>j}nM+YQfJQ{4^nx+P^2X3($6<*{3eF
z9gL}jI~$|*@v*2a?eMvV1wAM3J7pxSO~S?juw9JUM!N*-j5QLt9}~Dy_-O;tjq6F~
zzm70}nLffudvpk!CV8+Qgt7B5K4X|2;#fp2jA3jJbP<ENFUfpVGY8E+Eb^20;EP!T
zpOTfKm<x_V6#WdmxY7@^&}8ml$&<XY#XZRm+vB^gWa%7+1;;CDcLrloc718H1e+#x
zWUa<_ew3m%CPM~yLks-&pYvv=I7(4mR(dfI(-Q5YZ2@P-S>%X`=OYoz&?c?X+q8-q
zu!eWOORWR~5sD$kVA;WE=xyK##WbhD9zXlw&E@zrA3(=_aCBnG8HwbG@H@|OE#c~7
zUYmHj7M^T4!iYr7#+<bA0Po0|4bCE^WdDuLjdI@)Jg_-R?<3MYUZM-*<Ilo#?%$20
zvG4c7toD7QXIp+yF8?G_Bco%N#?$F+Iy;rUTi<o}>wyvU{i@;J=DQ6e*lmREE!VYA
z+eYK2W$y3f=T=-NFf7OPB-m=1P`UGg<+R-Ws@r#3zO++UU!>ESbY>@i_2_7MY5CQ~
z;!=6}c(Hi2RDSuD#o}wruOA<0(uHeFZ`SU04<8<VeEiAhpbABVzkDdC?oaD4(7G^{
zR&y_;Ks*{y@XLULOm<>&YC4y{oSVtb&dyzVs!(`(CqLUTn)f>%Kgfz}yI$4qo7+~?
zb?Vkbb2~5(0&eW&XA*jwX6wVVuC2o!bmq=Fq^%pCX*#z%p4D1!w|z6%j<j#x@xgvA
zFuOIU?ZW;Z*k11imh1R8P0xqCWq6+9bj<4EUE64w_PSGbgTU?X<Z}{>L2h^^<D}hs
z&uCgsXW^~i6nP7kjX~qOYUF7_HL%9z#g=1q%}i#Y@=n;9rfZnrJd+#rfIg+q=u7&R
zzM~)LXZn@?5JF_dthgec70-!z@q&0!gmzPd;9VV<S5n{%{^G&gt9y>^IvrrxULNzz
z#i|itJ{eebN*GmN<}qIXyyVKq*(^41E&*vEdvsVkKF4Gh`zn&fCc;d?9zFPRf%MMJ
zWKq<(L}86bhQTTr_aU=cRGWS9`rzQX+R_i_iNwfcCX1rd5@kXYzYNe$tf0Jph^T%6
zD-VyIS4+<Nye_G{IrtT%vYCr4IOJXCo~X5HtOLvluL%E;KPCTB><D58Y@8KlK2?k`
zuU-^gYX&p}ngPv#X5eu#Fro|<`TTzkKPUd?|NqD3PTSE8Xa=4@2E^!UeYJ{#4K^vs
zG4fe^5!XFjTv+c!NeMwEkK-ZbaXfDLI9~33&{sxV7PKqefILwWEtLP@&j7vu*ZY6^
IKkxtl0<Sk%zW@LL

literal 0
HcmV?d00001

diff --git a/examples/sequence-to-sequence/.DS_Store b/examples/sequence-to-sequence/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..0b4a6dc06944ab2783d7da06388e4224869f0466
GIT binary patch
literal 6148
zcmeHKJ5Iwu5S@h-5kVrNfzl-vQfQ$tktwKifIw_SA~|u$r$C}`!wEPAC*mL+0N%`w
zNLU*wqCkLVq}jK--nXB(-W{8WTz8puh}uNdgfljFkklAIXRp|n)-caW9}`;8hzgp}
zaiz6Ym;$E2Z&QHJZi}vQZWnZebGtgf!^^x#2YE3-3%(vs;YV*`$Pw+<vZiz&@SX!+
zQ^rXt1E#nqf^pnI(0-rhXn98`ihay+hOs9o=Q4+aW}nA&xUSsR+FUp4BfstR(qcU2
z#i?z?8y88EEf*=#%h|)z{2bg3dD*vk(_zg#y(S~5(y!f@l+r+>N6d45KlfBu-v_@7
zt39f+Ujw!ZX7Gm;K+I-qwme#D3YY?>z=r~SKSXfGSh4gdR|gXL1pqcsErvXQEHEch
zj1^0dcmi{t3iMQ`zZlNbQ6Fi$Sh4iz=?L#qB&Q>D$Y*mhIxs>3=3)U$OaW6MP+;4a
z+Z_KdzW)CYSXMLzOo9KTfNLbZq>B}4Y^|(~V{HV#g0peE(xXt2XgHP(hvGxH7@kF(
W0mh1@N7TUVLqKG(#1!~Z1>OOy4wzN|

literal 0
HcmV?d00001

diff --git a/examples/sequence-to-sequence/seq2seq_translator/.DS_Store b/examples/sequence-to-sequence/seq2seq_translator/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..071667f4470f57571ab1453781926f235c61c504
GIT binary patch
literal 6148
zcmeHKyH3ME5S)b+K{OT><$VDXe8CYpMG6}D08nBi3J3En()rI}c5gvgu@DssXji(M
z^}c*JlJC3#WcJ#e0u_KMhoY!5B5EFW?RoG%QLM%V<~YF`&7vJCw2LNP`vESw?sL@m
zvi@$hY`Z1B)!RU96BV9ui$|O}UZcSV_q4rm{}0$K+HdoZw(^<7+zwAz;qFH@c9G3k
zQ!o$=1OvgqUtoZDw#xLdV9dclFc1uUGN9)};!w<veL*`qI9UrooX~7SU3&@5NsigE
zFGvqXN-D9Winkb2(rHf~mmT|pB^~0;hxnU6nisLRvwpI0NG=$2Fc1t38PMLBQtACa
z=O;7T<XcEg!9XzZUm1`|eO1r6JHK1E_NjMm;<({Z(YRg}3iZKH03Y-mIW}kUC)F93
X9s7b>MeUYOj2{6bB&J~C7Z`X4?#)n*

literal 0
HcmV?d00001

diff --git a/examples/sequence-to-sequence/seq2seq_translator/README.md b/examples/sequence-to-sequence/seq2seq_translator/README.md
new file mode 100644
index 000000000..d678dba3e
--- /dev/null
+++ b/examples/sequence-to-sequence/seq2seq_translator/README.md
@@ -0,0 +1,268 @@
+# Seq2seq Translator Benchmarks
+
+Here is the comparison between Dynet and PyTorch on the [seq2seq translator example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html).
+
+We have already preprocessed and prepared the eng-fra language dataset in `data` folder according to the steps used in the [PyTorch example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html).
+
+## Usage (Dynet)
+
+The architecture of the dynet model `seq2seq_dynet.py` is the same as that in [PyTorch Example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). We here implement the attention mechanism in the model.
+
+The architecture of the dynet model is shown as follows. 
+
+```python
+class EncoderRNN(object):
+
+    def __init__(self, in_vocab, hidden_dim, model):
+        self.in_vocab = in_vocab
+        self.hidden_dim = hidden_dim
+        self.embedding_enc = model.add_lookup_parameters((self.in_vocab, self.hidden_dim))
+        self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model)
+
+    def __call__(self, input, hidden):
+        input_embed = dy.lookup(self.embedding_enc, input)
+        state_enc = self.rnn_enc.initial_state(vecs=hidden)
+        state_enc = state_enc.add_input(input_embed)
+        return state_enc.output(), state_enc.h()
+
+    def initHidden(self):
+        return [dy.zeros(self.hidden_dim)] 
+   
+DROPOUT_RATE = 0.1
+
+class AttnDecoderRNN(object):
+
+    def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH):
+        self.hidden_dim = hidden_dim
+        self.out_vocab = out_vocab
+        self.max_length = max_length
+        self.embedding_dec = model.add_lookup_parameters((self.out_vocab, self.hidden_dim))
+        self.w_attn = model.add_parameters((self.max_length, self.hidden_dim * 2))
+        self.b_attn = model.add_parameters((self.max_length,))
+        self.w_attn_combine = model.add_parameters((self.hidden_dim, self.hidden_dim * 2))
+        self.b_attn_combine = model.add_parameters((self.hidden_dim,))
+        self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model)
+        self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim))
+        self.b_dec = model.add_parameters((self.out_vocab,))
+
+    def __call__(self, input, hidden, encoder_outptus, dropout=False):
+        input_embed = dy.lookup(self.embedding_dec, input)
+        if dropout:
+            input_embed = dy.dropout(input_embed, DROPOUT_RATE)
+        input_cat = dy.concatenate([input_embed, hidden[0]])
+        w_attn = dy.parameter(self.w_attn)
+        b_attn = dy.parameter(self.b_attn)
+        attn_weights = dy.softmax(w_attn * input_cat + b_attn)
+        attn_applied = encoder_outptus * attn_weights
+        output = dy.concatenate([input_embed, attn_applied])
+        w_attn_combine = dy.parameter(self.w_attn_combine)
+        b_attn_combine = dy.parameter(self.b_attn_combine)
+        output = w_attn_combine * output + b_attn_combine
+        output = dy.rectify(output)
+        state_dec = self.rnn_dec.initial_state(vecs=hidden)
+        state_dec = state_dec.add_input(output)
+        w_dec = dy.parameter(self.w_dec)
+        b_dec = dy.parameter(self.b_dec)
+        output = state_dec.output()
+        output = dy.softmax(w_dec * output + b_dec)
+        return output, hidden, attn_weights
+
+    def initHidden(self):
+        return [dy.zeros(self.hidden_dim)]
+```
+
+Install the GPU version of Dynet according to the instructions on the [official website](http://dynet.readthedocs.io/en/latest/python.html#installing-a-cutting-edge-and-or-gpu-version).
+
+Then, run the training:
+
+<pre>
+python seq2seq_dynet.py --dynet_gpus 1
+</pre>
+
+## Usage (PyTorch)
+
+The code of `seq2seq_pytorch.py` follows the same line in [PyTorch Example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). 
+
+The architecture of the pytorch model is shown as follows.
+
+```python
+class EncoderRNN(nn.Module):
+
+    def __init__(self, input_size, hidden_size):
+        super(EncoderRNN, self).__init__()
+        self.hidden_size = hidden_size
+        self.embedding = nn.Embedding(input_size, hidden_size)
+        self.gru = nn.GRU(hidden_size, hidden_size)
+
+    def forward(self, input, hidden):
+        embedded = self.embedding(input).view(1, 1, -1)
+        output = embedded
+        output, hidden = self.gru(output, hidden)
+        return output, hidden
+
+    def initHidden(self):
+        return torch.zeros(1, 1, self.hidden_size, device=device)
+
+class AttnDecoderRNN(nn.Module):
+
+    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
+        super(AttnDecoderRNN, self).__init__()
+        self.hidden_size = hidden_size
+        self.output_size = output_size
+        self.dropout_p = dropout_p
+        self.max_length = max_length
+        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
+        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
+        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
+        self.dropout = nn.Dropout(self.dropout_p)
+        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
+        self.out = nn.Linear(self.hidden_size, self.output_size)
+
+    def forward(self, input, hidden, encoder_outputs):
+        embedded = self.embedding(input).view(1, 1, -1)
+        embedded = self.dropout(embedded)
+        attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
+        attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))
+        output = torch.cat((embedded[0], attn_applied[0]), 1)
+        output = self.attn_combine(output).unsqueeze(0)
+        output = F.relu(output)
+        output, hidden = self.gru(output, hidden)
+        output = F.log_softmax(self.out(output[0]), dim=1)
+        return output, hidden, attn_weights
+
+    def initHidden(self):
+        return torch.zeros(1, 1, self.hidden_size, device=device)
+```
+
+Install CUDA version of PyTorch according to the instructions on the [official website](http://pytorch.org/).
+
+Then, run the training:
+
+<pre>
+python seq2seq_pytorch.py
+</pre>
+
+## Performance
+
+We run our codes on a desktop with NVIDIA TITAN X. We here have D stands for Dynet and P stands for PyTorch.
+
+| Time (D) | Iteration (D) | Loss (D) | Time (P) | Iteration (P) | Loss (P)|
+| --- | --- | --- | --- | --- | --- |
+| 0m 26s | 5000 5% | 3.3565 | 1m 30s | 5000 5% | 2.8794 |
+| 0m 53s | 10000 10% | 2.7376 | 2m 55s | 10000 10% | 2.3103 |
+| 1m 21s | 15000 15% | 2.4912 | 4m 5s | 15000 15% | 1.9939 |
+| 1m 48s | 20000 20% | 2.2536 | 5m 16s | 20000 20% | 1.7537 |
+| 2m 16s | 25000 25% | 2.0537 | 6m 27s | 25000 25% | 1.5796 |
+| 2m 44s | 30000 30% | 1.8832 | 7m 39s | 30000 30% | 1.3795 |
+| 3m 12s | 35000 35% | 1.7232 | 9m 13s | 35000 35% | 1.2712 |
+| 3m 40s | 40000 40% | 1.5833 | 10m 31s | 40000 40% | 1.1374 |
+| 4m 8s | 45000 45% | 1.4360 | 11m 41s | 45000 45% | 1.0215 |
+| 4m 36s | 50000 50% | 1.2916 | 12m 53s | 50000 50% | 0.9307 |
+| 5m 4s | 55000 55% | 1.2023 | 14m 5s | 55000 55% | 0.8312 |
+| 5m 33s | 60000 60% | 1.1186 | 15m 17s | 60000 60% | 0.7879 |
+| 6m 1s | 65000 65% | 1.0435 | 16m 48s | 65000 65% | 0.7188 |
+| 6m 30s | 70000 70% | 0.9348 | 18m 6s | 70000 70% | 0.6532 |
+| 6m 58s | 75000 75% | 0.8634 | 19m 18s | 75000 75% | 0.6273 |
+| 7m 26s | 80000 80% | 0.8323 | 20m 34s | 80000 80% | 0.6021 |
+| 7m 55s | 85000 85% | 0.7610 | 21m 44s | 85000 85% | 0.5210 |
+| 8m 23s | 90000 90% | 0.7377 | 22m 55s | 90000 90% | 0.5054 |
+| 8m 52s | 95000 95% | 0.6666 | 24m 9s | 95000 95% | 0.4417 |
+| 9m 21s | 100000 100% | 0.6237 | 25m 24s | 100000 100% | 0.4297 |
+
+We then show some evaluation results as follows.
+
+Format:
+
+<pre>
+> input 
+= target 
+< output
+</pre>
+
+### Dynet
+
+```
+> elle est infirmiere .
+= she is a nurse .
+< she is a nurse . <EOS>
+
+> tu n es pas contrariee si ?
+= you re not upset are you ?
+< you re not upset are you re not upset are
+
+> j en ai termine avec mon travail .
+= i am through with my work .
+< i am through with my work . <EOS>
+
+> je ne l invente pas .
+= i m not making that up .
+< i m not making up . <EOS>
+
+> elles ont peur de moi .
+= they re afraid of me .
+< they re afraid of me . <EOS>
+
+> on va jouer au tennis .
+= we re going to play tennis .
+< we are going tennis . <EOS>
+
+> j ai une assuetude .
+= i m addicted .
+< i m addicted . <EOS>
+
+> elles sont en train de vous chercher .
+= they re looking for you .
+< they re looking for you . <EOS>
+
+> elle semble riche .
+= she seems rich .
+< she seems rich . <EOS>
+
+> vous etes bizarre .
+= you re weird .
+< you re weird . <EOS>
+```
+
+### PyTorch
+
+```
+> il est deja marie .
+= he s already married .
+< he s already married . <EOS>
+
+> on le dit decede .
+= he is said to have died .
+< he are said to have died . <EOS>
+
+> il est trop saoul .
+= he s too drunk .
+< he s too drunk . <EOS>
+
+> je suis assez heureux .
+= i m happy enough .
+< i m happy happy . <EOS>
+
+> je n y suis pas interessee .
+= i m not interested in that .
+< i m not interested in that . <EOS>
+
+> il a huit ans .
+= he s eight years old .
+< he is thirty . <EOS>
+
+> je ne suis pas differente .
+= i m no different .
+< i m no different . <EOS>
+
+> je suis heureux que vous l ayez aime .
+= i m happy you liked it .
+< i m happy you liked it . <EOS>
+
+> ils peuvent chanter .
+= they re able to sing .
+< they re able to sing . <EOS>
+
+> vous etes tellement belle dans cette robe !
+= you re so beautiful in that dress .
+< you re so beautiful in that dress . <EOS>
+```
diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
new file mode 100644
index 000000000..326744958
--- /dev/null
+++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
@@ -0,0 +1,331 @@
+# Requirements
+
+from __future__ import unicode_literals, print_function, division
+from io import open
+import unicodedata
+import re
+import random
+import dynet as dy
+
+# Data Preparation
+
+SOS_token = 0
+EOS_token = 1
+
+class Lang(object):
+
+    def __init__(self, name):
+        self.name = name
+        self.word2index = {}
+        self.word2count = {}
+        self.index2word = {0: "SOS", 1: "EOS"}
+        self.n_words = 2
+
+    def addSentence(self, sentence):
+        for word in sentence.split(" "):
+            self.addWord(word)
+
+    def addWord(self, word):
+        if word not in self.word2index:
+            self.word2index[word] = self.n_words
+            self.word2count[word] = 1
+            self.index2word[self.n_words] = word
+            self.n_words += 1
+        else:
+            self.word2count[word] += 1
+
+def unicodeToAscii(s):
+
+    return ''.join(
+        c for c in unicodedata.normalize('NFD', s)
+        if unicodedata.category(c) != 'Mn'
+    )
+
+def normalizeString(s):
+
+    s = unicodeToAscii(s.lower().strip())
+    s = re.sub(r"([.!?])", r" \1", s)
+    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
+    return s
+
+def readLangs(lang1, lang2, reverse=False):
+
+    print("Reading lines...")
+    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().strip().split('\n')
+    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
+    if reverse:
+        pairs = [list(reversed(p)) for p in pairs]
+        input_lang = Lang(lang2)
+        output_lang = Lang(lang1)
+    else:
+        input_lang = Lang(lang1)
+        output_lang = Lang(lang2)
+    return input_lang, output_lang, pairs
+
+MAX_LENGTH = 10
+eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s", "you are", "you re ",
+                "we are", "we re ", "they are", "they re ")
+
+def filterPair(p):
+
+    return len(p[0].split(' ')) < MAX_LENGTH and \
+        len(p[1].split(' ')) < MAX_LENGTH and \
+        p[1].startswith(eng_prefixes)
+
+def filterPairs(pairs):
+
+    return [pair for pair in pairs if filterPair(pair)]
+
+def prepareData(lang1, lang2, reverse=False):
+
+    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
+    print("Read %s sentence pairs" % len(pairs))
+    pairs = filterPairs(pairs)
+    print("Trimmed to %s sentence pairs" % len(pairs))
+    print("Counting words...")
+    for pair in pairs:
+        input_lang.addSentence(pair[0])
+        output_lang.addSentence(pair[1])
+    print("Counted words:")
+    print(input_lang.name, input_lang.n_words)
+    print(output_lang.name, output_lang.n_words)
+    return input_lang, output_lang, pairs
+
+input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
+print(random.choice(pairs))
+
+# Model
+
+class EncoderRNN(object):
+
+    def __init__(self, in_vocab, hidden_dim, model):
+        self.in_vocab = in_vocab
+        self.hidden_dim = hidden_dim
+        self.embedding_enc = model.add_lookup_parameters((self.in_vocab, self.hidden_dim))
+        self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model)
+
+    def __call__(self, input, hidden):
+        input_embed = dy.lookup(self.embedding_enc, input)
+        state_enc = self.rnn_enc.initial_state(vecs=hidden)
+        state_enc = state_enc.add_input(input_embed)
+        return state_enc.output(), state_enc.h()
+
+    def initHidden(self):
+        return [dy.zeros(self.hidden_dim)]
+
+DROPOUT_RATE = 0.1
+
+class AttnDecoderRNN(object):
+
+    def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH):
+        self.hidden_dim = hidden_dim
+        self.out_vocab = out_vocab
+        self.max_length = max_length
+        self.embedding_dec = model.add_lookup_parameters((self.out_vocab, self.hidden_dim))
+        self.w_attn = model.add_parameters((self.max_length, self.hidden_dim * 2))
+        self.b_attn = model.add_parameters((self.max_length,))
+        self.w_attn_combine = model.add_parameters((self.hidden_dim, self.hidden_dim * 2))
+        self.b_attn_combine = model.add_parameters((self.hidden_dim,))
+        self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model)
+        self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim))
+        self.b_dec = model.add_parameters((self.out_vocab,))
+
+    def __call__(self, input, hidden, encoder_outptus, dropout=False):
+        input_embed = dy.lookup(self.embedding_dec, input)
+        if dropout:
+            input_embed = dy.dropout(input_embed, DROPOUT_RATE)
+        input_cat = dy.concatenate([input_embed, hidden[0]])
+        w_attn = dy.parameter(self.w_attn)
+        b_attn = dy.parameter(self.b_attn)
+        attn_weights = dy.softmax(w_attn * input_cat + b_attn)
+        attn_applied = encoder_outptus * attn_weights
+        output = dy.concatenate([input_embed, attn_applied])
+        w_attn_combine = dy.parameter(self.w_attn_combine)
+        b_attn_combine = dy.parameter(self.b_attn_combine)
+        output = w_attn_combine * output + b_attn_combine
+        output = dy.rectify(output)
+        state_dec = self.rnn_dec.initial_state(vecs=hidden)
+        state_dec = state_dec.add_input(output)
+        w_dec = dy.parameter(self.w_dec)
+        b_dec = dy.parameter(self.b_dec)
+        output = state_dec.output()
+        output = dy.softmax(w_dec * output + b_dec)
+
+        return output, hidden, attn_weights
+
+    def initHidden(self):
+        return [dy.zeros(self.hidden_dim)]
+
+
+def indexesFromSentence(lang, sentence):
+
+    return [lang.word2index[word] for word in sentence.split(" ")] + [EOS_token]
+
+def indexesFromPair(pair):
+
+    input_indexes = indexesFromSentence(input_lang, pair[0])
+    target_indexes = indexesFromSentence(output_lang, pair[1])
+    return (input_indexes, target_indexes)
+
+# Training the Model
+
+teacher_forcing_ratio = 0.5
+
+def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH):
+
+    dy.renew_cg()
+
+    encoder_hidden = encoder.initHidden()
+
+    input_length = len(inputs)
+    target_length = len(targets)
+
+    encoder_outputs = [dy.zeros(hidden_dim) for _ in range(max_length)]
+
+    losses = []
+
+    for i in range(input_length):
+        encoder_output, encoder_hidden = encoder(inputs[i], encoder_hidden)
+        encoder_outputs[i] = encoder_output
+
+    encoder_outputs = dy.concatenate(encoder_outputs, 1)
+
+    decoder_input = SOS_token
+    decoder_hidden = encoder_hidden
+
+    if random.random() < teacher_forcing_ratio:
+        use_teacher_forcing = True
+    else:
+        use_teacher_forcing = False
+
+    if use_teacher_forcing:
+        for i in range(target_length):
+            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=True)
+            losses.append(-dy.log(dy.pick(decoder_output, targets[i])))
+            decoder_input = targets[i]
+    else:
+        for i in range(target_length):
+            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=True)
+            losses.append(-dy.log(dy.pick(decoder_output, targets[i])))
+            probs = decoder_output.vec_value()
+            decoder_input = probs.index(max(probs))
+            if decoder_input == EOS_token:
+                break
+
+    loss = dy.esum(losses)/len(losses)
+    loss.backward()
+    trainer.update()
+
+    return loss.value()
+
+# Helper Function to Print Time
+
+import time
+import math
+
+def asMinutes(s):
+    m = math.floor(s/60)
+    s -= m*60
+    return "%dm %ds" % (m, s)
+
+def timeSince(since, percent):
+    now = time.time()
+    s = now - since
+    es = s / (percent)
+    rs = es - s
+    return "%s (- %s)" % (asMinutes(s), asMinutes(rs))
+
+# Whole Training Process
+
+def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, plot_every=100):
+
+    start = time.time()
+    plot_losses = []
+    print_loss_total = 0
+    plot_loss_total = 0
+
+    training_pairs = [indexesFromPair(random.choice(pairs)) for _ in range(n_iters)]
+
+    for iter in range(1, n_iters+1):
+
+        training_pair = training_pairs[iter-1]
+        inputs = training_pair[0]
+        targets = training_pair[1]
+
+        loss = train(inputs, targets, encoder, decoder, trainer)
+
+        print_loss_total += loss
+        plot_loss_total += loss
+
+        if iter % print_every == 0:
+            print_loss_avg = print_loss_total/print_every
+            print_loss_total = 0
+            print("%s (%d %d%%) %.4f" % (timeSince(start, iter/n_iters), iter, iter/n_iters*100, print_loss_avg))
+
+        if iter % plot_every == 0:
+            plot_loss_avg = plot_loss_total/plot_every
+            plot_losses.append(plot_loss_avg)
+            plot_loss_total = 0
+
+# Evaluation
+
+def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
+
+    dy.renew_cg()
+
+    encoder_hidden = encoder.initHidden()
+
+    inputs = indexesFromSentence(input_lang, sentence)
+    input_length = len(inputs)
+
+    encoder_outputs = [dy.zeros(hidden_dim) for _ in range(max_length)]
+
+    for i in range(input_length):
+        encoder_output, encoder_hidden = encoder(inputs[i], encoder_hidden)
+        encoder_outputs[i] = encoder_output
+
+    encoder_outputs = dy.concatenate(encoder_outputs, 1)
+
+    decoder_input = SOS_token
+    decoder_hidden = encoder_hidden
+
+    decoder_words = []
+    decoder_attentions = [dy.zeros(max_length) for _ in range(max_length)]
+
+    for i in range(max_length):
+        decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=False)
+        decoder_attentions[i] = decoder_attention
+        probs = decoder_output.vec_value()
+        pred = probs.index(max(probs))
+        if pred == EOS_token:
+            decoder_words.append("<EOS>")
+            break
+        else:
+            decoder_words.append(output_lang.index2word[pred])
+        decoder_input = pred
+
+    return decoder_words
+
+
+def evaluationRandomly(encoder, decoder, n=10):
+
+    for i in range(n):
+        pair = random.choice(pairs)
+        print(">", pair[0])
+        print("=", pair[1])
+        output_words = evaluate(encoder, decoder, pair[0])
+        output_sentence = " ".join(output_words)
+        print("<", output_sentence)
+        print("")
+
+# Start Training and Evaluating
+
+model = dy.ParameterCollection()
+hidden_dim = 256
+encoder = EncoderRNN(input_lang.n_words, hidden_dim, model)
+decoder = AttnDecoderRNN(hidden_dim, output_lang.n_words, model)
+trainer = dy.SimpleSGDTrainer(model, learning_rate=0.2)
+
+trainIters(encoder, decoder, trainer, 100000, print_every=5000)
+
+evaluationRandomly(encoder, decoder)
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
new file mode 100644
index 000000000..a88ea5fdc
--- /dev/null
+++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
@@ -0,0 +1,354 @@
+# Requirements
+
+from __future__ import unicode_literals, print_function, division
+from io import open
+import unicodedata
+import re
+import random
+import torch
+import torch.nn as nn
+from torch import optim
+import torch.nn.functional as F
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Data Preparation
+
+SOS_token = 0
+EOS_token = 1
+
+class Lang:
+
+    def __init__(self, name):
+        self.name = name
+        self.word2index = {}
+        self.word2count = {}
+        self.index2word = {0: "SOS", 1: "EOS"}
+        self.n_words = 2  # Count SOS and EOS
+
+    def addSentence(self, sentence):
+        for word in sentence.split(' '):
+            self.addWord(word)
+
+    def addWord(self, word):
+        if word not in self.word2index:
+            self.word2index[word] = self.n_words
+            self.word2count[word] = 1
+            self.index2word[self.n_words] = word
+            self.n_words += 1
+        else:
+            self.word2count[word] += 1
+
+def unicodeToAscii(s):
+
+    return ''.join(
+        c for c in unicodedata.normalize('NFD', s)
+        if unicodedata.category(c) != 'Mn'
+    )
+
+def normalizeString(s):
+
+    s = unicodeToAscii(s.lower().strip())
+    s = re.sub(r"([.!?])", r" \1", s)
+    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
+    return s
+
+def readLangs(lang1, lang2, reverse=False):
+
+    print("Reading lines...")
+
+    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
+        read().strip().split('\n')
+
+    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
+
+    if reverse:
+        pairs = [list(reversed(p)) for p in pairs]
+        input_lang = Lang(lang2)
+        output_lang = Lang(lang1)
+    else:
+        input_lang = Lang(lang1)
+        output_lang = Lang(lang2)
+
+    return input_lang, output_lang, pairs
+
+MAX_LENGTH = 10
+
+eng_prefixes = (
+    "i am ", "i m ",
+    "he is", "he s ",
+    "she is", "she s",
+    "you are", "you re ",
+    "we are", "we re ",
+    "they are", "they re "
+)
+
+def filterPair(p):
+
+    return len(p[0].split(' ')) < MAX_LENGTH and \
+        len(p[1].split(' ')) < MAX_LENGTH and \
+        p[1].startswith(eng_prefixes)
+
+def filterPairs(pairs):
+
+    return [pair for pair in pairs if filterPair(pair)]
+
+def prepareData(lang1, lang2, reverse=False):
+
+    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
+    print("Read %s sentence pairs" % len(pairs))
+    pairs = filterPairs(pairs)
+    print("Trimmed to %s sentence pairs" % len(pairs))
+    print("Counting words...")
+    for pair in pairs:
+        input_lang.addSentence(pair[0])
+        output_lang.addSentence(pair[1])
+    print("Counted words:")
+    print(input_lang.name, input_lang.n_words)
+    print(output_lang.name, output_lang.n_words)
+    return input_lang, output_lang, pairs
+
+input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
+print(random.choice(pairs))
+
+# Model
+
+class EncoderRNN(nn.Module):
+
+    def __init__(self, input_size, hidden_size):
+        super(EncoderRNN, self).__init__()
+        self.hidden_size = hidden_size
+
+        self.embedding = nn.Embedding(input_size, hidden_size)
+        self.gru = nn.GRU(hidden_size, hidden_size)
+
+    def forward(self, input, hidden):
+        embedded = self.embedding(input).view(1, 1, -1)
+        output = embedded
+        output, hidden = self.gru(output, hidden)
+        return output, hidden
+
+    def initHidden(self):
+        return torch.zeros(1, 1, self.hidden_size, device=device)
+
+class AttnDecoderRNN(nn.Module):
+
+    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
+        super(AttnDecoderRNN, self).__init__()
+        self.hidden_size = hidden_size
+        self.output_size = output_size
+        self.dropout_p = dropout_p
+        self.max_length = max_length
+
+        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
+        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
+        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
+        self.dropout = nn.Dropout(self.dropout_p)
+        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
+        self.out = nn.Linear(self.hidden_size, self.output_size)
+
+    def forward(self, input, hidden, encoder_outputs):
+        embedded = self.embedding(input).view(1, 1, -1)
+        embedded = self.dropout(embedded)
+
+        attn_weights = F.softmax(
+            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
+        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
+                                 encoder_outputs.unsqueeze(0))
+
+        output = torch.cat((embedded[0], attn_applied[0]), 1)
+        output = self.attn_combine(output).unsqueeze(0)
+
+        output = F.relu(output)
+        output, hidden = self.gru(output, hidden)
+
+        output = F.log_softmax(self.out(output[0]), dim=1)
+        return output, hidden, attn_weights
+
+    def initHidden(self):
+        return torch.zeros(1, 1, self.hidden_size, device=device)
+
+def indexesFromSentence(lang, sentence):
+
+    return [lang.word2index[word] for word in sentence.split(' ')]
+
+
+def tensorFromSentence(lang, sentence):
+
+    indexes = indexesFromSentence(lang, sentence)
+    indexes.append(EOS_token)
+    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)
+
+
+def tensorsFromPair(pair):
+
+    input_tensor = tensorFromSentence(input_lang, pair[0])
+    target_tensor = tensorFromSentence(output_lang, pair[1])
+    return (input_tensor, target_tensor)
+
+# Training the Model
+
+teacher_forcing_ratio = 0.5
+
+
+def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
+    encoder_hidden = encoder.initHidden()
+
+    encoder_optimizer.zero_grad()
+    decoder_optimizer.zero_grad()
+
+    input_length = input_tensor.size(0)
+    target_length = target_tensor.size(0)
+
+    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
+
+    loss = 0
+
+    for ei in range(input_length):
+        encoder_output, encoder_hidden = encoder(
+            input_tensor[ei], encoder_hidden)
+        encoder_outputs[ei] = encoder_output[0, 0]
+
+    decoder_input = torch.tensor([[SOS_token]], device=device)
+
+    decoder_hidden = encoder_hidden
+
+    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
+
+    if use_teacher_forcing:
+
+        for di in range(target_length):
+            decoder_output, decoder_hidden, decoder_attention = decoder(
+                decoder_input, decoder_hidden, encoder_outputs)
+            loss += criterion(decoder_output, target_tensor[di])
+            decoder_input = target_tensor[di]
+
+    else:
+
+        for di in range(target_length):
+            decoder_output, decoder_hidden, decoder_attention = decoder(
+                decoder_input, decoder_hidden, encoder_outputs)
+            topv, topi = decoder_output.topk(1)
+            decoder_input = topi.squeeze().detach()
+
+            loss += criterion(decoder_output, target_tensor[di])
+            if decoder_input.item() == EOS_token:
+                break
+
+    loss.backward()
+
+    encoder_optimizer.step()
+    decoder_optimizer.step()
+
+    return loss.item() / target_length
+
+# Helper Function to Print Time
+
+import time
+import math
+
+
+def asMinutes(s):
+    m = math.floor(s / 60)
+    s -= m * 60
+    return '%dm %ds' % (m, s)
+
+
+def timeSince(since, percent):
+    now = time.time()
+    s = now - since
+    es = s / (percent)
+    rs = es - s
+    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))
+
+# Whole Training Process
+
+def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
+
+    start = time.time()
+    plot_losses = []
+    print_loss_total = 0
+    plot_loss_total = 0
+
+    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
+    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
+    training_pairs = [tensorsFromPair(random.choice(pairs))
+                      for i in range(n_iters)]
+    criterion = nn.NLLLoss()
+
+    for iter in range(1, n_iters + 1):
+        training_pair = training_pairs[iter - 1]
+        input_tensor = training_pair[0]
+        target_tensor = training_pair[1]
+
+        loss = train(input_tensor, target_tensor, encoder,
+                     decoder, encoder_optimizer, decoder_optimizer, criterion)
+        print_loss_total += loss
+        plot_loss_total += loss
+
+        if iter % print_every == 0:
+            print_loss_avg = print_loss_total / print_every
+            print_loss_total = 0
+            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
+                                         iter, iter / n_iters * 100, print_loss_avg))
+
+        if iter % plot_every == 0:
+            plot_loss_avg = plot_loss_total / plot_every
+            plot_losses.append(plot_loss_avg)
+            plot_loss_total = 0
+
+
+def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
+    with torch.no_grad():
+        input_tensor = tensorFromSentence(input_lang, sentence)
+        input_length = input_tensor.size()[0]
+        encoder_hidden = encoder.initHidden()
+
+        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
+
+        for ei in range(input_length):
+            encoder_output, encoder_hidden = encoder(input_tensor[ei],
+                                                     encoder_hidden)
+            encoder_outputs[ei] += encoder_output[0, 0]
+
+        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS
+
+        decoder_hidden = encoder_hidden
+
+        decoded_words = []
+        decoder_attentions = torch.zeros(max_length, max_length)
+
+        for di in range(max_length):
+            decoder_output, decoder_hidden, decoder_attention = decoder(
+                decoder_input, decoder_hidden, encoder_outputs)
+            decoder_attentions[di] = decoder_attention.data
+            topv, topi = decoder_output.data.topk(1)
+            if topi.item() == EOS_token:
+                decoded_words.append('<EOS>')
+                break
+            else:
+                decoded_words.append(output_lang.index2word[topi.item()])
+
+            decoder_input = topi.squeeze().detach()
+
+        return decoded_words, decoder_attentions[:di + 1]
+
+def evaluateRandomly(encoder, decoder, n=10):
+
+    for i in range(n):
+        pair = random.choice(pairs)
+        print('>', pair[0])
+        print('=', pair[1])
+        output_words, attentions = evaluate(encoder, decoder, pair[0])
+        output_sentence = ' '.join(output_words)
+        print('<', output_sentence)
+        print('')
+
+# Training and Evaluating
+
+hidden_size = 256
+encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
+attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)
+
+trainIters(encoder1, attn_decoder1, 100000, print_every=5000)
+
+evaluateRandomly(encoder1, attn_decoder1)
\ No newline at end of file

From 6c4000b2e75ec189ba3377e657c1de0ec741d564 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <pengzhi.gao@petuum.com>
Date: Tue, 5 Jun 2018 13:51:08 -0400
Subject: [PATCH 02/14] update README.md

---
 examples/sequence-to-sequence/.DS_Store       | Bin 6148 -> 6148 bytes
 .../seq2seq_translator/README.md              |   2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/sequence-to-sequence/.DS_Store b/examples/sequence-to-sequence/.DS_Store
index 0b4a6dc06944ab2783d7da06388e4224869f0466..a57abb186a5cb432320ada6fb3798218671d406c 100644
GIT binary patch
delta 267
zcmZoMXfc@J&&abeU^g=(&t@JLF2?$#^5TM|octsP28Po~1v#0;B?bo98JU<_SlQS)
zIJh{tc)4POGxE!WOA<>;i=7gSqCvcX#FC68C_5xSKL^fEObW|PEsqxvan8>xNzBYk
zEdp!KOi2YQi3!ilOUW;H$}i1JDF&Mp908Hx;Naxoj2Doqt~N3<GtyBoG%&B#QK&XF
zFgDRqFgLQOt>xqpS2eWtOvtUQs;;T6n+<d?5HK=AXa;^L4Ws5ve#i23vNo$SPg!tL
WUQT{qI!N{AbF6HPo7p-3@&f>&%}cWY

delta 44
zcmZoMXfc@J&&ahgU^g=>7Xt$W_hdnK(aEYTA18~l?cL1A&d9j2!I*I~JI7ys00@r@
A`2YX_

diff --git a/examples/sequence-to-sequence/seq2seq_translator/README.md b/examples/sequence-to-sequence/seq2seq_translator/README.md
index d678dba3e..78e048308 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/README.md
+++ b/examples/sequence-to-sequence/seq2seq_translator/README.md
@@ -2,7 +2,7 @@
 
 Here is the comparison between Dynet and PyTorch on the [seq2seq translator example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html).
 
-We have already preprocessed and prepared the eng-fra language dataset in `data` folder according to the steps used in the [PyTorch example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html).
+The data we used is a set of many thousands of English to French translation pairs. Download the data from [here](https://download.pytorch.org/tutorial/data.zip) and extract it to the current directory.
 
 ## Usage (Dynet)
 

From 63f63c5debc5b8d802e993e5f7e37379799dbb4e Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <pengzhi.gao@petuum.com>
Date: Wed, 6 Jun 2018 17:44:39 -0400
Subject: [PATCH 03/14] bug fixed in the model

---
 .../.idea/dictionaries/pengzhi_gao.xml        |   3 +
 .../seq2seq_translator/.idea/misc.xml         |   4 +
 .../seq2seq_translator/.idea/modules.xml      |   8 +
 .../.idea/seq2seq_translator.iml              |  12 ++
 .../seq2seq_translator/.idea/workspace.xml    | 166 ++++++++++++++++++
 .../seq2seq_translator/README.md              | 102 +++++------
 .../seq2seq_translator/seq2seq_dynet.py       |   2 +-
 7 files changed, 245 insertions(+), 52 deletions(-)
 create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml
 create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml
 create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml
 create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml
 create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml

diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml
new file mode 100644
index 000000000..f224b9fc9
--- /dev/null
+++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml
@@ -0,0 +1,3 @@
+<component name="ProjectDictionaryState">
+  <dictionary name="pengzhi.gao" />
+</component>
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml
new file mode 100644
index 000000000..e626b847e
--- /dev/null
+++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (word_language_model)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml
new file mode 100644
index 000000000..435aaac69
--- /dev/null
+++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/seq2seq_translator.iml" filepath="$PROJECT_DIR$/.idea/seq2seq_translator.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml b/examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml
new file mode 100644
index 000000000..e98082abe
--- /dev/null
+++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="projectConfiguration" value="py.test" />
+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml
new file mode 100644
index 000000000..830438b5d
--- /dev/null
+++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml
@@ -0,0 +1,166 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="fe9d73a4-57c6-4420-86ac-e3f1175a9847" name="Default" comment="" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="TRACKING_ENABLED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf>
+      <file leaf-file-name="seq2seq_dynet.py" pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/seq2seq_dynet.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="2265">
+              <caret line="156" column="42" lean-forward="true" selection-start-line="156" selection-start-column="42" selection-end-line="156" selection-end-column="42" />
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/seq2seq_dynet.py" />
+        <option value="$PROJECT_DIR$/README.md" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds" fullScreen="true">
+    <option name="x" value="1440" />
+    <option name="width" value="1920" />
+    <option name="height" value="1080" />
+  </component>
+  <component name="ProjectInspectionProfilesVisibleTreeState">
+    <entry key="Project Default">
+      <profile-state>
+        <expanded-state>
+          <State>
+            <id />
+          </State>
+        </expanded-state>
+        <selected-state>
+          <State>
+            <id>BashSupport</id>
+          </State>
+        </selected-state>
+      </profile-state>
+    </entry>
+  </component>
+  <component name="ProjectView">
+    <navigator proportions="" version="1">
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="seq2seq_translator" type="b2602c69:ProjectViewProjectNode" />
+              <item name="seq2seq_translator" type="462c0819:PsiDirectoryNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+      <pane id="Scope" />
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+    <property name="settings.editor.selected.configurable" value="File.Encoding" />
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="fe9d73a4-57c6-4420-86ac-e3f1175a9847" name="Default" comment="" />
+      <created>1528319057155</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1528319057155</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="1440" y="0" width="1920" height="1080" extended-state="0" />
+    <editor active="true" />
+    <layout>
+      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24960506" />
+      <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="bottom" id="Event Log" side_tool="true" />
+      <window_info anchor="bottom" id="Run" order="2" />
+      <window_info anchor="bottom" id="Version Control" show_stripe_button="false" />
+      <window_info anchor="bottom" id="Python Console" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info anchor="bottom" id="Terminal" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
+      <window_info id="Favorites" side_tool="true" />
+      <window_info anchor="bottom" id="Find" order="1" />
+      <window_info anchor="right" id="Commander" order="0" weight="0.4" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+    </layout>
+  </component>
+  <component name="VcsContentAnnotationSettings">
+    <option name="myLimit" value="2678400000" />
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/seq2seq_pytorch.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="-1149">
+          <caret line="164" column="27" selection-start-line="164" selection-start-column="27" selection-end-line="164" selection-end-column="27" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/README.md">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="2625">
+          <caret line="175" column="5" lean-forward="true" selection-start-line="175" selection-start-column="5" selection-end-line="175" selection-end-column="5" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/seq2seq_dynet.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="2265">
+          <caret line="156" column="42" lean-forward="true" selection-start-line="156" selection-start-column="42" selection-end-line="156" selection-end-column="42" />
+        </state>
+      </provider>
+    </entry>
+  </component>
+  <component name="masterDetails">
+    <states>
+      <state key="ScopeChooserConfigurable.UI">
+        <settings>
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+    </states>
+  </component>
+</project>
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/README.md b/examples/sequence-to-sequence/seq2seq_translator/README.md
index 78e048308..2db336987 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/README.md
+++ b/examples/sequence-to-sequence/seq2seq_translator/README.md
@@ -65,7 +65,7 @@ class AttnDecoderRNN(object):
         b_dec = dy.parameter(self.b_dec)
         output = state_dec.output()
         output = dy.softmax(w_dec * output + b_dec)
-        return output, hidden, attn_weights
+        return output, state_dec.h(), attn_weights
 
     def initHidden(self):
         return [dy.zeros(self.hidden_dim)]
@@ -148,26 +148,26 @@ We run our codes on a desktop with NVIDIA TITAN X. We here have D stands for Dyn
 
 | Time (D) | Iteration (D) | Loss (D) | Time (P) | Iteration (P) | Loss (P)|
 | --- | --- | --- | --- | --- | --- |
-| 0m 26s | 5000 5% | 3.3565 | 1m 30s | 5000 5% | 2.8794 |
-| 0m 53s | 10000 10% | 2.7376 | 2m 55s | 10000 10% | 2.3103 |
-| 1m 21s | 15000 15% | 2.4912 | 4m 5s | 15000 15% | 1.9939 |
-| 1m 48s | 20000 20% | 2.2536 | 5m 16s | 20000 20% | 1.7537 |
-| 2m 16s | 25000 25% | 2.0537 | 6m 27s | 25000 25% | 1.5796 |
-| 2m 44s | 30000 30% | 1.8832 | 7m 39s | 30000 30% | 1.3795 |
-| 3m 12s | 35000 35% | 1.7232 | 9m 13s | 35000 35% | 1.2712 |
-| 3m 40s | 40000 40% | 1.5833 | 10m 31s | 40000 40% | 1.1374 |
-| 4m 8s | 45000 45% | 1.4360 | 11m 41s | 45000 45% | 1.0215 |
-| 4m 36s | 50000 50% | 1.2916 | 12m 53s | 50000 50% | 0.9307 |
-| 5m 4s | 55000 55% | 1.2023 | 14m 5s | 55000 55% | 0.8312 |
-| 5m 33s | 60000 60% | 1.1186 | 15m 17s | 60000 60% | 0.7879 |
-| 6m 1s | 65000 65% | 1.0435 | 16m 48s | 65000 65% | 0.7188 |
-| 6m 30s | 70000 70% | 0.9348 | 18m 6s | 70000 70% | 0.6532 |
-| 6m 58s | 75000 75% | 0.8634 | 19m 18s | 75000 75% | 0.6273 |
-| 7m 26s | 80000 80% | 0.8323 | 20m 34s | 80000 80% | 0.6021 |
-| 7m 55s | 85000 85% | 0.7610 | 21m 44s | 85000 85% | 0.5210 |
-| 8m 23s | 90000 90% | 0.7377 | 22m 55s | 90000 90% | 0.5054 |
-| 8m 52s | 95000 95% | 0.6666 | 24m 9s | 95000 95% | 0.4417 |
-| 9m 21s | 100000 100% | 0.6237 | 25m 24s | 100000 100% | 0.4297 |
+| 0m 28s | 5000 5% | 3.2687 | 1m 30s | 5000 5% | 2.8794 |
+| 0m 56s | 10000 10% | 2.6397 | 2m 55s | 10000 10% | 2.3103 |
+| 1m 25s | 15000 15% | 2.3537 | 4m 5s | 15000 15% | 1.9939 |
+| 1m 54s | 20000 20% | 2.1538 | 5m 16s | 20000 20% | 1.7537 |
+| 2m 22s | 25000 25% | 1.9636 | 6m 27s | 25000 25% | 1.5796 |
+| 2m 51s | 30000 30% | 1.8166 | 7m 39s | 30000 30% | 1.3795 |
+| 3m 20s | 35000 35% | 1.6305 | 9m 13s | 35000 35% | 1.2712 |
+| 3m 49s | 40000 40% | 1.5026 | 10m 31s | 40000 40% | 1.1374 |
+| 4m 18s | 45000 45% | 1.4049 | 11m 41s | 45000 45% | 1.0215 |
+| 4m 47s | 50000 50% | 1.2827 | 12m 53s | 50000 50% | 0.9307 |
+| 5m 17s | 55000 55% | 1.2299 | 14m 5s | 55000 55% | 0.8312 |
+| 5m 46s | 60000 60% | 1.1067 | 15m 17s | 60000 60% | 0.7879 |
+| 6m 15s | 65000 65% | 1.0442 | 16m 48s | 65000 65% | 0.7188 |
+| 6m 44s | 70000 70% | 0.9789 | 18m 6s | 70000 70% | 0.6532 |
+| 7m 13s | 75000 75% | 0.8694 | 19m 18s | 75000 75% | 0.6273 |
+| 7m 43s | 80000 80% | 0.8219 | 20m 34s | 80000 80% | 0.6021 |
+| 8m 12s | 85000 85% | 0.7621 | 21m 44s | 85000 85% | 0.5210 |
+| 8m 41s | 90000 90% | 0.7453 | 22m 55s | 90000 90% | 0.5054 |
+| 9m 10s | 95000 95% | 0.6795 | 24m 9s | 95000 95% | 0.4417 |
+| 9m 39s | 100000 100% | 0.6442 | 25m 24s | 100000 100% | 0.4297 |
 
 We then show some evaluation results as follows.
 
@@ -182,45 +182,45 @@ Format:
 ### Dynet
 
 ```
-> elle est infirmiere .
-= she is a nurse .
-< she is a nurse . <EOS>
+> elle est convaincue de mon innocence .
+= she is convinced of my innocence .
+< she is convinced of my innocence . <EOS>
 
-> tu n es pas contrariee si ?
-= you re not upset are you ?
-< you re not upset are you re not upset are
+> je ne suis pas folle .
+= i m not crazy .
+< i m not mad . <EOS>
 
-> j en ai termine avec mon travail .
-= i am through with my work .
-< i am through with my work . <EOS>
+> je suis ruinee .
+= i m ruined .
+< i m ruined . <EOS>
 
-> je ne l invente pas .
-= i m not making that up .
-< i m not making up . <EOS>
+> je ne suis certainement pas ton ami .
+= i m certainly not your friend .
+< i m not your best your friend . <EOS>
 
-> elles ont peur de moi .
-= they re afraid of me .
-< they re afraid of me . <EOS>
+> c est un pleurnichard comme toujours .
+= he s a crybaby just like always .
+< he s a little nothing . <EOS>
 
-> on va jouer au tennis .
-= we re going to play tennis .
-< we are going tennis . <EOS>
+> je suis sure qu elle partira tot .
+= i m sure she ll leave early .
+< i m sure she ll leave early . <EOS>
 
-> j ai une assuetude .
-= i m addicted .
-< i m addicted . <EOS>
+> vous etes toujours vivantes .
+= you re still alive .
+< you re still alive . <EOS>
 
-> elles sont en train de vous chercher .
-= they re looking for you .
-< they re looking for you . <EOS>
+> nous n avons pas encore tres faim .
+= we aren t very hungry yet .
+< we re not not desperate . <EOS>
 
-> elle semble riche .
-= she seems rich .
-< she seems rich . <EOS>
+> vous n etes pas encore morts .
+= you re not dead yet .
+< you re not dead yet . <EOS>
 
-> vous etes bizarre .
-= you re weird .
-< you re weird . <EOS>
+> nous sommes coinces .
+= we re stuck .
+< we re stuck . <EOS>
 ```
 
 ### PyTorch
diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
index 326744958..f05de7f8a 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
+++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
@@ -151,7 +151,7 @@ def __call__(self, input, hidden, encoder_outptus, dropout=False):
         output = state_dec.output()
         output = dy.softmax(w_dec * output + b_dec)
 
-        return output, hidden, attn_weights
+        return output, state_dec.h(), attn_weights
 
     def initHidden(self):
         return [dy.zeros(self.hidden_dim)]

From 11b2a3983289d1b448e610639a7373722e538391 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <pengzhi.gao@petuum.com>
Date: Wed, 6 Jun 2018 17:47:06 -0400
Subject: [PATCH 04/14] delete unrelated files

---
 .../.idea/dictionaries/pengzhi_gao.xml        |   3 -
 .../seq2seq_translator/.idea/misc.xml         |   4 -
 .../seq2seq_translator/.idea/modules.xml      |   8 -
 .../.idea/seq2seq_translator.iml              |  12 --
 .../seq2seq_translator/.idea/workspace.xml    | 166 ------------------
 5 files changed, 193 deletions(-)
 delete mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml
 delete mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml
 delete mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml
 delete mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml
 delete mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml

diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml
deleted file mode 100644
index f224b9fc9..000000000
--- a/examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-<component name="ProjectDictionaryState">
-  <dictionary name="pengzhi.gao" />
-</component>
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml
deleted file mode 100644
index e626b847e..000000000
--- a/examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (word_language_model)" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml
deleted file mode 100644
index 435aaac69..000000000
--- a/examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/seq2seq_translator.iml" filepath="$PROJECT_DIR$/.idea/seq2seq_translator.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml b/examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml
deleted file mode 100644
index e98082abe..000000000
--- a/examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="TestRunnerService">
-    <option name="projectConfiguration" value="py.test" />
-    <option name="PROJECT_TEST_RUNNER" value="py.test" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml
deleted file mode 100644
index 830438b5d..000000000
--- a/examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml
+++ /dev/null
@@ -1,166 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ChangeListManager">
-    <list default="true" id="fe9d73a4-57c6-4420-86ac-e3f1175a9847" name="Default" comment="" />
-    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
-    <option name="TRACKING_ENABLED" value="true" />
-    <option name="SHOW_DIALOG" value="false" />
-    <option name="HIGHLIGHT_CONFLICTS" value="true" />
-    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
-    <option name="LAST_RESOLUTION" value="IGNORE" />
-  </component>
-  <component name="FileEditorManager">
-    <leaf>
-      <file leaf-file-name="seq2seq_dynet.py" pinned="false" current-in-tab="true">
-        <entry file="file://$PROJECT_DIR$/seq2seq_dynet.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="2265">
-              <caret line="156" column="42" lean-forward="true" selection-start-line="156" selection-start-column="42" selection-end-line="156" selection-end-column="42" />
-            </state>
-          </provider>
-        </entry>
-      </file>
-    </leaf>
-  </component>
-  <component name="IdeDocumentHistory">
-    <option name="CHANGED_PATHS">
-      <list>
-        <option value="$PROJECT_DIR$/seq2seq_dynet.py" />
-        <option value="$PROJECT_DIR$/README.md" />
-      </list>
-    </option>
-  </component>
-  <component name="ProjectFrameBounds" fullScreen="true">
-    <option name="x" value="1440" />
-    <option name="width" value="1920" />
-    <option name="height" value="1080" />
-  </component>
-  <component name="ProjectInspectionProfilesVisibleTreeState">
-    <entry key="Project Default">
-      <profile-state>
-        <expanded-state>
-          <State>
-            <id />
-          </State>
-        </expanded-state>
-        <selected-state>
-          <State>
-            <id>BashSupport</id>
-          </State>
-        </selected-state>
-      </profile-state>
-    </entry>
-  </component>
-  <component name="ProjectView">
-    <navigator proportions="" version="1">
-      <foldersAlwaysOnTop value="true" />
-    </navigator>
-    <panes>
-      <pane id="ProjectPane">
-        <subPane>
-          <expand>
-            <path>
-              <item name="seq2seq_translator" type="b2602c69:ProjectViewProjectNode" />
-              <item name="seq2seq_translator" type="462c0819:PsiDirectoryNode" />
-            </path>
-          </expand>
-          <select />
-        </subPane>
-      </pane>
-      <pane id="Scope" />
-    </panes>
-  </component>
-  <component name="PropertiesComponent">
-    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
-    <property name="settings.editor.selected.configurable" value="File.Encoding" />
-  </component>
-  <component name="RunDashboard">
-    <option name="ruleStates">
-      <list>
-        <RuleState>
-          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
-        </RuleState>
-        <RuleState>
-          <option name="name" value="StatusDashboardGroupingRule" />
-        </RuleState>
-      </list>
-    </option>
-  </component>
-  <component name="SvnConfiguration">
-    <configuration />
-  </component>
-  <component name="TaskManager">
-    <task active="true" id="Default" summary="Default task">
-      <changelist id="fe9d73a4-57c6-4420-86ac-e3f1175a9847" name="Default" comment="" />
-      <created>1528319057155</created>
-      <option name="number" value="Default" />
-      <option name="presentableId" value="Default" />
-      <updated>1528319057155</updated>
-    </task>
-    <servers />
-  </component>
-  <component name="ToolWindowManager">
-    <frame x="1440" y="0" width="1920" height="1080" extended-state="0" />
-    <editor active="true" />
-    <layout>
-      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24960506" />
-      <window_info anchor="bottom" id="TODO" order="6" />
-      <window_info anchor="bottom" id="Event Log" side_tool="true" />
-      <window_info anchor="bottom" id="Run" order="2" />
-      <window_info anchor="bottom" id="Version Control" show_stripe_button="false" />
-      <window_info anchor="bottom" id="Python Console" />
-      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
-      <window_info anchor="bottom" id="Terminal" />
-      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
-      <window_info id="Favorites" side_tool="true" />
-      <window_info anchor="bottom" id="Find" order="1" />
-      <window_info anchor="right" id="Commander" order="0" weight="0.4" />
-      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
-      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
-      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
-      <window_info anchor="bottom" id="Message" order="0" />
-      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
-    </layout>
-  </component>
-  <component name="VcsContentAnnotationSettings">
-    <option name="myLimit" value="2678400000" />
-  </component>
-  <component name="editorHistoryManager">
-    <entry file="file://$PROJECT_DIR$/seq2seq_pytorch.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="-1149">
-          <caret line="164" column="27" selection-start-line="164" selection-start-column="27" selection-end-line="164" selection-end-column="27" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/README.md">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="2625">
-          <caret line="175" column="5" lean-forward="true" selection-start-line="175" selection-start-column="5" selection-end-line="175" selection-end-column="5" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/seq2seq_dynet.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="2265">
-          <caret line="156" column="42" lean-forward="true" selection-start-line="156" selection-start-column="42" selection-end-line="156" selection-end-column="42" />
-        </state>
-      </provider>
-    </entry>
-  </component>
-  <component name="masterDetails">
-    <states>
-      <state key="ScopeChooserConfigurable.UI">
-        <settings>
-          <splitter-proportions>
-            <option name="proportions">
-              <list>
-                <option value="0.2" />
-              </list>
-            </option>
-          </splitter-proportions>
-        </settings>
-      </state>
-    </states>
-  </component>
-</project>
\ No newline at end of file

From f77ae009b1a3df952e149d01fc9ca8870f4b5f63 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <pengzhi.gao@petuum.com>
Date: Fri, 22 Jun 2018 16:48:03 -0400
Subject: [PATCH 05/14] address the comments

---
 .DS_Store                                     | Bin 8196 -> 8196 bytes
 .idea/dictionaries/pengzhi_gao.xml            |   3 +
 .idea/dynet.iml                               |  12 +
 .idea/misc.xml                                |   4 +
 .idea/modules.xml                             |   8 +
 .idea/vcs.xml                                 |   6 +
 .idea/workspace.xml                           | 206 ++++++++++++++++++
 examples/.DS_Store                            | Bin 10244 -> 10244 bytes
 .../seq2seq_translator/README.md              | 125 +----------
 9 files changed, 243 insertions(+), 121 deletions(-)
 create mode 100644 .idea/dictionaries/pengzhi_gao.xml
 create mode 100644 .idea/dynet.iml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/vcs.xml
 create mode 100644 .idea/workspace.xml

diff --git a/.DS_Store b/.DS_Store
index d7dc14478065716eb8d8443f856f8a008e2b97a3..816a6d9405976599f900e585009d30cf30e903ff 100644
GIT binary patch
delta 591
zcmZp1XmOa}&nUhzU^hRb_+}n~E=Cdepa6r?;*uf;1_l-eKZZnxT!z%iO@jW6CnsML
zY|xabt~RnX(orxpv#8ZksJ1XQ(@`)sF{`cR<PcXiwDnBLt*ol9sjZtixkgC7eiqOa
z4u({Q3a~i^3^_n{ab|K^K~heB5(5L{=A?q0%;FLQgZlykj7-cdtZeKYoLt;IynOru
zf<nTAB7&l#qGIBr5<*ha@dCn0iOJdNMfs(9DbD#h`9)4SrK#}({KXmh<(|p;c>zVK
zWtpkv5hbY=CBd0hK&g5mG#x>yDRBs)kjes}A`yfD*i`qT)YQD_^rFlZ|FpE?)RG7!
zb|lEU<f8nXoPhk|%#zIfyvP`^^(Cpfo_T5c@d831C-@hDq>97pQ;Uj$3=B;Xk%>h`
ziFxU%PL;7aiAkwB{&`OMB_;W}5t%@LfZPtYGq@lzIWsR^+5Q7C7=eIO*+B!s5@1k<
zFo3};z!;L3n41ce41kC!J9tAS!#NnV85|jc8Dba;85$U-Gc0CU&v2089K&OVuMGbg
zc^SnRl^C@dbr?+<%^4jS9T}Y&Js3S1gCN?O7@@|<Kxrt&7`=Ip5I@t#k|f5>>=NHt
OHZu#aVBA;|&j<j2l%ppA

delta 126
zcmZp1XmOa}&nUVvU^hRb=w=>)F2>3G1g#lQP5vd=;3ZjIZD?U&sH0$MTC1Z_ZD?R@
zqN8ALWKmno$sw+4XzQ7fTUk|IQ(HHS0Sp)!Av6O&l!j5WH%}AcXPUf4#&ctZF!N@1
LiEk_$E7+L<Sf3ta

diff --git a/.idea/dictionaries/pengzhi_gao.xml b/.idea/dictionaries/pengzhi_gao.xml
new file mode 100644
index 000000000..f224b9fc9
--- /dev/null
+++ b/.idea/dictionaries/pengzhi_gao.xml
@@ -0,0 +1,3 @@
+<component name="ProjectDictionaryState">
+  <dictionary name="pengzhi.gao" />
+</component>
\ No newline at end of file
diff --git a/.idea/dynet.iml b/.idea/dynet.iml
new file mode 100644
index 000000000..e98082abe
--- /dev/null
+++ b/.idea/dynet.iml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="projectConfiguration" value="py.test" />
+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 000000000..e626b847e
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (word_language_model)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 000000000..36fb4b31c
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/dynet.iml" filepath="$PROJECT_DIR$/.idea/dynet.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 000000000..94a25f7f4
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 000000000..0e60d61c9
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="eb730ce5-6678-4b69-aecc-cd8ffadf3802" name="Default" comment="">
+      <change beforePath="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py" beforeDir="false" afterPath="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py" beforeDir="false" afterPath="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py" afterDir="false" />
+    </list>
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="TRACKING_ENABLED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf>
+      <file leaf-file-name="README.md" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/README.md">
+          <provider selected="true" editor-type-id="text-editor" />
+        </entry>
+      </file>
+      <file leaf-file-name="seq2seq_dynet.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="572">
+              <caret line="259" column="4" selection-start-line="259" selection-end-line="260" selection-end-column="103" />
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file leaf-file-name="seq2seq_pytorch.py" pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="578">
+              <caret line="288" column="103" selection-start-line="288" selection-start-column="103" selection-end-line="288" selection-end-column="103" />
+              <folding>
+                <element signature="e#16#81#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py" />
+        <option value="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds" fullScreen="true">
+    <option name="x" value="1440" />
+    <option name="width" value="1920" />
+    <option name="height" value="1080" />
+  </component>
+  <component name="ProjectInspectionProfilesVisibleTreeState">
+    <entry key="Project Default">
+      <profile-state>
+        <expanded-state>
+          <State>
+            <id />
+          </State>
+        </expanded-state>
+        <selected-state>
+          <State>
+            <id>BashSupport</id>
+          </State>
+        </selected-state>
+      </profile-state>
+    </entry>
+  </component>
+  <component name="ProjectView">
+    <navigator proportions="" version="1">
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="Scope" />
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="dynet" type="b2602c69:ProjectViewProjectNode" />
+              <item name="dynet" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="dynet" type="b2602c69:ProjectViewProjectNode" />
+              <item name="dynet" type="462c0819:PsiDirectoryNode" />
+              <item name="examples" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="dynet" type="b2602c69:ProjectViewProjectNode" />
+              <item name="dynet" type="462c0819:PsiDirectoryNode" />
+              <item name="examples" type="462c0819:PsiDirectoryNode" />
+              <item name="sequence-to-sequence" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="dynet" type="b2602c69:ProjectViewProjectNode" />
+              <item name="dynet" type="462c0819:PsiDirectoryNode" />
+              <item name="examples" type="462c0819:PsiDirectoryNode" />
+              <item name="sequence-to-sequence" type="462c0819:PsiDirectoryNode" />
+              <item name="seq2seq_translator" type="462c0819:PsiDirectoryNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+    <property name="settings.editor.selected.configurable" value="File.Encoding" />
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="eb730ce5-6678-4b69-aecc-cd8ffadf3802" name="Default" comment="" />
+      <created>1529698634480</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1529698634480</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="1440" y="23" width="1920" height="1057" extended-state="0" />
+    <editor active="true" />
+    <layout>
+      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24960506" />
+      <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="bottom" id="Event Log" side_tool="true" />
+      <window_info anchor="bottom" id="Run" order="2" />
+      <window_info anchor="bottom" id="Version Control" />
+      <window_info anchor="bottom" id="Python Console" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info anchor="bottom" id="Terminal" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
+      <window_info id="Favorites" side_tool="true" />
+      <window_info anchor="bottom" id="Find" order="1" />
+      <window_info anchor="right" id="Commander" order="0" weight="0.4" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+    </layout>
+  </component>
+  <component name="VcsContentAnnotationSettings">
+    <option name="myLimit" value="2678400000" />
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/README.md">
+      <provider selected="true" editor-type-id="text-editor" />
+    </entry>
+    <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="572">
+          <caret line="259" column="4" selection-start-line="259" selection-end-line="260" selection-end-column="103" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="578">
+          <caret line="288" column="103" selection-start-line="288" selection-start-column="103" selection-end-line="288" selection-end-column="103" />
+          <folding>
+            <element signature="e#16#81#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+  </component>
+  <component name="masterDetails">
+    <states>
+      <state key="ScopeChooserConfigurable.UI">
+        <settings>
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+    </states>
+  </component>
+</project>
\ No newline at end of file
diff --git a/examples/.DS_Store b/examples/.DS_Store
index 2ef87546db611ef70fc85732a1589dc930659546..a554152dee93a8c306b9720bdf9b3bc6ca878a57 100644
GIT binary patch
delta 368
zcmZn(XbG6$&#196U^hRb#$+CW)KD%4Cx%Q0ABJFt5C&g{cm`*NAO<(joc!dZoctsP
z1_l8j-p;_lWb_{lfIJ2Uta_5liwmH7PEKYKZP1jct~RnX(orxpv#8ZksJ1XQ(@`)s
zF{`cR<PcXiwDnBLt*ol9sjZtidA+E7{VZJO=A;`2C+FuDfNh2lSnbcvcX3H6ElFlz
zxb9M1ck2u;^K$c3u&8sx41|IV;w&x;F3QWv&r1hd$hbLAU<>bNc7<Oon`MNVnE{2i
BRyY6v

delta 37
tcmZn(XbG6$UDU^hRb;$$9y)XiZ+8+kW2JZ0R>uJDUx^D{AKW&rve4JZHr

diff --git a/examples/sequence-to-sequence/seq2seq_translator/README.md b/examples/sequence-to-sequence/seq2seq_translator/README.md
index 2db336987..d43ee8d6e 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/README.md
+++ b/examples/sequence-to-sequence/seq2seq_translator/README.md
@@ -6,141 +6,23 @@ The data we used is a set of many thousands of English to French translation pai
 
 ## Usage (Dynet)
 
-The architecture of the dynet model `seq2seq_dynet.py` is the same as that in [PyTorch Example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). We here implement the attention mechanism in the model.
-
-The architecture of the dynet model is shown as follows. 
-
-```python
-class EncoderRNN(object):
-
-    def __init__(self, in_vocab, hidden_dim, model):
-        self.in_vocab = in_vocab
-        self.hidden_dim = hidden_dim
-        self.embedding_enc = model.add_lookup_parameters((self.in_vocab, self.hidden_dim))
-        self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model)
-
-    def __call__(self, input, hidden):
-        input_embed = dy.lookup(self.embedding_enc, input)
-        state_enc = self.rnn_enc.initial_state(vecs=hidden)
-        state_enc = state_enc.add_input(input_embed)
-        return state_enc.output(), state_enc.h()
-
-    def initHidden(self):
-        return [dy.zeros(self.hidden_dim)] 
-   
-DROPOUT_RATE = 0.1
-
-class AttnDecoderRNN(object):
-
-    def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH):
-        self.hidden_dim = hidden_dim
-        self.out_vocab = out_vocab
-        self.max_length = max_length
-        self.embedding_dec = model.add_lookup_parameters((self.out_vocab, self.hidden_dim))
-        self.w_attn = model.add_parameters((self.max_length, self.hidden_dim * 2))
-        self.b_attn = model.add_parameters((self.max_length,))
-        self.w_attn_combine = model.add_parameters((self.hidden_dim, self.hidden_dim * 2))
-        self.b_attn_combine = model.add_parameters((self.hidden_dim,))
-        self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model)
-        self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim))
-        self.b_dec = model.add_parameters((self.out_vocab,))
-
-    def __call__(self, input, hidden, encoder_outptus, dropout=False):
-        input_embed = dy.lookup(self.embedding_dec, input)
-        if dropout:
-            input_embed = dy.dropout(input_embed, DROPOUT_RATE)
-        input_cat = dy.concatenate([input_embed, hidden[0]])
-        w_attn = dy.parameter(self.w_attn)
-        b_attn = dy.parameter(self.b_attn)
-        attn_weights = dy.softmax(w_attn * input_cat + b_attn)
-        attn_applied = encoder_outptus * attn_weights
-        output = dy.concatenate([input_embed, attn_applied])
-        w_attn_combine = dy.parameter(self.w_attn_combine)
-        b_attn_combine = dy.parameter(self.b_attn_combine)
-        output = w_attn_combine * output + b_attn_combine
-        output = dy.rectify(output)
-        state_dec = self.rnn_dec.initial_state(vecs=hidden)
-        state_dec = state_dec.add_input(output)
-        w_dec = dy.parameter(self.w_dec)
-        b_dec = dy.parameter(self.b_dec)
-        output = state_dec.output()
-        output = dy.softmax(w_dec * output + b_dec)
-        return output, state_dec.h(), attn_weights
-
-    def initHidden(self):
-        return [dy.zeros(self.hidden_dim)]
-```
+The architecture of the Dynet model `seq2seq_dynet.py` is the same as that in [PyTorch Example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). We here implement the attention mechanism in the model.
 
 Install the GPU version of Dynet according to the instructions on the [official website](http://dynet.readthedocs.io/en/latest/python.html#installing-a-cutting-edge-and-or-gpu-version).
 
 Then, run the training:
 
-<pre>
-python seq2seq_dynet.py --dynet_gpus 1
-</pre>
+    python seq2seq_dynet.py --dynet_gpus 1
 
 ## Usage (PyTorch)
 
 The code of `seq2seq_pytorch.py` follows the same line in [PyTorch Example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). 
 
-The architecture of the pytorch model is shown as follows.
-
-```python
-class EncoderRNN(nn.Module):
-
-    def __init__(self, input_size, hidden_size):
-        super(EncoderRNN, self).__init__()
-        self.hidden_size = hidden_size
-        self.embedding = nn.Embedding(input_size, hidden_size)
-        self.gru = nn.GRU(hidden_size, hidden_size)
-
-    def forward(self, input, hidden):
-        embedded = self.embedding(input).view(1, 1, -1)
-        output = embedded
-        output, hidden = self.gru(output, hidden)
-        return output, hidden
-
-    def initHidden(self):
-        return torch.zeros(1, 1, self.hidden_size, device=device)
-
-class AttnDecoderRNN(nn.Module):
-
-    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
-        super(AttnDecoderRNN, self).__init__()
-        self.hidden_size = hidden_size
-        self.output_size = output_size
-        self.dropout_p = dropout_p
-        self.max_length = max_length
-        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
-        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
-        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
-        self.dropout = nn.Dropout(self.dropout_p)
-        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
-        self.out = nn.Linear(self.hidden_size, self.output_size)
-
-    def forward(self, input, hidden, encoder_outputs):
-        embedded = self.embedding(input).view(1, 1, -1)
-        embedded = self.dropout(embedded)
-        attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
-        attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))
-        output = torch.cat((embedded[0], attn_applied[0]), 1)
-        output = self.attn_combine(output).unsqueeze(0)
-        output = F.relu(output)
-        output, hidden = self.gru(output, hidden)
-        output = F.log_softmax(self.out(output[0]), dim=1)
-        return output, hidden, attn_weights
-
-    def initHidden(self):
-        return torch.zeros(1, 1, self.hidden_size, device=device)
-```
-
 Install CUDA version of PyTorch according to the instructions on the [official website](http://pytorch.org/).
 
 Then, run the training:
 
-<pre>
-python seq2seq_pytorch.py
-</pre>
+    python seq2seq_pytorch.py
 
 ## Performance
 
@@ -148,6 +30,7 @@ We run our codes on a desktop with NVIDIA TITAN X. We here have D stands for Dyn
 
 | Time (D) | Iteration (D) | Loss (D) | Time (P) | Iteration (P) | Loss (P)|
 | --- | --- | --- | --- | --- | --- |
+| 0m 0s | 0% | 7.9808 | 0m 0s | 0% | 7.9615 |
 | 0m 28s | 5000 5% | 3.2687 | 1m 30s | 5000 5% | 2.8794 |
 | 0m 56s | 10000 10% | 2.6397 | 2m 55s | 10000 10% | 2.3103 |
 | 1m 25s | 15000 15% | 2.3537 | 4m 5s | 15000 15% | 1.9939 |

From e46af58850bb461754a793055e3f1fb4d8009c41 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <pengzhi.gao@petuum.com>
Date: Fri, 22 Jun 2018 16:48:39 -0400
Subject: [PATCH 06/14] address the comments

---
 .idea/dictionaries/pengzhi_gao.xml |   3 -
 .idea/dynet.iml                    |  12 --
 .idea/misc.xml                     |   4 -
 .idea/modules.xml                  |   8 --
 .idea/vcs.xml                      |   6 -
 .idea/workspace.xml                | 206 -----------------------------
 6 files changed, 239 deletions(-)
 delete mode 100644 .idea/dictionaries/pengzhi_gao.xml
 delete mode 100644 .idea/dynet.iml
 delete mode 100644 .idea/misc.xml
 delete mode 100644 .idea/modules.xml
 delete mode 100644 .idea/vcs.xml
 delete mode 100644 .idea/workspace.xml

diff --git a/.idea/dictionaries/pengzhi_gao.xml b/.idea/dictionaries/pengzhi_gao.xml
deleted file mode 100644
index f224b9fc9..000000000
--- a/.idea/dictionaries/pengzhi_gao.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-<component name="ProjectDictionaryState">
-  <dictionary name="pengzhi.gao" />
-</component>
\ No newline at end of file
diff --git a/.idea/dynet.iml b/.idea/dynet.iml
deleted file mode 100644
index e98082abe..000000000
--- a/.idea/dynet.iml
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="TestRunnerService">
-    <option name="projectConfiguration" value="py.test" />
-    <option name="PROJECT_TEST_RUNNER" value="py.test" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index e626b847e..000000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (word_language_model)" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index 36fb4b31c..000000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/dynet.iml" filepath="$PROJECT_DIR$/.idea/dynet.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7f4..000000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
deleted file mode 100644
index 0e60d61c9..000000000
--- a/.idea/workspace.xml
+++ /dev/null
@@ -1,206 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ChangeListManager">
-    <list default="true" id="eb730ce5-6678-4b69-aecc-cd8ffadf3802" name="Default" comment="">
-      <change beforePath="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py" beforeDir="false" afterPath="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py" beforeDir="false" afterPath="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py" afterDir="false" />
-    </list>
-    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
-    <option name="TRACKING_ENABLED" value="true" />
-    <option name="SHOW_DIALOG" value="false" />
-    <option name="HIGHLIGHT_CONFLICTS" value="true" />
-    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
-    <option name="LAST_RESOLUTION" value="IGNORE" />
-  </component>
-  <component name="FileEditorManager">
-    <leaf>
-      <file leaf-file-name="README.md" pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/README.md">
-          <provider selected="true" editor-type-id="text-editor" />
-        </entry>
-      </file>
-      <file leaf-file-name="seq2seq_dynet.py" pinned="false" current-in-tab="false">
-        <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="572">
-              <caret line="259" column="4" selection-start-line="259" selection-end-line="260" selection-end-column="103" />
-            </state>
-          </provider>
-        </entry>
-      </file>
-      <file leaf-file-name="seq2seq_pytorch.py" pinned="false" current-in-tab="true">
-        <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="578">
-              <caret line="288" column="103" selection-start-line="288" selection-start-column="103" selection-end-line="288" selection-end-column="103" />
-              <folding>
-                <element signature="e#16#81#0" expanded="true" />
-              </folding>
-            </state>
-          </provider>
-        </entry>
-      </file>
-    </leaf>
-  </component>
-  <component name="Git.Settings">
-    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
-  </component>
-  <component name="IdeDocumentHistory">
-    <option name="CHANGED_PATHS">
-      <list>
-        <option value="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py" />
-        <option value="$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py" />
-      </list>
-    </option>
-  </component>
-  <component name="ProjectFrameBounds" fullScreen="true">
-    <option name="x" value="1440" />
-    <option name="width" value="1920" />
-    <option name="height" value="1080" />
-  </component>
-  <component name="ProjectInspectionProfilesVisibleTreeState">
-    <entry key="Project Default">
-      <profile-state>
-        <expanded-state>
-          <State>
-            <id />
-          </State>
-        </expanded-state>
-        <selected-state>
-          <State>
-            <id>BashSupport</id>
-          </State>
-        </selected-state>
-      </profile-state>
-    </entry>
-  </component>
-  <component name="ProjectView">
-    <navigator proportions="" version="1">
-      <foldersAlwaysOnTop value="true" />
-    </navigator>
-    <panes>
-      <pane id="Scope" />
-      <pane id="ProjectPane">
-        <subPane>
-          <expand>
-            <path>
-              <item name="dynet" type="b2602c69:ProjectViewProjectNode" />
-              <item name="dynet" type="462c0819:PsiDirectoryNode" />
-            </path>
-            <path>
-              <item name="dynet" type="b2602c69:ProjectViewProjectNode" />
-              <item name="dynet" type="462c0819:PsiDirectoryNode" />
-              <item name="examples" type="462c0819:PsiDirectoryNode" />
-            </path>
-            <path>
-              <item name="dynet" type="b2602c69:ProjectViewProjectNode" />
-              <item name="dynet" type="462c0819:PsiDirectoryNode" />
-              <item name="examples" type="462c0819:PsiDirectoryNode" />
-              <item name="sequence-to-sequence" type="462c0819:PsiDirectoryNode" />
-            </path>
-            <path>
-              <item name="dynet" type="b2602c69:ProjectViewProjectNode" />
-              <item name="dynet" type="462c0819:PsiDirectoryNode" />
-              <item name="examples" type="462c0819:PsiDirectoryNode" />
-              <item name="sequence-to-sequence" type="462c0819:PsiDirectoryNode" />
-              <item name="seq2seq_translator" type="462c0819:PsiDirectoryNode" />
-            </path>
-          </expand>
-          <select />
-        </subPane>
-      </pane>
-    </panes>
-  </component>
-  <component name="PropertiesComponent">
-    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
-    <property name="settings.editor.selected.configurable" value="File.Encoding" />
-  </component>
-  <component name="RunDashboard">
-    <option name="ruleStates">
-      <list>
-        <RuleState>
-          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
-        </RuleState>
-        <RuleState>
-          <option name="name" value="StatusDashboardGroupingRule" />
-        </RuleState>
-      </list>
-    </option>
-  </component>
-  <component name="SvnConfiguration">
-    <configuration />
-  </component>
-  <component name="TaskManager">
-    <task active="true" id="Default" summary="Default task">
-      <changelist id="eb730ce5-6678-4b69-aecc-cd8ffadf3802" name="Default" comment="" />
-      <created>1529698634480</created>
-      <option name="number" value="Default" />
-      <option name="presentableId" value="Default" />
-      <updated>1529698634480</updated>
-    </task>
-    <servers />
-  </component>
-  <component name="ToolWindowManager">
-    <frame x="1440" y="23" width="1920" height="1057" extended-state="0" />
-    <editor active="true" />
-    <layout>
-      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24960506" />
-      <window_info anchor="bottom" id="TODO" order="6" />
-      <window_info anchor="bottom" id="Event Log" side_tool="true" />
-      <window_info anchor="bottom" id="Run" order="2" />
-      <window_info anchor="bottom" id="Version Control" />
-      <window_info anchor="bottom" id="Python Console" />
-      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
-      <window_info anchor="bottom" id="Terminal" />
-      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
-      <window_info id="Favorites" side_tool="true" />
-      <window_info anchor="bottom" id="Find" order="1" />
-      <window_info anchor="right" id="Commander" order="0" weight="0.4" />
-      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
-      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
-      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
-      <window_info anchor="bottom" id="Message" order="0" />
-      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
-    </layout>
-  </component>
-  <component name="VcsContentAnnotationSettings">
-    <option name="myLimit" value="2678400000" />
-  </component>
-  <component name="editorHistoryManager">
-    <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/README.md">
-      <provider selected="true" editor-type-id="text-editor" />
-    </entry>
-    <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="572">
-          <caret line="259" column="4" selection-start-line="259" selection-end-line="260" selection-end-column="103" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="578">
-          <caret line="288" column="103" selection-start-line="288" selection-start-column="103" selection-end-line="288" selection-end-column="103" />
-          <folding>
-            <element signature="e#16#81#0" expanded="true" />
-          </folding>
-        </state>
-      </provider>
-    </entry>
-  </component>
-  <component name="masterDetails">
-    <states>
-      <state key="ScopeChooserConfigurable.UI">
-        <settings>
-          <splitter-proportions>
-            <option name="proportions">
-              <list>
-                <option value="0.2" />
-              </list>
-            </option>
-          </splitter-proportions>
-        </settings>
-      </state>
-    </states>
-  </component>
-</project>
\ No newline at end of file

From ad04e2d024d16601e1f00a854ea45f583a6cfb54 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <gpengzhi@gmail.com>
Date: Fri, 22 Jun 2018 16:49:02 -0400
Subject: [PATCH 07/14] Delete .DS_Store

---
 .DS_Store | Bin 8196 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 816a6d9405976599f900e585009d30cf30e903ff..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8196
zcmeHM&u<$=6n>M$={hFKCQX_MI9LRvN{ynV&`^XBj+0O<e#FW~ZK9;OyY|{%b-iPD
z*Gb$E<O_cTf-4ddCl34t9J#=eBM1HgB*eLVGqX;;YtmZ*sXNll8_#^-yqV9LH+l{c
zv0|sTMzlyo8mh2*4$VIlF)!*+8Hq*~U>W#C&63NjmdmXnY&wuCkSdTWkSdTWkSg&1
zPylbXDB30OzM4(jRDo21=TZR?AM&We79!hIR)!8VG6jGg!>}yqBM*?Ad}Ire?I|m%
zSW{&WETXa~#bAn#`MiL`79!hIR?&$mI<d$zi!u}@y@QvRaAE~z(>7HgRbW^Fk-KNe
zBAYy_#`1S@k={nV{#X`C%>dIPlOB>ikm(ELcM0SaZPG^yNgt9!0fne5Xiq7ObD7w~
zpqD9l;n+_dyaqT;)Jy!bFB04<5jP|sY%AgE&jfmuYVcHzqejdGYFwejsmV`Ze@S37
zz;>~BJ9$c}<+lS`YQUUAp%%5tML)Q+TVCx1p^p{E=wmr!qd$a;??5X5)OD?spdI}t
z^OMzfogiFY{gsW3j%CKLXxfA}F*$MHXz~*yG{a6%H2p*IsceQ#E9&oY?q2jw>#k#;
zRP?z`?uDk~*}fz@H5-tldyZG*Cq>@zYJptGP!pP#)v^_R@$Bs8TQ`^UYwNeqm-A=0
zZ>=xqUw>o${5-2IytejE=|QXe=<Kue&%eUKQCJl1X<B{0Xun2pNEoMj937PX&_C#J
zy52u(b|Uw}<kYl2GkbMrZf1Ud{@TL);_QnrRrITtS$){>#ilp8%l)F;u`Bv?02xYE
z?v;J}7)Ra@?bFb39${)WLD;oxpFA1b>$ZWJdomywz2n=qci8Zq+D^S5*x~*_|3G-F
z`rLKPJa9sXdk06-duX>xUY*0E*uhR)Oa*ssKY$(L>>rrEZ+Z>8*uC$XmhJ9%MIMH{
zweR2*V&~GO(Kf4&*I0V@4<%=Isc>EObXw{wtm$meGg~$$%W7<?@If@G(GA)lgO2D^
z>d}|<4Si2P($Dle{Y8)2B)i65VK>+^yT#sQ1-8L9+54=-b`^W1|CPSnccm;lgdO@y
zj}6~*x!1s);N=iYN{B4mxE}`E%j3!5jmRMqZGcgt_rNh|4?Ksw_&t)5WRR5wYa4Y1
zx0xeu;f5v{BAN^`#&MIoKsH76ieqI=8N_C37prg3mh`fMC{Rl<L}ZCG0ufcH11};T
z)~OL^hzP@8jYXJ22KJV~BWB0&8Ffmu$&(^N$B;RBN(V8vMiuPGL@!LPFBFZjG+;WY
zT*hXT-1vSN^o-#lKw6%m0;6i}MUnq+U;g|5Gu%PCbgDqAz<;d(8{0Crir7W}X-;lP
z<l1@E2dJV5Tu)g^K_k;~SecH)Ui`xl{XC$8Q;2L&S&X3k>pujf@4p4vqns`F-lp%r
K{EK`(n!f=J?MkBn


From 5c792fce59d7a1da6feaa81a6dcdc0a08b5c7679 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <gpengzhi@gmail.com>
Date: Fri, 22 Jun 2018 16:49:16 -0400
Subject: [PATCH 08/14] Delete .DS_Store

---
 examples/.DS_Store | Bin 10244 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 examples/.DS_Store

diff --git a/examples/.DS_Store b/examples/.DS_Store
deleted file mode 100644
index a554152dee93a8c306b9720bdf9b3bc6ca878a57..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 10244
zcmeHM&2Jk;6n~SC#&*)iO;Q3C0;5pqAx3dpN)SRAC#8y@B&ws*sx9f--Z)Elcii1|
zJ{nTxQi(r+3m*r>0f_@=?%WWSI9BR`zkmZ07jE!-v+Hzr?T84ew1*vQ_s#CUul>E5
z_jblaBr5GjiKswC85~UWBRKVF{G5-j))(D54Jsf{R3n1|s?++S-Nl{WND)X8ND)X8
zND)X8cp4Bu&E}RF4rzK$5l9h85!gq7^Fsj#({e=jLaI^+PHG7NI)K}%p^tTdk|{*A
z9MQdyDpBYRR}Uf*710tygyMK!V0O@QME61(ijxS%Nko=Kv_cWeI^+t9okTgL={ZFp
zMW7o2Ub|<pYHwrRF7@@htk-N=!&9D6Z{axow`%C|cEJ8hpDc2zxoF4uzfbVzr~<wZ
zs7fnzkydG*F421lnIDl&W;Gi<Wony#EAs2-TFbWlU}oll=<6RCJTjEY<TAOVxf|7W
zxmgX2pzT)-?;2k%8^L-l>aR#?@Ar+`Wy{=L%}>lrH!v*M^pvpGFu`*DBg<{b&5CTh
z4PWW0h7U5CY$m&!e|BeQ_RZPpLh0Px?sQ>i?)7uig)?W*?e1nXlc!2=FWz*v@9ccK
zd-n?k8Hp%!(P((RfBq7^o-b=ROVzq`TKk*_@Hui}q0tWNL=P7HyASUlq|cF!L-DoN
z4`+9y>bbUbo3O|4b_`*?h@o_gOoZAZA4AHg*KppVARcTn34VP*WwB}0!mp?XqD40O
z35k0Ni6Lsh&Iat%L6h8nCb%cTEy39)2iz`vD&oq9w?2Lrttmacd1Zmmjoa=s652+f
zV+*nlJhss<LOaVE3AtYras#ll1_>w5r>56;+R#5vpTN@=Z9}I?9`py%1ohyb(TxuA
zgwr()!Z#PZh(X+zLSE+_1!o%?`ORCf#V7$!g-X}N790g#`gvH9$_}GYXY8QKQ@FCm
z4TTQt<10s@)B|C`@zV8<!Ix04uXGln)1;<4hracWlCEPULU29Oz$vcB$m-)L>2f(`
z#XwApw28h2IWvwTM@+n|MJ$VicAnm)1<Zi+81oJ_5)z0|Okxn4UHrO614k&vc@*gJ
zyLUz|$Di?lble6<Cnh<jBsn5H<~goK91fDViMMOu&AKCuNW^qZC>^&kMjq+lQc+Ca
zzi?@JrtJqF&>Wx-5oz|9=+N@<ci}nr-~7-@#4k^}M4Uv4D9xfSlxAkRMwDjGoSiPb
zF+0~)B)$SvXa^RFukrBEP+Ikks7i`^9SDBwRLN{^c;x7@(fskzvC;AIi4)IEPCmDq
zAFmnpTTPE^-MqA=SFzjX6{{}Ys&&V_5|~>7cUJRb3BF5a<KsuSR<>K<nRs+V>AK*V
zrhBdFS&gN&HQx-bX#K0#eH7@$z;qVfH3|J2D9B5#z>=<i+4OwStA^(pZqux6->{9E
zX)n1I83fW<&5tTsIJxYZET{BVTSnb-n^W)np%A61@^YucA3JDynhHDkkjaY`*Ko{i
zcB=e-bTdPzFu!>w*XTBVMxWC+^aK4w_vs=1PJam@a$;PZ5HE-q#Y^H9@v4aQ_H`=!
z$&UJv3jEP>_~0d~Y98}UqG|+~PX?+^5ni=b9^?AwB`Y6iv)H^j0!aheqsiRyIi|AM
zw^|k(jxq&X^k7Fq((9SYbg3aFg&L0xgDM!ep|Y9I%{I6`xOlHMWry=b!ZVf0bg7Jz
zGQo*21N0LWl<SAk`6p3%*mtg$ob|abg?Y2{sT*c9i7eP<UF9BK+cDGuMuaQEzvNGa
zzb@O4m_as<3M1cF(2$Q0nDi<|AVnZW;4vf6AL=jg{{MND#RLBTf6QK_>Qe+#1fC=U
zVql@VP=N=X@7JU=yw_g9aT5nO=GzOY62VDr$AhZvcwF#yy!tH0y&3dXL%+-&(7lkN
mhxWh!8K7@grt5zl4fQ3h?`#};(V4FQwY}qLi})#C|NjHayvL9L


From e490502b1def78936fb22d522836195a0278ef09 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <gpengzhi@gmail.com>
Date: Fri, 22 Jun 2018 16:49:26 -0400
Subject: [PATCH 09/14] Delete .DS_Store

---
 examples/sequence-to-sequence/.DS_Store | Bin 6148 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 examples/sequence-to-sequence/.DS_Store

diff --git a/examples/sequence-to-sequence/.DS_Store b/examples/sequence-to-sequence/.DS_Store
deleted file mode 100644
index a57abb186a5cb432320ada6fb3798218671d406c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHK%Wl&^6upxMaMGei6;cr^Z;)6-C<%xn0g?$}(H)WzEC8i`M2#iK6Ksd5q(R>B
z1uWqQ_!d5(AHoN~nMY~s1S#r*R%os?bM8FO+?kVk#6-lpldw(HAR-G%V6~2HiHUZR
z7ObLcHh@fykx)(%aZ2d!LT^)H6fg>0HU-4lT_+FcHm3p3?aBEa?#4XuWA3AexE>xM
z-T5UA9?`~9)|d`TyzeERCF2AX0t4I=r8sQ6)PIXI^n6bpjeX29MeGFSSk8e{dNwA*
zb;T;D=DN}t{xlow_5Se3ag^n)))A|$Rj*vVW?6Nse!c$A9rCH0d-){mdg&Wcdg0|m
zU;8gPkN^0d|0)_x`;8mVc#?ZjGDsyk3J2)&_H~qmeA?xcB+TSEu4=HXnpNvJZp~($
z&Vx?VZnt*kO?%dE-MinkcRCN}^P07NyZvN$Z*=f+b~yj^Rs0rME-qkcn;MVd8=RGv
zAMdx_fRDz<#8r7C-s^>vJRmxHeE21Mx;igB7jHZ~-#kCKX%h2fAM+DhZRu?kVI%dI
zZsmZ2(zf~x-svN-d1ef8d``CYBKv2FjpD_c-ZIwu#ZJRSHwqX9E};VAez1`QIvNXw
z^6Ee)KLLPMR7-;|{wxTNt<lj~C`1d4DJoD=nf}CJijI0)_j5EB3KbpCyDSpZk;RiQ
zjLGOg8wD71B$&)7U=%1Ru%^ll@%}&m`}co|Wr{`tqrm^90IT-gUKcCUZ|lO^#9M14
t?I8&h{R)MWf=oZh(h*PbO(bb(vxNh6G!_a`12caFqzooA3j9+Aeghct=8gaW


From 28f7b9b454f8e4b8bdfaa5c926fac3e79d6fd939 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <gpengzhi@gmail.com>
Date: Fri, 22 Jun 2018 16:49:35 -0400
Subject: [PATCH 10/14] Delete .DS_Store

---
 .../seq2seq_translator/.DS_Store                 | Bin 6148 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 examples/sequence-to-sequence/seq2seq_translator/.DS_Store

diff --git a/examples/sequence-to-sequence/seq2seq_translator/.DS_Store b/examples/sequence-to-sequence/seq2seq_translator/.DS_Store
deleted file mode 100644
index 071667f4470f57571ab1453781926f235c61c504..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKyH3ME5S)b+K{OT><$VDXe8CYpMG6}D08nBi3J3En()rI}c5gvgu@DssXji(M
z^}c*JlJC3#WcJ#e0u_KMhoY!5B5EFW?RoG%QLM%V<~YF`&7vJCw2LNP`vESw?sL@m
zvi@$hY`Z1B)!RU96BV9ui$|O}UZcSV_q4rm{}0$K+HdoZw(^<7+zwAz;qFH@c9G3k
zQ!o$=1OvgqUtoZDw#xLdV9dclFc1uUGN9)};!w<veL*`qI9UrooX~7SU3&@5NsigE
zFGvqXN-D9Winkb2(rHf~mmT|pB^~0;hxnU6nisLRvwpI0NG=$2Fc1t38PMLBQtACa
z=O;7T<XcEg!9XzZUm1`|eO1r6JHK1E_NjMm;<({Z(YRg}3iZKH03Y-mIW}kUC)F93
X9s7b>MeUYOj2{6bB&J~C7Z`X4?#)n*


From 242d2416e0cb458c783d4f9d1da24794543a321a Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <gpengzhi@gmail.com>
Date: Fri, 22 Jun 2018 16:58:34 -0400
Subject: [PATCH 11/14] Update README.md

---
 examples/sequence-to-sequence/seq2seq_translator/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/sequence-to-sequence/seq2seq_translator/README.md b/examples/sequence-to-sequence/seq2seq_translator/README.md
index d43ee8d6e..f641cfbd8 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/README.md
+++ b/examples/sequence-to-sequence/seq2seq_translator/README.md
@@ -4,7 +4,7 @@ Here is the comparison between Dynet and PyTorch on the [seq2seq translator exam
 
 The data we used is a set of many thousands of English to French translation pairs. Download the data from [here](https://download.pytorch.org/tutorial/data.zip) and extract it to the current directory.
 
-## Usage (Dynet)
+## Usage (DyNet)
 
 The architecture of the Dynet model `seq2seq_dynet.py` is the same as that in [PyTorch Example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). We here implement the attention mechanism in the model.
 

From aa6284a90e531f64cc99ed82f4c6210c5997de0e Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <pengzhi.gao@petuum.com>
Date: Tue, 4 Sep 2018 10:54:26 -0400
Subject: [PATCH 12/14] fix lint error

---
 .../seq2seq_translator/seq2seq_dynet.py       | 73 ++++++++++++++-----
 .../seq2seq_translator/seq2seq_pytorch.py     | 47 +++++++++---
 2 files changed, 89 insertions(+), 31 deletions(-)

diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
index f05de7f8a..a63d74386 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
+++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
@@ -6,12 +6,15 @@
 import re
 import random
 import dynet as dy
+import time
+import math
 
 # Data Preparation
 
 SOS_token = 0
 EOS_token = 1
 
+
 class Lang(object):
 
     def __init__(self, name):
@@ -34,6 +37,7 @@ def addWord(self, word):
         else:
             self.word2count[word] += 1
 
+
 def unicodeToAscii(s):
 
     return ''.join(
@@ -41,6 +45,7 @@ def unicodeToAscii(s):
         if unicodedata.category(c) != 'Mn'
     )
 
+
 def normalizeString(s):
 
     s = unicodeToAscii(s.lower().strip())
@@ -48,10 +53,12 @@ def normalizeString(s):
     s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
     return s
 
+
 def readLangs(lang1, lang2, reverse=False):
 
     print("Reading lines...")
-    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().strip().split('\n')
+    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().\
+        strip().split('\n')
     pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
     if reverse:
         pairs = [list(reversed(p)) for p in pairs]
@@ -62,9 +69,12 @@ def readLangs(lang1, lang2, reverse=False):
         output_lang = Lang(lang2)
     return input_lang, output_lang, pairs
 
+
 MAX_LENGTH = 10
-eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s", "you are", "you re ",
-                "we are", "we re ", "they are", "they re ")
+eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s",
+                "you are", "you re ", "we are", "we re ", "they are",
+                "they re ")
+
 
 def filterPair(p):
 
@@ -72,10 +82,12 @@ def filterPair(p):
         len(p[1].split(' ')) < MAX_LENGTH and \
         p[1].startswith(eng_prefixes)
 
+
 def filterPairs(pairs):
 
     return [pair for pair in pairs if filterPair(pair)]
 
+
 def prepareData(lang1, lang2, reverse=False):
 
     input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
@@ -91,18 +103,22 @@ def prepareData(lang1, lang2, reverse=False):
     print(output_lang.name, output_lang.n_words)
     return input_lang, output_lang, pairs
 
+
 input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
 print(random.choice(pairs))
 
 # Model
 
+
 class EncoderRNN(object):
 
     def __init__(self, in_vocab, hidden_dim, model):
         self.in_vocab = in_vocab
         self.hidden_dim = hidden_dim
-        self.embedding_enc = model.add_lookup_parameters((self.in_vocab, self.hidden_dim))
-        self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model)
+        self.embedding_enc = model.add_lookup_parameters((self.in_vocab,
+                                                          self.hidden_dim))
+        self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim,
+                                     model)
 
     def __call__(self, input, hidden):
         input_embed = dy.lookup(self.embedding_enc, input)
@@ -113,20 +129,26 @@ def __call__(self, input, hidden):
     def initHidden(self):
         return [dy.zeros(self.hidden_dim)]
 
+
 DROPOUT_RATE = 0.1
 
+
 class AttnDecoderRNN(object):
 
     def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH):
         self.hidden_dim = hidden_dim
         self.out_vocab = out_vocab
         self.max_length = max_length
-        self.embedding_dec = model.add_lookup_parameters((self.out_vocab, self.hidden_dim))
-        self.w_attn = model.add_parameters((self.max_length, self.hidden_dim * 2))
+        self.embedding_dec = model.add_lookup_parameters((self.out_vocab,
+                                                          self.hidden_dim))
+        self.w_attn = model.add_parameters((self.max_length,
+                                            self.hidden_dim * 2))
         self.b_attn = model.add_parameters((self.max_length,))
-        self.w_attn_combine = model.add_parameters((self.hidden_dim, self.hidden_dim * 2))
+        self.w_attn_combine = model.add_parameters((self.hidden_dim,
+                                                    self.hidden_dim * 2))
         self.b_attn_combine = model.add_parameters((self.hidden_dim,))
-        self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model)
+        self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim,
+                                     model)
         self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim))
         self.b_dec = model.add_parameters((self.out_vocab,))
 
@@ -159,7 +181,9 @@ def initHidden(self):
 
 def indexesFromSentence(lang, sentence):
 
-    return [lang.word2index[word] for word in sentence.split(" ")] + [EOS_token]
+    return [lang.word2index[word] for word in sentence.split(" ")] + \
+           [EOS_token]
+
 
 def indexesFromPair(pair):
 
@@ -169,8 +193,10 @@ def indexesFromPair(pair):
 
 # Training the Model
 
+
 teacher_forcing_ratio = 0.5
 
+
 def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH):
 
     dy.renew_cg()
@@ -200,12 +226,14 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH):
 
     if use_teacher_forcing:
         for i in range(target_length):
-            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=True)
+            decoder_output, decoder_hidden, decoder_attention = decoder(
+                decoder_input, decoder_hidden, encoder_outputs, dropout=True)
             losses.append(-dy.log(dy.pick(decoder_output, targets[i])))
             decoder_input = targets[i]
     else:
         for i in range(target_length):
-            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=True)
+            decoder_output, decoder_hidden, decoder_attention = decoder(
+                decoder_input, decoder_hidden, encoder_outputs, dropout=True)
             losses.append(-dy.log(dy.pick(decoder_output, targets[i])))
             probs = decoder_output.vec_value()
             decoder_input = probs.index(max(probs))
@@ -220,14 +248,13 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH):
 
 # Helper Function to Print Time
 
-import time
-import math
 
 def asMinutes(s):
     m = math.floor(s/60)
     s -= m*60
     return "%dm %ds" % (m, s)
 
+
 def timeSince(since, percent):
     now = time.time()
     s = now - since
@@ -237,14 +264,17 @@ def timeSince(since, percent):
 
 # Whole Training Process
 
-def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, plot_every=100):
+
+def trainIters(encoder, decoder, trainer, n_iters, print_every=1000,
+               plot_every=100):
 
     start = time.time()
     plot_losses = []
     print_loss_total = 0
     plot_loss_total = 0
 
-    training_pairs = [indexesFromPair(random.choice(pairs)) for _ in range(n_iters)]
+    training_pairs = [indexesFromPair(random.choice(pairs))
+                      for _ in range(n_iters)]
 
     for iter in range(1, n_iters+1):
 
@@ -260,7 +290,9 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, plot_every=
         if iter % print_every == 0:
             print_loss_avg = print_loss_total/print_every
             print_loss_total = 0
-            print("%s (%d %d%%) %.4f" % (timeSince(start, iter/n_iters), iter, iter/n_iters*100, print_loss_avg))
+            print("%s (%d %d%%) %.4f" % (timeSince(start, iter/n_iters),
+                                         iter, iter/n_iters*100,
+                                         print_loss_avg))
 
         if iter % plot_every == 0:
             plot_loss_avg = plot_loss_total/plot_every
@@ -269,6 +301,7 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, plot_every=
 
 # Evaluation
 
+
 def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
 
     dy.renew_cg()
@@ -293,7 +326,8 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
     decoder_attentions = [dy.zeros(max_length) for _ in range(max_length)]
 
     for i in range(max_length):
-        decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=False)
+        decoder_output, decoder_hidden, decoder_attention = decoder(
+            decoder_input, decoder_hidden, encoder_outputs, dropout=False)
         decoder_attentions[i] = decoder_attention
         probs = decoder_output.vec_value()
         pred = probs.index(max(probs))
@@ -320,6 +354,7 @@ def evaluationRandomly(encoder, decoder, n=10):
 
 # Start Training and Evaluating
 
+
 model = dy.ParameterCollection()
 hidden_dim = 256
 encoder = EncoderRNN(input_lang.n_words, hidden_dim, model)
@@ -328,4 +363,4 @@ def evaluationRandomly(encoder, decoder, n=10):
 
 trainIters(encoder, decoder, trainer, 100000, print_every=5000)
 
-evaluationRandomly(encoder, decoder)
\ No newline at end of file
+evaluationRandomly(encoder, decoder)
diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
index a88ea5fdc..8c74513c6 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
+++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
@@ -5,6 +5,8 @@
 import unicodedata
 import re
 import random
+import time
+import math
 import torch
 import torch.nn as nn
 from torch import optim
@@ -16,6 +18,7 @@
 SOS_token = 0
 EOS_token = 1
 
+
 class Lang:
 
     def __init__(self, name):
@@ -38,6 +41,7 @@ def addWord(self, word):
         else:
             self.word2count[word] += 1
 
+
 def unicodeToAscii(s):
 
     return ''.join(
@@ -45,6 +49,7 @@ def unicodeToAscii(s):
         if unicodedata.category(c) != 'Mn'
     )
 
+
 def normalizeString(s):
 
     s = unicodeToAscii(s.lower().strip())
@@ -52,6 +57,7 @@ def normalizeString(s):
     s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
     return s
 
+
 def readLangs(lang1, lang2, reverse=False):
 
     print("Reading lines...")
@@ -71,6 +77,7 @@ def readLangs(lang1, lang2, reverse=False):
 
     return input_lang, output_lang, pairs
 
+
 MAX_LENGTH = 10
 
 eng_prefixes = (
@@ -82,16 +89,19 @@ def readLangs(lang1, lang2, reverse=False):
     "they are", "they re "
 )
 
+
 def filterPair(p):
 
     return len(p[0].split(' ')) < MAX_LENGTH and \
         len(p[1].split(' ')) < MAX_LENGTH and \
         p[1].startswith(eng_prefixes)
 
+
 def filterPairs(pairs):
 
     return [pair for pair in pairs if filterPair(pair)]
 
+
 def prepareData(lang1, lang2, reverse=False):
 
     input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
@@ -107,11 +117,13 @@ def prepareData(lang1, lang2, reverse=False):
     print(output_lang.name, output_lang.n_words)
     return input_lang, output_lang, pairs
 
+
 input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
 print(random.choice(pairs))
 
 # Model
 
+
 class EncoderRNN(nn.Module):
 
     def __init__(self, input_size, hidden_size):
@@ -130,9 +142,11 @@ def forward(self, input, hidden):
     def initHidden(self):
         return torch.zeros(1, 1, self.hidden_size, device=device)
 
+
 class AttnDecoderRNN(nn.Module):
 
-    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
+    def __init__(self, hidden_size, output_size, dropout_p=0.1,
+                 max_length=MAX_LENGTH):
         super(AttnDecoderRNN, self).__init__()
         self.hidden_size = hidden_size
         self.output_size = output_size
@@ -167,6 +181,7 @@ def forward(self, input, hidden, encoder_outputs):
     def initHidden(self):
         return torch.zeros(1, 1, self.hidden_size, device=device)
 
+
 def indexesFromSentence(lang, sentence):
 
     return [lang.word2index[word] for word in sentence.split(' ')]
@@ -187,10 +202,12 @@ def tensorsFromPair(pair):
 
 # Training the Model
 
+
 teacher_forcing_ratio = 0.5
 
 
-def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
+def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer,
+          decoder_optimizer, criterion, max_length=MAX_LENGTH):
     encoder_hidden = encoder.initHidden()
 
     encoder_optimizer.zero_grad()
@@ -199,7 +216,8 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, deco
     input_length = input_tensor.size(0)
     target_length = target_tensor.size(0)
 
-    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
+    encoder_outputs = torch.zeros(max_length, encoder.hidden_size,
+                                  device=device)
 
     loss = 0
 
@@ -212,7 +230,8 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, deco
 
     decoder_hidden = encoder_hidden
 
-    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
+    use_teacher_forcing = True if random.random() < teacher_forcing_ratio \
+        else False
 
     if use_teacher_forcing:
 
@@ -243,9 +262,6 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, deco
 
 # Helper Function to Print Time
 
-import time
-import math
-
 
 def asMinutes(s):
     m = math.floor(s / 60)
@@ -262,7 +278,9 @@ def timeSince(since, percent):
 
 # Whole Training Process
 
-def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
+
+def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100,
+               learning_rate=0.01):
 
     start = time.time()
     plot_losses = []
@@ -289,7 +307,8 @@ def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, lear
             print_loss_avg = print_loss_total / print_every
             print_loss_total = 0
             print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
-                                         iter, iter / n_iters * 100, print_loss_avg))
+                                         iter, iter / n_iters * 100,
+                                         print_loss_avg))
 
         if iter % plot_every == 0:
             plot_loss_avg = plot_loss_total / plot_every
@@ -303,7 +322,8 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
         input_length = input_tensor.size()[0]
         encoder_hidden = encoder.initHidden()
 
-        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
+        encoder_outputs = torch.zeros(max_length, encoder.hidden_size,
+                                      device=device)
 
         for ei in range(input_length):
             encoder_output, encoder_hidden = encoder(input_tensor[ei],
@@ -332,6 +352,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
 
         return decoded_words, decoder_attentions[:di + 1]
 
+
 def evaluateRandomly(encoder, decoder, n=10):
 
     for i in range(n):
@@ -345,10 +366,12 @@ def evaluateRandomly(encoder, decoder, n=10):
 
 # Training and Evaluating
 
+
 hidden_size = 256
 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
-attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)
+attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words,
+                               dropout_p=0.1).to(device)
 
 trainIters(encoder1, attn_decoder1, 100000, print_every=5000)
 
-evaluateRandomly(encoder1, attn_decoder1)
\ No newline at end of file
+evaluateRandomly(encoder1, attn_decoder1)

From d9619cf8353141aad138d36ec98401f1d1541336 Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <pengzhi.gao@petuum.com>
Date: Fri, 7 Sep 2018 13:37:13 -0400
Subject: [PATCH 13/14] fix lint error

---
 .../seq2seq_translator/seq2seq_dynet.py       | 30 +++++++--------
 .../seq2seq_translator/seq2seq_pytorch.py     | 38 +++++++++----------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
index a63d74386..2be37690c 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
+++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
@@ -1,7 +1,7 @@
 # Requirements
 
 from __future__ import unicode_literals, print_function, division
-from io import open
+import io
 import unicodedata
 import re
 import random
@@ -57,8 +57,8 @@ def normalizeString(s):
 def readLangs(lang1, lang2, reverse=False):
 
     print("Reading lines...")
-    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().\
-        strip().split('\n')
+    lines = io.open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
+        read().strip().split('\n')
     pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
     if reverse:
         pairs = [list(reversed(p)) for p in pairs]
@@ -120,8 +120,8 @@ def __init__(self, in_vocab, hidden_dim, model):
         self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim,
                                      model)
 
-    def __call__(self, input, hidden):
-        input_embed = dy.lookup(self.embedding_enc, input)
+    def __call__(self, inputs, hidden):
+        input_embed = dy.lookup(self.embedding_enc, inputs)
         state_enc = self.rnn_enc.initial_state(vecs=hidden)
         state_enc = state_enc.add_input(input_embed)
         return state_enc.output(), state_enc.h()
@@ -152,8 +152,8 @@ def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH):
         self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim))
         self.b_dec = model.add_parameters((self.out_vocab,))
 
-    def __call__(self, input, hidden, encoder_outptus, dropout=False):
-        input_embed = dy.lookup(self.embedding_dec, input)
+    def __call__(self, inputs, hidden, encoder_outptus, dropout=False):
+        input_embed = dy.lookup(self.embedding_dec, inputs)
         if dropout:
             input_embed = dy.dropout(input_embed, DROPOUT_RATE)
         input_cat = dy.concatenate([input_embed, hidden[0]])
@@ -226,7 +226,7 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH):
 
     if use_teacher_forcing:
         for i in range(target_length):
-            decoder_output, decoder_hidden, decoder_attention = decoder(
+            decoder_output, decoder_hidden, _ = decoder(
                 decoder_input, decoder_hidden, encoder_outputs, dropout=True)
             losses.append(-dy.log(dy.pick(decoder_output, targets[i])))
             decoder_input = targets[i]
@@ -276,9 +276,9 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000,
     training_pairs = [indexesFromPair(random.choice(pairs))
                       for _ in range(n_iters)]
 
-    for iter in range(1, n_iters+1):
+    for iteration in range(1, n_iters+1):
 
-        training_pair = training_pairs[iter-1]
+        training_pair = training_pairs[iteration-1]
         inputs = training_pair[0]
         targets = training_pair[1]
 
@@ -287,14 +287,14 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000,
         print_loss_total += loss
         plot_loss_total += loss
 
-        if iter % print_every == 0:
+        if iteration % print_every == 0:
             print_loss_avg = print_loss_total/print_every
             print_loss_total = 0
-            print("%s (%d %d%%) %.4f" % (timeSince(start, iter/n_iters),
-                                         iter, iter/n_iters*100,
+            print("%s (%d %d%%) %.4f" % (timeSince(start, iteration/n_iters),
+                                         iteration, iteration/n_iters*100,
                                          print_loss_avg))
 
-        if iter % plot_every == 0:
+        if iteration % plot_every == 0:
             plot_loss_avg = plot_loss_total/plot_every
             plot_losses.append(plot_loss_avg)
             plot_loss_total = 0
@@ -343,7 +343,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
 
 def evaluationRandomly(encoder, decoder, n=10):
 
-    for i in range(n):
+    for _ in range(n):
         pair = random.choice(pairs)
         print(">", pair[0])
         print("=", pair[1])
diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
index 8c74513c6..75bcff626 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
+++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
@@ -1,7 +1,7 @@
 # Requirements
 
 from __future__ import unicode_literals, print_function, division
-from io import open
+import io
 import unicodedata
 import re
 import random
@@ -62,7 +62,7 @@ def readLangs(lang1, lang2, reverse=False):
 
     print("Reading lines...")
 
-    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
+    lines = io.open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
         read().strip().split('\n')
 
     pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
@@ -133,8 +133,8 @@ def __init__(self, input_size, hidden_size):
         self.embedding = nn.Embedding(input_size, hidden_size)
         self.gru = nn.GRU(hidden_size, hidden_size)
 
-    def forward(self, input, hidden):
-        embedded = self.embedding(input).view(1, 1, -1)
+    def forward(self, inputs, hidden):
+        embedded = self.embedding(inputs).view(1, 1, -1)
         output = embedded
         output, hidden = self.gru(output, hidden)
         return output, hidden
@@ -160,8 +160,8 @@ def __init__(self, hidden_size, output_size, dropout_p=0.1,
         self.gru = nn.GRU(self.hidden_size, self.hidden_size)
         self.out = nn.Linear(self.hidden_size, self.output_size)
 
-    def forward(self, input, hidden, encoder_outputs):
-        embedded = self.embedding(input).view(1, 1, -1)
+    def forward(self, inputs, hidden, encoder_outputs):
+        embedded = self.embedding(inputs).view(1, 1, -1)
         embedded = self.dropout(embedded)
 
         attn_weights = F.softmax(
@@ -236,7 +236,7 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer,
     if use_teacher_forcing:
 
         for di in range(target_length):
-            decoder_output, decoder_hidden, decoder_attention = decoder(
+            decoder_output, decoder_hidden, _ = decoder(
                 decoder_input, decoder_hidden, encoder_outputs)
             loss += criterion(decoder_output, target_tensor[di])
             decoder_input = target_tensor[di]
@@ -244,9 +244,9 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer,
     else:
 
         for di in range(target_length):
-            decoder_output, decoder_hidden, decoder_attention = decoder(
+            decoder_output, decoder_hidden, _ = decoder(
                 decoder_input, decoder_hidden, encoder_outputs)
-            topv, topi = decoder_output.topk(1)
+            _, topi = decoder_output.topk(1)
             decoder_input = topi.squeeze().detach()
 
             loss += criterion(decoder_output, target_tensor[di])
@@ -290,11 +290,11 @@ def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100,
     encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
     decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
     training_pairs = [tensorsFromPair(random.choice(pairs))
-                      for i in range(n_iters)]
+                      for _ in range(n_iters)]
     criterion = nn.NLLLoss()
 
-    for iter in range(1, n_iters + 1):
-        training_pair = training_pairs[iter - 1]
+    for iteration in range(1, n_iters + 1):
+        training_pair = training_pairs[iteration - 1]
         input_tensor = training_pair[0]
         target_tensor = training_pair[1]
 
@@ -303,14 +303,14 @@ def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100,
         print_loss_total += loss
         plot_loss_total += loss
 
-        if iter % print_every == 0:
+        if iteration % print_every == 0:
             print_loss_avg = print_loss_total / print_every
             print_loss_total = 0
-            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
-                                         iter, iter / n_iters * 100,
+            print('%s (%d %d%%) %.4f' % (timeSince(start, iteration / n_iters),
+                                         iteration, iteration / n_iters * 100,
                                          print_loss_avg))
 
-        if iter % plot_every == 0:
+        if iteration % plot_every == 0:
             plot_loss_avg = plot_loss_total / plot_every
             plot_losses.append(plot_loss_avg)
             plot_loss_total = 0
@@ -341,7 +341,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
             decoder_output, decoder_hidden, decoder_attention = decoder(
                 decoder_input, decoder_hidden, encoder_outputs)
             decoder_attentions[di] = decoder_attention.data
-            topv, topi = decoder_output.data.topk(1)
+            _, topi = decoder_output.data.topk(1)
             if topi.item() == EOS_token:
                 decoded_words.append('<EOS>')
                 break
@@ -355,11 +355,11 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
 
 def evaluateRandomly(encoder, decoder, n=10):
 
-    for i in range(n):
+    for _ in range(n):
         pair = random.choice(pairs)
         print('>', pair[0])
         print('=', pair[1])
-        output_words, attentions = evaluate(encoder, decoder, pair[0])
+        output_words, _ = evaluate(encoder, decoder, pair[0])
         output_sentence = ' '.join(output_words)
         print('<', output_sentence)
         print('')

From 177aa617bfa01b00a577928dfb88d7bc4ba1e20d Mon Sep 17 00:00:00 2001
From: Pengzhi Gao <pengzhi.gao@petuum.com>
Date: Fri, 7 Sep 2018 15:38:05 -0400
Subject: [PATCH 14/14] fix lint error

---
 .../seq2seq_translator/seq2seq_dynet.py               | 11 ++++++-----
 .../seq2seq_translator/seq2seq_pytorch.py             |  9 +++++----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
index 2be37690c..fb62589ec 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
+++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py
@@ -8,6 +8,7 @@
 import dynet as dy
 import time
 import math
+r = random.SystemRandom()
 
 # Data Preparation
 
@@ -105,7 +106,7 @@ def prepareData(lang1, lang2, reverse=False):
 
 
 input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
-print(random.choice(pairs))
+print(r.choice(pairs))
 
 # Model
 
@@ -219,7 +220,7 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH):
     decoder_input = SOS_token
     decoder_hidden = encoder_hidden
 
-    if random.random() < teacher_forcing_ratio:
+    if r.random() < teacher_forcing_ratio:
         use_teacher_forcing = True
     else:
         use_teacher_forcing = False
@@ -232,7 +233,7 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH):
             decoder_input = targets[i]
     else:
         for i in range(target_length):
-            decoder_output, decoder_hidden, decoder_attention = decoder(
+            decoder_output, decoder_hidden, _ = decoder(
                 decoder_input, decoder_hidden, encoder_outputs, dropout=True)
             losses.append(-dy.log(dy.pick(decoder_output, targets[i])))
             probs = decoder_output.vec_value()
@@ -273,7 +274,7 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000,
     print_loss_total = 0
     plot_loss_total = 0
 
-    training_pairs = [indexesFromPair(random.choice(pairs))
+    training_pairs = [indexesFromPair(r.choice(pairs))
                       for _ in range(n_iters)]
 
     for iteration in range(1, n_iters+1):
@@ -344,7 +345,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
 def evaluationRandomly(encoder, decoder, n=10):
 
     for _ in range(n):
-        pair = random.choice(pairs)
+        pair = r.choice(pairs)
         print(">", pair[0])
         print("=", pair[1])
         output_words = evaluate(encoder, decoder, pair[0])
diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
index 75bcff626..95aab4669 100644
--- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
+++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py
@@ -12,6 +12,7 @@
 from torch import optim
 import torch.nn.functional as F
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+r = random.SystemRandom()
 
 # Data Preparation
 
@@ -119,7 +120,7 @@ def prepareData(lang1, lang2, reverse=False):
 
 
 input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
-print(random.choice(pairs))
+print(r.choice(pairs))
 
 # Model
 
@@ -230,7 +231,7 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer,
 
     decoder_hidden = encoder_hidden
 
-    use_teacher_forcing = True if random.random() < teacher_forcing_ratio \
+    use_teacher_forcing = True if r.random() < teacher_forcing_ratio \
         else False
 
     if use_teacher_forcing:
@@ -289,7 +290,7 @@ def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100,
 
     encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
     decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
-    training_pairs = [tensorsFromPair(random.choice(pairs))
+    training_pairs = [tensorsFromPair(r.choice(pairs))
                       for _ in range(n_iters)]
     criterion = nn.NLLLoss()
 
@@ -356,7 +357,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
 def evaluateRandomly(encoder, decoder, n=10):
 
     for _ in range(n):
-        pair = random.choice(pairs)
+        pair = r.choice(pairs)
         print('>', pair[0])
         print('=', pair[1])
         output_words, _ = evaluate(encoder, decoder, pair[0])