From 8ec515b89f792332f81f9ade4d5b479e55913322 Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Tue, 5 Jun 2018 13:45:47 -0400 Subject: [PATCH 01/14] add seq2seq translator example --- .DS_Store | Bin 0 -> 8196 bytes examples/.DS_Store | Bin 0 -> 10244 bytes examples/sequence-to-sequence/.DS_Store | Bin 0 -> 6148 bytes .../seq2seq_translator/.DS_Store | Bin 0 -> 6148 bytes .../seq2seq_translator/README.md | 268 +++++++++++++ .../seq2seq_translator/seq2seq_dynet.py | 331 ++++++++++++++++ .../seq2seq_translator/seq2seq_pytorch.py | 354 ++++++++++++++++++ 7 files changed, 953 insertions(+) create mode 100644 .DS_Store create mode 100644 examples/.DS_Store create mode 100644 examples/sequence-to-sequence/.DS_Store create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.DS_Store create mode 100644 examples/sequence-to-sequence/seq2seq_translator/README.md create mode 100644 examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py create mode 100644 examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d7dc14478065716eb8d8443f856f8a008e2b97a3 GIT binary patch literal 8196 zcmeHM!EVz)5S?wC)@cOf0BX7U01}6)N}z}$gp{T&QY1hKS8xE-+Hu-gxONpgq$LF9 z!cV{*iC^Kukq_Ym;LWb7>Oo{wk(C%i(Q)2pIASA< zV-*#hgrbwkW)@k6BINAg*%D5op`!6d0i(dA0$jV7$fFKL)GpNTypis~Zv2!@QcJ)b zl1s0tldJR@`9(usq)mFHko1rON+^Y`!XHywb;M--x; z6i1ed{uS>HlB4Wf%UACx43e}~`z~f?=SuSnmbGXtURZqPbmM`Ox@kXYy1f_t>xr9o zy{x|z$Kh$;^_~Tt!EW{PW*nt%5OsQz9QYkTUOo>ZKOQvWe&i=|97mn7tg=+7}pihcL?XvH4ZYq!=`>^nEtN29W}biIDRwRd>(cKB}e{v-546WL>&E~($A{u6o= z%Di!UQ5Z)DOoUaCQ7z=xHa#W>etK5r^{XHsH^6Ao18^ML0WYAasJhIjJgW@WHf$C5 zG)7iU+$X^~m1oT3ex3l? yTx;8~Js3B}ja8HsI9ZM(%5og>^dE-k+ki5sk;So!ID%$A1V|Z-GYb4u1%3fMAy54P literal 0 HcmV?d00001 diff --git a/examples/.DS_Store b/examples/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2ef87546db611ef70fc85732a1589dc930659546 GIT binary patch literal 10244 zcmeHM&2Jk;6o2C+jqQBcO;Ri>1dc+XhZs2l0TMzOCsAebktmKzsZ*@Ov}sWOp4zgep;q8Efawj^FRS+27lF z;~5i?sP<6@bgM0HN#@A(=ksjz1rQUFiXAcF#G(r(Qj($*cC0nLDBKr^5j z&sCZRt~EA$?`+$WRX z1;U3npI$j}nM+YQfJQ{4^nx+P^2X3($6<*{3eF z9gL}jI~$|*@v*2a?eMvV1wAM3J7pxSO~S?juw9JUM!N*-j5QLt9}~Dy_-O;tjq6F~ zzm70}nLffudvpk!CV8+Qgt7B5K4X|2;#fp2jA3jJbP%X`=OYoz&?c?X+q8-q zu!eWOORWR~5sD$kVA;WE=xyK##WbhD9zXlw&E@zrA3(=_aCBnG8HwbG@H@|OE#c~7 zUYmHj7M^T4!iYr7#+wyvU{i@;J=DQ6e*lmREE!VYA z+eYK2W$y3f=T=-NFf7OPB-m=1P`UGg<+R-Ws@r#3zO++UU!>ESbY>@i_2_7MY5CQ~ z;!=6}c(Hi2RDSuD#o}wruOA<0(uHeFZ`SU04<8&YC4y{oSVtb&dyzVs!(`(CqLUTn)f>%Kgfz}yI$4qo7+~? zb?Vkbb2~5(0&eW&XA*jwX6wVVuC2o!bmq=Fq^%pCX*#z%p4D1!w|z6%jL2h^^^IvrrxULNzz z#i|itJ{eebN*GmN<}qIXyyVKq*(^41E&*vEdvsVkKF4Gh`zn&fCc;d?9zFPRf%MMJ zWKq<(L}86bhQTTr_aU=cRGWS9`rzQX+R_i_iNwfcCX1rd5@kXYzYNe$tf0Jph^T%6 zD-VyIS4+N6d45KlfBu-v_@7 zt39f+Ujw!ZX7Gm;K+I-qwme#D3YY?>z=r~SKSXfGSh4gdR|gXL1pqcsErvXQEHEch zj1^0dcmi{t3iMQ`zZlNbQ6Fi$Sh4iz=?L#qB&Q>D$Y*mhIxs>3=3)U$OaW6MP+;4a z+Z_KdzW)CYSXMLzOo9KTfNLbZq>B}4Y^|(~V{HV#g0peE(xXt2XgHP(hvGxH7@kF( W0mh1@N7TUVLqKG(#1!~Z1>OOy4wzN| literal 0 HcmV?d00001 diff --git a/examples/sequence-to-sequence/seq2seq_translator/.DS_Store b/examples/sequence-to-sequence/seq2seq_translator/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..071667f4470f57571ab1453781926f235c61c504 GIT binary patch literal 6148 zcmeHKyH3ME5S)b+K{OT><$VDXe8CYpMG6}D08nBi3J3En()rI}c5gvgu@DssXji(M z^}c*JlJC3#WcJ#e0u_KMhoY!5B5EFW?RoG%QLM%V<~YF`&7vJCw2LNP`vESw?sL@m zvi@$hY`Z1B)!RU96BV9ui$|O}UZcSV_q4rm{}0$K+HdoZw(^<7+zwAz;qFH@c9G3k zQ!o$=1OvgqUtoZDw#xLdV9dclFc1uUGN9)};!wMeUYOj2{6bB&J~C7Z`X4?#)n* literal 0 HcmV?d00001 diff --git a/examples/sequence-to-sequence/seq2seq_translator/README.md b/examples/sequence-to-sequence/seq2seq_translator/README.md new file mode 100644 index 000000000..d678dba3e --- /dev/null +++ b/examples/sequence-to-sequence/seq2seq_translator/README.md @@ -0,0 +1,268 @@ +# Seq2seq Translator Benchmarks + +Here is the comparison between Dynet and PyTorch on the [seq2seq translator example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). + +We have already preprocessed and prepared the eng-fra language dataset in `data` folder according to the steps used in the [PyTorch example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). + +## Usage (Dynet) + +The architecture of the dynet model `seq2seq_dynet.py` is the same as that in [PyTorch Example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). We here implement the attention mechanism in the model. + +The architecture of the dynet model is shown as follows. + +```python +class EncoderRNN(object): + + def __init__(self, in_vocab, hidden_dim, model): + self.in_vocab = in_vocab + self.hidden_dim = hidden_dim + self.embedding_enc = model.add_lookup_parameters((self.in_vocab, self.hidden_dim)) + self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model) + + def __call__(self, input, hidden): + input_embed = dy.lookup(self.embedding_enc, input) + state_enc = self.rnn_enc.initial_state(vecs=hidden) + state_enc = state_enc.add_input(input_embed) + return state_enc.output(), state_enc.h() + + def initHidden(self): + return [dy.zeros(self.hidden_dim)] + +DROPOUT_RATE = 0.1 + +class AttnDecoderRNN(object): + + def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH): + self.hidden_dim = hidden_dim + self.out_vocab = out_vocab + self.max_length = max_length + self.embedding_dec = model.add_lookup_parameters((self.out_vocab, self.hidden_dim)) + self.w_attn = model.add_parameters((self.max_length, self.hidden_dim * 2)) + self.b_attn = model.add_parameters((self.max_length,)) + self.w_attn_combine = model.add_parameters((self.hidden_dim, self.hidden_dim * 2)) + self.b_attn_combine = model.add_parameters((self.hidden_dim,)) + self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model) + self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim)) + self.b_dec = model.add_parameters((self.out_vocab,)) + + def __call__(self, input, hidden, encoder_outptus, dropout=False): + input_embed = dy.lookup(self.embedding_dec, input) + if dropout: + input_embed = dy.dropout(input_embed, DROPOUT_RATE) + input_cat = dy.concatenate([input_embed, hidden[0]]) + w_attn = dy.parameter(self.w_attn) + b_attn = dy.parameter(self.b_attn) + attn_weights = dy.softmax(w_attn * input_cat + b_attn) + attn_applied = encoder_outptus * attn_weights + output = dy.concatenate([input_embed, attn_applied]) + w_attn_combine = dy.parameter(self.w_attn_combine) + b_attn_combine = dy.parameter(self.b_attn_combine) + output = w_attn_combine * output + b_attn_combine + output = dy.rectify(output) + state_dec = self.rnn_dec.initial_state(vecs=hidden) + state_dec = state_dec.add_input(output) + w_dec = dy.parameter(self.w_dec) + b_dec = dy.parameter(self.b_dec) + output = state_dec.output() + output = dy.softmax(w_dec * output + b_dec) + return output, hidden, attn_weights + + def initHidden(self): + return [dy.zeros(self.hidden_dim)] +``` + +Install the GPU version of Dynet according to the instructions on the [official website](http://dynet.readthedocs.io/en/latest/python.html#installing-a-cutting-edge-and-or-gpu-version). + +Then, run the training: + +
+python seq2seq_dynet.py --dynet_gpus 1
+
+ +## Usage (PyTorch) + +The code of `seq2seq_pytorch.py` follows the same line in [PyTorch Example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). + +The architecture of the pytorch model is shown as follows. + +```python +class EncoderRNN(nn.Module): + + def __init__(self, input_size, hidden_size): + super(EncoderRNN, self).__init__() + self.hidden_size = hidden_size + self.embedding = nn.Embedding(input_size, hidden_size) + self.gru = nn.GRU(hidden_size, hidden_size) + + def forward(self, input, hidden): + embedded = self.embedding(input).view(1, 1, -1) + output = embedded + output, hidden = self.gru(output, hidden) + return output, hidden + + def initHidden(self): + return torch.zeros(1, 1, self.hidden_size, device=device) + +class AttnDecoderRNN(nn.Module): + + def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH): + super(AttnDecoderRNN, self).__init__() + self.hidden_size = hidden_size + self.output_size = output_size + self.dropout_p = dropout_p + self.max_length = max_length + self.embedding = nn.Embedding(self.output_size, self.hidden_size) + self.attn = nn.Linear(self.hidden_size * 2, self.max_length) + self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) + self.dropout = nn.Dropout(self.dropout_p) + self.gru = nn.GRU(self.hidden_size, self.hidden_size) + self.out = nn.Linear(self.hidden_size, self.output_size) + + def forward(self, input, hidden, encoder_outputs): + embedded = self.embedding(input).view(1, 1, -1) + embedded = self.dropout(embedded) + attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1) + attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) + output = torch.cat((embedded[0], attn_applied[0]), 1) + output = self.attn_combine(output).unsqueeze(0) + output = F.relu(output) + output, hidden = self.gru(output, hidden) + output = F.log_softmax(self.out(output[0]), dim=1) + return output, hidden, attn_weights + + def initHidden(self): + return torch.zeros(1, 1, self.hidden_size, device=device) +``` + +Install CUDA version of PyTorch according to the instructions on the [official website](http://pytorch.org/). + +Then, run the training: + +
+python seq2seq_pytorch.py
+
+ +## Performance + +We run our codes on a desktop with NVIDIA TITAN X. We here have D stands for Dynet and P stands for PyTorch. + +| Time (D) | Iteration (D) | Loss (D) | Time (P) | Iteration (P) | Loss (P)| +| --- | --- | --- | --- | --- | --- | +| 0m 26s | 5000 5% | 3.3565 | 1m 30s | 5000 5% | 2.8794 | +| 0m 53s | 10000 10% | 2.7376 | 2m 55s | 10000 10% | 2.3103 | +| 1m 21s | 15000 15% | 2.4912 | 4m 5s | 15000 15% | 1.9939 | +| 1m 48s | 20000 20% | 2.2536 | 5m 16s | 20000 20% | 1.7537 | +| 2m 16s | 25000 25% | 2.0537 | 6m 27s | 25000 25% | 1.5796 | +| 2m 44s | 30000 30% | 1.8832 | 7m 39s | 30000 30% | 1.3795 | +| 3m 12s | 35000 35% | 1.7232 | 9m 13s | 35000 35% | 1.2712 | +| 3m 40s | 40000 40% | 1.5833 | 10m 31s | 40000 40% | 1.1374 | +| 4m 8s | 45000 45% | 1.4360 | 11m 41s | 45000 45% | 1.0215 | +| 4m 36s | 50000 50% | 1.2916 | 12m 53s | 50000 50% | 0.9307 | +| 5m 4s | 55000 55% | 1.2023 | 14m 5s | 55000 55% | 0.8312 | +| 5m 33s | 60000 60% | 1.1186 | 15m 17s | 60000 60% | 0.7879 | +| 6m 1s | 65000 65% | 1.0435 | 16m 48s | 65000 65% | 0.7188 | +| 6m 30s | 70000 70% | 0.9348 | 18m 6s | 70000 70% | 0.6532 | +| 6m 58s | 75000 75% | 0.8634 | 19m 18s | 75000 75% | 0.6273 | +| 7m 26s | 80000 80% | 0.8323 | 20m 34s | 80000 80% | 0.6021 | +| 7m 55s | 85000 85% | 0.7610 | 21m 44s | 85000 85% | 0.5210 | +| 8m 23s | 90000 90% | 0.7377 | 22m 55s | 90000 90% | 0.5054 | +| 8m 52s | 95000 95% | 0.6666 | 24m 9s | 95000 95% | 0.4417 | +| 9m 21s | 100000 100% | 0.6237 | 25m 24s | 100000 100% | 0.4297 | + +We then show some evaluation results as follows. + +Format: + +
+> input 
+= target 
+< output
+
+ +### Dynet + +``` +> elle est infirmiere . += she is a nurse . +< she is a nurse . + +> tu n es pas contrariee si ? += you re not upset are you ? +< you re not upset are you re not upset are + +> j en ai termine avec mon travail . += i am through with my work . +< i am through with my work . + +> je ne l invente pas . += i m not making that up . +< i m not making up . + +> elles ont peur de moi . += they re afraid of me . +< they re afraid of me . + +> on va jouer au tennis . += we re going to play tennis . +< we are going tennis . + +> j ai une assuetude . += i m addicted . +< i m addicted . + +> elles sont en train de vous chercher . += they re looking for you . +< they re looking for you . + +> elle semble riche . += she seems rich . +< she seems rich . + +> vous etes bizarre . += you re weird . +< you re weird . +``` + +### PyTorch + +``` +> il est deja marie . += he s already married . +< he s already married . + +> on le dit decede . += he is said to have died . +< he are said to have died . + +> il est trop saoul . += he s too drunk . +< he s too drunk . + +> je suis assez heureux . += i m happy enough . +< i m happy happy . + +> je n y suis pas interessee . += i m not interested in that . +< i m not interested in that . + +> il a huit ans . += he s eight years old . +< he is thirty . + +> je ne suis pas differente . += i m no different . +< i m no different . + +> je suis heureux que vous l ayez aime . += i m happy you liked it . +< i m happy you liked it . + +> ils peuvent chanter . += they re able to sing . +< they re able to sing . + +> vous etes tellement belle dans cette robe ! += you re so beautiful in that dress . +< you re so beautiful in that dress . +``` diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py new file mode 100644 index 000000000..326744958 --- /dev/null +++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py @@ -0,0 +1,331 @@ +# Requirements + +from __future__ import unicode_literals, print_function, division +from io import open +import unicodedata +import re +import random +import dynet as dy + +# Data Preparation + +SOS_token = 0 +EOS_token = 1 + +class Lang(object): + + def __init__(self, name): + self.name = name + self.word2index = {} + self.word2count = {} + self.index2word = {0: "SOS", 1: "EOS"} + self.n_words = 2 + + def addSentence(self, sentence): + for word in sentence.split(" "): + self.addWord(word) + + def addWord(self, word): + if word not in self.word2index: + self.word2index[word] = self.n_words + self.word2count[word] = 1 + self.index2word[self.n_words] = word + self.n_words += 1 + else: + self.word2count[word] += 1 + +def unicodeToAscii(s): + + return ''.join( + c for c in unicodedata.normalize('NFD', s) + if unicodedata.category(c) != 'Mn' + ) + +def normalizeString(s): + + s = unicodeToAscii(s.lower().strip()) + s = re.sub(r"([.!?])", r" \1", s) + s = re.sub(r"[^a-zA-Z.!?]+", r" ", s) + return s + +def readLangs(lang1, lang2, reverse=False): + + print("Reading lines...") + lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().strip().split('\n') + pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines] + if reverse: + pairs = [list(reversed(p)) for p in pairs] + input_lang = Lang(lang2) + output_lang = Lang(lang1) + else: + input_lang = Lang(lang1) + output_lang = Lang(lang2) + return input_lang, output_lang, pairs + +MAX_LENGTH = 10 +eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s", "you are", "you re ", + "we are", "we re ", "they are", "they re ") + +def filterPair(p): + + return len(p[0].split(' ')) < MAX_LENGTH and \ + len(p[1].split(' ')) < MAX_LENGTH and \ + p[1].startswith(eng_prefixes) + +def filterPairs(pairs): + + return [pair for pair in pairs if filterPair(pair)] + +def prepareData(lang1, lang2, reverse=False): + + input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse) + print("Read %s sentence pairs" % len(pairs)) + pairs = filterPairs(pairs) + print("Trimmed to %s sentence pairs" % len(pairs)) + print("Counting words...") + for pair in pairs: + input_lang.addSentence(pair[0]) + output_lang.addSentence(pair[1]) + print("Counted words:") + print(input_lang.name, input_lang.n_words) + print(output_lang.name, output_lang.n_words) + return input_lang, output_lang, pairs + +input_lang, output_lang, pairs = prepareData('eng', 'fra', True) +print(random.choice(pairs)) + +# Model + +class EncoderRNN(object): + + def __init__(self, in_vocab, hidden_dim, model): + self.in_vocab = in_vocab + self.hidden_dim = hidden_dim + self.embedding_enc = model.add_lookup_parameters((self.in_vocab, self.hidden_dim)) + self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model) + + def __call__(self, input, hidden): + input_embed = dy.lookup(self.embedding_enc, input) + state_enc = self.rnn_enc.initial_state(vecs=hidden) + state_enc = state_enc.add_input(input_embed) + return state_enc.output(), state_enc.h() + + def initHidden(self): + return [dy.zeros(self.hidden_dim)] + +DROPOUT_RATE = 0.1 + +class AttnDecoderRNN(object): + + def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH): + self.hidden_dim = hidden_dim + self.out_vocab = out_vocab + self.max_length = max_length + self.embedding_dec = model.add_lookup_parameters((self.out_vocab, self.hidden_dim)) + self.w_attn = model.add_parameters((self.max_length, self.hidden_dim * 2)) + self.b_attn = model.add_parameters((self.max_length,)) + self.w_attn_combine = model.add_parameters((self.hidden_dim, self.hidden_dim * 2)) + self.b_attn_combine = model.add_parameters((self.hidden_dim,)) + self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model) + self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim)) + self.b_dec = model.add_parameters((self.out_vocab,)) + + def __call__(self, input, hidden, encoder_outptus, dropout=False): + input_embed = dy.lookup(self.embedding_dec, input) + if dropout: + input_embed = dy.dropout(input_embed, DROPOUT_RATE) + input_cat = dy.concatenate([input_embed, hidden[0]]) + w_attn = dy.parameter(self.w_attn) + b_attn = dy.parameter(self.b_attn) + attn_weights = dy.softmax(w_attn * input_cat + b_attn) + attn_applied = encoder_outptus * attn_weights + output = dy.concatenate([input_embed, attn_applied]) + w_attn_combine = dy.parameter(self.w_attn_combine) + b_attn_combine = dy.parameter(self.b_attn_combine) + output = w_attn_combine * output + b_attn_combine + output = dy.rectify(output) + state_dec = self.rnn_dec.initial_state(vecs=hidden) + state_dec = state_dec.add_input(output) + w_dec = dy.parameter(self.w_dec) + b_dec = dy.parameter(self.b_dec) + output = state_dec.output() + output = dy.softmax(w_dec * output + b_dec) + + return output, hidden, attn_weights + + def initHidden(self): + return [dy.zeros(self.hidden_dim)] + + +def indexesFromSentence(lang, sentence): + + return [lang.word2index[word] for word in sentence.split(" ")] + [EOS_token] + +def indexesFromPair(pair): + + input_indexes = indexesFromSentence(input_lang, pair[0]) + target_indexes = indexesFromSentence(output_lang, pair[1]) + return (input_indexes, target_indexes) + +# Training the Model + +teacher_forcing_ratio = 0.5 + +def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH): + + dy.renew_cg() + + encoder_hidden = encoder.initHidden() + + input_length = len(inputs) + target_length = len(targets) + + encoder_outputs = [dy.zeros(hidden_dim) for _ in range(max_length)] + + losses = [] + + for i in range(input_length): + encoder_output, encoder_hidden = encoder(inputs[i], encoder_hidden) + encoder_outputs[i] = encoder_output + + encoder_outputs = dy.concatenate(encoder_outputs, 1) + + decoder_input = SOS_token + decoder_hidden = encoder_hidden + + if random.random() < teacher_forcing_ratio: + use_teacher_forcing = True + else: + use_teacher_forcing = False + + if use_teacher_forcing: + for i in range(target_length): + decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=True) + losses.append(-dy.log(dy.pick(decoder_output, targets[i]))) + decoder_input = targets[i] + else: + for i in range(target_length): + decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=True) + losses.append(-dy.log(dy.pick(decoder_output, targets[i]))) + probs = decoder_output.vec_value() + decoder_input = probs.index(max(probs)) + if decoder_input == EOS_token: + break + + loss = dy.esum(losses)/len(losses) + loss.backward() + trainer.update() + + return loss.value() + +# Helper Function to Print Time + +import time +import math + +def asMinutes(s): + m = math.floor(s/60) + s -= m*60 + return "%dm %ds" % (m, s) + +def timeSince(since, percent): + now = time.time() + s = now - since + es = s / (percent) + rs = es - s + return "%s (- %s)" % (asMinutes(s), asMinutes(rs)) + +# Whole Training Process + +def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, plot_every=100): + + start = time.time() + plot_losses = [] + print_loss_total = 0 + plot_loss_total = 0 + + training_pairs = [indexesFromPair(random.choice(pairs)) for _ in range(n_iters)] + + for iter in range(1, n_iters+1): + + training_pair = training_pairs[iter-1] + inputs = training_pair[0] + targets = training_pair[1] + + loss = train(inputs, targets, encoder, decoder, trainer) + + print_loss_total += loss + plot_loss_total += loss + + if iter % print_every == 0: + print_loss_avg = print_loss_total/print_every + print_loss_total = 0 + print("%s (%d %d%%) %.4f" % (timeSince(start, iter/n_iters), iter, iter/n_iters*100, print_loss_avg)) + + if iter % plot_every == 0: + plot_loss_avg = plot_loss_total/plot_every + plot_losses.append(plot_loss_avg) + plot_loss_total = 0 + +# Evaluation + +def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): + + dy.renew_cg() + + encoder_hidden = encoder.initHidden() + + inputs = indexesFromSentence(input_lang, sentence) + input_length = len(inputs) + + encoder_outputs = [dy.zeros(hidden_dim) for _ in range(max_length)] + + for i in range(input_length): + encoder_output, encoder_hidden = encoder(inputs[i], encoder_hidden) + encoder_outputs[i] = encoder_output + + encoder_outputs = dy.concatenate(encoder_outputs, 1) + + decoder_input = SOS_token + decoder_hidden = encoder_hidden + + decoder_words = [] + decoder_attentions = [dy.zeros(max_length) for _ in range(max_length)] + + for i in range(max_length): + decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=False) + decoder_attentions[i] = decoder_attention + probs = decoder_output.vec_value() + pred = probs.index(max(probs)) + if pred == EOS_token: + decoder_words.append("") + break + else: + decoder_words.append(output_lang.index2word[pred]) + decoder_input = pred + + return decoder_words + + +def evaluationRandomly(encoder, decoder, n=10): + + for i in range(n): + pair = random.choice(pairs) + print(">", pair[0]) + print("=", pair[1]) + output_words = evaluate(encoder, decoder, pair[0]) + output_sentence = " ".join(output_words) + print("<", output_sentence) + print("") + +# Start Training and Evaluating + +model = dy.ParameterCollection() +hidden_dim = 256 +encoder = EncoderRNN(input_lang.n_words, hidden_dim, model) +decoder = AttnDecoderRNN(hidden_dim, output_lang.n_words, model) +trainer = dy.SimpleSGDTrainer(model, learning_rate=0.2) + +trainIters(encoder, decoder, trainer, 100000, print_every=5000) + +evaluationRandomly(encoder, decoder) \ No newline at end of file diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py new file mode 100644 index 000000000..a88ea5fdc --- /dev/null +++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py @@ -0,0 +1,354 @@ +# Requirements + +from __future__ import unicode_literals, print_function, division +from io import open +import unicodedata +import re +import random +import torch +import torch.nn as nn +from torch import optim +import torch.nn.functional as F +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Data Preparation + +SOS_token = 0 +EOS_token = 1 + +class Lang: + + def __init__(self, name): + self.name = name + self.word2index = {} + self.word2count = {} + self.index2word = {0: "SOS", 1: "EOS"} + self.n_words = 2 # Count SOS and EOS + + def addSentence(self, sentence): + for word in sentence.split(' '): + self.addWord(word) + + def addWord(self, word): + if word not in self.word2index: + self.word2index[word] = self.n_words + self.word2count[word] = 1 + self.index2word[self.n_words] = word + self.n_words += 1 + else: + self.word2count[word] += 1 + +def unicodeToAscii(s): + + return ''.join( + c for c in unicodedata.normalize('NFD', s) + if unicodedata.category(c) != 'Mn' + ) + +def normalizeString(s): + + s = unicodeToAscii(s.lower().strip()) + s = re.sub(r"([.!?])", r" \1", s) + s = re.sub(r"[^a-zA-Z.!?]+", r" ", s) + return s + +def readLangs(lang1, lang2, reverse=False): + + print("Reading lines...") + + lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\ + read().strip().split('\n') + + pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines] + + if reverse: + pairs = [list(reversed(p)) for p in pairs] + input_lang = Lang(lang2) + output_lang = Lang(lang1) + else: + input_lang = Lang(lang1) + output_lang = Lang(lang2) + + return input_lang, output_lang, pairs + +MAX_LENGTH = 10 + +eng_prefixes = ( + "i am ", "i m ", + "he is", "he s ", + "she is", "she s", + "you are", "you re ", + "we are", "we re ", + "they are", "they re " +) + +def filterPair(p): + + return len(p[0].split(' ')) < MAX_LENGTH and \ + len(p[1].split(' ')) < MAX_LENGTH and \ + p[1].startswith(eng_prefixes) + +def filterPairs(pairs): + + return [pair for pair in pairs if filterPair(pair)] + +def prepareData(lang1, lang2, reverse=False): + + input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse) + print("Read %s sentence pairs" % len(pairs)) + pairs = filterPairs(pairs) + print("Trimmed to %s sentence pairs" % len(pairs)) + print("Counting words...") + for pair in pairs: + input_lang.addSentence(pair[0]) + output_lang.addSentence(pair[1]) + print("Counted words:") + print(input_lang.name, input_lang.n_words) + print(output_lang.name, output_lang.n_words) + return input_lang, output_lang, pairs + +input_lang, output_lang, pairs = prepareData('eng', 'fra', True) +print(random.choice(pairs)) + +# Model + +class EncoderRNN(nn.Module): + + def __init__(self, input_size, hidden_size): + super(EncoderRNN, self).__init__() + self.hidden_size = hidden_size + + self.embedding = nn.Embedding(input_size, hidden_size) + self.gru = nn.GRU(hidden_size, hidden_size) + + def forward(self, input, hidden): + embedded = self.embedding(input).view(1, 1, -1) + output = embedded + output, hidden = self.gru(output, hidden) + return output, hidden + + def initHidden(self): + return torch.zeros(1, 1, self.hidden_size, device=device) + +class AttnDecoderRNN(nn.Module): + + def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH): + super(AttnDecoderRNN, self).__init__() + self.hidden_size = hidden_size + self.output_size = output_size + self.dropout_p = dropout_p + self.max_length = max_length + + self.embedding = nn.Embedding(self.output_size, self.hidden_size) + self.attn = nn.Linear(self.hidden_size * 2, self.max_length) + self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) + self.dropout = nn.Dropout(self.dropout_p) + self.gru = nn.GRU(self.hidden_size, self.hidden_size) + self.out = nn.Linear(self.hidden_size, self.output_size) + + def forward(self, input, hidden, encoder_outputs): + embedded = self.embedding(input).view(1, 1, -1) + embedded = self.dropout(embedded) + + attn_weights = F.softmax( + self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1) + attn_applied = torch.bmm(attn_weights.unsqueeze(0), + encoder_outputs.unsqueeze(0)) + + output = torch.cat((embedded[0], attn_applied[0]), 1) + output = self.attn_combine(output).unsqueeze(0) + + output = F.relu(output) + output, hidden = self.gru(output, hidden) + + output = F.log_softmax(self.out(output[0]), dim=1) + return output, hidden, attn_weights + + def initHidden(self): + return torch.zeros(1, 1, self.hidden_size, device=device) + +def indexesFromSentence(lang, sentence): + + return [lang.word2index[word] for word in sentence.split(' ')] + + +def tensorFromSentence(lang, sentence): + + indexes = indexesFromSentence(lang, sentence) + indexes.append(EOS_token) + return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1) + + +def tensorsFromPair(pair): + + input_tensor = tensorFromSentence(input_lang, pair[0]) + target_tensor = tensorFromSentence(output_lang, pair[1]) + return (input_tensor, target_tensor) + +# Training the Model + +teacher_forcing_ratio = 0.5 + + +def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): + encoder_hidden = encoder.initHidden() + + encoder_optimizer.zero_grad() + decoder_optimizer.zero_grad() + + input_length = input_tensor.size(0) + target_length = target_tensor.size(0) + + encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) + + loss = 0 + + for ei in range(input_length): + encoder_output, encoder_hidden = encoder( + input_tensor[ei], encoder_hidden) + encoder_outputs[ei] = encoder_output[0, 0] + + decoder_input = torch.tensor([[SOS_token]], device=device) + + decoder_hidden = encoder_hidden + + use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False + + if use_teacher_forcing: + + for di in range(target_length): + decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_input, decoder_hidden, encoder_outputs) + loss += criterion(decoder_output, target_tensor[di]) + decoder_input = target_tensor[di] + + else: + + for di in range(target_length): + decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_input, decoder_hidden, encoder_outputs) + topv, topi = decoder_output.topk(1) + decoder_input = topi.squeeze().detach() + + loss += criterion(decoder_output, target_tensor[di]) + if decoder_input.item() == EOS_token: + break + + loss.backward() + + encoder_optimizer.step() + decoder_optimizer.step() + + return loss.item() / target_length + +# Helper Function to Print Time + +import time +import math + + +def asMinutes(s): + m = math.floor(s / 60) + s -= m * 60 + return '%dm %ds' % (m, s) + + +def timeSince(since, percent): + now = time.time() + s = now - since + es = s / (percent) + rs = es - s + return '%s (- %s)' % (asMinutes(s), asMinutes(rs)) + +# Whole Training Process + +def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01): + + start = time.time() + plot_losses = [] + print_loss_total = 0 + plot_loss_total = 0 + + encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) + decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) + training_pairs = [tensorsFromPair(random.choice(pairs)) + for i in range(n_iters)] + criterion = nn.NLLLoss() + + for iter in range(1, n_iters + 1): + training_pair = training_pairs[iter - 1] + input_tensor = training_pair[0] + target_tensor = training_pair[1] + + loss = train(input_tensor, target_tensor, encoder, + decoder, encoder_optimizer, decoder_optimizer, criterion) + print_loss_total += loss + plot_loss_total += loss + + if iter % print_every == 0: + print_loss_avg = print_loss_total / print_every + print_loss_total = 0 + print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), + iter, iter / n_iters * 100, print_loss_avg)) + + if iter % plot_every == 0: + plot_loss_avg = plot_loss_total / plot_every + plot_losses.append(plot_loss_avg) + plot_loss_total = 0 + + +def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): + with torch.no_grad(): + input_tensor = tensorFromSentence(input_lang, sentence) + input_length = input_tensor.size()[0] + encoder_hidden = encoder.initHidden() + + encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) + + for ei in range(input_length): + encoder_output, encoder_hidden = encoder(input_tensor[ei], + encoder_hidden) + encoder_outputs[ei] += encoder_output[0, 0] + + decoder_input = torch.tensor([[SOS_token]], device=device) # SOS + + decoder_hidden = encoder_hidden + + decoded_words = [] + decoder_attentions = torch.zeros(max_length, max_length) + + for di in range(max_length): + decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_input, decoder_hidden, encoder_outputs) + decoder_attentions[di] = decoder_attention.data + topv, topi = decoder_output.data.topk(1) + if topi.item() == EOS_token: + decoded_words.append('') + break + else: + decoded_words.append(output_lang.index2word[topi.item()]) + + decoder_input = topi.squeeze().detach() + + return decoded_words, decoder_attentions[:di + 1] + +def evaluateRandomly(encoder, decoder, n=10): + + for i in range(n): + pair = random.choice(pairs) + print('>', pair[0]) + print('=', pair[1]) + output_words, attentions = evaluate(encoder, decoder, pair[0]) + output_sentence = ' '.join(output_words) + print('<', output_sentence) + print('') + +# Training and Evaluating + +hidden_size = 256 +encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) +attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) + +trainIters(encoder1, attn_decoder1, 100000, print_every=5000) + +evaluateRandomly(encoder1, attn_decoder1) \ No newline at end of file From 6c4000b2e75ec189ba3377e657c1de0ec741d564 Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Tue, 5 Jun 2018 13:51:08 -0400 Subject: [PATCH 02/14] update README.md --- examples/sequence-to-sequence/.DS_Store | Bin 6148 -> 6148 bytes .../seq2seq_translator/README.md | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/sequence-to-sequence/.DS_Store b/examples/sequence-to-sequence/.DS_Store index 0b4a6dc06944ab2783d7da06388e4224869f0466..a57abb186a5cb432320ada6fb3798218671d406c 100644 GIT binary patch delta 267 zcmZoMXfc@J&&abeU^g=(&t@JLF2?$#^5TM|octsP28Po~1v#0;B?bo98JU<_SlQS) zIJh{tc)4POGxE!WOA<>;i=7gSqCvcX#FC68C_5xSKL^fEObW|PEsqxvan8>xNzBYk zEdp!KOi2YQi3!ilOUW;H$}i1JDF&Mp908Hx;Naxoj2Doqt~N3xqpS2eWtOvtUQs;;T6n+&%}cWY delta 44 zcmZoMXfc@J&&ahgU^g=>7Xt$W_hdnK(aEYTA18~l?cL1A&d9j2!I*I~JI7ys00@r@ A`2YX_ diff --git a/examples/sequence-to-sequence/seq2seq_translator/README.md b/examples/sequence-to-sequence/seq2seq_translator/README.md index d678dba3e..78e048308 100644 --- a/examples/sequence-to-sequence/seq2seq_translator/README.md +++ b/examples/sequence-to-sequence/seq2seq_translator/README.md @@ -2,7 +2,7 @@ Here is the comparison between Dynet and PyTorch on the [seq2seq translator example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). -We have already preprocessed and prepared the eng-fra language dataset in `data` folder according to the steps used in the [PyTorch example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). +The data we used is a set of many thousands of English to French translation pairs. Download the data from [here](https://download.pytorch.org/tutorial/data.zip) and extract it to the current directory. ## Usage (Dynet) From 63f63c5debc5b8d802e993e5f7e37379799dbb4e Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Wed, 6 Jun 2018 17:44:39 -0400 Subject: [PATCH 03/14] bug fixed in the model --- .../.idea/dictionaries/pengzhi_gao.xml | 3 + .../seq2seq_translator/.idea/misc.xml | 4 + .../seq2seq_translator/.idea/modules.xml | 8 + .../.idea/seq2seq_translator.iml | 12 ++ .../seq2seq_translator/.idea/workspace.xml | 166 ++++++++++++++++++ .../seq2seq_translator/README.md | 102 +++++------ .../seq2seq_translator/seq2seq_dynet.py | 2 +- 7 files changed, 245 insertions(+), 52 deletions(-) create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml create mode 100644 examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml new file mode 100644 index 000000000..f224b9fc9 --- /dev/null +++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/dictionaries/pengzhi_gao.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml new file mode 100644 index 000000000..e626b847e --- /dev/null +++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml new file mode 100644 index 000000000..435aaac69 --- /dev/null +++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml b/examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml new file mode 100644 index 000000000..e98082abe --- /dev/null +++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/seq2seq_translator.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml b/examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml new file mode 100644 index 000000000..830438b5d --- /dev/null +++ b/examples/sequence-to-sequence/seq2seq_translator/.idea/workspace.xml @@ -0,0 +1,166 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BashSupport + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - 1528319057155 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file From f77ae009b1a3df952e149d01fc9ca8870f4b5f63 Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Fri, 22 Jun 2018 16:48:03 -0400 Subject: [PATCH 05/14] address the comments --- .DS_Store | Bin 8196 -> 8196 bytes .idea/dictionaries/pengzhi_gao.xml | 3 + .idea/dynet.iml | 12 + .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + .idea/workspace.xml | 206 ++++++++++++++++++ examples/.DS_Store | Bin 10244 -> 10244 bytes .../seq2seq_translator/README.md | 125 +---------- 9 files changed, 243 insertions(+), 121 deletions(-) create mode 100644 .idea/dictionaries/pengzhi_gao.xml create mode 100644 .idea/dynet.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml diff --git a/.DS_Store b/.DS_Store index d7dc14478065716eb8d8443f856f8a008e2b97a3..816a6d9405976599f900e585009d30cf30e903ff 100644 GIT binary patch delta 591 zcmZp1XmOa}&nUhzU^hRb_+}n~E=Cdepa6r?;*uf;1_l-eKZZnxT!z%iO@jW6CnsML zY|xabt~RnX(orxpv#8ZksJ1XQ(@`)sF{`cRzVK zWtpkv5hbY=CBd0hK&g5mG#x>yDRBs)kjes}A`yfD*i`qT)YQD_^rFlZ|FpE?)RG7! zb|lEU97pQ;Uj$3=B;Xk%>h` ziFxU%PL;7aiAkwB{&`OMB_;W}5t%@LfZPtYGq@lzIWsR^+5Q7C7=eIO*+B!s5@1k< zFo3};z!;L3n41ce41kC!J9tAS!#NnV85|jc8Dba;85$U-Gc0CU&v2089K&OVuMGbg zc^SnRl^C@dbr?+<%^4jS9T}Y&Js3S1gCN?O7@@|>=NHt OHZu#aVBA;|&j)F2>3G1g#lQP5vd=;3ZjIZD?U&sH0$MTC1Z_ZD?R@ zqN8ALWKmno$sw+4XzQ7fTUk|IQ(HHS0Sp)!Av6O&l!j5WH%}AcXPUf4#&ctZF!N@1 LiEk_$E7+L + + \ No newline at end of file diff --git a/.idea/dynet.iml b/.idea/dynet.iml new file mode 100644 index 000000000..e98082abe --- /dev/null +++ b/.idea/dynet.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..e626b847e --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..36fb4b31c --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..94a25f7f4 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 000000000..0e60d61c9 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,206 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BashSupport + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - 1529698634480 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file From ad04e2d024d16601e1f00a854ea45f583a6cfb54 Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Fri, 22 Jun 2018 16:49:02 -0400 Subject: [PATCH 07/14] Delete .DS_Store --- .DS_Store | Bin 8196 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 816a6d9405976599f900e585009d30cf30e903ff..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHM&u<$=6n>M$={hFKCQX_MI9LRvN{ynV&`^XBj+0OW#C&63NjmdmXnY&wuCkSdTWkSdTWkSg&1 zPylbXDB30OzM4(jRDo21=TZR?AM&We79!hIR)!8VG6jGg!>}yqBM*?Ad}Ire?I|m% zSW{&WETXa~#bAn#`MiL`79!hIR?&$mI7HgRbW^Fk-KNe zBAYy_#`1S@k={nV{#X`C%>dIPlOB>ikm(ELcM0SaZPG^yNgt9!0fne5Xiq7ObD7w~ zpqD9l;n+_dyaqT;)Jy!bFB04<5jP|sY%AgE&jfmuYVcHzqejdGYFwejsmV`Ze@S37 zz;>~BJ9$c}<+lS`YQUUAp%%5tML)Q+TVCx1p^p{E=wmr!qd$a;??5X5)OD?spdI}t z^OMzfogiFY{gsW3j%CKLXxfA}F*$MHXz~*yG{a6%H2p*IsceQ#E9&oY?q2jw>#k#; zRP?z`?uDk~*}fz@H5-tldyZG*Cq>@zYJptGP!pP#)v^_R@$Bs8TQ`^UYwNeqm-A=0 zZ>=xqUw>o${5-2IytejE=|QXe=#ilp8%l)F;u`Bv?02xYE z?v;J}7)Ra@?bFb39${)WLD;oxpFA1b>$ZWJdomywz2n=qci8Zq+D^S5*x~*_|3G-F z`rLKPJa9sXdk06-duX>xUY*0E*uhR)Oa*ssKY$(L>>rrEZ+Z>8*uC$XmhJ9%MIMH{ zweR2*V&~GO(Kf4&*I0V@4<%=Isc>EObXw{wtm$meGg~$$%W7d}|<4Si2P($Dle{Y8)2B)i65VK>+^yT#sQ1-8L9+54=-b`^W1|CPSnccm;lgdO@y zj}6~*x!1s);N=iYN{B4mxE}`E%j3!5jmRMqZGcgt_rNh|4?Ksw_&t)5WRR5wYa4Y1 zx0xeu;f5v{BAN^`#&MIoKsH76ieqI=8N_C37prg3mh`fMC{Rlpujf@4p4vqns`F-lp%r K{EK`(n!f=J?MkBn From 5c792fce59d7a1da6feaa81a6dcdc0a08b5c7679 Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Fri, 22 Jun 2018 16:49:16 -0400 Subject: [PATCH 08/14] Delete .DS_Store --- examples/.DS_Store | Bin 10244 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 examples/.DS_Store diff --git a/examples/.DS_Store b/examples/.DS_Store deleted file mode 100644 index a554152dee93a8c306b9720bdf9b3bc6ca878a57..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10244 zcmeHM&2Jk;6n~SC#&*)iO;Q3C0;5pqAx3dpN)SRAC#8y@B&ws*sx9f--Z)Elcii1| zJ{nTxQi(r+3m*r>0f_@=?%WWSI9BR`zkmZ07jE!-v+Hzr?T84ew1*vQ_s#CUul>E5 z_jblaBr5GjiKswC85~UWBRKVF{G5-j))(D54Jsf{R3n1|s?++S-Nl{WND)X8ND)X8 zND)X8cp4Bu&E}RF4rzK$5l9h85!gq7^Fsj#({e=jLaI^+PHG7NI)K}%p^tTdk|{*A z9MQdyDpBYRR}Uf*710tygyMK!V0O@QME61(ijxS%Nko=Kv_cWeI^+t9okTgL={ZFp zMW7o2Ub|aR#?@Ar+`Wy{=L%}>lrH!v*M^pvpGFu`*DBg<{b&5CTh z4PWW0h7U5CY$m&!e|BeQ_RZPpLh0Px?sQ>i?)7uig)?W*?e1nXlc!2=FWz*v@9ccK zd-n?k8Hp%!(P((RfBq7^o-b=ROVzq`TKk*_@Hui}q0tWNL=P7HyASUlq|cF!L-DoN z4`+9y>bbUbo3O|4b_`*?h@o_gOoZAZA4AHg*KppVARcTn34VP*WwB}0!mp?XqD40O z35k0Ni6Lsh&Iat%L6h8nCb%cTEy39)2iz`vD&oq9w?2Lrttmacd1Zmmjoa=s652+f zV+*nlJhssw5r>56;+R#5vpTN@=Z9}I?9`py%1ohyb(TxuA zgwr()!Z#PZh(X+zLSE+_1!o%?`ORCf#V7$!g-X}N790g#`gvH9$_}GYXY8QKQ@FCm z4TTQt<10s@)B|C`@zV82f(` z#XwApw28h2IWvwTM@+n|MJ$VicAnm)1^&kMjq+lQc+Ca zzi?@JrtJqF&>Wx-5oz|9=+N@7cUJRb3BF5aKAK*V zrhBdFS&gN&HQx-bX#K0#eH7@$z;qVfH3|J2D9B5#z>={9E zX)n1I83fW<&5tTsIJxYZET{BVTSnb-n^W)np%A61@^YucA3JDynhHDkkjaY`*Ko{i zcB=e-bTdPzFu!>w*XTBVMxWC+^aK4w_vs=1PJam@a$;PZ5HE-q#Y^H9@v4aQ_H`=! z$&UJv3jEP>_~0d~Y98}UqG|+~PX?+^5ni=b9^?AwB`Y6iv)H^j0!aheqsiRyIi|AM zw^|k(jxq&X^k7Fq((9SYbg3aFg&L0xgDM!ep|Y9I%{I6`xOlHMWry=b!ZVf0bg7Jz zGQo*21N0LWlQe+#1fC=U zVql@VP=N=X@7JU=yw_g9aT5nO=GzOY62VDr$AhZvcwF#yy!tH0y&3dXL%+-&(7lkN mhxWh!8K7@grt5zl4fQ3h?`#};(V4FQwY}qLi})#C|NjHayvL9L From e490502b1def78936fb22d522836195a0278ef09 Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Fri, 22 Jun 2018 16:49:26 -0400 Subject: [PATCH 09/14] Delete .DS_Store --- examples/sequence-to-sequence/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 examples/sequence-to-sequence/.DS_Store diff --git a/examples/sequence-to-sequence/.DS_Store b/examples/sequence-to-sequence/.DS_Store deleted file mode 100644 index a57abb186a5cb432320ada6fb3798218671d406c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%Wl&^6upxMaMGei6;cr^Z;)6-C<%xn0g?$}(H)WzEC8i`M2#iK6Ksd5q(R>B z1uWqQ_!d5(AHoN~nMY~s1S#r*R%os?bM8FO+?kVk#6-lpldw(HAR-G%V6~2HiHUZR z7ObLcHh@fykx)(%aZ2d!LT^)H6fg>0HU-4lT_+FcHm3p3?aBEa?#4XuWA3AexE>xM z-T5UA9?`~9)|d`TyzeERCF2AX0t4I=r8sQ6)PIXI^n6bpjeX29MeGFSSk8e{dNwA* zb;T;D=DN}t{xlow_5Se3ag^n)))A|$Rj*vVW?6Nse!c$A9rCH0d-){mdg&Wcdg0|m zU;8gPkN^0d|0)_x`;8mVc#?ZjGDsyk3J2)&_H~qmeA?xcB+TSEu4=HXnpNvJZp~($ z&Vx?VZnt*kO?%dE-MinkcRCN}^P07NyZvN$Z*=f+b~yj^Rs0rME-qkcn;MVd8=RGv zAMdx_fRDz<#8r7C-s^>vJRmxHeE21Mx;igB7jHZ~-#kCKX%h2fAM+DhZRu?kVI%dI zZsmZ2(zf~x-svN-d1ef8d``CYBKv2FjpD_c-ZIwu#ZJRSHwqX9E};VAez1`QIvNXw z^6Ee)KLLPMR7-;|{wxTNt Date: Fri, 22 Jun 2018 16:49:35 -0400 Subject: [PATCH 10/14] Delete .DS_Store --- .../seq2seq_translator/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 examples/sequence-to-sequence/seq2seq_translator/.DS_Store diff --git a/examples/sequence-to-sequence/seq2seq_translator/.DS_Store b/examples/sequence-to-sequence/seq2seq_translator/.DS_Store deleted file mode 100644 index 071667f4470f57571ab1453781926f235c61c504..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKyH3ME5S)b+K{OT><$VDXe8CYpMG6}D08nBi3J3En()rI}c5gvgu@DssXji(M z^}c*JlJC3#WcJ#e0u_KMhoY!5B5EFW?RoG%QLM%V<~YF`&7vJCw2LNP`vESw?sL@m zvi@$hY`Z1B)!RU96BV9ui$|O}UZcSV_q4rm{}0$K+HdoZw(^<7+zwAz;qFH@c9G3k zQ!o$=1OvgqUtoZDw#xLdV9dclFc1uUGN9)};!wMeUYOj2{6bB&J~C7Z`X4?#)n* From 242d2416e0cb458c783d4f9d1da24794543a321a Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Fri, 22 Jun 2018 16:58:34 -0400 Subject: [PATCH 11/14] Update README.md --- examples/sequence-to-sequence/seq2seq_translator/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/sequence-to-sequence/seq2seq_translator/README.md b/examples/sequence-to-sequence/seq2seq_translator/README.md index d43ee8d6e..f641cfbd8 100644 --- a/examples/sequence-to-sequence/seq2seq_translator/README.md +++ b/examples/sequence-to-sequence/seq2seq_translator/README.md @@ -4,7 +4,7 @@ Here is the comparison between Dynet and PyTorch on the [seq2seq translator exam The data we used is a set of many thousands of English to French translation pairs. Download the data from [here](https://download.pytorch.org/tutorial/data.zip) and extract it to the current directory. -## Usage (Dynet) +## Usage (DyNet) The architecture of the Dynet model `seq2seq_dynet.py` is the same as that in [PyTorch Example](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html). We here implement the attention mechanism in the model. From aa6284a90e531f64cc99ed82f4c6210c5997de0e Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Tue, 4 Sep 2018 10:54:26 -0400 Subject: [PATCH 12/14] fix lint error --- .../seq2seq_translator/seq2seq_dynet.py | 73 ++++++++++++++----- .../seq2seq_translator/seq2seq_pytorch.py | 47 +++++++++--- 2 files changed, 89 insertions(+), 31 deletions(-) diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py index f05de7f8a..a63d74386 100644 --- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py +++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py @@ -6,12 +6,15 @@ import re import random import dynet as dy +import time +import math # Data Preparation SOS_token = 0 EOS_token = 1 + class Lang(object): def __init__(self, name): @@ -34,6 +37,7 @@ def addWord(self, word): else: self.word2count[word] += 1 + def unicodeToAscii(s): return ''.join( @@ -41,6 +45,7 @@ def unicodeToAscii(s): if unicodedata.category(c) != 'Mn' ) + def normalizeString(s): s = unicodeToAscii(s.lower().strip()) @@ -48,10 +53,12 @@ def normalizeString(s): s = re.sub(r"[^a-zA-Z.!?]+", r" ", s) return s + def readLangs(lang1, lang2, reverse=False): print("Reading lines...") - lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().strip().split('\n') + lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().\ + strip().split('\n') pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines] if reverse: pairs = [list(reversed(p)) for p in pairs] @@ -62,9 +69,12 @@ def readLangs(lang1, lang2, reverse=False): output_lang = Lang(lang2) return input_lang, output_lang, pairs + MAX_LENGTH = 10 -eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s", "you are", "you re ", - "we are", "we re ", "they are", "they re ") +eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s", + "you are", "you re ", "we are", "we re ", "they are", + "they re ") + def filterPair(p): @@ -72,10 +82,12 @@ def filterPair(p): len(p[1].split(' ')) < MAX_LENGTH and \ p[1].startswith(eng_prefixes) + def filterPairs(pairs): return [pair for pair in pairs if filterPair(pair)] + def prepareData(lang1, lang2, reverse=False): input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse) @@ -91,18 +103,22 @@ def prepareData(lang1, lang2, reverse=False): print(output_lang.name, output_lang.n_words) return input_lang, output_lang, pairs + input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print(random.choice(pairs)) # Model + class EncoderRNN(object): def __init__(self, in_vocab, hidden_dim, model): self.in_vocab = in_vocab self.hidden_dim = hidden_dim - self.embedding_enc = model.add_lookup_parameters((self.in_vocab, self.hidden_dim)) - self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model) + self.embedding_enc = model.add_lookup_parameters((self.in_vocab, + self.hidden_dim)) + self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, + model) def __call__(self, input, hidden): input_embed = dy.lookup(self.embedding_enc, input) @@ -113,20 +129,26 @@ def __call__(self, input, hidden): def initHidden(self): return [dy.zeros(self.hidden_dim)] + DROPOUT_RATE = 0.1 + class AttnDecoderRNN(object): def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH): self.hidden_dim = hidden_dim self.out_vocab = out_vocab self.max_length = max_length - self.embedding_dec = model.add_lookup_parameters((self.out_vocab, self.hidden_dim)) - self.w_attn = model.add_parameters((self.max_length, self.hidden_dim * 2)) + self.embedding_dec = model.add_lookup_parameters((self.out_vocab, + self.hidden_dim)) + self.w_attn = model.add_parameters((self.max_length, + self.hidden_dim * 2)) self.b_attn = model.add_parameters((self.max_length,)) - self.w_attn_combine = model.add_parameters((self.hidden_dim, self.hidden_dim * 2)) + self.w_attn_combine = model.add_parameters((self.hidden_dim, + self.hidden_dim * 2)) self.b_attn_combine = model.add_parameters((self.hidden_dim,)) - self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model) + self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, + model) self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim)) self.b_dec = model.add_parameters((self.out_vocab,)) @@ -159,7 +181,9 @@ def initHidden(self): def indexesFromSentence(lang, sentence): - return [lang.word2index[word] for word in sentence.split(" ")] + [EOS_token] + return [lang.word2index[word] for word in sentence.split(" ")] + \ + [EOS_token] + def indexesFromPair(pair): @@ -169,8 +193,10 @@ def indexesFromPair(pair): # Training the Model + teacher_forcing_ratio = 0.5 + def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH): dy.renew_cg() @@ -200,12 +226,14 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH): if use_teacher_forcing: for i in range(target_length): - decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=True) + decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_input, decoder_hidden, encoder_outputs, dropout=True) losses.append(-dy.log(dy.pick(decoder_output, targets[i]))) decoder_input = targets[i] else: for i in range(target_length): - decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=True) + decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_input, decoder_hidden, encoder_outputs, dropout=True) losses.append(-dy.log(dy.pick(decoder_output, targets[i]))) probs = decoder_output.vec_value() decoder_input = probs.index(max(probs)) @@ -220,14 +248,13 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH): # Helper Function to Print Time -import time -import math def asMinutes(s): m = math.floor(s/60) s -= m*60 return "%dm %ds" % (m, s) + def timeSince(since, percent): now = time.time() s = now - since @@ -237,14 +264,17 @@ def timeSince(since, percent): # Whole Training Process -def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, plot_every=100): + +def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, + plot_every=100): start = time.time() plot_losses = [] print_loss_total = 0 plot_loss_total = 0 - training_pairs = [indexesFromPair(random.choice(pairs)) for _ in range(n_iters)] + training_pairs = [indexesFromPair(random.choice(pairs)) + for _ in range(n_iters)] for iter in range(1, n_iters+1): @@ -260,7 +290,9 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, plot_every= if iter % print_every == 0: print_loss_avg = print_loss_total/print_every print_loss_total = 0 - print("%s (%d %d%%) %.4f" % (timeSince(start, iter/n_iters), iter, iter/n_iters*100, print_loss_avg)) + print("%s (%d %d%%) %.4f" % (timeSince(start, iter/n_iters), + iter, iter/n_iters*100, + print_loss_avg)) if iter % plot_every == 0: plot_loss_avg = plot_loss_total/plot_every @@ -269,6 +301,7 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, plot_every= # Evaluation + def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): dy.renew_cg() @@ -293,7 +326,8 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): decoder_attentions = [dy.zeros(max_length) for _ in range(max_length)] for i in range(max_length): - decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs, dropout=False) + decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_input, decoder_hidden, encoder_outputs, dropout=False) decoder_attentions[i] = decoder_attention probs = decoder_output.vec_value() pred = probs.index(max(probs)) @@ -320,6 +354,7 @@ def evaluationRandomly(encoder, decoder, n=10): # Start Training and Evaluating + model = dy.ParameterCollection() hidden_dim = 256 encoder = EncoderRNN(input_lang.n_words, hidden_dim, model) @@ -328,4 +363,4 @@ def evaluationRandomly(encoder, decoder, n=10): trainIters(encoder, decoder, trainer, 100000, print_every=5000) -evaluationRandomly(encoder, decoder) \ No newline at end of file +evaluationRandomly(encoder, decoder) diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py index a88ea5fdc..8c74513c6 100644 --- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py +++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py @@ -5,6 +5,8 @@ import unicodedata import re import random +import time +import math import torch import torch.nn as nn from torch import optim @@ -16,6 +18,7 @@ SOS_token = 0 EOS_token = 1 + class Lang: def __init__(self, name): @@ -38,6 +41,7 @@ def addWord(self, word): else: self.word2count[word] += 1 + def unicodeToAscii(s): return ''.join( @@ -45,6 +49,7 @@ def unicodeToAscii(s): if unicodedata.category(c) != 'Mn' ) + def normalizeString(s): s = unicodeToAscii(s.lower().strip()) @@ -52,6 +57,7 @@ def normalizeString(s): s = re.sub(r"[^a-zA-Z.!?]+", r" ", s) return s + def readLangs(lang1, lang2, reverse=False): print("Reading lines...") @@ -71,6 +77,7 @@ def readLangs(lang1, lang2, reverse=False): return input_lang, output_lang, pairs + MAX_LENGTH = 10 eng_prefixes = ( @@ -82,16 +89,19 @@ def readLangs(lang1, lang2, reverse=False): "they are", "they re " ) + def filterPair(p): return len(p[0].split(' ')) < MAX_LENGTH and \ len(p[1].split(' ')) < MAX_LENGTH and \ p[1].startswith(eng_prefixes) + def filterPairs(pairs): return [pair for pair in pairs if filterPair(pair)] + def prepareData(lang1, lang2, reverse=False): input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse) @@ -107,11 +117,13 @@ def prepareData(lang1, lang2, reverse=False): print(output_lang.name, output_lang.n_words) return input_lang, output_lang, pairs + input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print(random.choice(pairs)) # Model + class EncoderRNN(nn.Module): def __init__(self, input_size, hidden_size): @@ -130,9 +142,11 @@ def forward(self, input, hidden): def initHidden(self): return torch.zeros(1, 1, self.hidden_size, device=device) + class AttnDecoderRNN(nn.Module): - def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH): + def __init__(self, hidden_size, output_size, dropout_p=0.1, + max_length=MAX_LENGTH): super(AttnDecoderRNN, self).__init__() self.hidden_size = hidden_size self.output_size = output_size @@ -167,6 +181,7 @@ def forward(self, input, hidden, encoder_outputs): def initHidden(self): return torch.zeros(1, 1, self.hidden_size, device=device) + def indexesFromSentence(lang, sentence): return [lang.word2index[word] for word in sentence.split(' ')] @@ -187,10 +202,12 @@ def tensorsFromPair(pair): # Training the Model + teacher_forcing_ratio = 0.5 -def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): +def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, + decoder_optimizer, criterion, max_length=MAX_LENGTH): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() @@ -199,7 +216,8 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, deco input_length = input_tensor.size(0) target_length = target_tensor.size(0) - encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) + encoder_outputs = torch.zeros(max_length, encoder.hidden_size, + device=device) loss = 0 @@ -212,7 +230,8 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, deco decoder_hidden = encoder_hidden - use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False + use_teacher_forcing = True if random.random() < teacher_forcing_ratio \ + else False if use_teacher_forcing: @@ -243,9 +262,6 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, deco # Helper Function to Print Time -import time -import math - def asMinutes(s): m = math.floor(s / 60) @@ -262,7 +278,9 @@ def timeSince(since, percent): # Whole Training Process -def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01): + +def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, + learning_rate=0.01): start = time.time() plot_losses = [] @@ -289,7 +307,8 @@ def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, lear print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), - iter, iter / n_iters * 100, print_loss_avg)) + iter, iter / n_iters * 100, + print_loss_avg)) if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every @@ -303,7 +322,8 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): input_length = input_tensor.size()[0] encoder_hidden = encoder.initHidden() - encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) + encoder_outputs = torch.zeros(max_length, encoder.hidden_size, + device=device) for ei in range(input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], @@ -332,6 +352,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): return decoded_words, decoder_attentions[:di + 1] + def evaluateRandomly(encoder, decoder, n=10): for i in range(n): @@ -345,10 +366,12 @@ def evaluateRandomly(encoder, decoder, n=10): # Training and Evaluating + hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) -attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) +attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, + dropout_p=0.1).to(device) trainIters(encoder1, attn_decoder1, 100000, print_every=5000) -evaluateRandomly(encoder1, attn_decoder1) \ No newline at end of file +evaluateRandomly(encoder1, attn_decoder1) From d9619cf8353141aad138d36ec98401f1d1541336 Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Fri, 7 Sep 2018 13:37:13 -0400 Subject: [PATCH 13/14] fix lint error --- .../seq2seq_translator/seq2seq_dynet.py | 30 +++++++-------- .../seq2seq_translator/seq2seq_pytorch.py | 38 +++++++++---------- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py index a63d74386..2be37690c 100644 --- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py +++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py @@ -1,7 +1,7 @@ # Requirements from __future__ import unicode_literals, print_function, division -from io import open +import io import unicodedata import re import random @@ -57,8 +57,8 @@ def normalizeString(s): def readLangs(lang1, lang2, reverse=False): print("Reading lines...") - lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().\ - strip().split('\n') + lines = io.open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\ + read().strip().split('\n') pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines] if reverse: pairs = [list(reversed(p)) for p in pairs] @@ -120,8 +120,8 @@ def __init__(self, in_vocab, hidden_dim, model): self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model) - def __call__(self, input, hidden): - input_embed = dy.lookup(self.embedding_enc, input) + def __call__(self, inputs, hidden): + input_embed = dy.lookup(self.embedding_enc, inputs) state_enc = self.rnn_enc.initial_state(vecs=hidden) state_enc = state_enc.add_input(input_embed) return state_enc.output(), state_enc.h() @@ -152,8 +152,8 @@ def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH): self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim)) self.b_dec = model.add_parameters((self.out_vocab,)) - def __call__(self, input, hidden, encoder_outptus, dropout=False): - input_embed = dy.lookup(self.embedding_dec, input) + def __call__(self, inputs, hidden, encoder_outptus, dropout=False): + input_embed = dy.lookup(self.embedding_dec, inputs) if dropout: input_embed = dy.dropout(input_embed, DROPOUT_RATE) input_cat = dy.concatenate([input_embed, hidden[0]]) @@ -226,7 +226,7 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH): if use_teacher_forcing: for i in range(target_length): - decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_output, decoder_hidden, _ = decoder( decoder_input, decoder_hidden, encoder_outputs, dropout=True) losses.append(-dy.log(dy.pick(decoder_output, targets[i]))) decoder_input = targets[i] @@ -276,9 +276,9 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, training_pairs = [indexesFromPair(random.choice(pairs)) for _ in range(n_iters)] - for iter in range(1, n_iters+1): + for iteration in range(1, n_iters+1): - training_pair = training_pairs[iter-1] + training_pair = training_pairs[iteration-1] inputs = training_pair[0] targets = training_pair[1] @@ -287,14 +287,14 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, print_loss_total += loss plot_loss_total += loss - if iter % print_every == 0: + if iteration % print_every == 0: print_loss_avg = print_loss_total/print_every print_loss_total = 0 - print("%s (%d %d%%) %.4f" % (timeSince(start, iter/n_iters), - iter, iter/n_iters*100, + print("%s (%d %d%%) %.4f" % (timeSince(start, iteration/n_iters), + iteration, iteration/n_iters*100, print_loss_avg)) - if iter % plot_every == 0: + if iteration % plot_every == 0: plot_loss_avg = plot_loss_total/plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 @@ -343,7 +343,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): def evaluationRandomly(encoder, decoder, n=10): - for i in range(n): + for _ in range(n): pair = random.choice(pairs) print(">", pair[0]) print("=", pair[1]) diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py index 8c74513c6..75bcff626 100644 --- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py +++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py @@ -1,7 +1,7 @@ # Requirements from __future__ import unicode_literals, print_function, division -from io import open +import io import unicodedata import re import random @@ -62,7 +62,7 @@ def readLangs(lang1, lang2, reverse=False): print("Reading lines...") - lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\ + lines = io.open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\ read().strip().split('\n') pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines] @@ -133,8 +133,8 @@ def __init__(self, input_size, hidden_size): self.embedding = nn.Embedding(input_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size) - def forward(self, input, hidden): - embedded = self.embedding(input).view(1, 1, -1) + def forward(self, inputs, hidden): + embedded = self.embedding(inputs).view(1, 1, -1) output = embedded output, hidden = self.gru(output, hidden) return output, hidden @@ -160,8 +160,8 @@ def __init__(self, hidden_size, output_size, dropout_p=0.1, self.gru = nn.GRU(self.hidden_size, self.hidden_size) self.out = nn.Linear(self.hidden_size, self.output_size) - def forward(self, input, hidden, encoder_outputs): - embedded = self.embedding(input).view(1, 1, -1) + def forward(self, inputs, hidden, encoder_outputs): + embedded = self.embedding(inputs).view(1, 1, -1) embedded = self.dropout(embedded) attn_weights = F.softmax( @@ -236,7 +236,7 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, if use_teacher_forcing: for di in range(target_length): - decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_output, decoder_hidden, _ = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_tensor[di]) decoder_input = target_tensor[di] @@ -244,9 +244,9 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, else: for di in range(target_length): - decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_output, decoder_hidden, _ = decoder( decoder_input, decoder_hidden, encoder_outputs) - topv, topi = decoder_output.topk(1) + _, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach() loss += criterion(decoder_output, target_tensor[di]) @@ -290,11 +290,11 @@ def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) training_pairs = [tensorsFromPair(random.choice(pairs)) - for i in range(n_iters)] + for _ in range(n_iters)] criterion = nn.NLLLoss() - for iter in range(1, n_iters + 1): - training_pair = training_pairs[iter - 1] + for iteration in range(1, n_iters + 1): + training_pair = training_pairs[iteration - 1] input_tensor = training_pair[0] target_tensor = training_pair[1] @@ -303,14 +303,14 @@ def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, print_loss_total += loss plot_loss_total += loss - if iter % print_every == 0: + if iteration % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 - print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), - iter, iter / n_iters * 100, + print('%s (%d %d%%) %.4f' % (timeSince(start, iteration / n_iters), + iteration, iteration / n_iters * 100, print_loss_avg)) - if iter % plot_every == 0: + if iteration % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 @@ -341,7 +341,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) decoder_attentions[di] = decoder_attention.data - topv, topi = decoder_output.data.topk(1) + _, topi = decoder_output.data.topk(1) if topi.item() == EOS_token: decoded_words.append('') break @@ -355,11 +355,11 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): def evaluateRandomly(encoder, decoder, n=10): - for i in range(n): + for _ in range(n): pair = random.choice(pairs) print('>', pair[0]) print('=', pair[1]) - output_words, attentions = evaluate(encoder, decoder, pair[0]) + output_words, _ = evaluate(encoder, decoder, pair[0]) output_sentence = ' '.join(output_words) print('<', output_sentence) print('') From 177aa617bfa01b00a577928dfb88d7bc4ba1e20d Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Fri, 7 Sep 2018 15:38:05 -0400 Subject: [PATCH 14/14] fix lint error --- .../seq2seq_translator/seq2seq_dynet.py | 11 ++++++----- .../seq2seq_translator/seq2seq_pytorch.py | 9 +++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py index 2be37690c..fb62589ec 100644 --- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py +++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_dynet.py @@ -8,6 +8,7 @@ import dynet as dy import time import math +r = random.SystemRandom() # Data Preparation @@ -105,7 +106,7 @@ def prepareData(lang1, lang2, reverse=False): input_lang, output_lang, pairs = prepareData('eng', 'fra', True) -print(random.choice(pairs)) +print(r.choice(pairs)) # Model @@ -219,7 +220,7 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH): decoder_input = SOS_token decoder_hidden = encoder_hidden - if random.random() < teacher_forcing_ratio: + if r.random() < teacher_forcing_ratio: use_teacher_forcing = True else: use_teacher_forcing = False @@ -232,7 +233,7 @@ def train(inputs, targets, encoder, decoder, trainer, max_length=MAX_LENGTH): decoder_input = targets[i] else: for i in range(target_length): - decoder_output, decoder_hidden, decoder_attention = decoder( + decoder_output, decoder_hidden, _ = decoder( decoder_input, decoder_hidden, encoder_outputs, dropout=True) losses.append(-dy.log(dy.pick(decoder_output, targets[i]))) probs = decoder_output.vec_value() @@ -273,7 +274,7 @@ def trainIters(encoder, decoder, trainer, n_iters, print_every=1000, print_loss_total = 0 plot_loss_total = 0 - training_pairs = [indexesFromPair(random.choice(pairs)) + training_pairs = [indexesFromPair(r.choice(pairs)) for _ in range(n_iters)] for iteration in range(1, n_iters+1): @@ -344,7 +345,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): def evaluationRandomly(encoder, decoder, n=10): for _ in range(n): - pair = random.choice(pairs) + pair = r.choice(pairs) print(">", pair[0]) print("=", pair[1]) output_words = evaluate(encoder, decoder, pair[0]) diff --git a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py index 75bcff626..95aab4669 100644 --- a/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py +++ b/examples/sequence-to-sequence/seq2seq_translator/seq2seq_pytorch.py @@ -12,6 +12,7 @@ from torch import optim import torch.nn.functional as F device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +r = random.SystemRandom() # Data Preparation @@ -119,7 +120,7 @@ def prepareData(lang1, lang2, reverse=False): input_lang, output_lang, pairs = prepareData('eng', 'fra', True) -print(random.choice(pairs)) +print(r.choice(pairs)) # Model @@ -230,7 +231,7 @@ def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_hidden = encoder_hidden - use_teacher_forcing = True if random.random() < teacher_forcing_ratio \ + use_teacher_forcing = True if r.random() < teacher_forcing_ratio \ else False if use_teacher_forcing: @@ -289,7 +290,7 @@ def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) - training_pairs = [tensorsFromPair(random.choice(pairs)) + training_pairs = [tensorsFromPair(r.choice(pairs)) for _ in range(n_iters)] criterion = nn.NLLLoss() @@ -356,7 +357,7 @@ def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH): def evaluateRandomly(encoder, decoder, n=10): for _ in range(n): - pair = random.choice(pairs) + pair = r.choice(pairs) print('>', pair[0]) print('=', pair[1]) output_words, _ = evaluate(encoder, decoder, pair[0])