-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bidirectional sequence to sequence learning model is implemented, to …
…be tested
- Loading branch information
Showing
2 changed files
with
353 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#ifndef _GALOIS_BISEQENCODERDECODER_H | ||
#define _GALOIS_BISEQENCODERDECODER_H | ||
|
||
#include "galois/base.h" | ||
#include "galois/narray.h" | ||
#include "galois/gfilters/net.h" | ||
#include "galois/models/ordered_model.h" | ||
#include "galois/optimizer.h" | ||
|
||
namespace gs | ||
{ | ||
|
||
template<typename T> | ||
class BiSeqEncoderDecoder : protected OrderedModel<T> | ||
{ | ||
static default_random_engine galois_rn_generator; | ||
|
||
protected: | ||
int max_len_one; | ||
int max_len_another; | ||
int input_size_one; | ||
int input_size_another; | ||
vector<int> hidden_sizes; | ||
|
||
int train_seq_count = 0; | ||
SP_NArray<T> train_one = nullptr; | ||
SP_NArray<T> train_another = nullptr; | ||
public: | ||
BiSeqEncoderDecoder(int max_len_one, | ||
int max_len_another, | ||
int input_size_one, | ||
int input_size_another, | ||
initializer_list<int> hidden_sizes, | ||
int batch_size, | ||
int num_epoch, | ||
T learning_rate, | ||
string optimizer_name); | ||
BiSeqEncoderDecoder(const BiSeqEncoderDecoder& other) = delete; | ||
BiSeqEncoderDecoder& operator=(const BiSeqEncoderDecoder&) = delete; | ||
|
||
using OrderedModel<T>::get_params; | ||
using OrderedModel<T>::get_grads; | ||
|
||
void add_train_dataset(const SP_NArray<T> data, const SP_NArray<T> target); | ||
T train_one_batch(const bool update=true); | ||
void fit(); | ||
}; | ||
template<typename T> | ||
default_random_engine BiSeqEncoderDecoder<T>::galois_rn_generator(0); | ||
|
||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,300 @@ | ||
#include "galois/models/bi_seq_encoder_decoder.h" | ||
#include "galois/gfilters/path.h" | ||
#include "galois/filters.h" | ||
|
||
namespace gs | ||
{ | ||
|
||
string bi_seq_generate_id(string tag, int i) { | ||
return tag + "[" + to_string(i) + "]"; | ||
} | ||
|
||
string bi_seq_generate_id(string tag, int i, int j) { | ||
return tag + "[" + to_string(i) + "," + to_string(j) + "]"; | ||
} | ||
|
||
template<typename T> | ||
class Encoder : public OrderedNet<T> | ||
{ | ||
public: | ||
Encoder(const Encoder& other) = delete; | ||
Encoder& operator=(const Encoder&) = delete; | ||
Encoder(int max_len, int input_size, vector<int> hidden_sizes) { | ||
auto h2hraw = vector<SP_Filter<T>>(); | ||
for (auto hsize : hidden_sizes) { | ||
h2hraw.push_back(make_shared<Linear<T>>(hsize, hsize)); | ||
} | ||
auto x2hraw = vector<SP_Filter<T>>(); | ||
for (int i = 0; i < hidden_sizes.size(); i++) { | ||
if (i == 0) { | ||
x2hraw.push_back(make_shared<Embedding<T>>(input_size, hidden_sizes[i])); | ||
} else { | ||
x2hraw.push_back(make_shared<Linear<T>>(hidden_sizes[i-1], hidden_sizes[i])); | ||
} | ||
} | ||
for (int i = 0; i < max_len; i++) { | ||
for (int j = 0; j < hidden_sizes.size(); j++) { | ||
string hraw = bi_seq_generate_id("hraw", i, j); | ||
string left_h = bi_seq_generate_id("h", i-1, j); | ||
if (i > 0) { | ||
BaseNet<T>::add_link(left_h, hraw, h2hraw[j]->share()); | ||
} | ||
} | ||
for (int j = 0; j < hidden_sizes.size(); j++) { | ||
string hraw = bi_seq_generate_id("hraw", i, j); | ||
string down_h; | ||
if (j == 0) { | ||
down_h = bi_seq_generate_id("x", i); | ||
} else { | ||
down_h = bi_seq_generate_id("h", i, j-1); | ||
} | ||
BaseNet<T>::add_link(down_h, hraw, x2hraw[j]->share()); | ||
string h = bi_seq_generate_id("h", i, j); | ||
BaseNet<T>::add_link(hraw, h, make_shared<Tanh<T>>()); | ||
} | ||
} | ||
auto x_ids = vector<string>(); | ||
auto y_ids = vector<string>(); | ||
for (int i = 0; i < max_len; i++) { | ||
x_ids.push_back(bi_seq_generate_id("x", i)); | ||
} | ||
for (int j = 0; j < hidden_sizes.size(); j++) { | ||
y_ids.push_back(bi_seq_generate_id("h", max_len-1, j)); | ||
} | ||
this->add_input_ids(x_ids); | ||
this->add_output_ids(y_ids); | ||
|
||
this->fix_net(); | ||
} | ||
|
||
using OrderedNet<T>::forward; | ||
using OrderedNet<T>::backward; | ||
}; | ||
|
||
template<typename T> | ||
class Decoder : public OrderedNet<T> | ||
{ | ||
private: | ||
int max_len; | ||
int num_hidden_layer; | ||
|
||
SP_Signal<T> initial_input_signal; | ||
|
||
public: | ||
Decoder(const Decoder& other) = delete; | ||
Decoder& operator=(const Decoder&) = delete; | ||
Decoder(int max_len, int input_size, vector<int> hidden_sizes) | ||
: max_len(max_len) | ||
, num_hidden_layer(hidden_sizes.size()) { | ||
auto h2hraw = vector<SP_Filter<T>>(); | ||
for (auto hsize : hidden_sizes) { | ||
h2hraw.push_back(make_shared<Linear<T>>(hsize, hsize)); | ||
} | ||
auto x2hraw = vector<SP_Filter<T>>(); | ||
for (int i = 0; i < hidden_sizes.size(); i++) { | ||
if (i == 0) { | ||
x2hraw.push_back(make_shared<Embedding<T>>(input_size, hidden_sizes[i])); | ||
} else { | ||
x2hraw.push_back(make_shared<Linear<T>>(hidden_sizes[i-1], hidden_sizes[i])); | ||
} | ||
} | ||
auto h2yraw = make_shared<Linear<T>>(hidden_sizes.back(), input_size); | ||
|
||
for (int i = 0; i < max_len; i++) { | ||
for (int j = 0; j < hidden_sizes.size(); j++) { | ||
string hraw = bi_seq_generate_id("hraw", i, j); | ||
string left_h = bi_seq_generate_id("h", i-1, j); | ||
BaseNet<T>::add_link(left_h, hraw, h2hraw[j]->share()); | ||
} | ||
for (int j = 0; j < hidden_sizes.size(); j++) { | ||
string hraw = bi_seq_generate_id("hraw", i, j); | ||
string down_h; | ||
if (j == 0) { | ||
down_h = bi_seq_generate_id("x", i); | ||
} else { | ||
down_h = bi_seq_generate_id("h", i, j-1); | ||
} | ||
BaseNet<T>::add_link(down_h, hraw, x2hraw[j]->share()); | ||
string h = bi_seq_generate_id("h", i, j); | ||
BaseNet<T>::add_link(hraw, h, make_shared<Tanh<T>>()); | ||
} | ||
string yraw = bi_seq_generate_id("yraw", i); | ||
string down_h = bi_seq_generate_id("h", i, hidden_sizes.size()-1); | ||
BaseNet<T>::add_link(down_h, yraw, h2yraw->share()); | ||
string y = bi_seq_generate_id("y", i); | ||
BaseNet<T>::add_link(yraw, y, make_shared<CrossEntropy<T>>()); | ||
} | ||
|
||
auto x_ids = vector<string>(); | ||
auto y_ids = vector<string>(); | ||
for (int i = 0; i < max_len; i++) { | ||
x_ids.push_back(bi_seq_generate_id("x", i)); | ||
y_ids.push_back(bi_seq_generate_id("y", i)); | ||
} | ||
for (int i = 0; i < hidden_sizes.size(); i++) { | ||
x_ids.push_back(bi_seq_generate_id("h", -1, i)); | ||
} | ||
this->add_input_ids(x_ids); | ||
this->add_output_ids(y_ids); | ||
|
||
this->fix_net(); | ||
|
||
initial_input_signal = make_shared<Signal<T>>(InputSignal); | ||
} | ||
|
||
SP_Filter<T> share() override { | ||
throw "to be implemented"; | ||
} | ||
|
||
void install_signals(const vector<SP_Signal<T>> &in_signals, const vector<SP_Signal<T>> &out_signals) override { | ||
cout << "virtual function installing signals" << endl; | ||
auto new_in_signals = vector<SP_Signal<T>>(); | ||
new_in_signals.push_back(initial_input_signal); | ||
new_in_signals.insert(new_in_signals.end(), out_signals.begin(), out_signals.end()-1); | ||
new_in_signals.insert(new_in_signals.end(), in_signals.begin(), in_signals.end()); | ||
OrderedNet<T>::install_signals(new_in_signals, out_signals); | ||
} | ||
|
||
void forward() override { | ||
initial_input_signal->get_data()->fill(0); | ||
OrderedNet<T>::forward(); | ||
} | ||
using OrderedNet<T>::backward; | ||
}; | ||
|
||
template<typename T> | ||
BiSeqEncoderDecoder<T>::BiSeqEncoderDecoder(int _max_len_one, | ||
int _max_len_another, | ||
int _input_size_one, | ||
int _input_size_another, | ||
initializer_list<int> _hidden_sizes, | ||
int _batch_size, | ||
int _num_epoch, | ||
T _learning_rate, | ||
string _optimizer_name) | ||
: OrderedModel<T>(_batch_size, _num_epoch, _learning_rate, _optimizer_name) | ||
, max_len_one(_max_len_one) | ||
, max_len_another(_max_len_another) | ||
, input_size_one(_input_size_one) | ||
, input_size_another(_input_size_another) | ||
, hidden_sizes(_hidden_sizes) { | ||
auto encoder_one = make_shared<Encoder<T>>(max_len_one, input_size_one, hidden_sizes); | ||
auto decoder_one = make_shared<Decoder<T>>(max_len_one, input_size_one, hidden_sizes); | ||
auto encoder_another = make_shared<Encoder<T>>(max_len_another, input_size_another, hidden_sizes); | ||
auto decoder_another = make_shared<Decoder<T>>(max_len_another, input_size_another, hidden_sizes); | ||
|
||
auto x_ids_one = vector<string>(); | ||
auto x_ids_another = vector<string>(); | ||
auto h_ids_one = vector<string>(); | ||
auto h_ids_another = vector<string>(); | ||
auto y_ids_one2one = vector<string>(); | ||
auto y_ids_one2another = vector<string>(); | ||
auto y_ids_another2one = vector<string>(); | ||
auto y_ids_another2another = vector<string>(); | ||
for (int i = 0; i < max_len_one; i++) { | ||
x_ids_one.push_back(bi_seq_generate_id("x_one", i)); | ||
y_ids_one2one.push_back(bi_seq_generate_id("y_one2one", i)); | ||
y_ids_another2one.push_back(bi_seq_generate_id("y_another2one", i)); | ||
} | ||
for (int j = 0; j < hidden_sizes.size(); j++) { | ||
h_ids_one.push_back(bi_seq_generate_id("h_one", max_len_one-1, j)); | ||
h_ids_another.push_back(bi_seq_generate_id("h_another", max_len_another-1, j)); | ||
} | ||
for (int i = 0; i < max_len_another; i++) { | ||
x_ids_another.push_back(bi_seq_generate_id("x_another", i)); | ||
y_ids_one2another.push_back(bi_seq_generate_id("y_one2another", i)); | ||
y_ids_another2another.push_back(bi_seq_generate_id("y_another2another", i)); | ||
} | ||
|
||
this->add_link(x_ids_one, h_ids_one, encoder_one); | ||
this->add_link(h_ids_one, y_ids_one2one, decoder_one); | ||
this->add_link(h_ids_one, y_ids_one2another, decoder_another); | ||
this->add_link(x_ids_another, h_ids_another, encoder_another->share()); | ||
this->add_link(h_ids_another, y_ids_another2another, decoder_another->share()); | ||
this->add_link(h_ids_another, y_ids_another2one, decoder_one->share()); | ||
|
||
this->add_input_ids(x_ids_one); | ||
this->add_input_ids(x_ids_another); | ||
this->add_output_ids(y_ids_one2one); | ||
this->add_output_ids(y_ids_one2another); | ||
this->add_output_ids(y_ids_another2one); | ||
this->add_output_ids(y_ids_another2another); | ||
|
||
this->compile(); | ||
} | ||
|
||
template<typename T> | ||
void BiSeqEncoderDecoder<T>::add_train_dataset(const SP_NArray<T> one, const SP_NArray<T> another) { | ||
auto one_dims = one->get_dims(); | ||
auto another_dims = another->get_dims(); | ||
CHECK(one_dims[0] == another_dims[0], "length of data and target must match"); | ||
CHECK(one_dims.size() == 2 && another_dims.size() == 2, "both should be an array of sentences"); | ||
CHECK(one_dims[1] == max_len_one && another_dims[1] == max_len_another, "these should match"); | ||
|
||
CHECK(train_one==nullptr && train_another==nullptr, "dataset should not be set before"); | ||
train_seq_count = one_dims[0]; | ||
train_one = one; | ||
train_another = another; | ||
} | ||
|
||
template<typename T> | ||
T BiSeqEncoderDecoder<T>::train_one_batch(bool update) { | ||
uniform_int_distribution<> distribution(0, train_seq_count-1); | ||
vector<int> batch_ids(this->batch_size); | ||
for (int i = 0; i < this->batch_size; i++) { | ||
batch_ids[i] = distribution(galois_rn_generator); | ||
} | ||
|
||
this->net.reopaque(); | ||
|
||
for (int i = 0; i < max_len_one; i++) { | ||
this->input_signals[i]->get_data()->copy_from(batch_ids, i, train_one); | ||
} | ||
for (int i = max_len_one; i < max_len_one+max_len_another; i++) { | ||
this->input_signals[i]->get_data()->copy_from(batch_ids, i, train_another); | ||
} | ||
|
||
this->net.forward(); | ||
this->net.backward(); | ||
if (update) { | ||
this->optimizer->update(); | ||
} | ||
|
||
T loss = 0; | ||
for (auto output_signal : this->output_signals) { | ||
loss += *output_signal->get_loss(); | ||
} | ||
return loss; | ||
} | ||
|
||
// test dataset is not support for the moment | ||
template<typename T> | ||
void BiSeqEncoderDecoder<T>::fit() { | ||
printf("Start training\n"); | ||
|
||
for (int k = 1; k < this->num_epoch+1; k++) { | ||
printf("Epoch: %2d", k); | ||
auto start = chrono::system_clock::now(); | ||
T loss = 0; | ||
|
||
int len = train_seq_count; | ||
for (int i = 0; i < len; i += this->batch_size) { | ||
loss += train_one_batch(i); | ||
if (i % 10000 == 0) { | ||
cout << " > " << i << endl; | ||
} | ||
} | ||
loss /= T(len); | ||
|
||
auto end = chrono::system_clock::now(); | ||
chrono::duration<double> eplased_time = end - start; | ||
printf(", time: %.2fs", eplased_time.count()); | ||
printf(", loss: %.6f", loss); | ||
printf("\n"); | ||
} | ||
} | ||
|
||
template class BiSeqEncoderDecoder<float>; | ||
template class BiSeqEncoderDecoder<double>; | ||
|
||
} |