bidirectional sequence to sequence learning model is implemented, to …

…be tested
polarker · Nov 20, 2015 · 78b0dfc · 78b0dfc
1 parent c9e15a2
commit 78b0dfc
Show file tree

Hide file tree

Showing 2 changed files with 353 additions and 0 deletions.
diff --git a/include/galois/models/bi_seq_encoder_decoder.h b/include/galois/models/bi_seq_encoder_decoder.h
@@ -0,0 +1,53 @@
+#ifndef _GALOIS_BISEQENCODERDECODER_H
+#define _GALOIS_BISEQENCODERDECODER_H
+
+#include "galois/base.h"
+#include "galois/narray.h"
+#include "galois/gfilters/net.h"
+#include "galois/models/ordered_model.h"
+#include "galois/optimizer.h"
+
+namespace gs
+{
+
+    template<typename T>
+    class BiSeqEncoderDecoder : protected OrderedModel<T>
+    {
+        static default_random_engine galois_rn_generator;
+
+    protected:
+        int max_len_one;
+        int max_len_another;
+        int input_size_one;
+        int input_size_another;
+        vector<int> hidden_sizes;
+
+        int train_seq_count = 0;
+        SP_NArray<T> train_one = nullptr;
+        SP_NArray<T> train_another = nullptr;
+    public:
+        BiSeqEncoderDecoder(int max_len_one,
+            int max_len_another,
+            int input_size_one,
+            int input_size_another,
+            initializer_list<int> hidden_sizes,
+            int batch_size,
+            int num_epoch,
+            T learning_rate,
+            string optimizer_name);
+        BiSeqEncoderDecoder(const BiSeqEncoderDecoder& other) = delete;
+        BiSeqEncoderDecoder& operator=(const BiSeqEncoderDecoder&) = delete;
+
+        using OrderedModel<T>::get_params;
+        using OrderedModel<T>::get_grads;
+
+        void add_train_dataset(const SP_NArray<T> data, const SP_NArray<T> target);
+        T train_one_batch(const bool update=true);
+        void fit();
+    };
+    template<typename T>
+    default_random_engine BiSeqEncoderDecoder<T>::galois_rn_generator(0);
+
+}
+
+#endif
diff --git a/src/models/bi_seq_encoder_decoder.cc b/src/models/bi_seq_encoder_decoder.cc
@@ -0,0 +1,300 @@
+#include "galois/models/bi_seq_encoder_decoder.h"
+#include "galois/gfilters/path.h"
+#include "galois/filters.h"
+
+namespace gs
+{
+
+    string bi_seq_generate_id(string tag, int i) {
+        return tag + "[" + to_string(i) + "]";
+    }
+
+    string bi_seq_generate_id(string tag, int i, int j) {
+        return tag + "[" + to_string(i) + "," + to_string(j) + "]";
+    }
+
+    template<typename T>
+    class Encoder : public OrderedNet<T>
+    {
+    public:
+        Encoder(const Encoder& other) = delete;
+        Encoder& operator=(const Encoder&) = delete;
+        Encoder(int max_len, int input_size, vector<int> hidden_sizes) {
+            auto h2hraw = vector<SP_Filter<T>>();
+            for (auto hsize : hidden_sizes) {
+                h2hraw.push_back(make_shared<Linear<T>>(hsize, hsize));
+            }
+            auto x2hraw = vector<SP_Filter<T>>();
+            for (int i = 0; i < hidden_sizes.size(); i++) {
+                if (i == 0) {
+                    x2hraw.push_back(make_shared<Embedding<T>>(input_size, hidden_sizes[i]));
+                } else {
+                    x2hraw.push_back(make_shared<Linear<T>>(hidden_sizes[i-1], hidden_sizes[i]));
+                }
+            }
+            for (int i = 0; i < max_len; i++) {
+                for (int j = 0; j < hidden_sizes.size(); j++) {
+                    string hraw = bi_seq_generate_id("hraw", i, j);
+                    string left_h = bi_seq_generate_id("h", i-1, j);
+                    if (i > 0) {
+                        BaseNet<T>::add_link(left_h, hraw, h2hraw[j]->share());
+                    }
+                }
+                for (int j = 0; j < hidden_sizes.size(); j++) {
+                    string hraw = bi_seq_generate_id("hraw", i, j);
+                    string down_h;
+                    if (j == 0) {
+                        down_h = bi_seq_generate_id("x", i);
+                    } else {
+                        down_h = bi_seq_generate_id("h", i, j-1);
+                    }
+                    BaseNet<T>::add_link(down_h, hraw, x2hraw[j]->share());
+                    string h = bi_seq_generate_id("h", i, j);
+                    BaseNet<T>::add_link(hraw, h, make_shared<Tanh<T>>());
+                }
+            }
+            auto x_ids = vector<string>();
+            auto y_ids = vector<string>();
+            for (int i = 0; i < max_len; i++) {
+                x_ids.push_back(bi_seq_generate_id("x", i));
+            }
+            for (int j = 0; j < hidden_sizes.size(); j++) {
+                y_ids.push_back(bi_seq_generate_id("h", max_len-1, j));
+            }
+            this->add_input_ids(x_ids);
+            this->add_output_ids(y_ids);
+
+            this->fix_net();
+        }
+
+        using OrderedNet<T>::forward;
+        using OrderedNet<T>::backward;
+    };
+
+    template<typename T>
+    class Decoder : public OrderedNet<T>
+    {
+    private:
+        int max_len;
+        int num_hidden_layer;
+
+        SP_Signal<T> initial_input_signal;
+
+    public:
+        Decoder(const Decoder& other) = delete;
+        Decoder& operator=(const Decoder&) = delete;
+        Decoder(int max_len, int input_size, vector<int> hidden_sizes)
+                : max_len(max_len)
+                , num_hidden_layer(hidden_sizes.size()) {
+            auto h2hraw = vector<SP_Filter<T>>();
+            for (auto hsize : hidden_sizes) {
+                h2hraw.push_back(make_shared<Linear<T>>(hsize, hsize));
+            }
+            auto x2hraw = vector<SP_Filter<T>>();
+            for (int i = 0; i < hidden_sizes.size(); i++) {
+                if (i == 0) {
+                    x2hraw.push_back(make_shared<Embedding<T>>(input_size, hidden_sizes[i]));
+                } else {
+                    x2hraw.push_back(make_shared<Linear<T>>(hidden_sizes[i-1], hidden_sizes[i]));
+                }
+            }
+            auto h2yraw = make_shared<Linear<T>>(hidden_sizes.back(), input_size);
+
+            for (int i = 0; i < max_len; i++) {
+                for (int j = 0; j < hidden_sizes.size(); j++) {
+                    string hraw = bi_seq_generate_id("hraw", i, j);
+                    string left_h = bi_seq_generate_id("h", i-1, j);
+                    BaseNet<T>::add_link(left_h, hraw, h2hraw[j]->share());
+                }
+                for (int j = 0; j < hidden_sizes.size(); j++) {
+                    string hraw = bi_seq_generate_id("hraw", i, j);
+                    string down_h;
+                    if (j == 0) {
+                        down_h = bi_seq_generate_id("x", i);
+                    } else {
+                        down_h = bi_seq_generate_id("h", i, j-1);
+                    }
+                    BaseNet<T>::add_link(down_h, hraw, x2hraw[j]->share());
+                    string h = bi_seq_generate_id("h", i, j);
+                    BaseNet<T>::add_link(hraw, h, make_shared<Tanh<T>>());
+                }
+                string yraw = bi_seq_generate_id("yraw", i);
+                string down_h = bi_seq_generate_id("h", i, hidden_sizes.size()-1);
+                BaseNet<T>::add_link(down_h, yraw, h2yraw->share());
+                string y = bi_seq_generate_id("y", i);
+                BaseNet<T>::add_link(yraw, y, make_shared<CrossEntropy<T>>());
+            }
+
+            auto x_ids = vector<string>();
+            auto y_ids = vector<string>();
+            for (int i = 0; i < max_len; i++) {
+                x_ids.push_back(bi_seq_generate_id("x", i));
+                y_ids.push_back(bi_seq_generate_id("y", i));
+            }
+            for (int i = 0; i < hidden_sizes.size(); i++) {
+                x_ids.push_back(bi_seq_generate_id("h", -1, i));
+            }
+            this->add_input_ids(x_ids);
+            this->add_output_ids(y_ids);
+
+            this->fix_net();
+
+            initial_input_signal = make_shared<Signal<T>>(InputSignal);
+        }
+
+        SP_Filter<T> share() override {
+            throw "to be implemented";
+        }
+
+        void install_signals(const vector<SP_Signal<T>> &in_signals, const vector<SP_Signal<T>> &out_signals) override {
+            cout << "virtual function installing signals" << endl;
+            auto new_in_signals = vector<SP_Signal<T>>();
+            new_in_signals.push_back(initial_input_signal);
+            new_in_signals.insert(new_in_signals.end(), out_signals.begin(), out_signals.end()-1);
+            new_in_signals.insert(new_in_signals.end(), in_signals.begin(), in_signals.end());
+            OrderedNet<T>::install_signals(new_in_signals, out_signals);
+        }
+
+        void forward() override {
+            initial_input_signal->get_data()->fill(0);
+            OrderedNet<T>::forward();
+        }
+        using OrderedNet<T>::backward;
+    };
+
+    template<typename T>
+    BiSeqEncoderDecoder<T>::BiSeqEncoderDecoder(int _max_len_one,
+                int _max_len_another,
+                int _input_size_one,
+                int _input_size_another,
+                initializer_list<int> _hidden_sizes,
+                int _batch_size,
+                int _num_epoch,
+                T _learning_rate,
+                string _optimizer_name)
+            : OrderedModel<T>(_batch_size, _num_epoch, _learning_rate, _optimizer_name)
+            , max_len_one(_max_len_one)
+            , max_len_another(_max_len_another)
+            , input_size_one(_input_size_one)
+            , input_size_another(_input_size_another)
+            , hidden_sizes(_hidden_sizes) {
+        auto encoder_one = make_shared<Encoder<T>>(max_len_one, input_size_one, hidden_sizes);
+        auto decoder_one = make_shared<Decoder<T>>(max_len_one, input_size_one, hidden_sizes);
+        auto encoder_another = make_shared<Encoder<T>>(max_len_another, input_size_another, hidden_sizes);
+        auto decoder_another = make_shared<Decoder<T>>(max_len_another, input_size_another, hidden_sizes);
+
+        auto x_ids_one = vector<string>();
+        auto x_ids_another = vector<string>();
+        auto h_ids_one = vector<string>();
+        auto h_ids_another = vector<string>();
+        auto y_ids_one2one = vector<string>();
+        auto y_ids_one2another = vector<string>();
+        auto y_ids_another2one = vector<string>();
+        auto y_ids_another2another = vector<string>();
+        for (int i = 0; i < max_len_one; i++) {
+            x_ids_one.push_back(bi_seq_generate_id("x_one", i));
+            y_ids_one2one.push_back(bi_seq_generate_id("y_one2one", i));
+            y_ids_another2one.push_back(bi_seq_generate_id("y_another2one", i));
+        }
+        for (int j = 0; j < hidden_sizes.size(); j++) {
+            h_ids_one.push_back(bi_seq_generate_id("h_one", max_len_one-1, j));
+            h_ids_another.push_back(bi_seq_generate_id("h_another", max_len_another-1, j));
+        }
+        for (int i = 0; i < max_len_another; i++) {
+            x_ids_another.push_back(bi_seq_generate_id("x_another", i));
+            y_ids_one2another.push_back(bi_seq_generate_id("y_one2another", i));
+            y_ids_another2another.push_back(bi_seq_generate_id("y_another2another", i));
+        }
+
+        this->add_link(x_ids_one, h_ids_one, encoder_one);
+        this->add_link(h_ids_one, y_ids_one2one, decoder_one);
+        this->add_link(h_ids_one, y_ids_one2another, decoder_another);
+        this->add_link(x_ids_another, h_ids_another, encoder_another->share());
+        this->add_link(h_ids_another, y_ids_another2another, decoder_another->share());
+        this->add_link(h_ids_another, y_ids_another2one, decoder_one->share());
+
+        this->add_input_ids(x_ids_one);
+        this->add_input_ids(x_ids_another);
+        this->add_output_ids(y_ids_one2one);
+        this->add_output_ids(y_ids_one2another);
+        this->add_output_ids(y_ids_another2one);
+        this->add_output_ids(y_ids_another2another);
+
+        this->compile();
+    }
+
+    template<typename T>
+    void BiSeqEncoderDecoder<T>::add_train_dataset(const SP_NArray<T> one, const SP_NArray<T> another) {
+        auto one_dims = one->get_dims();
+        auto another_dims = another->get_dims();
+        CHECK(one_dims[0] == another_dims[0], "length of data and target must match");
+        CHECK(one_dims.size() == 2 && another_dims.size() == 2, "both should be an array of sentences");
+        CHECK(one_dims[1] == max_len_one && another_dims[1] == max_len_another, "these should match");
+
+        CHECK(train_one==nullptr && train_another==nullptr, "dataset should not be set before");
+        train_seq_count = one_dims[0];
+        train_one = one;
+        train_another = another;
+    }
+
+    template<typename T>
+    T BiSeqEncoderDecoder<T>::train_one_batch(bool update) {
+        uniform_int_distribution<> distribution(0, train_seq_count-1);
+        vector<int> batch_ids(this->batch_size);
+        for (int i = 0; i < this->batch_size; i++) {
+            batch_ids[i] = distribution(galois_rn_generator);
+        }
+
+        this->net.reopaque();
+
+        for (int i = 0; i < max_len_one; i++) {
+            this->input_signals[i]->get_data()->copy_from(batch_ids, i, train_one);
+        }
+        for (int i = max_len_one; i < max_len_one+max_len_another; i++) {
+            this->input_signals[i]->get_data()->copy_from(batch_ids, i, train_another);
+        }
+
+        this->net.forward();
+        this->net.backward();
+        if (update) {
+            this->optimizer->update();
+        }
+
+        T loss = 0;
+        for (auto output_signal : this->output_signals) {
+            loss += *output_signal->get_loss();
+        }
+        return loss;
+    }
+
+    // test dataset is not support for the moment
+    template<typename T>
+    void BiSeqEncoderDecoder<T>::fit() {
+        printf("Start training\n");
+
+        for (int k = 1; k < this->num_epoch+1; k++) {
+            printf("Epoch: %2d", k);
+            auto start = chrono::system_clock::now();
+            T loss = 0;
+
+            int len = train_seq_count;
+            for (int i = 0; i < len; i += this->batch_size) {
+                loss += train_one_batch(i);
+                if (i % 10000 == 0) {
+                    cout << " > " << i << endl;
+                }
+            }
+            loss /= T(len);
+
+            auto end = chrono::system_clock::now();
+            chrono::duration<double> eplased_time = end - start;
+            printf(", time: %.2fs", eplased_time.count());
+            printf(", loss: %.6f", loss);
+            printf("\n");
+        }
+    }
+
+    template class BiSeqEncoderDecoder<float>;
+    template class BiSeqEncoderDecoder<double>;
+
+}