diff --git a/benchmarks/NodeRegression/README.md b/benchmarks/NodeRegression/README.md
new file mode 100644
index 00000000..9f1d8460
--- /dev/null
+++ b/benchmarks/NodeRegression/README.md
@@ -0,0 +1,60 @@
+# GLI Benchmarking on `NodeRegrssion` Task
+
+The code in this folder can be used to benchmark some popular models on `NodeRegrssion` task.
+
+## How to run
+
+Example commands to run the code:
+
+```bash
+# full batch
+python train.py --dataset <dataset> --model GCN
+python train.py --dataset <dataset> --model MLP
+python train.py --dataset <dataset> --model GAT --model-cfg configs/GAT.yaml
+python train.py --dataset <dataset> --model GraphSAGE --model-cfg configs/GraphSAGE.yaml
+python train.py --dataset <dataset> --model MoNet --model-cfg configs/MoNet.yaml
+python train.py --dataset <dataset> --model MixHop --model-cfg configs/MixHop.yaml
+python train.py --dataset <dataset> --model LINKX --model-cfg configs/LINKX.yaml --train-cfg configs/LINKX_train.yaml
+
+# mini batch
+python train_minibatch.py --dataset <dataset> --model GCN_minibatch
+
+# GBDT
+python train_gbdt.py --dataset <dataset>  --model lightgbm
+python train_gbdt.py --dataset <dataset>  --model catboost
+```
+
+One can provide a `yaml` file to arguments `--model-cfg` or `--train-cfg` respectively for model configuration or training configuration. If not provided, default configurations (see [model_default.yaml](https://github.com/Graph-Learning-Benchmarks/gli/blob/main/benchmarks/NodeRegression/configs/model_default.yaml) and [train_default.yaml](https://github.com/Graph-Learning-Benchmarks/gli/blob/main/benchmarks/NodeRegression/configs/train_default.yaml)) will be used. 
+
+Note that some models may have unique hyperparameters not included in the default configuration files. In this case, one should pass the model-specific coniguration files to `train.py`.
+
+## Supported models
+
+The following list of models are supported by this benchmark.
+
+### Full batch
+
+- `GCN`
+- `MLP`
+- `GAT`
+- `GraphSAGE`
+- `MoNet`
+- `MixHop`
+- `LINKX`
+
+### Mini batch
+
+- `GCN_minibatch`
+
+### Gradient Boosting Decision Tree (GBDT)
+
+- `catboost`
+- `lightgbm`
+
+To add a new model, one should add the model implementation under the `models` folder, and add model specific confgurations under the `configs` folder when needed. We have tried to implement `train.py` in a generic way so one may only need to make minimal modifications to `train.py` and `utils.py`.
+
+Contributions of new models are welcome through pull requests.
+
+## Supported datasets
+
+No `NodeRegrssion` datasets available now.
diff --git a/benchmarks/NodeRegression/config_gen.py b/benchmarks/NodeRegression/config_gen.py
new file mode 100644
index 00000000..c376101f
--- /dev/null
+++ b/benchmarks/NodeRegression/config_gen.py
@@ -0,0 +1,75 @@
+"""
+Random search.
+
+References:
+https://github.com/pyg-team/pytorch_geometric/blob/master/graphgym/configs_gen.py
+https://github.com/pyg-team/pytorch_geometric/blob/master/torch_geometric/
+graphgym/utils/io.py
+"""
+
+import argparse
+import yaml
+import time
+from utils import load_config_file, makedirs_rm_exist
+from random import randint
+
+train_cfg_list = ["self_loop", "to_dense", "lr", "weight_decay", "num_trials",
+                  "max_epoch", "early_stopping"]
+
+
+def parse_args():
+    """Parse the arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model-cfg", type=str,
+                        default="configs/model_default.yaml",
+                        help="The model configuration file path.")
+    parser.add_argument("--train-cfg", type=str,
+                        default="configs/train_default.yaml",
+                        help="The training configuration file path.")
+    parser.add_argument("--grid", type=str,
+                        help="configuration file for grid search.",
+                        default="grid/grid_example.yaml")
+    parser.add_argument("--sample_num", dest="sample_num",
+                        help="Number of random samples in the space.",
+                        default=10, type=int)
+    parser.add_argument("--trial_num", type=int, default=5,
+                        help="Number of trials for same configuration.")
+    parser.add_argument("--model", type=str, default="GCN",
+                        help="model to be used. GCN, GAT, MoNet,\
+                              GraphSAGE, MLP for now.")
+    return parser.parse_args()
+
+
+def grid_gen(args, gen_cfg, model_cfg, train_cfg):
+    """Generate random search configuration files."""
+    dir_name = "./grid/" + args.model + time.strftime("_%Y%m%d_%H%M%S")
+    makedirs_rm_exist(dir_name)
+    for i in range(args.sample_num):
+        for key in gen_cfg:
+            key_len = len(gen_cfg[key])
+            if key in train_cfg_list:
+                train_cfg[key] = gen_cfg[key][randint(0, key_len-1)]
+            else:
+                # otherwise, the key is for model
+                model_cfg[key] = gen_cfg[key][randint(0, key_len-1)]
+        for j in range(args.trial_num):
+            index_str = str(i) + "_" + str(j)
+            # the i-th configuration, j-th trial
+            train_cfg_name = args.model + "_train_" + index_str + ".yaml"
+            model_cfg_name = args.model + "_model_" + index_str + ".yaml"
+            train_cfg["seed"] = randint(1, 10000)
+            with open(dir_name + "/" + train_cfg_name,
+                      "w", encoding="utf-8") as f:
+                yaml.dump(train_cfg, f, default_flow_style=False)
+            with open(dir_name + "/" + model_cfg_name,
+                      "w", encoding="utf-8") as f:
+                yaml.dump(model_cfg, f, default_flow_style=False)
+
+
+if __name__ == "__main__":
+    Args = parse_args()
+    Gen_cfg = load_config_file(Args.grid)
+    # load default configuration for training and model
+    Model_cfg = load_config_file(Args.model_cfg)
+    Train_cfg = load_config_file(Args.train_cfg)
+    grid_gen(Args, Gen_cfg, Model_cfg, Train_cfg)
diff --git a/benchmarks/NodeRegression/configs/GAT.yaml b/benchmarks/NodeRegression/configs/GAT.yaml
new file mode 100644
index 00000000..176f7a4c
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/GAT.yaml
@@ -0,0 +1,7 @@
+num_layers: 2
+num_hidden: 8
+num_heads: 8
+num_out_heads: 2
+residual: False
+dropout: .6
+negative_slope: .2
diff --git a/benchmarks/NodeRegression/configs/GraphSAGE.yaml b/benchmarks/NodeRegression/configs/GraphSAGE.yaml
new file mode 100644
index 00000000..0732681a
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/GraphSAGE.yaml
@@ -0,0 +1,4 @@
+num_layers: 2
+num_hidden: 8
+dropout: .6
+aggregator_type: gcn
diff --git a/benchmarks/NodeRegression/configs/LINKX.yaml b/benchmarks/NodeRegression/configs/LINKX.yaml
new file mode 100644
index 00000000..c12f8882
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/LINKX.yaml
@@ -0,0 +1,7 @@
+num_hidden: 16
+num_layers: 1
+dropout: .5
+inner_activation: False
+inner_dropout: False
+init_layers_A: 1
+init_layers_X: 1
diff --git a/benchmarks/NodeRegression/configs/LINKX_train.yaml b/benchmarks/NodeRegression/configs/LINKX_train.yaml
new file mode 100644
index 00000000..9e67cc7e
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/LINKX_train.yaml
@@ -0,0 +1,11 @@
+loss_fun: cross_entropy
+self_loop: False
+to_dense: False
+lr: .01
+weight_decay: 0.001
+max_epoch: 10000
+early_stopping: True
+seed: 0
+batch_size: 256
+to_undirected: False
+optimizer: "AdamW"
diff --git a/benchmarks/NodeRegression/configs/MixHop.yaml b/benchmarks/NodeRegression/configs/MixHop.yaml
new file mode 100644
index 00000000..ccc80a99
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/MixHop.yaml
@@ -0,0 +1,6 @@
+num_hidden: 8
+p: [0, 1, 2]
+num_layers: 2
+dropout: .5
+layer_dropout: 0.9
+batchnorm: False
diff --git a/benchmarks/NodeRegression/configs/MoNet.yaml b/benchmarks/NodeRegression/configs/MoNet.yaml
new file mode 100644
index 00000000..989e365d
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/MoNet.yaml
@@ -0,0 +1,5 @@
+num_layers: 2
+num_hidden: 8
+dropout: .6
+pseudo_dim: 2
+num_kernels: 3
diff --git a/benchmarks/NodeRegression/configs/catboost.yaml b/benchmarks/NodeRegression/configs/catboost.yaml
new file mode 100644
index 00000000..a2c887df
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/catboost.yaml
@@ -0,0 +1,12 @@
+hp:
+  lr:
+  - 0.01
+  - 0.1
+  depth:
+  - 4
+  - 6
+  l2_leaf_reg:
+  - null
+num_epochs: 1000
+patience: 100
+verbose: false
diff --git a/benchmarks/NodeRegression/configs/lightgbm.yaml b/benchmarks/NodeRegression/configs/lightgbm.yaml
new file mode 100644
index 00000000..88130fc1
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/lightgbm.yaml
@@ -0,0 +1,14 @@
+hp:
+    lr:
+    - 0.01
+    - 0.1
+    num_leaves:
+    - 15
+    - 63
+    lambda_l2:
+    - 0.0
+    boosting:
+    - gbdt
+num_epochs: 1000
+patience: 100
+  
\ No newline at end of file
diff --git a/benchmarks/NodeRegression/configs/model_default.yaml b/benchmarks/NodeRegression/configs/model_default.yaml
new file mode 100644
index 00000000..fc98b689
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/model_default.yaml
@@ -0,0 +1,3 @@
+num_layers: 2
+num_hidden: 8
+dropout: .6
diff --git a/benchmarks/NodeRegression/configs/train_default.yaml b/benchmarks/NodeRegression/configs/train_default.yaml
new file mode 100644
index 00000000..842c684e
--- /dev/null
+++ b/benchmarks/NodeRegression/configs/train_default.yaml
@@ -0,0 +1,11 @@
+loss_fcn: mse
+self_loop: True
+to_dense: False
+lr: .01
+weight_decay: 0.001
+max_epoch: 10000
+early_stopping: True
+seed: 0
+batch_size: 256
+to_undirected: False
+optimizer: "Adam"
diff --git a/benchmarks/NodeRegression/grid/grid_example.yaml b/benchmarks/NodeRegression/grid/grid_example.yaml
new file mode 100644
index 00000000..6235819a
--- /dev/null
+++ b/benchmarks/NodeRegression/grid/grid_example.yaml
@@ -0,0 +1,4 @@
+num_hidden: [32 ,64]
+lr: [0.001, 0.005, 0.01, .1]
+dropout: [0.2, 0.4, 0.6, 0.8]
+weight_decay: [.0001, .001, .01, .1]
diff --git a/benchmarks/NodeRegression/models/gat.py b/benchmarks/NodeRegression/models/gat.py
new file mode 100644
index 00000000..436f442f
--- /dev/null
+++ b/benchmarks/NodeRegression/models/gat.py
@@ -0,0 +1,57 @@
+"""
+GAT model in GLI.
+
+References:
+https://github.com/dmlc/dgl/tree/master/examples/pytorch/gat
+"""
+
+from torch import nn
+from dgl.nn import GATConv
+
+
+class GAT(nn.Module):
+    """GAT network."""
+
+    def __init__(self,
+                 g,
+                 num_layers,
+                 in_dim,
+                 num_hidden,
+                 num_classes,
+                 heads,
+                 activation,
+                 feat_drop,
+                 attn_drop,
+                 negative_slope,
+                 residual):
+        """Initiate model."""
+        super().__init__()
+        self.g = g
+        self.num_layers = num_layers
+        self.gat_layers = nn.ModuleList()
+        self.activation = activation
+
+        # input projection (no residual)
+        self.gat_layers.append(GATConv(
+            in_dim, num_hidden, heads[0],
+            feat_drop, attn_drop, negative_slope, False, self.activation))
+        # hidden layers
+        for layer in range(1, num_layers - 2):
+            # due to multi-head, the in_dim = num_hidden * num_heads
+            self.gat_layers.append(GATConv(num_hidden * heads[layer-1],
+                                           num_hidden, heads[layer],
+                                           feat_drop, attn_drop,
+                                           negative_slope, residual,
+                                           self.activation))
+        # output projection
+        self.gat_layers.append(GATConv(
+            num_hidden * heads[-2], num_classes, heads[-1],
+            feat_drop, attn_drop, negative_slope, residual, None))
+
+    def forward(self, inputs):
+        """Forward."""
+        h = inputs
+        for layer in range(self.num_layers):
+            h = self.gat_layers[layer](self.g, h)
+            h = h.flatten(1) if layer != self.num_layers - 1 else h.mean(1)
+        return h
diff --git a/benchmarks/NodeRegression/models/gbdt.py b/benchmarks/NodeRegression/models/gbdt.py
new file mode 100644
index 00000000..d4e49b85
--- /dev/null
+++ b/benchmarks/NodeRegression/models/gbdt.py
@@ -0,0 +1,272 @@
+"""
+CatBoost and Lightgbm model in GLI.
+
+References:
+https://github.com/nd7141/bgnn/blob/master/models/GBDT.py
+"""
+
+from catboost import Pool, CatBoostClassifier, CatBoostRegressor
+import time
+from sklearn.metrics import mean_squared_error, accuracy_score, r2_score
+import numpy as np
+from collections import defaultdict as ddict
+import lightgbm
+
+
+class GBDTCatBoost:
+    """GDBT CatBoost."""
+
+    def __init__(self,
+                 task="regression",
+                 depth=6,
+                 lr=0.1,
+                 l2_leaf_reg=None,
+                 max_bin=None):
+        """Initiate class."""
+        self.task = task
+        self.depth = depth
+        self.learning_rate = lr
+        self.l2_leaf_reg = l2_leaf_reg
+        self.max_bin = max_bin
+
+    def init_model(self, num_epochs, patience):
+        """Initiate model."""
+        catboost_model_obj = CatBoostRegressor if self.task == "regression" \
+            else CatBoostClassifier
+        # self.loss_function = "RMSE"
+        # if self.task == "regression" else "CrossEntropy"
+        self.loss_function = "RMSE" if self.task == "regression" \
+            else "MultiClass"
+        self.custom_metrics = ["R2"] if self.task == "regression" \
+            else ["Accuracy"]
+        # ["Accuracy", "AUC", "Precision", "Recall", "F1", "MCC", "R2"],
+
+        print("loss function: ", self.loss_function)
+        print("metric: ", self.custom_metrics)
+
+        self.model = catboost_model_obj(iterations=num_epochs,
+                                        depth=self.depth,
+                                        learning_rate=self.learning_rate,
+                                        loss_function=self.loss_function,
+                                        custom_metric=self.custom_metrics,
+                                        random_seed=0,
+                                        early_stopping_rounds=patience,
+                                        l2_leaf_reg=self.l2_leaf_reg,
+                                        max_bin=self.max_bin,
+                                        nan_mode="Min")
+
+    def get_metrics(self):
+        """Get metrics."""
+        d = self.model.evals_result_
+        metrics = ddict(list)
+        keys = ["learn", "validation_0", "validation_1"] \
+            if "validation_0" in self.model.evals_result_ \
+            else ["learn", "validation"]
+        for metric_name in d[keys[0]]:
+            perf = [d[key][metric_name] for key in keys]
+            if metric_name == self.loss_function:
+                metrics["loss"] = list(zip(*perf))
+            else:
+                metrics[metric_name.lower()] = list(zip(*perf))
+
+        return metrics
+
+    def get_test_metric(self, metrics, metric_name):
+        """Get test metric."""
+        if metric_name == "loss":
+            val_epoch = np.argmin([acc[1] for acc in metrics[metric_name]])
+        else:
+            val_epoch = np.argmax([acc[1] for acc in metrics[metric_name]])
+        min_metric = metrics[metric_name][val_epoch]
+        return min_metric, val_epoch
+
+    def save_metrics(self, metrics, fn):
+        """Save metrics."""
+        with open(fn, "w+", encoding="utf-8") as f:
+            for key, value in metrics.items():
+                print(key, value, file=f)
+
+    def train_val_test_split(self, x, y, train_mask, val_mask, test_mask):
+        """Get train/val/test split."""
+        x_train, y_train = x[train_mask], y[train_mask]
+        x_val, y_val = x[val_mask], y[val_mask]
+        x_test, y_test = x[test_mask], y[test_mask]
+        return x_train, y_train, x_val, y_val, x_test, y_test
+
+    def fit(self,
+            x, y, train_mask, val_mask, test_mask,
+            num_epochs=1000, patience=200,
+            plot=False, verbose=False,
+            loss_fn="", metric_name="loss"):
+        """Fit model."""
+        x_train, y_train, x_val, y_val, x_test, y_test = \
+            self.train_val_test_split(x, y, train_mask, val_mask, test_mask)
+        self.init_model(num_epochs, patience)
+
+        start = time.time()
+        # print("type(x_train)", type(x_train))
+        # print("type(y_train)", type(y_train))
+        # print("cat_features", cat_features)
+        pool = Pool(x_train.numpy(), y_train.numpy())
+        eval_set = [(x_val.numpy(), y_val.numpy()),
+                    (x_test.numpy(), y_test.numpy())]
+        self.model.fit(pool, eval_set=eval_set, plot=plot, verbose=verbose)
+        finish = time.time()
+
+        num_trees = self.model.tree_count_
+        print(f"Finished training. Total time: {finish - start:.2f} |\
+                Number of trees: {num_trees:d} |\
+                Time per tree: {(time.time() - start )/num_trees:.2f}")
+
+        metrics = self.get_metrics()
+        min_metric, min_val_epoch = self.get_test_metric(metrics, metric_name)
+        if loss_fn:
+            self.save_metrics(metrics, loss_fn)
+        print(f"Best {metric_name} at iteration {min_val_epoch}:\
+                {min_metric[0]:.3f}/{min_metric[1]:.3f}/{min_metric[2]:.3f}")
+        return metrics
+
+    def predict(self, x_test, y_test):
+        """Predict."""
+        pred = self.model.predict(x_test)
+
+        metrics = {}
+        metrics["rmse"] = mean_squared_error(pred, y_test) ** .5
+
+        return metrics
+
+
+class GBDTLGBM:
+    """GBDT Lightgbm."""
+
+    def __init__(self, task="regression", lr=0.1, num_leaves=31, max_bin=255,
+                 lambda_l1=0., lambda_l2=0., boosting="gbdt"):
+        """Initiate lightgbm."""
+        self.task = task
+        self.boosting = boosting
+        self.learning_rate = lr
+        self.num_leaves = num_leaves
+        self.max_bin = max_bin
+        self.lambda_l1 = lambda_l1
+        self.lambda_l2 = lambda_l2
+
+    def accuracy(self, preds, train_data):
+        """Calculate accuracy."""
+        labels = train_data.get_label()
+        preds_classes = preds.reshape((preds.shape[0]//labels.shape[0],
+                                       labels.shape[0])).argmax(0)
+        return "accuracy", accuracy_score(labels, preds_classes), True
+
+    def r2(self, preds, train_data):
+        """Calculate R2."""
+        labels = train_data.get_label()
+        return "r2", r2_score(labels, preds), True
+
+    def init_model(self):
+        """Initiate model."""
+        self.parameters = {
+            "objective": "regression" if self.task == "regression"
+            else "multiclass",
+            "metric": {"rmse"} if self.task == "regression"
+            else {"multiclass"},
+            "num_classes": self.num_classes,
+            "boosting": self.boosting,
+            "num_leaves": self.num_leaves,
+            "max_bin": self.max_bin,
+            "learning_rate": self.learning_rate,
+            "lambda_l1": self.lambda_l1,
+            "lambda_l2": self.lambda_l2,
+            # "num_threads": 1,
+            # "feature_fraction": 0.9,
+            # "bagging_fraction": 0.8,
+            # "bagging_freq": 5,
+            "verbose": 1,
+            # "device_type": "gpu"
+        }
+        self.evals_result = {}
+
+    def get_metrics(self):
+        """Get metrics."""
+        d = self.evals_result
+        metrics = ddict(list)
+        keys = ["training", "valid_1", "valid_2"] \
+            if "training" in d \
+            else ["valid_0", "valid_1"]
+        for metric_name in d[keys[0]]:
+            perf = [d[key][metric_name] for key in keys]
+            if metric_name in ["regression", "multiclass", "rmse", "l2",
+                               "multi_logloss", "binary_logloss"]:
+                metrics["loss"] = list(zip(*perf))
+            else:
+                metrics[metric_name] = list(zip(*perf))
+        return metrics
+
+    def get_test_metric(self, metrics, metric_name):
+        """Get test metrics."""
+        if metric_name == "loss":
+            val_epoch = np.argmin([acc[1] for acc in metrics[metric_name]])
+        else:
+            val_epoch = np.argmax([acc[1] for acc in metrics[metric_name]])
+        min_metric = metrics[metric_name][val_epoch]
+        return min_metric, val_epoch
+
+    def save_metrics(self, metrics, fn):
+        """Save metrics."""
+        with open(fn, "w+", encoding="utf-8") as f:
+            for key, value in metrics.items():
+                print(key, value, file=f)
+
+    def train_val_test_split(self, x, y, train_mask, val_mask, test_mask):
+        """Get train/val/test splits."""
+        x_train, y_train = x[train_mask], y[train_mask]
+        x_val, y_val = x[val_mask], y[val_mask]
+        x_test, y_test = x[test_mask], y[test_mask]
+        return x_train, y_train, x_val, y_val, x_test, y_test
+
+    def fit(self,
+            x, y, train_mask, val_mask, test_mask,
+            num_epochs=1000, patience=200,
+            loss_fn="", metric_name="loss"):
+        """Fit model."""
+        x_train, y_train, x_val, y_val, x_test, y_test = \
+            self.train_val_test_split(x.numpy(), y.numpy(),
+                                      train_mask.numpy(),
+                                      val_mask.numpy(),
+                                      test_mask.numpy())
+        self.num_classes = None if self.task == "regression"\
+            else int(y.max()+1)
+        self.init_model()
+
+        start = time.time()
+        train_data = lightgbm.Dataset(x_train, label=y_train)
+        val_data = lightgbm.Dataset(x_val, label=y_val)
+        test_data = lightgbm.Dataset(x_test, label=y_test)
+        valid_sets = [train_data, val_data, test_data]
+        self.model = lightgbm.train(params=self.parameters,
+                                    train_set=train_data,
+                                    valid_sets=valid_sets,
+                                    num_boost_round=num_epochs,
+                                    early_stopping_rounds=patience,
+                                    evals_result=self.evals_result,
+                                    feval=self.r2 if self.task == "regression"
+                                    else self.accuracy,
+                                    verbose_eval=1)
+        finish = time.time()
+        print(f"Finished training. Total time: {finish - start:.2f}")
+
+        metrics = self.get_metrics()
+        min_metric, min_val_epoch = self.get_test_metric(metrics, metric_name)
+        if loss_fn:
+            self.save_metrics(metrics, loss_fn)
+        print(f"Best {metric_name} at iteration {min_val_epoch}:\
+                {min_metric[0]:.3f}/{min_metric[1]:.3f}/{min_metric[2]:.3f}")
+        return metrics
+
+    def predict(self, x_test, y_test):
+        """Predict."""
+        pred = self.model.predict(x_test)
+
+        metrics = {}
+        metrics["rmse"] = mean_squared_error(pred, y_test) ** .5
+
+        return metrics
diff --git a/benchmarks/NodeRegression/models/gcn.py b/benchmarks/NodeRegression/models/gcn.py
new file mode 100644
index 00000000..ffd0a047
--- /dev/null
+++ b/benchmarks/NodeRegression/models/gcn.py
@@ -0,0 +1,45 @@
+"""
+GCN model in GLI.
+
+References:
+https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn
+"""
+
+from torch import nn
+from dgl.nn.pytorch import GraphConv
+
+
+class GCN(nn.Module):
+    """GCN network."""
+
+    def __init__(self,
+                 g,
+                 in_feats,
+                 n_hidden,
+                 n_classes,
+                 n_layers,
+                 activation,
+                 dropout):
+        """Initiate model."""
+        super().__init__()
+        self.g = g
+        self.layers = nn.ModuleList()
+        # input layer
+        self.layers.append(GraphConv(in_feats, n_hidden,
+                                     activation=activation))
+        # hidden layers
+        for _ in range(n_layers - 2):
+            self.layers.append(GraphConv(n_hidden, n_hidden,
+                                         activation=activation))
+        # output layer
+        self.layers.append(GraphConv(n_hidden, n_classes))
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, features):
+        """Forward."""
+        h = features
+        for i, layer in enumerate(self.layers):
+            if i != 0:
+                h = self.dropout(h)
+            h = layer(self.g, h)
+        return h
diff --git a/benchmarks/NodeRegression/models/gcn_minibatch.py b/benchmarks/NodeRegression/models/gcn_minibatch.py
new file mode 100644
index 00000000..13a329b1
--- /dev/null
+++ b/benchmarks/NodeRegression/models/gcn_minibatch.py
@@ -0,0 +1,47 @@
+"""
+GCN model in GLI.
+
+References:
+https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn
+https://docs.dgl.ai/guide/minibatch-node.html?highlight=sampling
+"""
+
+from torch import nn
+from dgl.nn.pytorch import GraphConv
+
+
+class GCNminibatch(nn.Module):
+    """GCN network."""
+
+    def __init__(self,
+                 in_feats,
+                 n_hidden,
+                 n_classes,
+                 n_layers,
+                 activation,
+                 dropout):
+        """Initiate model."""
+        super().__init__()
+        self.layers = nn.ModuleList()
+        # input layer
+        self.layers.append(GraphConv(in_feats, n_hidden,
+                                     activation=activation,
+                                     norm='none'))
+        # hidden layers
+        for _ in range(n_layers - 2):
+            self.layers.append(GraphConv(n_hidden, n_hidden,
+                                         activation=activation,
+                                         norm='none'))
+        # output layer
+        self.layers.append(GraphConv(n_hidden, n_classes,
+                                     norm='none'))
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, blocks, features):
+        """Forward."""
+        h = features
+        for i, layer in enumerate(self.layers):
+            if i != 0:
+                h = self.dropout(h)
+            h = layer(blocks[i], h)
+        return h
diff --git a/benchmarks/NodeRegression/models/graph_sage.py b/benchmarks/NodeRegression/models/graph_sage.py
new file mode 100644
index 00000000..a62efdb2
--- /dev/null
+++ b/benchmarks/NodeRegression/models/graph_sage.py
@@ -0,0 +1,47 @@
+"""
+GraphSAGE model in GLI.
+
+References:
+https://github.com/dmlc/dgl/blob/master/examples/pytorch/graphsage/train_full.py
+"""
+
+from torch import nn
+from dgl.nn.pytorch.conv import SAGEConv
+
+
+class GraphSAGE(nn.Module):
+    """GraphSAGE model."""
+
+    def __init__(self,
+                 g,
+                 in_feats,
+                 n_hidden,
+                 n_classes,
+                 n_layers,
+                 activation,
+                 dropout,
+                 aggregator_type):
+        """Initiate model."""
+        super().__init__()
+        self.g = g
+        self.layers = nn.ModuleList()
+        self.dropout = nn.Dropout(dropout)
+        self.activation = activation
+
+        # input layer
+        self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type))
+        # hidden layers
+        for _ in range(n_layers - 2):
+            self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type))
+        # output layer
+        self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type))
+
+    def forward(self, inputs):
+        """Forward."""
+        h = self.dropout(inputs)
+        for length, layer in enumerate(self.layers):
+            h = layer(self.g, h)
+            if length != len(self.layers) - 1:
+                h = self.activation(h)
+                h = self.dropout(h)
+        return h
diff --git a/benchmarks/NodeRegression/models/linkx.py b/benchmarks/NodeRegression/models/linkx.py
new file mode 100644
index 00000000..8168f833
--- /dev/null
+++ b/benchmarks/NodeRegression/models/linkx.py
@@ -0,0 +1,112 @@
+"""
+LINKX model in Non-Homophily-Large-Scale.
+
+References:
+https://github.com/CUAI/Non-Homophily-Large-Scale
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch_sparse import SparseTensor
+
+
+class LINKX(nn.Module):
+    """
+    LINKX method with skip connections.
+
+    a = MLP_1(A), x = MLP_2(X), MLP_3(sigma(W_1[a, x] + a + x)).
+    """
+
+    def __init__(self, g, in_channels, hidden_channels, out_channels,
+                 num_layers, num_nodes, dropout=.5, inner_activation=False,
+                 inner_dropout=False, init_layers_A=1,
+                 init_layers_X=1):
+        """Initiate model."""
+        super().__init__()
+        self.g = g
+        self.mlpa = MLP(num_nodes, hidden_channels, hidden_channels,
+                        init_layers_A, dropout=0)
+        self.mlpx = MLP(in_channels, hidden_channels, hidden_channels,
+                        init_layers_X, dropout=0)
+        self.w = nn.Linear(2*hidden_channels, hidden_channels)
+        self.mlp_final = MLP(hidden_channels, hidden_channels, out_channels,
+                             num_layers, dropout=dropout)
+        self.in_channels = in_channels
+        self.num_nodes = num_nodes
+        self.inner_activation = inner_activation
+        self.inner_dropout = inner_dropout
+
+    def reset_parameters(self):
+        """Reset parameters."""
+        self.mlpa.reset_parameters()
+        self.mlpx.reset_parameters()
+        self.w.reset_parameters()
+        self.mlp_final.reset_parameters()
+
+    def forward(self, feats):
+        """Forward."""
+        m = self.num_nodes
+        feat_dim = feats
+        row, col = self.g.edges()
+        row = row-row.min()
+        aa = SparseTensor(
+            row=row, col=col, sparse_sizes=(m, m)
+                         ).to_torch_sparse_coo_tensor()
+
+        xa = self.mlpa(aa, input_tensor=True)
+        xx = self.mlpx(feat_dim, input_tensor=True)
+        x = torch.cat((xa, xx), axis=-1)
+        x = self.w(x)
+        if self.inner_dropout:
+            x = F.dropout(x)
+        if self.inner_activation:
+            x = F.relu(x)
+        x = F.relu(x + xa + xx)
+        x = self.mlp_final(x, input_tensor=True)
+
+        return x
+
+
+class MLP(nn.Module):
+    """MLP model."""
+
+    def __init__(self, in_channels, hidden_channels, out_channels,
+                 num_layers, dropout=.5):
+        """Initiate layer."""
+        super().__init__()
+        self.lins = nn.ModuleList()
+        self.bns = nn.ModuleList()
+        if num_layers == 1:
+            # just linear layer i.e. logistic regression
+            self.lins.append(nn.Linear(in_channels, out_channels))
+        else:
+            self.lins.append(nn.Linear(in_channels, hidden_channels))
+            self.bns.append(nn.BatchNorm1d(hidden_channels))
+            for _ in range(num_layers - 2):
+                self.lins.append(nn.Linear(hidden_channels, hidden_channels))
+                self.bns.append(nn.BatchNorm1d(hidden_channels))
+            self.lins.append(nn.Linear(hidden_channels, out_channels))
+
+        self.dropout = dropout
+
+    def reset_parameters(self):
+        """Reset parameters."""
+        for lin in self.lins:
+            lin.reset_parameters()
+        for bn in self.bns:
+            bn.reset_parameters()
+
+    def forward(self, data, input_tensor=False):
+        """Forward."""
+        if not input_tensor:
+            x = data.graph['node_feat']
+        else:
+            x = data
+        for i, lin in enumerate(self.lins[:-1]):
+            x = lin(x)
+            x = F.relu(x, inplace=True)
+            x = self.bns[i](x)
+            x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.lins[-1](x)
+        return x
diff --git a/benchmarks/NodeRegression/models/mixhop.py b/benchmarks/NodeRegression/models/mixhop.py
new file mode 100644
index 00000000..747106dc
--- /dev/null
+++ b/benchmarks/NodeRegression/models/mixhop.py
@@ -0,0 +1,132 @@
+"""
+MIXHOP model in GLI.
+
+References:
+https://github.com/dmlc/dgl/tree/master/examples/pytorch/mixhop
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import dgl.function as fn
+
+
+class MixHopConv(nn.Module):
+    r"""MixHopConv layer."""
+
+    def __init__(self,
+                 in_dim,
+                 out_dim,
+                 p,
+                 dropout=0,
+                 activation=None,
+                 batchnorm=False):
+        """Initiate layer."""
+        super().__init__()
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.p = p
+        self.activation = activation
+        self.batchnorm = batchnorm
+
+        # define dropout layer
+        self.dropout = nn.Dropout(dropout)
+
+        # define batch norm layer
+        if self.batchnorm:
+            self.bn = nn.BatchNorm1d(out_dim * len(p))
+
+        # define weight dict for each power j
+        self.weights = nn.ModuleDict({
+            str(j): nn.Linear(in_dim, out_dim, bias=False) for j in p
+        })
+
+    def forward(self, graph, feats):
+        """Forward."""
+        with graph.local_scope():
+            degs = graph.in_degrees().float().clamp(min=1)
+            norm = torch.pow(degs, -0.5).to(feats.device).unsqueeze(1)
+            max_j = max(self.p) + 1
+            outputs = []
+            for j in range(max_j):
+
+                if j in self.p:
+                    output = self.weights[str(j)](feats)
+                    outputs.append(output)
+
+                feats = feats * norm
+                graph.ndata['h'] = feats
+                graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
+                feats = graph.ndata.pop('h')
+                feats = feats * norm
+
+            final = torch.cat(outputs, dim=1)
+
+            if self.batchnorm:
+                final = self.bn(final)
+
+            if self.activation is not None:
+                final = self.activation(final)
+
+            final = self.dropout(final)
+
+            return final
+
+
+class MixHop(nn.Module):
+    """MixHop model."""
+
+    def __init__(self,
+                 g,
+                 in_dim,
+                 hid_dim,
+                 out_dim,
+                 p,
+                 num_layers=2,
+                 input_dropout=0.0,
+                 layer_dropout=0.0,
+                 batchnorm=False):
+        """Initiate model."""
+        super().__init__()
+        self.g = g
+        self.in_dim = in_dim
+        self.hid_dim = hid_dim
+        self.out_dim = out_dim
+        self.num_layers = num_layers
+        self.p = p
+        self.input_dropout = input_dropout
+        self.layer_dropout = layer_dropout
+        self.activation = F.tanh
+        self.batchnorm = batchnorm
+
+        self.layers = nn.ModuleList()
+        self.dropout = nn.Dropout(self.input_dropout)
+
+        # Input layer
+        self.layers.append(MixHopConv(self.in_dim,
+                                      self.hid_dim,
+                                      p=self.p,
+                                      dropout=self.input_dropout,
+                                      activation=self.activation,
+                                      batchnorm=self.batchnorm))
+
+        # Hidden layers with n - 1 MixHopConv layers
+        for _ in range(self.num_layers - 2):
+            self.layers.append(MixHopConv(self.hid_dim * len(p),
+                                          self.hid_dim,
+                                          p=self.p,
+                                          dropout=self.layer_dropout,
+                                          activation=self.activation,
+                                          batchnorm=self.batchnorm))
+
+        self.fc_layers = nn.Linear(self.hid_dim * len(p),
+                                   self.out_dim, bias=False)
+
+    def forward(self, feats):
+        """Forward."""
+        feats = self.dropout(feats)
+        for layer in self.layers:
+            feats = layer(self.g, feats)
+
+        feats = self.fc_layers(feats)
+        return feats
diff --git a/benchmarks/NodeRegression/models/mlp.py b/benchmarks/NodeRegression/models/mlp.py
new file mode 100644
index 00000000..393412ab
--- /dev/null
+++ b/benchmarks/NodeRegression/models/mlp.py
@@ -0,0 +1,45 @@
+"""
+MLP model in GLI.
+
+References:
+https://github.com/dmlc/dgl/blob/195f99362d883f8b6d131b70a7868a537e55b786/examples/pytorch/grand/model.py
+"""
+
+from torch import nn
+
+
+class MLP(nn.Module):
+    """MLP network."""
+
+    def __init__(self,
+                 in_feats,
+                 n_hidden,
+                 n_classes,
+                 n_layers,
+                 activation,
+                 dropout):
+        """Initiate model."""
+        super().__init__()
+        self.layers = nn.ModuleList()
+        self.activation = activation
+        # input layer
+        self.layers.append(nn.Linear(in_feats, n_hidden, bias=True))
+
+        # hidden layers
+        for _ in range(n_layers - 1):
+            self.layers.append(nn.Linear(n_hidden, n_hidden, bias=True))
+
+        # output layer
+        self.layers.append(nn.Linear(n_hidden, n_classes, bias=True))
+
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, features):
+        """Forward."""
+        h = features
+        for i, layer in enumerate(self.layers):
+            if i != 0:
+                h = self.dropout(h)
+            h = layer(h)
+            h = self.activation(h)
+        return h
diff --git a/benchmarks/NodeRegression/models/monet.py b/benchmarks/NodeRegression/models/monet.py
new file mode 100644
index 00000000..c54c0af7
--- /dev/null
+++ b/benchmarks/NodeRegression/models/monet.py
@@ -0,0 +1,63 @@
+"""
+GAT model in GLI.
+
+References:
+https://github.com/dmlc/dgl/blob/master/examples/pytorch/monet/citation.py
+"""
+
+import torch
+from torch import nn
+from dgl.nn.pytorch.conv import GMMConv
+
+
+class MoNet(nn.Module):
+    """Monet model."""
+
+    def __init__(self,
+                 g,
+                 in_feats,
+                 n_hidden,
+                 out_feats,
+                 n_layers,
+                 dim,
+                 n_kernels,
+                 dropout):
+        """Initiate model."""
+        super().__init__()
+        self.g = g
+        self.layers = nn.ModuleList()
+        self.pseudo_proj = nn.ModuleList()
+
+        # process pseudo
+        us, vs = g.edges(order="eid")
+        udeg, vdeg = 1 / torch.sqrt(g.in_degrees(us).float()), 1 / \
+            torch.sqrt(g.in_degrees(vs).float())
+        self.pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1)
+
+        # Input layer
+        self.layers.append(
+            GMMConv(in_feats, n_hidden, dim, n_kernels))
+        self.pseudo_proj.append(
+            nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
+
+        # Hidden layer
+        for _ in range(n_layers - 2):
+            self.layers.append(GMMConv(n_hidden, n_hidden, dim, n_kernels))
+            self.pseudo_proj.append(
+                nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
+
+        # Output layer
+        self.layers.append(GMMConv(n_hidden, out_feats, dim, n_kernels))
+        self.pseudo_proj.append(
+            nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, feat):
+        """Forward."""
+        h = feat
+        for i in range(len(self.layers)):
+            if i != 0:
+                h = self.dropout(h)
+            h = self.layers[i](
+                self.g, h, self.pseudo_proj[i](self.pseudo))
+        return h
diff --git a/benchmarks/NodeRegression/train.py b/benchmarks/NodeRegression/train.py
new file mode 100644
index 00000000..b19c9838
--- /dev/null
+++ b/benchmarks/NodeRegression/train.py
@@ -0,0 +1,178 @@
+"""
+Train for node regression dataset.
+
+References:
+https://github.com/dmlc/dgl/blob/master/examples/pytorch/gat/train.py
+https://github.com/pyg-team/pytorch_geometric/blob/master/graphgym/main.py
+"""
+
+
+import time
+import re
+import torch
+from torch import nn
+import numpy as np
+import dgl
+import gli
+from utils import generate_model, parse_args, Models_need_to_be_densed,\
+                  load_config_file, check_multiple_split,\
+                  EarlyStopping, set_seed, Datasets_need_to_be_undirected,\
+                  get_label_number
+from gli.utils import to_dense
+
+
+def evaluate(model, features, labels, mask, eval_func):
+    """Evaluate model."""
+    model.eval()
+    with torch.no_grad():
+        logits = model(features)
+        logits = logits[mask]
+        labels = labels[mask]
+        return eval_func(logits.squeeze(), labels)
+
+
+def main():
+    """Load dataset and train the model."""
+    # Load cmd line args
+    args = parse_args()
+    print(args)
+    # Load config file
+    model_cfg = load_config_file(args.model_cfg)
+    train_cfg = load_config_file(args.train_cfg)
+    set_seed(train_cfg["seed"])
+
+    # load and preprocess dataset
+    if args.gpu < 0:
+        device = "cpu"
+        cuda = False
+    else:
+        device = args.gpu
+        cuda = True
+
+    data = gli.dataloading.get_gli_dataset(args.dataset, args.task,
+                                           args.task_id, device,
+                                           args.verbose)
+    g = data[0]
+    if train_cfg["to_dense"] or \
+       args.model in Models_need_to_be_densed:
+        g = to_dense(g)
+    # add self loop
+    if train_cfg["self_loop"]:
+        g = dgl.remove_self_loop(g)
+        g = dgl.add_self_loop(g)
+    # convert to undirected set
+    if train_cfg["to_undirected"] or \
+       args.dataset in Datasets_need_to_be_undirected:
+        g = g.to("cpu")
+        g = dgl.to_bidirected(g, copy_ndata=True)
+        g = g.to(device)
+
+    feature_name = re.search(r".*Node/(\w+)", data.features[0]).group(1)
+    label_name = re.search(r".*Node/(\w+)", data.target).group(1)
+    features = g.ndata[feature_name]
+    labels = g.ndata[label_name].squeeze()
+    train_mask = g.ndata["train_mask"]
+    val_mask = g.ndata["val_mask"]
+    test_mask = g.ndata["test_mask"]
+
+    # for multi-split dataset, choose 0-th split for now
+    if check_multiple_split(args.dataset):
+        train_mask = train_mask[:, 0]
+        val_mask = val_mask[:, 0]
+        test_mask = test_mask[:, 0]
+
+    # When labels contains -1, modify masks
+    if labels.min() < 0:
+        train_mask = train_mask * (labels >= 0)
+        val_mask = val_mask * (labels >= 0)
+        test_mask = test_mask * (labels >= 0)
+
+    in_feats = features.shape[1]
+    n_classes = data.num_labels
+    n_edges = g.number_of_edges()
+
+    print(f"""----Data statistics------'
+      #Edges {n_edges}
+      #Classes {n_classes}
+      #Train samples {train_mask.int().sum().item()}
+      #Val samples {val_mask.int().sum().item()}
+      #Test samples {test_mask.int().sum().item()}""")
+
+    # create model
+    label_number = get_label_number(labels)
+    model = generate_model(args, g, in_feats, label_number, **model_cfg)
+
+    # create loss function and evalution function
+    if train_cfg["loss_fcn"] == "mse":
+        eval_func = loss_fcn = nn.MSELoss()
+    elif train_cfg["loss_fcn"] == "mae":
+        eval_func = loss_fcn = nn.L1Loss()
+    else:
+        raise NotImplementedError(f"Loss function \
+            {train_cfg['loss_fcn']} is not supported.")
+
+    print(model)
+    if cuda:
+        model.cuda()
+
+    # use optimizer
+    if train_cfg["optimizer"] == "AdamW":
+        optimizer = torch.optim.AdamW(
+            model.parameters(), lr=train_cfg["lr"],
+            weight_decay=train_cfg["weight_decay"])
+    elif train_cfg["optimizer"] == "Adam":
+        optimizer = torch.optim.Adam(
+            model.parameters(), lr=train_cfg["lr"],
+            weight_decay=train_cfg["weight_decay"])
+    else:
+        raise NotImplementedError(f"Optimizer \
+            {train_cfg['optimizer']} is not supported.")
+
+    ckpt_name = args.model + "_" + args.dataset + "_"
+    ckpt_name += args.train_cfg
+    stopper = EarlyStopping(ckpt_name=ckpt_name,
+                            early_stop=train_cfg["early_stopping"],
+                            patience=50)
+
+    # initialize graph
+    dur = []
+    for epoch in range(train_cfg["max_epoch"]):
+        model.train()
+        if epoch >= 3:
+            if cuda:
+                torch.cuda.synchronize()
+            t0 = time.time()
+        # forward
+        logits = model(features)
+        loss = loss_fcn(logits[train_mask].squeeze(),
+                        labels[train_mask].float())
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        if epoch >= 3:
+            if cuda:
+                torch.cuda.synchronize()
+            dur.append(time.time() - t0)
+
+        val_loss = evaluate(model, features, labels, val_mask, eval_func)
+        print(f"Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f}"
+              f"| Loss {loss.item():.4f} | "
+              f" Val Loss {val_loss.item():.4f} | "
+              f"ETputs(KTEPS) {n_edges / np.mean(dur) / 1000:.2f}")
+
+        if stopper.step(val_loss.item(), model):
+            break
+
+    print()
+
+    model.load_state_dict(torch.load(stopper.ckpt_dir))
+
+    loss = evaluate(model, features, labels, test_mask, eval_func)
+    val_loss = stopper.best_score
+    print(f"Test loss {loss:.4f}, Val loss {val_loss:.4f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/NodeRegression/train_gbdt.py b/benchmarks/NodeRegression/train_gbdt.py
new file mode 100644
index 00000000..4fa76f97
--- /dev/null
+++ b/benchmarks/NodeRegression/train_gbdt.py
@@ -0,0 +1,243 @@
+"""
+Train for GBDT.
+
+Reference:
+https://github.com/nd7141/bgnn/blob/master/scripts/run.py
+"""
+import os
+import re
+import json
+import time
+import datetime
+from collections import defaultdict as ddict
+
+import numpy as np
+from sklearn.model_selection import ParameterGrid
+
+import gli
+from utils import parse_args, set_seed,\
+                  load_config_file, check_multiple_split
+from gli.utils import to_dense
+from models.gbdt import GBDTCatBoost, GBDTLGBM
+
+
+class RunModel:
+    """Model class for gbdt."""
+
+    def __init__(self, args, model_cfg, train_cfg):
+        """Initiate model."""
+        self.args = args
+        self.model_cfg = model_cfg
+        self.train_cfg = train_cfg
+
+    def read_input(self):
+        """Read input."""
+        data = gli.dataloading.get_gli_dataset(self.args.dataset,
+                                               "NodeClassification")
+        g = data[0]
+        g = to_dense(g)
+        feature_name = re.search(r".*Node/(\w+)", data.features[0]).group(1)
+        label_name = re.search(r".*Node/(\w+)", data.target).group(1)
+        features = g.ndata[feature_name]
+        labels = g.ndata[label_name].squeeze()
+        train_mask = g.ndata["train_mask"]
+        val_mask = g.ndata["val_mask"]
+        test_mask = g.ndata["test_mask"]
+        # for multi-split dataset, choose 0-th split for now
+        if check_multiple_split(self.args.dataset):
+            train_mask = train_mask[:, 0]
+            val_mask = val_mask[:, 0]
+            test_mask = test_mask[:, 0]
+
+        # When labels contains -1, modify masks
+        if labels.min() < 0:
+            train_mask = train_mask * (labels >= 0)
+            val_mask = val_mask * (labels >= 0)
+            test_mask = test_mask * (labels >= 0)
+
+        self.x = features
+        self.y = labels
+
+        self.masks = {"0": {"train": train_mask,
+                            "val": val_mask,
+                            "test": test_mask}}
+
+    def get_input(self):
+        """Get input."""
+        if self.save_folder is None:
+            self.save_folder = f"grid/gdbt_results/{self.args.dataset}/\
+                                {datetime.datetime.now().strftime('%d_%m')}"
+
+        self.read_input()
+        print("Save to folder:", self.save_folder)
+
+    def run_one_model(self, config_fn, model_name):
+        """Run single model."""
+        print(config_fn)
+        # self.config = OmegaConf.load(config_fn)
+        # print(type(self.config))
+        # print(self.config)
+        # grid = ParameterGrid(dict(self.config.hp))
+
+        self.config = load_config_file(config_fn)
+        print(type(self.config))
+        print(self.config)
+        grid = ParameterGrid(self.config["hp"])
+
+        for ps in grid:
+            print("hyper params: ", ps)
+            param_string = "".join([f"-{key}{ps[key]}" for key in ps])
+            exp_name = f"{model_name}{param_string}"
+            print(f"\nSeed {self.seed} RUNNING:{exp_name}")
+
+            runs = []
+            runs_custom = []
+            times = []
+            for _ in range(self.repeat_exp):
+                start = time.time()
+                model = self.define_model(model_name, ps)
+
+                inputs = {"x": self.x, "y": self.y,
+                          "train_mask": self.train_mask,
+                          "val_mask": self.val_mask,
+                          "test_mask": self.test_mask}
+
+                metrics = model.fit(num_epochs=self.config["num_epochs"],
+                                    patience=self.config["patience"],
+                                    loss_fn=f"{self.seed_folder}/\
+                                              {exp_name}.txt",
+                                    metric_name="loss"
+                                    if self.task == "regression"
+                                    else "accuracy", **inputs)
+                finish = time.time()
+                best_loss = min(metrics["loss"], key=lambda x: x[1])
+                best_custom = max(metrics["r2" if self.task == "regression"
+                                  else "accuracy"],
+                                  key=lambda x: x[1])
+                runs.append(best_loss)
+                runs_custom.append(best_custom)
+                times.append(finish - start)
+            self.store_results[exp_name] = (list(map(np.mean,
+                                                     zip(*runs))),
+                                            list(map(np.mean,
+                                                     zip(*runs_custom))),
+                                            np.mean(times))
+
+    def define_model(self, model_name, ps):
+        """Define model."""
+        if model_name == "catboost":
+            return GBDTCatBoost(self.task, **ps)
+        elif model_name == "lightgbm":
+            return GBDTLGBM(self.task, **ps)
+
+    def create_save_folder(self, seed):
+        """Create folder to save output."""
+        self.seed_folder = f"{self.save_folder}/{seed}"
+        os.makedirs(self.seed_folder, exist_ok=True)
+
+    def split_masks(self, seed):
+        """Split masks."""
+        self.train_mask = self.masks[seed]["train"]
+        self.val_mask = self.masks[seed]["val"]
+        self.test_mask = self.masks[seed]["test"]
+
+    def save_results(self, seed):
+        """Save results."""
+        self.seed_results[seed] = self.store_results
+        with open(f"{self.save_folder}/seed_results.json", "w+",
+                  encoding="utf-8") as f:
+            json.dump(self.seed_results, f)
+
+        self.aggregated = self.aggregate_results()
+        with open(f"{self.save_folder}/aggregated_results.json", "w+",
+                  encoding="utf-8") as f:
+            json.dump(self.aggregated, f)
+
+    def aggregate_results(self):
+        """Aggregate results."""
+        model_best_score = ddict(list)
+        model_best_time = ddict(list)
+
+        results = self.seed_results
+        # print("results:", results)
+        for seed_tuple in results.items():
+            # print("seed_tuple", seed_tuple)
+            # print("seed_tuple[1]", seed_tuple[1])
+            model_results_for_seed = ddict(list)
+            for _, output in seed_tuple[1].items():
+                model_name = self.args.model
+                if self.task == "regression":  # rmse metric
+                    val_metric, test_metric = output[0][1], output[0][2]
+                    cur_time = output[2]
+                else:  # accuracy metric
+                    val_metric, test_metric = output[1][1], output[1][2]
+                    cur_time = output[2]
+                model_results_for_seed[model_name].append((val_metric,
+                                                           test_metric,
+                                                           cur_time))
+
+            for model_name, model_results in model_results_for_seed.items():
+                if self.task == "regression":
+                    best_result = min(model_results)  # rmse
+                else:
+                    best_result = max(model_results)  # accuracy
+                model_best_score[model_name].append(best_result[1])
+                model_best_time[model_name].append(best_result[2])
+
+        aggregated = {}
+        for model, scores in model_best_score.items():
+            aggregated[model] = (np.mean(scores), np.std(scores),
+                                 np.mean(model_best_time[model]),
+                                 np.std(model_best_time[model]))
+        return aggregated
+
+    def run(self,
+            save_folder: str = None,
+            task: str = "NodeClassification",
+            repeat_exp: int = 1,
+            max_seeds: int = 5,
+            ):
+        """Run the model."""
+        start2run = time.time()
+        self.repeat_exp = repeat_exp
+        self.max_seeds = max_seeds
+        print(self.args.dataset, task, repeat_exp, max_seeds)
+
+        self.task = task
+        self.save_folder = save_folder
+        self.get_input()
+
+        self.seed_results = {}
+        for ix, seed in enumerate(self.masks):
+            print(f"{self.args.dataset} Seed {seed}")
+            self.seed = seed
+
+            self.create_save_folder(seed)
+            self.split_masks(seed)
+
+            self.store_results = {}
+            self.run_one_model("configs/" + self.args.model + ".yaml",
+                               self.args.model)
+
+            self.save_results(seed)
+            if ix+1 >= max_seeds:
+                break
+
+        print(f"Finished {self.args.dataset}: {time.time() - start2run} sec.")
+
+
+def main():
+    """Load dataset and train the model."""
+    # Load cmd line args
+    args = parse_args()
+    print(args)
+    # Load config file
+    model_cfg = load_config_file(args.model_cfg)
+    train_cfg = load_config_file(args.train_cfg)
+    set_seed(train_cfg["seed"])
+
+    RunModel(args, model_cfg, train_cfg).run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/NodeRegression/train_minibatch.py b/benchmarks/NodeRegression/train_minibatch.py
new file mode 100644
index 00000000..5acc0006
--- /dev/null
+++ b/benchmarks/NodeRegression/train_minibatch.py
@@ -0,0 +1,207 @@
+"""
+Train for node classification dataset.
+
+References:
+https://github.com/dmlc/dgl/blob/master/examples/pytorch/gat/train.py
+https://github.com/pyg-team/pytorch_geometric/blob/master/graphgym/main.py
+https://docs.dgl.ai/guide/minibatch-node.html?highlight=sampling
+"""
+
+
+import time
+import re
+import torch
+from torch import nn
+import numpy as np
+import dgl
+import gli
+from utils import generate_model, parse_args, \
+                  load_config_file, check_multiple_split,\
+                  EarlyStopping, set_seed
+from gli.utils import to_dense
+from dgl.dataloading import MultiLayerFullNeighborSampler as Sampler
+
+
+# def accuracy(logits, labels):
+#     """Calculate accuracy."""
+#     _, indices = torch.max(logits, dim=1)
+#     correct = torch.sum(indices == labels)
+#     return correct.item() * 1.0 / len(labels)
+
+
+def evaluate(model, dataloader, eval_func):
+    """Evaluate model."""
+    model.eval()
+    ys = []
+    y_hats = []
+    for _, _, blocks in dataloader:
+        with torch.no_grad():
+            input_features = blocks[0].srcdata["NodeFeature"]
+            ys.append(blocks[-1].dstdata["NodeLabel"])
+            y_hats.append(model(blocks, input_features))
+    return eval_func(torch.cat(y_hats).squeeze(), torch.cat(ys).float())
+
+
+def main():
+    """Load dataset and train the model."""
+    # Load cmd line args
+    args = parse_args()
+    print(args)
+    # Load config file
+    model_cfg = load_config_file(args.model_cfg)
+    train_cfg = load_config_file(args.train_cfg)
+    set_seed(train_cfg["seed"])
+
+    # load and preprocess dataset
+    if args.gpu < 0:
+        device = "cpu"
+        cuda = False
+    else:
+        device = args.gpu
+        cuda = True
+
+    data = gli.dataloading.get_gli_dataset(args.dataset, args.task,
+                                           device=device)
+    # check EdgeFeature and multi-modal node features
+    edge_cnt = node_cnt = 0
+    if len(data.features) > 1:
+        for _, element in enumerate(data.features):
+            if "Edge" in element:
+                edge_cnt += 1
+            if "Node" in element:
+                node_cnt += 1
+        if edge_cnt >= 1:
+            raise NotImplementedError("Edge feature is not supported yet.")
+        elif node_cnt >= 2:
+            raise NotImplementedError("Multi-modal node features\
+                                       is not supported yet.")
+
+    g = data[0]
+    indice = data.get_node_indices()
+
+    g = to_dense(g)
+    # add self loop
+    if train_cfg["self_loop"]:
+        g = dgl.remove_self_loop(g)
+        g = dgl.add_self_loop(g)
+
+    feature_name = re.search(r".*Node/(\w+)", data.features[0]).group(1)
+    features = g.ndata[feature_name]
+
+    # for multi-split dataset, choose 0-th split for now
+    if check_multiple_split(args.dataset):
+        train_mask = train_mask[:, 0]
+        val_mask = val_mask[:, 0]
+        test_mask = test_mask[:, 0]
+
+    in_feats = features.shape[1]
+    n_classes = data.num_labels
+    n_edges = g.number_of_edges()
+
+    sampler = Sampler(model_cfg["num_layers"])
+    train_dataloader = dgl.dataloading.DataLoader(
+        g, indice["train_set"], sampler,
+        batch_size=train_cfg["batch_size"],
+        device=device,
+        shuffle=True,
+        drop_last=False)
+
+    valid_dataloader = dgl.dataloading.DataLoader(
+            g, indice["val_set"], sampler,
+            device=device,
+            batch_size=train_cfg["batch_size"],
+            shuffle=True,
+            drop_last=False)
+
+    test_dataloader = dgl.dataloading.DataLoader(
+            g, indice["test_set"], sampler,
+            device=device,
+            batch_size=train_cfg["batch_size"],
+            shuffle=True,
+            drop_last=False)
+
+    print(f"""----Data statistics------'
+      #Edges {n_edges}
+      #Classes {n_classes}""")
+
+    # create model, supporting only single label task
+    label_number = 1
+    model = generate_model(args, g, in_feats, label_number, **model_cfg)
+
+    print(model)
+    if cuda:
+        model.cuda()
+
+    # create loss function and evalution function
+    if train_cfg["loss_fcn"] == "mse":
+        eval_func = loss_fcn = nn.MSELoss()
+    elif train_cfg["loss_fcn"] == "mae":
+        eval_func = loss_fcn = nn.L1Loss()
+    else:
+        raise NotImplementedError(f"Loss function \
+            {train_cfg['loss_fcn']} is not supported.")
+
+    # use optimizer
+    optimizer = torch.optim.Adam(
+        model.parameters(), lr=train_cfg["lr"],
+        weight_decay=train_cfg["weight_decay"])
+
+    ckpt_name = args.model + "_" + args.dataset + "_"
+    ckpt_name += args.train_cfg
+    stopper = EarlyStopping(ckpt_name=ckpt_name,
+                            early_stop=train_cfg["early_stopping"],
+                            patience=50)
+
+    # initialize graph
+    dur = []
+    for epoch in range(train_cfg["max_epoch"]):
+        model.train()
+        if epoch >= 3:
+            if cuda:
+                torch.cuda.synchronize()
+            t0 = time.time()
+
+        for it, (_, _, blocks) in enumerate(train_dataloader):
+            if cuda:
+                blocks = [b.to(torch.device("cuda")) for b in blocks]
+            input_features = blocks[0].srcdata["NodeFeature"]
+            output_labels = blocks[-1].dstdata["NodeLabel"]
+
+            # When labels contains -1, modify labels
+            if min(output_labels) < 0:
+                output_labels = output_labels * (output_labels >= 0)
+
+            logits = model(blocks, input_features)
+            loss = loss_fcn(logits.squeeze(), output_labels.float())
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            if it % 20 == 0:
+                # train_acc = loss_fcn(logits.squeeze(), output_labels.float())
+                print("Loss", loss.item())
+
+        if epoch >= 3:
+            if cuda:
+                torch.cuda.synchronize()
+            dur.append(time.time() - t0)
+
+        val_loss = evaluate(model, valid_dataloader, eval_func)
+        print(f"Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f}"
+              f"| Loss {loss:.4f} | "
+              f" Val Loss {val_loss.item():.4f} | "
+              f"ETputs(KTEPS) {n_edges / np.mean(dur) / 1000:.2f}")
+
+        if stopper.step(val_loss.item(), model):
+            break
+
+    print()
+
+    model.load_state_dict(torch.load(stopper.ckpt_dir))
+
+    loss = evaluate(model, test_dataloader, eval_func)
+    val_loss = stopper.best_score
+    print(f"Test loss {loss:.4f}, Val loss {val_loss:.4f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/NodeRegression/utils.py b/benchmarks/NodeRegression/utils.py
new file mode 100644
index 00000000..682fe141
--- /dev/null
+++ b/benchmarks/NodeRegression/utils.py
@@ -0,0 +1,241 @@
+"""
+Utility functions.
+
+References:
+https://github.com/pyg-team/pytorch_geometric/blob/
+575611f4f5e2209c7923dba977a1eebc207bd2e2/torch_geometric/
+graphgym/cmd_args.py
+https://github.com/dmlc/dgl/blob/a107993f106cecb1c375f7a6ae41088d04f29e29/
+examples/pytorch/caregnn/utils.py
+https://github.com/CUAI/Non-Homophily-Large-Scale/blob/master/data_utils.py
+"""
+import argparse
+import yaml
+import os
+import fnmatch
+import json
+import random
+import shutil
+import torch
+import torch.nn.functional as F
+import numpy as np
+from models.gcn import GCN
+from models.gat import GAT
+from models.monet import MoNet
+from models.graph_sage import GraphSAGE
+from models.mlp import MLP
+from models.gcn_minibatch import GCNminibatch
+from models.mixhop import MixHop
+from models.linkx import LINKX
+
+Models_need_to_be_densed = ["GCN", "GraphSAGE", "GAT", "MixHop", "LINKX"]
+Datasets_need_to_be_undirected = ["pokec", "genius", "penn94", "twitch-gamers"]
+
+
+def generate_model(args, g, in_feats, n_classes, **model_cfg):
+    """Generate required model."""
+    # create models
+    if args.model == "GCN":
+        model = GCN(g=g,
+                    in_feats=in_feats,
+                    n_hidden=model_cfg["num_hidden"],
+                    n_classes=n_classes,
+                    n_layers=model_cfg["num_layers"],
+                    activation=F.relu,
+                    dropout=model_cfg["dropout"])
+    elif args.model == "GAT":
+        heads = ([model_cfg["num_heads"]] * (model_cfg["num_layers"]))\
+                + [model_cfg["num_out_heads"]]
+        model = GAT(g,
+                    model_cfg["num_layers"],
+                    in_feats,
+                    model_cfg["num_hidden"],
+                    n_classes,
+                    heads,
+                    F.elu,
+                    model_cfg["dropout"],
+                    model_cfg["dropout"],
+                    model_cfg["negative_slope"],
+                    model_cfg["residual"])
+    elif args.model == "MoNet":
+        model = MoNet(g,
+                      in_feats,
+                      model_cfg["num_hidden"],
+                      n_classes,
+                      model_cfg["num_layers"],
+                      model_cfg["pseudo_dim"],
+                      model_cfg["num_kernels"],
+                      model_cfg["dropout"])
+    elif args.model == "GraphSAGE":
+        model = GraphSAGE(g,
+                          in_feats,
+                          model_cfg["num_hidden"],
+                          n_classes,
+                          model_cfg["num_layers"],
+                          F.relu,
+                          model_cfg["dropout"],
+                          model_cfg["aggregator_type"])
+    elif args.model == "MLP":
+        model = MLP(in_feats,
+                    model_cfg["num_hidden"],
+                    n_classes,
+                    model_cfg["num_layers"],
+                    F.relu,
+                    model_cfg["dropout"])
+    elif args.model == "GCN_minibatch":
+        model = GCNminibatch(in_feats,
+                             model_cfg["num_hidden"],
+                             n_classes,
+                             model_cfg["num_layers"],
+                             F.relu,
+                             model_cfg["dropout"])
+    elif args.model == "MixHop":
+        model = MixHop(g,
+                       in_dim=in_feats,
+                       hid_dim=model_cfg["num_hidden"],
+                       out_dim=n_classes,
+                       p=model_cfg["p"],
+                       num_layers=model_cfg["num_layers"],
+                       input_dropout=model_cfg["dropout"],
+                       layer_dropout=model_cfg["layer_dropout"],
+                       batchnorm=model_cfg["batchnorm"])
+    elif args.model == "LINKX":
+        model = LINKX(g=g,
+                      in_channels=in_feats,
+                      num_nodes=g.ndata["NodeFeature"].shape[0],
+                      hidden_channels=model_cfg["num_hidden"],
+                      out_channels=n_classes,
+                      num_layers=model_cfg["num_layers"],
+                      dropout=model_cfg["dropout"],
+                      inner_activation=model_cfg["inner_activation"],
+                      inner_dropout=model_cfg["inner_dropout"],
+                      init_layers_A=model_cfg["init_layers_A"],
+                      init_layers_X=model_cfg["init_layers_X"])
+    try:
+        model
+    except UnboundLocalError as exc:
+        raise NameError(f"model {args.model} is not supported yet.") from exc
+    else:
+        return model
+
+
+def parse_args():
+    """Parse the command line arguments."""
+    parser = argparse.ArgumentParser(description="train for node\
+                                                  classification")
+    parser.add_argument("--model-cfg", type=str,
+                        default="configs/model_default.yaml",
+                        help="The model configuration file path.")
+    parser.add_argument("--train-cfg", type=str,
+                        default="configs/train_default.yaml",
+                        help="The training configuration file path.")
+    parser.add_argument("--model", type=str, default="GCN",
+                        help="model to be used. GCN, GAT, MoNet,\
+                              GraphSAGE, MLP, LINKX, MixHop for now")
+    parser.add_argument("--dataset", type=str, default="cora",
+                        help="dataset to be trained")
+    parser.add_argument("--task", type=str,
+                        default="NodeRegression",
+                        help="task name. NodeClassification,\
+                        GraphClassification, LinkPrediction,\
+                        TimeDependentLinkPrediction,\
+                        KGRelationPrediction, NodeRegression.\
+                        KGEntityPrediction, GraphRegression,\
+                        for now")
+    parser.add_argument("--task-id", type=int, default=1,
+                        help="task id, starting from 1")
+    parser.add_argument("--gpu", type=int, default=-1,
+                        help="which GPU to use. Set -1 to use CPU.")
+    parser.add_argument("--verbose", type=bool, default=False,
+                        help="whether to print verbosely")
+    return parser.parse_args()
+
+
+def load_config_file(path):
+    """Load yaml files."""
+    with open(path, "r", encoding="utf-8") as stream:
+        try:
+            parsed_yaml = yaml.full_load(stream)
+            print(parsed_yaml)
+            return parsed_yaml
+        except yaml.YAMLError as exc:
+            print(exc)
+
+
+def check_multiple_split(dataset):
+    """Check whether the dataset has multiple splits."""
+    dataset_directory = os.path.dirname(os.path.dirname(os.getcwd())) \
+        + "/datasets/" + dataset
+    for file in os.listdir(dataset_directory):
+        if fnmatch.fnmatch(file, "task*.json"):
+            with open(dataset_directory + "/" + file,  encoding="utf-8") as f:
+                task_dict = json.load(f)
+                if "num_splits" in task_dict and task_dict["num_splits"] > 1:
+                    return 1
+                else:
+                    return 0
+
+
+class EarlyStopping:
+    """Do early stopping."""
+
+    def __init__(self, ckpt_name, early_stop, patience=50):
+        """Init early stopping."""
+        self.patience = patience
+        self.counter = 0
+        self.best_score = None
+        self.early_stop = False
+        self.early_stop_flag = early_stop
+        self.dir_name = "checkpoints/"
+        if ~os.path.isdir(self.dir_name):
+            os.makedirs(self.dir_name, exist_ok=True)
+        ckpt_name = ckpt_name.replace("/", "_")
+        ckpt_name = os.path.splitext(ckpt_name)[0]
+        self.ckpt_dir = self.dir_name + ckpt_name + "_checkpoint.pt"
+
+    def step(self, loss, model):
+        """Step early stopping."""
+        score = loss
+        if self.best_score is None:
+            self.best_score = score
+            self.save_checkpoint(model)
+        elif score > self.best_score:
+            if self.early_stop_flag:
+                self.counter += 1
+                print(f"EarlyStopping counter: {self.counter}\
+                        out of {self.patience}")
+                if self.counter >= self.patience:
+                    self.early_stop = True
+        else:
+            self.best_score = score
+            self.save_checkpoint(model)
+            self.counter = 0
+        return self.early_stop
+
+    def save_checkpoint(self, model):
+        """Save model when validation loss decrease."""
+        torch.save(model.state_dict(), self.ckpt_dir)
+
+
+def makedirs_rm_exist(dir_name):
+    """Make a directory, remove any existing data."""
+    if os.path.isdir(dir_name):
+        shutil.rmtree(dir_name)
+    os.makedirs(dir_name, exist_ok=True)
+
+
+def set_seed(seed):
+    """Set random seed."""
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+
+
+def get_label_number(labels):
+    """Return the label number of dataset."""
+    if len(labels.shape) > 1:
+        return labels.shape[1]
+    else:
+        return 1