forked from hunkim/DeepLearningZeroToAll
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmxlab-12-4-rnn_deep_prediction.py
140 lines (119 loc) · 4.7 KB
/
mxlab-12-4-rnn_deep_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# http://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
# Video: https://www.youtube.com/watch?v=ftMq5ps503w
import numpy as np
import mxnet as mx
import logging
import sys
from sklearn.preprocessing import MinMaxScaler
# brew install graphviz
# pip3 install graphviz
# pip3 install pydot-ng
import matplotlib.pyplot as plt
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) # Config the logging
np.random.seed(777)
mx.random.seed(777)
timesteps = seq_length = 7
batch_size = 32
data_dim = 5
def build_sym(seq_len, use_cudnn=False):
"""Build the symbol for stock-price prediction
Parameters
----------
seq_len : int
use_cudnn : bool, optional
Whether to use the LSTM implemented in cudnn, will be faster than the original version
Returns
-------
pred : mx.sym.Symbol
The prediction result
"""
data = mx.sym.var("data") # Shape: (N, T, C)
target = mx.sym.var("target") # Shape: (N, T, C)
data = mx.sym.transpose(data, axes=(1, 0, 2)) # Shape: (T, N, C)
if use_cudnn:
lstm1 = mx.rnn.FusedRNNCell(num_hidden=5, mode="lstm", prefix="lstm1_")
lstm2 = mx.rnn.FusedRNNCell(num_hidden=10, mode="lstm", prefix="lstm2_",
get_next_state=True)
else:
lstm1 = mx.rnn.LSTMCell(num_hidden=5, prefix="lstm1_")
lstm2 = mx.rnn.LSTMCell(num_hidden=10, prefix="lstm2_")
L1, _ = lstm1.unroll(length=seq_len, inputs=data, merge_outputs=True,
layout="TNC") # Shape: (T, N, 5)
L1 = mx.sym.Dropout(L1, p=0.2) # Shape: (T, N, 5)
_, L2_states = lstm2.unroll(length=seq_len, inputs=L1, merge_outputs=True,
layout="TNC") # Shape: (T, N, 10)
L2 = mx.sym.reshape(L2_states[0], shape=(-1, 0), reverse=True) # Shape: (T * N, 10)
pred = mx.sym.FullyConnected(L2, num_hidden=1, name="pred")
pred = mx.sym.LinearRegressionOutput(data=pred, label=target)
return pred
# Open,High,Low,Close,Volume
xy = np.loadtxt('data-02-stock_daily.csv', delimiter=',')
xy = xy[::-1] # reverse order (chronically ordered)
# very important. It does not work without it.
scaler = MinMaxScaler(feature_range=(0, 1))
xy = scaler.fit_transform(xy)
x = xy
y = xy[:, [-1]] # Close as label
dataX = []
dataY = []
for i in range(0, len(y) - seq_length):
_x = x[i:i + seq_length]
_y = y[i + seq_length] # Next close price
print(_x, "->", _y)
dataX.append(_x)
dataY.append(_y)
# split to train and testing
train_size = int(len(dataY) * 0.7)
test_size = len(dataY) - train_size
trainX, testX = np.array(dataX[0:train_size]), np.array(
dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(
dataY[train_size:len(dataY)])
def train_eval_net(use_cudnn):
pred = build_sym(seq_len=seq_length, use_cudnn=use_cudnn)
net = mx.mod.Module(symbol=pred, data_names=['data'], label_names=['target'], context=mx.gpu())
train_iter = mx.io.NDArrayIter(data=trainX, label=trainY,
data_name="data", label_name="target",
batch_size=batch_size,
shuffle=True)
test_iter = mx.io.NDArrayIter(data=testX, label=testY,
data_name="data", label_name="target",
batch_size=batch_size)
net.fit(train_data=train_iter, eval_data=test_iter,
initializer=mx.init.Xavier(rnd_type="gaussian", magnitude=1),
optimizer="adam",
optimizer_params={"learning_rate": 1E-3},
eval_metric="mse", num_epoch=200)
# make predictions
testPredict = net.predict(test_iter).asnumpy()
mse = np.mean((testPredict - testY)**2)
return testPredict, mse
# inverse values
# testPredict = scaler.transform(testPredict)
# testY = scaler.transform(testY)
import time
print("Begin to train LSTM with CUDNN acceleration...")
begin = time.time()
cudnn_pred, cudnn_mse = train_eval_net(use_cudnn=True)
end = time.time()
cudnn_time_spent = end - begin
print("Done!")
print("Begin to train LSTM without CUDNN acceleration...")
begin = time.time()
normal_pred, normal_mse = train_eval_net(use_cudnn=False)
end = time.time()
normal_time_spent = end - begin
print("Done!")
print("CUDNN time spent: %g, test mse: %g" % (cudnn_time_spent, cudnn_mse))
print("NoCUDNN time spent: %g, test mse: %g" % (normal_time_spent, normal_mse))
plt.close('all')
fig = plt.figure()
plt.plot(testY, label='Groud Truth')
plt.plot(cudnn_pred, label='With cuDNN')
plt.plot(normal_pred, label='Without cuDNN')
plt.legend()
plt.show()
'''
CUDNN time spent: 10.0955, test mse: 0.00721571
NoCUDNN time spent: 38.9882, test mse: 0.00565724
'''