-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathQuantopian.py
225 lines (173 loc) · 7.05 KB
/
Quantopian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
"""
This is a simple Quantopian algorithm
Incorporates basic ML idea
Uses various stocks
Alpha Idea:
1. Classify whether the price is going up or down
2. If classification probability is >= threshold then buy
3. If classification probability is < threshold then stay out
"""
import pandas as pd
import talib
import time
from sklearn import ensemble
from sklearn import linear_model
from sklearn import metrics
from sklearn import preprocessing
#global parameters
train_data_periods = 504 # train set size
pred_data_periods = 100 # train set size for real time classification
classification_threshold = 0.55 # for signals
vol = 30000 # each entry size in money
def get_data(stock, data, periods):
fields = ['open', 'high', 'low', 'close', 'volume', 'price']
train_data = data.history(stock, fields, periods, '1d')
#keep only true data
train_data = train_data.dropna()
#feature engineering
train_data = feature_engineering(train_data)
#keep only true data
train_data = train_data.dropna()
#save column names
cols = list(train_data.columns)
cols.remove('price')
#make a shift: classify the next price direction
price_t1 = pd.DataFrame(train_data.values[:-1, -1])#current
price_t2 = pd.DataFrame(train_data.values[1:, -1])#following
nominal_class = price_t1 <= price_t2
nominal_class = nominal_class.astype(int)
#exclude first row
train_data = pd.DataFrame(train_data.values[1:, :-1], columns = cols)
return train_data, nominal_class
def feature_engineering(train_data):
#SMA
train_data.insert(0, 'sma20', talib.SMA(train_data['close'].values, timeperiod=20), True)
#CCI
train_data.insert(0, 'cci14', talib.CCI(train_data['high'].values, train_data['low'].values,
train_data['close'].values, timeperiod=14), True)
#RSI
train_data.insert(0, 'rsi14', talib.RSI(train_data['close'].values, timeperiod=14), True)
#ADX
train_data.insert(0, 'adx14', talib.ADX(train_data['high'].values, train_data['low'].values,
train_data['close'].values, timeperiod=14), True)
#ATR
train_data.insert(0, 'atr14', talib.ATR(train_data['high'].values, train_data['low'].values,
train_data['close'].values, timeperiod=14), True)
#Bands
bb20Upperband, bb20Middleband, bb20Lowerband = talib.BBANDS(
train_data['close'].values, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
train_data.insert(0, 'bb20Upperband', bb20Upperband, True)
train_data.insert(0, 'bb20Middleband', bb20Middleband, True)
train_data.insert(0, 'bb20Lowerband', bb20Lowerband, True)
bb50Upperband, bb50Middleband, bb50Lowerband = talib.BBANDS(
train_data['close'].values, timeperiod=50, nbdevup=2, nbdevdn=2, matype=0)
train_data.insert(0, 'bb50Upperband', bb50Upperband, True)
train_data.insert(0, 'bb50Middleband', bb50Middleband, True)
train_data.insert(0, 'bb50Lowerband', bb50Lowerband, True)
#MACD
macd1226, macdSignal1226, macdHist1226 = talib.MACD(
train_data['close'].values, fastperiod=13, slowperiod=26, signalperiod=9)
train_data.insert(0, 'macd1226', macd1226, True)
train_data.insert(0, 'macdSignal1226', macdSignal1226, True)
train_data.insert(0, 'macdHist1226', macdHist1226, True)
#Stochastic
stochasticSlowK335, stochasticSlowD335 = talib.STOCH(
train_data['high'].values, train_data['low'].values, train_data['close'].values,
fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
train_data.insert(0, 'stochasticSlowK335', stochasticSlowK335, True)
train_data.insert(0, 'stochasticSlowD335', stochasticSlowD335, True)
return train_data
def create_symbol_list(data):
"""
Creates list of stocks to be traded
"""
initial_stock_list = [
symbol('FB'),
symbol('AMZN'),
symbol('NDAQ'),
symbol('AAPL'),
symbol('HD'),
symbol('V')
]
#remove untradable stocks
return remove_untradable(initial_stock_list, data)
def remove_untradable(stocks, data):
to_trade = []
for stock in stocks:
if(data.can_trade(stock)):
to_trade.append(stock)
else:
print "can't trade:", stock, "untradable"
return to_trade
def initialize(context):
"""
Called once at the start of the algorithm.
"""
# Rebalance every day, 1 hour after market open.
schedule_function(my_rebalance, date_rules.every_day(), time_rules.market_open(hours=1))
def before_trading_start(context, data):
"""
Called every day before market open.
"""
#create symbol list
context.stocks = create_symbol_list(data)
#train classifiers for each stock
context.classifiers = {}
for stock in context.stocks:
try:
#get data for training model
train_data, nominal_class = get_data(stock, data, train_data_periods)
#fit classifier
classifier_name = str(stock) + '_classifier'
context.classifiers[classifier_name] = ensemble.RandomForestClassifier(n_estimators=50, max_depth=10)
y = nominal_class.values.ravel()
context.classifiers[classifier_name].fit(train_data.values, y)
except Exception as e:
print 'error: before_trading_start', e
def my_rebalance(context,data):
"""
Execute orders according to our schedule_function() timing.
"""
for stock in context.stocks:
try:
classifier_name = str(stock) + '_classifier'
#get data for real-time classification
train_data, nominal_class = get_data(stock, data, pred_data_periods)
#make predictions
proba = context.classifiers[classifier_name].predict_proba(train_data.values[-1,:].reshape(1, -1))
#log.info('proba:', proba)
#check for signals
buy, sell, out = check_for_signals(proba)
#trading
trade(data, context, stock, buy, sell, out)
except Exception as e:
print 'error: my_rebalance', e
def check_for_signals(proba):
"""
Check for trading signals via classification threshold
"""
buy = False
sell = False
out = False
if(proba[0][1] >= classification_threshold):
buy = True
else:
out = True
return buy, sell, out
def trade(data, context, stock, buy, sell, out):
"""
Enter/exit positions via trading signals
"""
positions = context.portfolio.positions[stock].amount
amount = vol / int(data.current(stock, 'price'))
print 'amount:', amount, 'positions:', positions, 'buy:', buy, 'sell:', sell, 'out:', out
#buy signal
if(buy):
if(positions <= 0):
order(stock, amount - positions)
else:
order(stock, amount)
#out signal
if(out):
if(positions is not 0):
order(stock, -positions)