Skip to content

Commit fbfcc5a

Browse files
committed
update notebook, more understandable
1 parent 52a9e0e commit fbfcc5a

File tree

1 file changed

+143
-92
lines changed

1 file changed

+143
-92
lines changed

1.Fully_connected_psepssm_predict_enzyme/predict_enzyme.ipynb

Lines changed: 143 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,14 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"# Load data first"
7+
"# Load related packages"
88
]
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 1,
12+
"execution_count": 12,
1313
"metadata": {},
14-
"outputs": [
15-
{
16-
"name": "stderr",
17-
"output_type": "stream",
18-
"text": [
19-
"Using TensorFlow backend.\n"
20-
]
21-
}
22-
],
14+
"outputs": [],
2315
"source": [
2416
"import cPickle\n",
2517
"import numpy as np\n",
@@ -33,27 +25,16 @@
3325
]
3426
},
3527
{
36-
"cell_type": "code",
37-
"execution_count": 2,
38-
"metadata": {
39-
"collapsed": true,
40-
"scrolled": true
41-
},
42-
"outputs": [],
28+
"cell_type": "markdown",
29+
"metadata": {},
4330
"source": [
44-
"test_ratio = 0.1\n",
45-
"number_class = 2\n",
46-
"number_features = 16306\n",
47-
"batch_size = 1024\n",
48-
"epochs = 50"
31+
"# Function related to loading data"
4932
]
5033
},
5134
{
5235
"cell_type": "code",
53-
"execution_count": 3,
54-
"metadata": {
55-
"collapsed": true
56-
},
36+
"execution_count": 13,
37+
"metadata": {},
5738
"outputs": [],
5839
"source": [
5940
"def Pfam_from_pickle_file_encoding(name_list_pickle_filename,model_names_list_filename):\n",
@@ -76,9 +57,16 @@
7657
"\treturn encoding"
7758
]
7859
},
60+
{
61+
"cell_type": "markdown",
62+
"metadata": {},
63+
"source": [
64+
"# Load the data"
65+
]
66+
},
7967
{
8068
"cell_type": "code",
81-
"execution_count": 4,
69+
"execution_count": 14,
8270
"metadata": {},
8371
"outputs": [
8472
{
@@ -106,21 +94,55 @@
10694
"label = tf.keras.utils.to_categorical(label,num_classes=2)"
10795
]
10896
},
97+
{
98+
"cell_type": "markdown",
99+
"metadata": {},
100+
"source": [
101+
"# Define hyper-parameters"
102+
]
103+
},
109104
{
110105
"cell_type": "code",
111-
"execution_count": 5,
106+
"execution_count": 15,
112107
"metadata": {
113-
"collapsed": true
108+
"scrolled": true
114109
},
115110
"outputs": [],
111+
"source": [
112+
"test_ratio = 0.1 # how much data for training and how much data for testing\n",
113+
"number_class = 2 # total number of classes, useful for define network structure\n",
114+
"number_features = 16306 # total number of feature, useful for define network structure\n",
115+
"batch_size = 1024 # stochastic gradient descent, training batch size\n",
116+
"epochs = 5 # training epoches"
117+
]
118+
},
119+
{
120+
"cell_type": "markdown",
121+
"metadata": {},
122+
"source": [
123+
"# Splite training data and testing data"
124+
]
125+
},
126+
{
127+
"cell_type": "code",
128+
"execution_count": 16,
129+
"metadata": {},
130+
"outputs": [],
116131
"source": [
117132
"x_train, x_test, y_train, y_test = train_test_split(\n",
118133
" feature, label, test_size=test_ratio, random_state=0)"
119134
]
120135
},
136+
{
137+
"cell_type": "markdown",
138+
"metadata": {},
139+
"source": [
140+
"# Build the network"
141+
]
142+
},
121143
{
122144
"cell_type": "code",
123-
"execution_count": null,
145+
"execution_count": 17,
124146
"metadata": {},
125147
"outputs": [
126148
{
@@ -130,86 +152,115 @@
130152
"_________________________________________________________________\n",
131153
"Layer (type) Output Shape Param # \n",
132154
"=================================================================\n",
133-
"dense_1 (Dense) (None, 1024) 16698368 \n",
155+
"dense_7 (Dense) (None, 1024) 16698368 \n",
134156
"_________________________________________________________________\n",
135-
"dropout_1 (Dropout) (None, 1024) 0 \n",
157+
"dropout_5 (Dropout) (None, 1024) 0 \n",
136158
"_________________________________________________________________\n",
137-
"dense_2 (Dense) (None, 1024) 1049600 \n",
159+
"dense_8 (Dense) (None, 1024) 1049600 \n",
138160
"_________________________________________________________________\n",
139-
"dropout_2 (Dropout) (None, 1024) 0 \n",
161+
"dropout_6 (Dropout) (None, 1024) 0 \n",
140162
"_________________________________________________________________\n",
141-
"dense_3 (Dense) (None, 2) 2050 \n",
163+
"dense_9 (Dense) (None, 2) 2050 \n",
142164
"=================================================================\n",
143165
"Total params: 17,750,018\n",
144166
"Trainable params: 17,750,018\n",
145167
"Non-trainable params: 0\n",
146-
"_________________________________________________________________\n",
147-
"Train on 39902 samples, validate on 4434 samples\n",
148-
"Epoch 1/50\n",
149-
"39902/39902 [==============================] - 9s 232us/step - loss: 0.4076 - acc: 0.8252 - val_loss: 0.2237 - val_acc: 0.9213\n",
150-
"Epoch 2/50\n",
151-
"39902/39902 [==============================] - 8s 191us/step - loss: 0.1455 - acc: 0.9540 - val_loss: 0.1822 - val_acc: 0.9443\n",
152-
"Epoch 3/50\n",
153-
"39902/39902 [==============================] - 7s 185us/step - loss: 0.1090 - acc: 0.9658 - val_loss: 0.1856 - val_acc: 0.9436\n",
154-
"Epoch 4/50\n",
155-
"39902/39902 [==============================] - 7s 166us/step - loss: 0.0979 - acc: 0.9679 - val_loss: 0.1887 - val_acc: 0.9441\n",
156-
"Epoch 5/50\n",
157-
"39902/39902 [==============================] - 6s 162us/step - loss: 0.0912 - acc: 0.9693 - val_loss: 0.1952 - val_acc: 0.9452\n",
158-
"Epoch 6/50\n",
159-
"39902/39902 [==============================] - 7s 175us/step - loss: 0.0861 - acc: 0.9712 - val_loss: 0.2007 - val_acc: 0.9454\n",
160-
"Epoch 7/50\n",
161-
"39902/39902 [==============================] - 7s 178us/step - loss: 0.0845 - acc: 0.9715 - val_loss: 0.2042 - val_acc: 0.9425\n",
162-
"Epoch 8/50\n",
163-
"39902/39902 [==============================] - 7s 178us/step - loss: 0.0821 - acc: 0.9723 - val_loss: 0.2068 - val_acc: 0.9436\n",
164-
"Epoch 9/50\n",
165-
"39902/39902 [==============================] - 7s 164us/step - loss: 0.0810 - acc: 0.9721 - val_loss: 0.2098 - val_acc: 0.9429\n",
166-
"Epoch 10/50\n",
167-
"39902/39902 [==============================] - 6s 149us/step - loss: 0.0788 - acc: 0.9731 - val_loss: 0.2098 - val_acc: 0.9434\n",
168-
"Epoch 11/50\n",
169-
"39902/39902 [==============================] - 6s 156us/step - loss: 0.0782 - acc: 0.9733 - val_loss: 0.2208 - val_acc: 0.9416\n",
170-
"Epoch 12/50\n",
171-
"39902/39902 [==============================] - 7s 170us/step - loss: 0.0768 - acc: 0.9732 - val_loss: 0.2277 - val_acc: 0.9432\n",
172-
"Epoch 13/50\n",
173-
"39902/39902 [==============================] - 7s 174us/step - loss: 0.0765 - acc: 0.9738 - val_loss: 0.2270 - val_acc: 0.9418\n",
174-
"Epoch 14/50\n",
175-
"39902/39902 [==============================] - 7s 175us/step - loss: 0.0742 - acc: 0.9744 - val_loss: 0.2364 - val_acc: 0.9391\n",
176-
"Epoch 15/50\n",
177-
"16384/39902 [===========>..................] - ETA: 3s - loss: 0.0697 - acc: 0.9757"
168+
"_________________________________________________________________\n"
178169
]
179170
}
180171
],
181172
"source": [
182-
"model = Sequential()\n",
183-
"model.add(Dense(1024, activation='relu', input_shape=(number_features,)))\n",
184-
"model.add(Dropout(0.3))\n",
185-
"model.add(Dense(1024, activation='relu'))\n",
186-
"model.add(Dropout(0.3))\n",
187-
"model.add(Dense(number_class, activation='softmax'))\n",
188-
"\n",
189-
"model.summary()\n",
190-
"\n",
173+
"model = Sequential() # linear stack of layers\n",
174+
"model.add(Dense(1024, activation='relu', input_shape=(number_features,))) # fully connected layer\n",
175+
"model.add(Dropout(0.3)) # dropout some nodes to avoid overfitting\n",
176+
"model.add(Dense(1024, activation='relu')) # fully conncted layer\n",
177+
"model.add(Dropout(0.3)) # dropout\n",
178+
"model.add(Dense(number_class, activation='softmax')) # final classification layer\n",
179+
"model.summary() # summarize the model structure and parameters"
180+
]
181+
},
182+
{
183+
"cell_type": "markdown",
184+
"metadata": {},
185+
"source": [
186+
"# Define loss, optimizer (update rule), and metrics of monitoring the training process"
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": 18,
192+
"metadata": {},
193+
"outputs": [],
194+
"source": [
191195
"model.compile(loss='categorical_crossentropy',\n",
192196
" optimizer=keras.optimizers.adam(),\n",
193-
" metrics=['accuracy'])\n",
194-
"\n",
197+
" metrics=['accuracy'])"
198+
]
199+
},
200+
{
201+
"cell_type": "markdown",
202+
"metadata": {},
203+
"source": [
204+
"# Run the training loop"
205+
]
206+
},
207+
{
208+
"cell_type": "code",
209+
"execution_count": 19,
210+
"metadata": {},
211+
"outputs": [
212+
{
213+
"name": "stdout",
214+
"output_type": "stream",
215+
"text": [
216+
"Train on 39902 samples, validate on 4434 samples\n",
217+
"Epoch 1/5\n",
218+
"39902/39902 [==============================] - 6s 144us/step - loss: 0.3966 - acc: 0.8318 - val_loss: 0.2212 - val_acc: 0.9283\n",
219+
"Epoch 2/5\n",
220+
"39902/39902 [==============================] - 6s 142us/step - loss: 0.1457 - acc: 0.9545 - val_loss: 0.1830 - val_acc: 0.9436\n",
221+
"Epoch 3/5\n",
222+
"39902/39902 [==============================] - 6s 140us/step - loss: 0.1083 - acc: 0.9663 - val_loss: 0.1853 - val_acc: 0.9452\n",
223+
"Epoch 4/5\n",
224+
"39902/39902 [==============================] - 5s 132us/step - loss: 0.0986 - acc: 0.9670 - val_loss: 0.1906 - val_acc: 0.9447\n",
225+
"Epoch 5/5\n",
226+
"39902/39902 [==============================] - 5s 120us/step - loss: 0.0916 - acc: 0.9697 - val_loss: 0.1960 - val_acc: 0.9454\n"
227+
]
228+
}
229+
],
230+
"source": [
195231
"history = model.fit(x_train, y_train,\n",
196232
" batch_size=batch_size,\n",
197233
" epochs=epochs,\n",
198234
" verbose=1,\n",
199-
" validation_data=(x_test, y_test))\n",
200-
"score = model.evaluate(x_test, y_test, verbose=0)\n",
201-
"print('Test loss:', score[0])\n",
202-
"print('Test accuracy:', score[1])"
235+
" validation_data=(x_test, y_test))"
236+
]
237+
},
238+
{
239+
"cell_type": "markdown",
240+
"metadata": {},
241+
"source": [
242+
"# Evaluate the trained model"
203243
]
204244
},
205245
{
206246
"cell_type": "code",
207-
"execution_count": null,
208-
"metadata": {
209-
"collapsed": true
210-
},
211-
"outputs": [],
212-
"source": []
247+
"execution_count": 21,
248+
"metadata": {},
249+
"outputs": [
250+
{
251+
"name": "stdout",
252+
"output_type": "stream",
253+
"text": [
254+
"('Test loss:', 0.1959872514036361)\n",
255+
"('Test accuracy:', 0.9454217409840241)\n"
256+
]
257+
}
258+
],
259+
"source": [
260+
"score = model.evaluate(x_test, y_test, verbose=0)\n",
261+
"print('Test loss:', score[0])\n",
262+
"print('Test accuracy:', score[1])"
263+
]
213264
}
214265
],
215266
"metadata": {
@@ -221,14 +272,14 @@
221272
"language_info": {
222273
"codemirror_mode": {
223274
"name": "ipython",
224-
"version": 3
275+
"version": 2
225276
},
226277
"file_extension": ".py",
227278
"mimetype": "text/x-python",
228279
"name": "python",
229280
"nbconvert_exporter": "python",
230-
"pygments_lexer": "ipython3",
231-
"version": "3.6.1"
281+
"pygments_lexer": "ipython2",
282+
"version": "2.7.13"
232283
}
233284
},
234285
"nbformat": 4,

0 commit comments

Comments
 (0)