|
4 | 4 | "cell_type": "markdown",
|
5 | 5 | "metadata": {},
|
6 | 6 | "source": [
|
7 |
| - "# Load data first" |
| 7 | + "# Load related packages" |
8 | 8 | ]
|
9 | 9 | },
|
10 | 10 | {
|
11 | 11 | "cell_type": "code",
|
12 |
| - "execution_count": 1, |
| 12 | + "execution_count": 12, |
13 | 13 | "metadata": {},
|
14 |
| - "outputs": [ |
15 |
| - { |
16 |
| - "name": "stderr", |
17 |
| - "output_type": "stream", |
18 |
| - "text": [ |
19 |
| - "Using TensorFlow backend.\n" |
20 |
| - ] |
21 |
| - } |
22 |
| - ], |
| 14 | + "outputs": [], |
23 | 15 | "source": [
|
24 | 16 | "import cPickle\n",
|
25 | 17 | "import numpy as np\n",
|
|
33 | 25 | ]
|
34 | 26 | },
|
35 | 27 | {
|
36 |
| - "cell_type": "code", |
37 |
| - "execution_count": 2, |
38 |
| - "metadata": { |
39 |
| - "collapsed": true, |
40 |
| - "scrolled": true |
41 |
| - }, |
42 |
| - "outputs": [], |
| 28 | + "cell_type": "markdown", |
| 29 | + "metadata": {}, |
43 | 30 | "source": [
|
44 |
| - "test_ratio = 0.1\n", |
45 |
| - "number_class = 2\n", |
46 |
| - "number_features = 16306\n", |
47 |
| - "batch_size = 1024\n", |
48 |
| - "epochs = 50" |
| 31 | + "# Function related to loading data" |
49 | 32 | ]
|
50 | 33 | },
|
51 | 34 | {
|
52 | 35 | "cell_type": "code",
|
53 |
| - "execution_count": 3, |
54 |
| - "metadata": { |
55 |
| - "collapsed": true |
56 |
| - }, |
| 36 | + "execution_count": 13, |
| 37 | + "metadata": {}, |
57 | 38 | "outputs": [],
|
58 | 39 | "source": [
|
59 | 40 | "def Pfam_from_pickle_file_encoding(name_list_pickle_filename,model_names_list_filename):\n",
|
|
76 | 57 | "\treturn encoding"
|
77 | 58 | ]
|
78 | 59 | },
|
| 60 | + { |
| 61 | + "cell_type": "markdown", |
| 62 | + "metadata": {}, |
| 63 | + "source": [ |
| 64 | + "# Load the data" |
| 65 | + ] |
| 66 | + }, |
79 | 67 | {
|
80 | 68 | "cell_type": "code",
|
81 |
| - "execution_count": 4, |
| 69 | + "execution_count": 14, |
82 | 70 | "metadata": {},
|
83 | 71 | "outputs": [
|
84 | 72 | {
|
|
106 | 94 | "label = tf.keras.utils.to_categorical(label,num_classes=2)"
|
107 | 95 | ]
|
108 | 96 | },
|
| 97 | + { |
| 98 | + "cell_type": "markdown", |
| 99 | + "metadata": {}, |
| 100 | + "source": [ |
| 101 | + "# Define hyper-parameters" |
| 102 | + ] |
| 103 | + }, |
109 | 104 | {
|
110 | 105 | "cell_type": "code",
|
111 |
| - "execution_count": 5, |
| 106 | + "execution_count": 15, |
112 | 107 | "metadata": {
|
113 |
| - "collapsed": true |
| 108 | + "scrolled": true |
114 | 109 | },
|
115 | 110 | "outputs": [],
|
| 111 | + "source": [ |
| 112 | + "test_ratio = 0.1 # how much data for training and how much data for testing\n", |
| 113 | + "number_class = 2 # total number of classes, useful for define network structure\n", |
| 114 | + "number_features = 16306 # total number of feature, useful for define network structure\n", |
| 115 | + "batch_size = 1024 # stochastic gradient descent, training batch size\n", |
| 116 | + "epochs = 5 # training epoches" |
| 117 | + ] |
| 118 | + }, |
| 119 | + { |
| 120 | + "cell_type": "markdown", |
| 121 | + "metadata": {}, |
| 122 | + "source": [ |
| 123 | + "# Splite training data and testing data" |
| 124 | + ] |
| 125 | + }, |
| 126 | + { |
| 127 | + "cell_type": "code", |
| 128 | + "execution_count": 16, |
| 129 | + "metadata": {}, |
| 130 | + "outputs": [], |
116 | 131 | "source": [
|
117 | 132 | "x_train, x_test, y_train, y_test = train_test_split(\n",
|
118 | 133 | " feature, label, test_size=test_ratio, random_state=0)"
|
119 | 134 | ]
|
120 | 135 | },
|
| 136 | + { |
| 137 | + "cell_type": "markdown", |
| 138 | + "metadata": {}, |
| 139 | + "source": [ |
| 140 | + "# Build the network" |
| 141 | + ] |
| 142 | + }, |
121 | 143 | {
|
122 | 144 | "cell_type": "code",
|
123 |
| - "execution_count": null, |
| 145 | + "execution_count": 17, |
124 | 146 | "metadata": {},
|
125 | 147 | "outputs": [
|
126 | 148 | {
|
|
130 | 152 | "_________________________________________________________________\n",
|
131 | 153 | "Layer (type) Output Shape Param # \n",
|
132 | 154 | "=================================================================\n",
|
133 |
| - "dense_1 (Dense) (None, 1024) 16698368 \n", |
| 155 | + "dense_7 (Dense) (None, 1024) 16698368 \n", |
134 | 156 | "_________________________________________________________________\n",
|
135 |
| - "dropout_1 (Dropout) (None, 1024) 0 \n", |
| 157 | + "dropout_5 (Dropout) (None, 1024) 0 \n", |
136 | 158 | "_________________________________________________________________\n",
|
137 |
| - "dense_2 (Dense) (None, 1024) 1049600 \n", |
| 159 | + "dense_8 (Dense) (None, 1024) 1049600 \n", |
138 | 160 | "_________________________________________________________________\n",
|
139 |
| - "dropout_2 (Dropout) (None, 1024) 0 \n", |
| 161 | + "dropout_6 (Dropout) (None, 1024) 0 \n", |
140 | 162 | "_________________________________________________________________\n",
|
141 |
| - "dense_3 (Dense) (None, 2) 2050 \n", |
| 163 | + "dense_9 (Dense) (None, 2) 2050 \n", |
142 | 164 | "=================================================================\n",
|
143 | 165 | "Total params: 17,750,018\n",
|
144 | 166 | "Trainable params: 17,750,018\n",
|
145 | 167 | "Non-trainable params: 0\n",
|
146 |
| - "_________________________________________________________________\n", |
147 |
| - "Train on 39902 samples, validate on 4434 samples\n", |
148 |
| - "Epoch 1/50\n", |
149 |
| - "39902/39902 [==============================] - 9s 232us/step - loss: 0.4076 - acc: 0.8252 - val_loss: 0.2237 - val_acc: 0.9213\n", |
150 |
| - "Epoch 2/50\n", |
151 |
| - "39902/39902 [==============================] - 8s 191us/step - loss: 0.1455 - acc: 0.9540 - val_loss: 0.1822 - val_acc: 0.9443\n", |
152 |
| - "Epoch 3/50\n", |
153 |
| - "39902/39902 [==============================] - 7s 185us/step - loss: 0.1090 - acc: 0.9658 - val_loss: 0.1856 - val_acc: 0.9436\n", |
154 |
| - "Epoch 4/50\n", |
155 |
| - "39902/39902 [==============================] - 7s 166us/step - loss: 0.0979 - acc: 0.9679 - val_loss: 0.1887 - val_acc: 0.9441\n", |
156 |
| - "Epoch 5/50\n", |
157 |
| - "39902/39902 [==============================] - 6s 162us/step - loss: 0.0912 - acc: 0.9693 - val_loss: 0.1952 - val_acc: 0.9452\n", |
158 |
| - "Epoch 6/50\n", |
159 |
| - "39902/39902 [==============================] - 7s 175us/step - loss: 0.0861 - acc: 0.9712 - val_loss: 0.2007 - val_acc: 0.9454\n", |
160 |
| - "Epoch 7/50\n", |
161 |
| - "39902/39902 [==============================] - 7s 178us/step - loss: 0.0845 - acc: 0.9715 - val_loss: 0.2042 - val_acc: 0.9425\n", |
162 |
| - "Epoch 8/50\n", |
163 |
| - "39902/39902 [==============================] - 7s 178us/step - loss: 0.0821 - acc: 0.9723 - val_loss: 0.2068 - val_acc: 0.9436\n", |
164 |
| - "Epoch 9/50\n", |
165 |
| - "39902/39902 [==============================] - 7s 164us/step - loss: 0.0810 - acc: 0.9721 - val_loss: 0.2098 - val_acc: 0.9429\n", |
166 |
| - "Epoch 10/50\n", |
167 |
| - "39902/39902 [==============================] - 6s 149us/step - loss: 0.0788 - acc: 0.9731 - val_loss: 0.2098 - val_acc: 0.9434\n", |
168 |
| - "Epoch 11/50\n", |
169 |
| - "39902/39902 [==============================] - 6s 156us/step - loss: 0.0782 - acc: 0.9733 - val_loss: 0.2208 - val_acc: 0.9416\n", |
170 |
| - "Epoch 12/50\n", |
171 |
| - "39902/39902 [==============================] - 7s 170us/step - loss: 0.0768 - acc: 0.9732 - val_loss: 0.2277 - val_acc: 0.9432\n", |
172 |
| - "Epoch 13/50\n", |
173 |
| - "39902/39902 [==============================] - 7s 174us/step - loss: 0.0765 - acc: 0.9738 - val_loss: 0.2270 - val_acc: 0.9418\n", |
174 |
| - "Epoch 14/50\n", |
175 |
| - "39902/39902 [==============================] - 7s 175us/step - loss: 0.0742 - acc: 0.9744 - val_loss: 0.2364 - val_acc: 0.9391\n", |
176 |
| - "Epoch 15/50\n", |
177 |
| - "16384/39902 [===========>..................] - ETA: 3s - loss: 0.0697 - acc: 0.9757" |
| 168 | + "_________________________________________________________________\n" |
178 | 169 | ]
|
179 | 170 | }
|
180 | 171 | ],
|
181 | 172 | "source": [
|
182 |
| - "model = Sequential()\n", |
183 |
| - "model.add(Dense(1024, activation='relu', input_shape=(number_features,)))\n", |
184 |
| - "model.add(Dropout(0.3))\n", |
185 |
| - "model.add(Dense(1024, activation='relu'))\n", |
186 |
| - "model.add(Dropout(0.3))\n", |
187 |
| - "model.add(Dense(number_class, activation='softmax'))\n", |
188 |
| - "\n", |
189 |
| - "model.summary()\n", |
190 |
| - "\n", |
| 173 | + "model = Sequential() # linear stack of layers\n", |
| 174 | + "model.add(Dense(1024, activation='relu', input_shape=(number_features,))) # fully connected layer\n", |
| 175 | + "model.add(Dropout(0.3)) # dropout some nodes to avoid overfitting\n", |
| 176 | + "model.add(Dense(1024, activation='relu')) # fully conncted layer\n", |
| 177 | + "model.add(Dropout(0.3)) # dropout\n", |
| 178 | + "model.add(Dense(number_class, activation='softmax')) # final classification layer\n", |
| 179 | + "model.summary() # summarize the model structure and parameters" |
| 180 | + ] |
| 181 | + }, |
| 182 | + { |
| 183 | + "cell_type": "markdown", |
| 184 | + "metadata": {}, |
| 185 | + "source": [ |
| 186 | + "# Define loss, optimizer (update rule), and metrics of monitoring the training process" |
| 187 | + ] |
| 188 | + }, |
| 189 | + { |
| 190 | + "cell_type": "code", |
| 191 | + "execution_count": 18, |
| 192 | + "metadata": {}, |
| 193 | + "outputs": [], |
| 194 | + "source": [ |
191 | 195 | "model.compile(loss='categorical_crossentropy',\n",
|
192 | 196 | " optimizer=keras.optimizers.adam(),\n",
|
193 |
| - " metrics=['accuracy'])\n", |
194 |
| - "\n", |
| 197 | + " metrics=['accuracy'])" |
| 198 | + ] |
| 199 | + }, |
| 200 | + { |
| 201 | + "cell_type": "markdown", |
| 202 | + "metadata": {}, |
| 203 | + "source": [ |
| 204 | + "# Run the training loop" |
| 205 | + ] |
| 206 | + }, |
| 207 | + { |
| 208 | + "cell_type": "code", |
| 209 | + "execution_count": 19, |
| 210 | + "metadata": {}, |
| 211 | + "outputs": [ |
| 212 | + { |
| 213 | + "name": "stdout", |
| 214 | + "output_type": "stream", |
| 215 | + "text": [ |
| 216 | + "Train on 39902 samples, validate on 4434 samples\n", |
| 217 | + "Epoch 1/5\n", |
| 218 | + "39902/39902 [==============================] - 6s 144us/step - loss: 0.3966 - acc: 0.8318 - val_loss: 0.2212 - val_acc: 0.9283\n", |
| 219 | + "Epoch 2/5\n", |
| 220 | + "39902/39902 [==============================] - 6s 142us/step - loss: 0.1457 - acc: 0.9545 - val_loss: 0.1830 - val_acc: 0.9436\n", |
| 221 | + "Epoch 3/5\n", |
| 222 | + "39902/39902 [==============================] - 6s 140us/step - loss: 0.1083 - acc: 0.9663 - val_loss: 0.1853 - val_acc: 0.9452\n", |
| 223 | + "Epoch 4/5\n", |
| 224 | + "39902/39902 [==============================] - 5s 132us/step - loss: 0.0986 - acc: 0.9670 - val_loss: 0.1906 - val_acc: 0.9447\n", |
| 225 | + "Epoch 5/5\n", |
| 226 | + "39902/39902 [==============================] - 5s 120us/step - loss: 0.0916 - acc: 0.9697 - val_loss: 0.1960 - val_acc: 0.9454\n" |
| 227 | + ] |
| 228 | + } |
| 229 | + ], |
| 230 | + "source": [ |
195 | 231 | "history = model.fit(x_train, y_train,\n",
|
196 | 232 | " batch_size=batch_size,\n",
|
197 | 233 | " epochs=epochs,\n",
|
198 | 234 | " verbose=1,\n",
|
199 |
| - " validation_data=(x_test, y_test))\n", |
200 |
| - "score = model.evaluate(x_test, y_test, verbose=0)\n", |
201 |
| - "print('Test loss:', score[0])\n", |
202 |
| - "print('Test accuracy:', score[1])" |
| 235 | + " validation_data=(x_test, y_test))" |
| 236 | + ] |
| 237 | + }, |
| 238 | + { |
| 239 | + "cell_type": "markdown", |
| 240 | + "metadata": {}, |
| 241 | + "source": [ |
| 242 | + "# Evaluate the trained model" |
203 | 243 | ]
|
204 | 244 | },
|
205 | 245 | {
|
206 | 246 | "cell_type": "code",
|
207 |
| - "execution_count": null, |
208 |
| - "metadata": { |
209 |
| - "collapsed": true |
210 |
| - }, |
211 |
| - "outputs": [], |
212 |
| - "source": [] |
| 247 | + "execution_count": 21, |
| 248 | + "metadata": {}, |
| 249 | + "outputs": [ |
| 250 | + { |
| 251 | + "name": "stdout", |
| 252 | + "output_type": "stream", |
| 253 | + "text": [ |
| 254 | + "('Test loss:', 0.1959872514036361)\n", |
| 255 | + "('Test accuracy:', 0.9454217409840241)\n" |
| 256 | + ] |
| 257 | + } |
| 258 | + ], |
| 259 | + "source": [ |
| 260 | + "score = model.evaluate(x_test, y_test, verbose=0)\n", |
| 261 | + "print('Test loss:', score[0])\n", |
| 262 | + "print('Test accuracy:', score[1])" |
| 263 | + ] |
213 | 264 | }
|
214 | 265 | ],
|
215 | 266 | "metadata": {
|
|
221 | 272 | "language_info": {
|
222 | 273 | "codemirror_mode": {
|
223 | 274 | "name": "ipython",
|
224 |
| - "version": 3 |
| 275 | + "version": 2 |
225 | 276 | },
|
226 | 277 | "file_extension": ".py",
|
227 | 278 | "mimetype": "text/x-python",
|
228 | 279 | "name": "python",
|
229 | 280 | "nbconvert_exporter": "python",
|
230 |
| - "pygments_lexer": "ipython3", |
231 |
| - "version": "3.6.1" |
| 281 | + "pygments_lexer": "ipython2", |
| 282 | + "version": "2.7.13" |
232 | 283 | }
|
233 | 284 | },
|
234 | 285 | "nbformat": 4,
|
|
0 commit comments