Skip to content

Commit

Permalink
Updated cnn model
Browse files Browse the repository at this point in the history
  • Loading branch information
hlgirard committed Jun 11, 2019
1 parent a3aa582 commit b2f38e0
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 24 deletions.
Binary file added models/cnn-simple-model-1560265830.333636.h5
Binary file not shown.
2 changes: 1 addition & 1 deletion models/cnn-simple-model.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "batch_input_shape": [null, 150, 150, 1], "dtype": "float32", "filters": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout", "trainable": true, "dtype": "float32", "rate": 0.2, "noise_shape": null, "seed": null}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "dtype": "float32", "filters": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "dtype": "float32", "rate": 0.2, "noise_shape": null, "seed": null}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_1", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Conv2D", "config": {"name": "conv2d_2", "trainable": true, "dtype": "float32", "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "dtype": "float32", "rate": 0.2, "noise_shape": null, "seed": null}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_2", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "dtype": "float32", "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.2.4-tf", "backend": "tensorflow"}
{"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "batch_input_shape": [null, 150, 150, 1], "dtype": "float32", "filters": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation", "trainable": true, "dtype": "float32", "activation": "relu"}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "dtype": "float32", "filters": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "dtype": "float32", "activation": "relu"}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_1", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Conv2D", "config": {"name": "conv2d_2", "trainable": true, "dtype": "float32", "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_2", "trainable": true, "dtype": "float32", "activation": "relu"}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_2", "trainable": true, "dtype": "float32", "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 64, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_3", "trainable": true, "dtype": "float32", "activation": "relu"}}, {"class_name": "Dropout", "config": {"name": "dropout", "trainable": true, "dtype": "float32", "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null, "dtype": "float32"}}, "bias_initializer": {"class_name": "Zeros", "config": {"dtype": "float32"}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.2.4-tf", "backend": "tensorflow"}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
long_description = f.read()

setup(name='crystalml',
version='0.0.5.1',
version='0.0.5.2',
description='Integrated tool to measure the nucleation rate of protein crystals. ',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
5 changes: 3 additions & 2 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def segment(directory, compare, save_overlay, verbose):
@click.option('-tb', '--tensorboard', is_flag=True, help="Save logs for tensorboard visualization")
@click.option('-v', '--verbose', count=True, help="Increase verbosity level")
@click.option('-l', '--layer', default=1, help="For transfer learning, how many layers to skim from the top.")
def train(directory, model, verbose, tensorboard, layer):
@click.option('-f', '--frozen', default=0, help="For transfer learning, how many layers to unfreeze at the top.")
def train(directory, model, verbose, tensorboard, layer, frozen):
'''Train a model from a directory of labeled images'''

training_directory = directory
Expand All @@ -81,4 +82,4 @@ def train(directory, model, verbose, tensorboard, layer):

elif model == "cnn-transfer":
from .models.train.cnn_transfer import train_cnn_transfer_from_directory
train_cnn_transfer_from_directory(training_directory, tensorboard, -1 * layer)
train_cnn_transfer_from_directory(training_directory, tensorboard, -1 * layer, frozen)
29 changes: 24 additions & 5 deletions src/models/train/cnn_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def build(width, height, depth):
class cnn_regularized:
'''
Regularization of the weights is added to each layer.
No measurable improvement in accuracy but significantly larger run time.
No measurable improvement in accuracy but significantly longer run time.
'''
@staticmethod
def build(width, height, depth):
Expand Down Expand Up @@ -92,6 +92,10 @@ def build(width, height, depth):
return model

class cnn_dropout:
'''
Dropouts added to each layer to prevent overfitting.
Some improvement in testing loss but significantly longer run time.
'''
@staticmethod
def build(width, height, depth):

Expand All @@ -100,7 +104,7 @@ def build(width, height, depth):

# if we are using "channels first", update the input shape
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
inputShape = (depth, height, width)

# 1st layer convolutional
model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 1), activation='relu'))
Expand Down Expand Up @@ -135,7 +139,7 @@ def train_cnn_simple_from_directory(training_directory, bTensorboard):
logging.info("Starting training of simple CNN from directory %s", training_directory)

## Define the model
model = cnn_dropout.build(150, 150, 1)
model = cnn_simple.build(150, 150, 1)

#optimizer = SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)

Expand Down Expand Up @@ -177,7 +181,8 @@ def train_cnn_simple_from_directory(training_directory, bTensorboard):
batch_size=batch_size,
color_mode='grayscale',
class_mode='binary',
subset='validation')
subset='validation',
shuffle=False)

model.summary()

Expand Down Expand Up @@ -206,4 +211,18 @@ def train_cnn_simple_from_directory(training_directory, bTensorboard):
# Save weigths
model_weights_path = pkg_resources.resource_filename('models', "cnn-simple-model-{}.h5".format(time()))
logging.info("Saving model weights to %s", model_weights_path)
model.save_weights(model_weights_path)
model.save_weights(model_weights_path)

# Display confusion matrix
try:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
Y_pred = model.predict_generator(validation_generator, num_validation // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('-------- Confusion Matrix --------')
print(confusion_matrix(validation_generator.classes, y_pred))
print('-------- Classification Report --------')
target_names = ['Clear', 'Crystal']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))
except ImportError:
logging.info("sklearn is required to print confucion matrix and classification report.")
50 changes: 35 additions & 15 deletions src/models/train/cnn_transfer.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, GlobalAveragePooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.constraints import unit_norm, max_norm
from tensorflow.keras.optimizers import SGD, Adadelta
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.optimizers import SGD, Adadelta, RMSprop
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
import tensorflow.keras.backend as K

from time import time
import pkg_resources
import logging
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

class cnn_transfer_ResNet50:
@staticmethod
def build(width, height, top_layer=-1):
def build(width, height, top_layer=-1, unfreeze_layers=0):

logging.info("Building a transfer learning model on top of ResNet50...")
logging.info("Input size: (%d, %d) - Number of layers to strip from ResNet50: %d", width, height, -1 * top_layer)
Expand All @@ -30,30 +30,35 @@ def build(width, height, top_layer=-1):
bottleneck_model = Model(inputs=bottleneck_input, outputs=bottleneck_output)

# Freeze all layers of the pretrained model
for layer in bottleneck_model.layers:
layer.trainable = False
if unfreeze_layers == 0:
for layer in bottleneck_model.layers:
layer.trainable = False
else:
for layer in bottleneck_model.layers[:-unfreeze_layers]:
layer.trainable = False

# Make a new model on top
model = Sequential()
model.add(bottleneck_model)
model.add(Flatten())
model.add(BatchNormalization())
model.add(GlobalAveragePooling2D())
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

return model

def train_cnn_transfer_from_directory(training_directory, bTensorboard, last_layer=-1):
def train_cnn_transfer_from_directory(training_directory, bTensorboard, last_layer=-1, unfreeze=0):

model = cnn_transfer_ResNet50.build(150, 150, last_layer)
model = cnn_transfer_ResNet50.build(150, 150, last_layer, unfreeze)

model.compile(optimizer='rmsprop',
model.compile(optimizer=RMSprop(lr=1e-4),
loss='binary_crossentropy',
metrics=['accuracy'])


## Prepare the data
batch_size = 16
batch_size = 8
num_samples = sum([len(os.listdir(os.path.join(training_directory, categoryDir))) for categoryDir in os.listdir(training_directory) if os.path.isdir(os.path.join(training_directory, categoryDir))])
num_training = int(0.8 * num_samples)
num_validation = num_samples - num_training
Expand All @@ -64,7 +69,8 @@ def train_cnn_transfer_from_directory(training_directory, bTensorboard, last_lay

train_datagen = ImageDataGenerator(
rescale=1./255,
validation_split=0.2)
validation_split=0.2,
preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
training_directory, # this is the target directory
Expand Down Expand Up @@ -111,4 +117,18 @@ def train_cnn_transfer_from_directory(training_directory, bTensorboard, last_lay
# Save weigths
model_weights_path = pkg_resources.resource_filename('models', "cnn-onResnet-model-{}.h5".format(time()))
logging.info("Saving model weights to %s", model_weights_path)
model.save_weights(model_weights_path)
model.save_weights(model_weights_path)

# Display confusion matrix
try:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
Y_pred = model.predict_generator(validation_generator, num_validation // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('-------- Confusion Matrix --------')
print(confusion_matrix(validation_generator.classes, y_pred))
print('-------- Classification Report --------')
target_names = ['Clear', 'Crystal']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))
except ImportError:
logging.info("sklearn is required to print confucion matrix and classification report.")

0 comments on commit b2f38e0

Please sign in to comment.