diff --git a/.travis.yml b/.travis.yml index 506a44f..f711d5a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,22 @@ language: python python: - "3.9" +before_install: + - export PYTHONPATH=$PYTHONPATH:$(pwd)/src + install: - pip install pipenv - - pipenv install + - pipenv install --dev + +stages: + - style + - test -script: - # Your test script goes here - - pytest -v --color=yes src # Runs in pipenv \ No newline at end of file +jobs: + include: + - stage: style + name: "Style check" + script: pylint **/*.py + - stage: test + name: "Unit tests" + script: pytest -v --color=yes src # Runs in pipenv diff --git a/Makefile b/Makefile index 4752508..2ebd666 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,20 @@ +activate: + pipenv shell + test: pytest lint: - pylint src + pylint **/*.py lintfix: - autopep8 src --recursive --in-place --aggressive + autopep8 **/*.py --recursive --in-place --aggressive lintfixhard: - autopep8 src --recursive --in-place --aggressive --aggressive + autopep8 **/*.py --recursive --in-place --aggressive --aggressive install: - pipenv install + pipenv install --dev lock: pipenv lock diff --git a/README.md b/README.md index 01e83a4..d15f447 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,9 @@ make install ``` This calls `pipenv`, which will create a virtual environment for this project. -To enter a shell on this environment run +To activate this environment run ```bash -make shell +make activate ``` ## Tests @@ -44,11 +44,11 @@ make lint We use `autopep8` to automatically fix errors. -User +Use ```bash make lintfix ``` -of +or ```bash make lintfixhard ``` diff --git a/cli/cnn_fashion_mnist_classifier.py b/cli/cnn_fashion_mnist_classifier.py index b93a1fd..aeaf6f6 100644 --- a/cli/cnn_fashion_mnist_classifier.py +++ b/cli/cnn_fashion_mnist_classifier.py @@ -1,22 +1,24 @@ - +"""CLI to use the cnn.FashionMNISTClassifier""" import argparse from computer_vision.cnn import FashionMNISTClassifier + def build_arg_parser(): + """Parses the user's arguments""" parser = argparse.ArgumentParser( - description="Use a CNN classifier to classify the "+ + description="Use a CNN classifier to classify the " + "Fashion MNIST training dataset", epilog="Built with <3 by Emmanuel Byrd at 8th Light Ltd." ) parser.add_argument( "--model_path", metavar="./model.h5", type=str, default="./model.h5", - help="The read/write path of the trained model " + + help="The read/write path of the trained model " + "(default: ./model.h5)" ) parser.add_argument( - "--history_path", metavar="./train_hist", type=str, + "--history_path", metavar="./train_hist", type=str, default="./train_hist", - help="The read/write path of the training history "+ + help="The read/write path of the training history " + "(default: ./hist)" ) parser.add_argument( @@ -49,17 +51,19 @@ def build_arg_parser(): help="Path to store the generated plot. Leave blank to ignore." ) parser.add_argument( - "--save_model", action=argparse.BooleanOptionalAction,type=bool, + "--save_model", action=argparse.BooleanOptionalAction, type=bool, help="Save the model" ) return parser + def execute_cnn(args): - cnn_trainer = FashionMNISTClassifier(args.model_path, args.history_path) + """Execute the FashionMNISTClassifier functions as requested""" + cnn_trainer = FashionMNISTClassifier() if args.use_stored: - cnn_trainer.load_model() - cnn_trainer.load_train_history() + cnn_trainer.load_model(args.model_path) + cnn_trainer.load_train_history(args.history_path) cnn_trainer.show_summary() elif args.train: cnn_trainer.load_dataset() @@ -67,31 +71,34 @@ def execute_cnn(args): cnn_trainer.show_summary() cnn_trainer.compile() cnn_trainer.train( - args.mini_batch_size, - args.epochs, + args.mini_batch_size, + args.epochs, args.validation_split ) else: - print ( - "Do you want to load a model or train a new one? "+ + print( + "Do you want to load a model or train a new one? " + "(--use_stored, --train)" ) return - + if args.plot or args.save_plot: cnn_trainer.plot_hist(args.plot, args.save_plot) if args.save_model: - cnn_trainer.save_model() - cnn_trainer.save_train_history() + cnn_trainer.save_model(args.model_path) + cnn_trainer.save_train_history(args.history_path) + def main(): + """Main function""" arg_parser = build_arg_parser() args = arg_parser.parse_args() execute_cnn(args) print("Finished.") - + + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/path_adder.py b/path_adder.py index 3faec5d..1c6d4bb 100644 --- a/path_adder.py +++ b/path_adder.py @@ -1,23 +1,33 @@ +"""Adds this repository's modules to the Python's PATH""" import os import sys import pathlib + def main(): - for elem in sys.path: - this_file_path = pathlib.Path(__file__).parent.resolve() - src_path = os.path.join(this_file_path, "src") + """ + Creates a `.pth` file under the corresponding `site-packages` dir. + This function works in both a virtual and a native environment. + """ + this_file_path = pathlib.Path(__file__).parent.resolve() + src_path = os.path.join(this_file_path, "src") + + for elem in sys.path: if elem.endswith("site-packages"): site_packages_dir = elem elif elem == src_path: print(src_path + " path is already added. Aborting...") return - - file_name = 'extra_python_folders.pth' - with open(os.path.join(site_packages_dir, file_name), 'w') as f: - f.write(src_path) + + file_path = os.path.join(site_packages_dir, + 'extra_python_folders.pth') + + with open(file_path, 'w', encoding="utf-8") as file: + file.write(src_path) print("Added to path: " + src_path) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/computer_vision/cnn/__init__.py b/src/computer_vision/cnn/__init__.py index 929583b..a2bbccb 100644 --- a/src/computer_vision/cnn/__init__.py +++ b/src/computer_vision/cnn/__init__.py @@ -1 +1,2 @@ +"""Convolutional Neural Network (CNN) implementations""" from .fashion_mnist_classifier import FashionMNISTClassifier diff --git a/src/computer_vision/cnn/fashion_mnist_classifier.py b/src/computer_vision/cnn/fashion_mnist_classifier.py index e6714f1..ddf1e11 100644 --- a/src/computer_vision/cnn/fashion_mnist_classifier.py +++ b/src/computer_vision/cnn/fashion_mnist_classifier.py @@ -1,27 +1,65 @@ +"""Simple CNN model that classifies the Fashion MNIST dataset.""" +# Standard imports +import pickle + # Machine Learning packages -import tensorflow.keras as keras -from tensorflow.keras.datasets import fashion_mnist +from tensorflow import keras +from keras.datasets import fashion_mnist # Common packages in scientific computing import matplotlib.pyplot as plt import numpy as np -# Support packages -import pickle - - class FashionMNISTClassifier: - def __init__(self, model_path, train_history_path): - self.model_path = model_path - self.train_history_path = train_history_path + """ + Can load a trained model from memory or + train one from scratch. This means that loading a dataset and + training are optional. The current implementation can create a 92% accurate + model around epoch 10. + + Example of full training cycle: + fmnist = FashionMNISTClassifier() + fmnist.load_dataset() + fmnist.build_model() + fmnist.show_summary() + fmnist.compile() + fmnist.train(128, 15, 0.2) + fmnist.plot_hist(show=true, fig_path='./my_fig.jpg') + fmnist.save_model('./my_model.h5') + fmnist.save_train_history('./my_train_hist') + + Example of plotting the accuracy of a saved training history: + fmnist = FashionMNISTClassifier() + fmnist.load_train_history('./my_train_hist') + fmnist.plot_hist(show=true) + + Example of picking a pre-trained model and continue training: + fmnist = FashionMNISTClassifier() + fmnist.load_dataset() + fmnist.load_model('./my_model.h5') + fmnist.train(64, 5, 0.2) + fmnist.load_model('./finetuned_model.h5') + fmnist.save_train_history('./other_train_hist') # saves a different file + """ + + def __init__(self): + """Initialize the model, train_hist and train_data variables""" self.model = None self.train_hist = None self.train_data = None def load_dataset(self): - # The first time the Fashion MNIST dataset is loaded, it will be fetched - # from https://storage.googleapis.com/tensorflow/tf-keras-datasets/ - # This is done automatically. + """ + The first time the Fashion MNIST dataset is loaded, it will be fetched + from https://storage.googleapis.com/tensorflow/tf-keras-datasets/ + This is done automatically. + + To change the size of the dataset (for testing purposes, for example), + simply override the `self.train_data` variable: + + dataset = fmnist.train_data + fmnist.train_data = (dataset[0][:100], dataset[0][:100]) + """ (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() assert x_train.shape == (60000, 28, 28) assert y_train.shape == (60000,) @@ -31,9 +69,23 @@ def load_dataset(self): self.train_data = (x_train, y_train) def build_model(self): + """ + Build a Sequential model that receives a 28x28 grayscale image + with pixel values [0, 255] and outputs a Dense layer with 10 units + corresponding to the classes. + + The first layer scales the input to a float from [0,1] + Then there are two Convolutional layers followed by a MaxPool. + The output of the CNN stage is flattened and processed through two + fully connected layers with ReLU. + + In total, this model contains 507,530 trainable parameters. + + To build a different model simply override the `.model` variable. + """ self.model = keras.Sequential([ keras.layers.InputLayer(input_shape=(28, 28, 1)), - keras.layers.experimental.preprocessing.Rescaling(1./255), + keras.layers.experimental.preprocessing.Rescaling(1. / 255), keras.layers.Conv2D(16, (3, 3), activation='relu'), keras.layers.Conv2D(64, (3, 3), activation='relu'), keras.layers.MaxPooling2D((3, 3), strides=2), @@ -44,15 +96,31 @@ def build_model(self): ]) def show_summary(self): + """ + Prints the layers that build the model, their output shape, + and the total number of parameters. + """ print(self.model.summary()) def compile(self): + """ + Compiles the model using the Sparse Categorical Crossentropy loss + function and the ADAM optimizer. It measures with accuracy. + + To use a different compile method simply load the model and call + `.model.compile` with the desired parameters. + """ self.model.compile( loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) def train(self, batch_size, epochs, val_split): + """ + Train the model using the previously loaded dataset, and receives + the parameters batch_size, epochs and va_split. The training histoty + will be stored in the `.train_hist` variable. + """ fit_hist = self.model.fit( self.train_data[0], self.train_data[1], @@ -62,33 +130,41 @@ def train(self, batch_size, epochs, val_split): ) self.train_hist = fit_hist.history - def plot_hist(self, show_plot, save_plot): - x = np.arange(1, len(self.train_hist['accuracy'])+1) + def plot_hist(self, show=False, fig_path=None): + """ + Plot the `accuracy` and `val_accuracy` of the generated training + history, and either shows it in the current process or saves it. + """ + x_points = np.arange(1, len(self.train_hist['accuracy']) + 1) - plt.plot(x, self.train_hist['accuracy'], label='Train') - plt.plot(x, self.train_hist['val_accuracy'], label='Validation') + plt.plot(x_points, self.train_hist['accuracy'], label='Train') + plt.plot(x_points, self.train_hist['val_accuracy'], label='Validation') plt.title('Training accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(loc='lower right') - if save_plot: - plt.savefig(save_plot) + if fig_path: + plt.savefig(fig_path) - if show_plot: + if show: plt.show() - def save_model(self): - self.model.save(self.model_path) + def save_model(self, path): + """Save the model in the given path""" + self.model.save(path) - def load_model(self): - self.model = keras.models.load_model(self.model_path) + def load_model(self, path): + """Load the model from the given path""" + self.model = keras.models.load_model(path) - def save_train_history(self): - with open(self.train_history_path, 'wb') as file_pi: + def save_train_history(self, path): + """Save the training history in the given path""" + with open(path, 'wb') as file_pi: pickle.dump(self.train_hist, file_pi) - def load_train_history(self): - with open(self.train_history_path, 'rb') as file_pi: + def load_train_history(self, path): + """Load the training history from the given path""" + with open(path, 'rb') as file_pi: self.train_hist = pickle.load(file_pi) diff --git a/src/computer_vision/cnn/fashion_mnist_classifier_test.py b/src/computer_vision/cnn/fashion_mnist_classifier_test.py index 08b1563..a10f4af 100644 --- a/src/computer_vision/cnn/fashion_mnist_classifier_test.py +++ b/src/computer_vision/cnn/fashion_mnist_classifier_test.py @@ -1,13 +1,19 @@ +"""Test file for the FashionMNIST Classifier""" import tensorflow -import tensorflow.compat.v1 as tf from .fashion_mnist_classifier import FashionMNISTClassifier +# Make tests deterministic tensorflow.random.set_seed(123) - def test_full_cycle(): - classifier = FashionMNISTClassifier("./model.h5", "./hist") + """ + Tests that the entire flow can be executed without interruptions or failures + + To test this fast, the provided dataset is reduced to its first 300 + samples, and trained only for 2 epochs. + """ + classifier = FashionMNISTClassifier() classifier.build_model() classifier.load_dataset() classifier.train_data = ( # just use 300 data samples for unit testing diff --git a/src/computer_vision/cnn/hist b/src/computer_vision/cnn/hist new file mode 100644 index 0000000..9cb2e00 Binary files /dev/null and b/src/computer_vision/cnn/hist differ diff --git a/src/computer_vision/cnn/model.h5 b/src/computer_vision/cnn/model.h5 new file mode 100644 index 0000000..afdfc94 Binary files /dev/null and b/src/computer_vision/cnn/model.h5 differ diff --git a/src/computer_vision/cnn/plot.jpg b/src/computer_vision/cnn/plot.jpg new file mode 100644 index 0000000..518f4cf Binary files /dev/null and b/src/computer_vision/cnn/plot.jpg differ diff --git a/src/computer_vision/cnn/train_hist b/src/computer_vision/cnn/train_hist new file mode 100644 index 0000000..c467891 Binary files /dev/null and b/src/computer_vision/cnn/train_hist differ