Scripts/train/snakeclef_vgg16.py

# -*- coding: utf-8 -*-
"""SnakeCLEF-VGG16.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1dainCb6W8mG_A2A0VIE7j_lSAIbDP921
"""

'''
!pwd
import os
os.chdir(os.path.join('/', 'content', 'drive', 'MyDrive', 'Research', 'LifeCLEF\'22', 'SnakeCLEF-2022', 'Dataset', 'SNAKE_CLEF'))
!pwd
# - Karthik
'''

import os
import tensorflow as tf
import keras.utils
import numpy as np
from cv2 import cv2
import pandas as pd

BATCH_SIZE=8
IMG_SIZE=(224,224)

class InputSequencer(tf.keras.utils.Sequence):

	def __init__(self, base_path=None, shuffle=True):
		self.base_path = None
		self.BATCH_SIZE = BATCH_SIZE
		self.IMG_SIZE = IMG_SIZE
		self.shuffle = shuffle
		self.csv_filename = "snakeclef-trainpaths.csv"
		self.x_col_name = "file_path"
		self.y_col_name = "class_id"
		print(os.getcwd())
		self.data_file = pd.read_csv(self.csv_filename)
		print(self.data_file.head())
		self.num_data_pts = len(self.data_file)
		print(self.num_data_pts)
		self.trained_files = []

		#self.indexes = np.arange(len(self.image_paths))
		self.on_epoch_end()

	def on_epoch_end(self):
		pass
		"""
		if(self.shuffle):
			np.random.shuffle(self.indexes)
		"""

	def __len__(self):
		return self.num_data_pts // self.BATCH_SIZE
		pass

	def __getitem__(self, idx):
		"""Returns tuple (input, target) correspond to batch #idx."""
		#
		# data_rows = self.data_file.sample(n=self.BATCH_SIZE,replace=False)
		# batch_paths = data_rows[self.x_col_name].to_list()
		# batch_labels = data_rows[self.y_col_name].to_list()
		# batch_labels = list(data_rows.loc[:, [self.y_col_name]])
		base_path="/content/drive/MyDrive/ML/SnakeCLEF2022-small_size/SnakeCLEF2022-small_size"
		'''
		# Karthik
		base_path = os.path.join('/', 'content', 'drive', 'MyDrive', 'Research', 'LifeCLEF\'22', 'SnakeCLEF-2022', 'Dataset', 'SNAKE_CLEF', 'SnakeCLEF2022-small_size', 'SnakeCLEF2022-small_size')
		'''

		batch_images = []
		batch_labels = []
		# The resize error may be occuring because the file is not found and `img` holds None
		# Adding file existence check
		while len(batch_images)<self.BATCH_SIZE:
			
			new_row = self.data_file.sample(n=1, replace=False)
	 		
			path = new_row[self.x_col_name].to_list()[0]
			label = new_row[self.y_col_name].to_list()[0]
			if(path in self.trained_files):
				continue
			else:
				self.trained_files.append(path)
			img=cv2.imread(os.path.join(base_path, path))
			if img is None:
				continue

			try:
				img_res = cv2.resize(img, self.IMG_SIZE)
			except:
				continue

			# print(os.path.join(base_path,path))
			image_data = np.array(img_res, dtype='uint8')
			batch_images.append(image_data)
			batch_labels.append(label)
			# print(f"{len(batch_images)} of {self.BATCH_SIZE} images prepared")
	 
		return (np.array(batch_images), np.array(batch_labels))

data_reader = InputSequencer()
imgs, labels = data_reader[5]

"""
check_img = imgs[0]
check_label = labels[0]
import matplotlib.pyplot as plot
print(check_img.shape)
plot.imshow(check_img)
plot.show()
print(check_label)
"""

"""
Randomize or shuffle training data and ensure that all images are fed to the model
Upload images to the drive
"""

model = VGG16()
model.summary()


from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import decode_predictions
from keras.applications.vgg16 import VGG16

# load the model
model = VGG16()

# load an image from file
#image = load_img('mug.jpg', target_size=(224, 224))
# convert the image pixels to a numpy array
#image = img_to_array(image)
# reshape data for the model
image = imgs[0].reshape((1, imgs[0].shape[0], imgs[0].shape[1], imgs[0].shape[2]))
# prepare the image for the VGG model
image = preprocess_input(image)
# predict the probability across all output classes
yhat = model.predict(image)
# convert the probabilities to class labels
label = decode_predictions(yhat)
# retrieve the most likely result, e.g. highest probability
label = label[0][0]
# print the classification
print('%s (%.2f%%)' % (label[1], label[2]*100))

print(keras.__version__)

from keras.models import Model
from keras.layers import Dense, Flatten, Input
from keras.applications import vgg16
from keras import backend as K

model = vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(*IMG_SIZE, 3))

# model.summary(line_length=150)

flatten = Flatten()
new_layer2 = Dense(1572, activation='softmax', name='my_dense_2')

inp2 = model.input
out2 = new_layer2(flatten(model.output))

model2 = Model(inp2, out2)
model2.summary()
model2.compile(
    optimizer="rmsprop",
    loss='sparse_categorical_crossentropy',
    metrics=['acc']
)

weight_save = keras.callbacks.ModelCheckpoint('weights.{epoch:08d}.h5', save_weights_only=True, period=2, callbacks=[weight_save])

model2.fit_generator(data_reader,
    epochs=1,
    verbose=1
)