In [ ]:
# https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
In [ ]:
import math
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from keras.engine import  Model
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import load_img, img_to_array, array_to_img, ImageDataGenerator
from keras_vggface.vggface import VGGFace
from PIL import Image
In [ ]:
DATA_DIR = '../data/preprocessed/'
BATCH_SIZE = 16
GRAYSCALE = False
INPUT_DIM = (128, 128, 1 if GRAYSCALE else 3)
AUGMENTATION_FACTOR = 2
EPOCHS = 100
RANDOM_SEED = 123
In [ ]:
train_datagen = ImageDataGenerator(
        rotation_range=10,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

datagen = ImageDataGenerator(rescale=1./255)

generator_base_params = {
    'target_size': INPUT_DIM[:2],
    'class_mode': 'categorical',
    'color_mode': 'grayscale' if GRAYSCALE else 'rgb',
    'batch_size': BATCH_SIZE,
    'seed': RANDOM_SEED
}

train_generator = train_datagen.flow_from_directory(DATA_DIR + 'train', shuffle=False, **generator_base_params) 
validation_generator = datagen.flow_from_directory(DATA_DIR + 'validation', shuffle=False, **generator_base_params)
test_generator = datagen.flow_from_directory(DATA_DIR + 'test', shuffle=False, **generator_base_params)

n_train = train_generator.n
n_validation = validation_generator.n
n_test = test_generator.n
In [ ]:
gens = [train_generator, validation_generator, test_generator]

for g in gens:
    g.batch_size = 1

train_labels = np.array([train_generator.next()[1] for i in range(n_train * AUGMENTATION_FACTOR)]).reshape(-1, 4)
validation_labels = np.array([validation_generator.next()[1] for i in range(n_validation)]).reshape(-1, 4)
test_labels = np.array([test_generator.next()[1] for i in range(n_test)]).reshape(-1, 4)

for g in gens:
    g.reset()
    g.batch_size = BATCH_SIZE
In [ ]:
def predict_bottleneck():
    model = VGGFace(include_top=False, input_shape=INPUT_DIM, pooling='max')
    features_train = model.predict_generator(train_generator, math.ceil(train_labels.shape[0] / BATCH_SIZE) + 3, verbose=1)
    features_validation = model.predict_generator(validation_generator, math.ceil(n_validation / BATCH_SIZE) + 1, verbose=1)
    features_test = model.predict_generator(test_generator, math.ceil(n_test / BATCH_SIZE) + 1, verbose=1)
    return (features_train[:train_labels.shape[0]], features_validation[:n_validation], features_test[:n_test])
In [ ]:
features = predict_bottleneck()
In [ ]:
# shuffle
train_data, _, train_labels, _ = train_test_split(features[0], train_labels, test_size=0.0)
validation_data, _, validation_labels, _ = train_test_split(features[1], validation_labels, test_size=0.0)
test_data, _, test_labels, _ = train_test_split(features[2], test_labels, test_size=0.0)
In [ ]:
def get_model():
    model = Sequential()
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
    return model
In [ ]:
model = get_model()
callbacks = [ModelCheckpoint('top_model_weights.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')]
In [ ]:
history = model.fit(train_data,
        train_labels,
        epochs=EPOCHS,
        validation_data=(validation_data, validation_labels),
        callbacks=callbacks)
In [ ]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
In [ ]:
 # Don't forget to rename file so that weights can be loaded in fine-tuning step