Mount the drive to the instance.

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Import various modules.

I used Tnesorflow and its Keras library as the backend. The Keras module is for preprocessing images. 

In [1]:
!pip install -q tensorflow-gpu==2.0.0-alpha0
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from __future__ import absolute_import, division, print_function
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.preprocessing import image
keras = tf.keras
datapath = '/content/gdrive/My Drive/Bird_ID_project/nabirds'

[31mERROR: Could not find a version that satisfies the requirement tensorflow-gpu==2.0.0-alpha0[0m
[31mERROR: No matching distribution found for tensorflow-gpu==2.0.0-alpha0[0m


ModuleNotFoundError: No module named 'joblib'

Reading in the pandas dataframe. It actually doesn't provide much beside the unique species class names.

In [0]:
train_frame = pd.read_csv(datapath+'/train.csv')
val_frame = pd.read_csv(datapath+'/val.csv')
test_frame = pd.read_csv(datapath+'/test.csv')
# train_frame
Bird_list=train_frame.class_name_sp.unique()
Bird_id=train_frame.class_id_sp.unique()
# print(Bird_list, Bird_id)
Birds = dict(zip(Bird_id, Bird_list))

Unzip the tar.gz files **to the Google Colab instance**. This drastically increased the training speed.

In [0]:
!tar -xzf "gdrive/My Drive/Bird_ID_project/nabirds/data.tar.gz"
!tar -xzf "gdrive/My Drive/Bird_ID_project/nabirds/Darren_data.tar.gz"

In [0]:
!mv /content/Darren_test/0289 /content/Darren_test/0867
!mv /content/Darren_test/0095 /content/Darren_test/0553
!rm -rf /content/Darren_test/NIL

Using Keras image generator to reduce RAM footprint (instead of using a large numpy array).

In [0]:
train_datagen = image.ImageDataGenerator(
 rescale=1./255,
# width_shift_range=0.1,
# height_shift_range=0.1,
# zoom_range=0.1,
 fill_mode='constant',
 horizontal_flip=True,
 dtype=np.float32)

val_datagen = image.ImageDataGenerator(
 rescale=1./255, 
 dtype=np.float32)

test_datagen = image.ImageDataGenerator(
 rescale=1./255,
 dtype=np.float32)

train_generator = train_datagen.flow_from_directory(
 directory='/content/data/train/',
 #directory=datapath + '/data/train/',
 #classes=list(Bird_list),
 target_size=(224, 224),
 batch_size=32,
 class_mode='categorical')

validation_generator = val_datagen.flow_from_directory(
 #directory=datapath + '/data/val/',
 directory='/content/data/val/',
 #classes=list(Bird_list),
 target_size=(224, 224),
 batch_size=32,
 class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
 #directory=datapath + '/data/test/',
 directory='/content/data/test/',
 shuffle=False,
 #classes=list(Bird_list),
 target_size=(224, 224),
 batch_size=32,
 class_mode='categorical')


Research has shown that starting from scratch will not work. So we use MobileNet V2 as our feature extractor. This is a relatively small network, which will help in the future when we want to run near real time inferences. We also add two layers: One global average and one prediction. 

In [0]:
IMG_SHAPE = (224,224,3)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
 include_top=False, 
 weights='imagenet')

base_model.trainable = False

global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
prediction_layer = keras.layers.Dense(404,activation='softmax')
base_learning_rate = 0.0001
model = tf.keras.Sequential([
 base_model,
 global_average_layer,
 prediction_layer
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate), 
 loss='categorical_crossentropy', 
 metrics=['accuracy'])
model.summary()

In [0]:
def fit_model(model, batch_size=32, epochs=10): 
 history = model.fit_generator(
 generator=train_generator,
 steps_per_epoch=(len(train_frame) // batch_size),
 epochs=epochs,
 validation_data=validation_generator,
 callbacks=None
 )
 score = model.evaluate_generator(train_generator, verbose=1)
 probs = model.predict_generator(test_generator, verbose=1)
 return model, score, probs, history


In [0]:
fit_model, score, probs, history = fit_model(model, batch_size=32, epochs=10)

In [0]:
def top3(probs, GT):
 t3 = np.argsort(probs)[-3:]
 #print(t3)
 if GT in t3:
 return 1
 else:
 return 0
 
def top5(probs, GT):
 t5 = np.argsort(probs)[-5:]
 if GT in t5:
 return 1
 else:
 return 0
 
def top3_idx(probs):
 return np.flip(np.argsort(probs)[-3:],0), np.flip(probs[np.argsort(probs)[-3:]],0)
 #print(t3)

def top5_idx(probs):
 return np.flip(np.argsort(probs)[-5:])
 #print(t3)

After 10 epochs we achieve 44, 64, and 72 % top 1, 3, and 5 hit rate. Not bad! But it can be better. 

In [0]:
correct_prediction = 0
correct_top3 = 0
correct_top5 = 0
for ii in range(len(probs)):
 P_this = np.argmax(probs[ii])
 GT_this = test_generator.labels[ii]
 if P_this == GT_this:
 correct_prediction += 1
 correct_top3 += top3(probs[ii],GT_this)
 correct_top5 += top5(probs[ii],GT_this)
 if ii < 10:
 print("Prediction: {} ({})".format(P_this,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(P_this)])]))
 print("Actual: {} ({})".format(GT_this,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(GT_this)])]))
 print("Prediction: {} ".format(P_this))
 print("Actual: {} ".format(GT_this))
 #plt.figure()
 #plt.imshow(X_test[ii,:,:,:])
 #plt.show()
print(correct_prediction, correct_prediction/len(probs))
print(correct_top3, correct_top3/len(probs))
print(correct_top5, correct_top5/len(probs))
print(len(probs))

We see overfitting after second epoch, which seems to be a common thing when training models for bird IDs. 

In [0]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
# plt.ylim([0,1.0])
plt.title('Training Loss')
plt.xlabel('epoch')
plt.show()

We start to fine tune the model by allowing the last 55 layers to be trained.

In [0]:
base_model.trainable = True

In [0]:
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
 layer.trainable = False

In [0]:
model.compile(loss='categorical_crossentropy',
 optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
 metrics=['accuracy'])

In [0]:
initial_epochs = 10
fine_tune_epochs = 10
total_epochs = initial_epochs + fine_tune_epochs


def fit_model_FT(model, batch_size=32, epochs=10): 
 history = model.fit_generator(
 generator=train_generator,
 steps_per_epoch=(len(train_frame) // batch_size),
 epochs=total_epochs,
 initial_epoch=initial_epochs,
 validation_data=validation_generator,
 callbacks=None
 )
 score = model.evaluate_generator(train_generator, verbose=1)
 probs = model.predict_generator(test_generator, verbose=1)
 return model, score, probs, history

In [0]:
fit_model, score, probs, history = fit_model_FT(model, batch_size=32, epochs=10)

In [0]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
# plt.ylim([0,1.0])
plt.title('Training Loss')
plt.xlabel('epoch')
plt.show()

After 10 fine tuning epochs the hit rates increased to 63 (top), 82 (top3), and 88 % (top5).

In [0]:
correct_prediction = 0
correct_top3 = 0
correct_top5 = 0
for ii in range(len(probs)):
 P_this = np.argmax(probs[ii])
 GT_this = test_generator.labels[ii]
 if P_this == GT_this:
 correct_prediction += 1
 correct_top3 += top3(probs[ii],GT_this)
 correct_top5 += top5(probs[ii],GT_this)
 if ii % 100 == 20:
 t3, p3 = top3_idx(probs[ii])
 print("Prediction: {}, {}, or {} ({} ({:.1f} %), {} ({:.1f} %), or {} ({:.1f} %))".format(t3[0], t3[1], t3[2] ,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(t3[0])])], p3[0] * 100,
 Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(t3[1])])], p3[1] * 100,
 Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(t3[2])])], p3[2] * 100))
 print("Actual: {} ({})".format(GT_this,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(GT_this)])]))

 
 plt.figure()
 image = plt.imread('data/test/'+test_generator.filenames[ii])
 plt.imshow(image)
 plt.show()
print(correct_prediction, correct_prediction/len(probs))
print(correct_top3, correct_top3/len(probs))
print(correct_top5, correct_top5/len(probs))
print(len(probs))

In [0]:
Prediction = []
Prediction3 = []
Correct_prediction3 = []
for ii in range(len(probs)):
 Prediction.append(np.argmax(probs[ii]))
 Prediction3.append(top3_idx(probs[ii])[0])
 Correct_prediction3.append(np.asscalar(np.in1d(test_generator.labels[ii],Prediction3[ii])))

 
Correct_predicted = []
Correct_predicted3 = []
Species_length = []
for ii in range(len(np.unique(test_generator.labels))):
 Species_length.append(sum((test_generator.labels == ii)))
 Correct_predicted.append((sum((test_generator.labels == ii) & (Prediction == test_generator.labels)))/sum(test_generator.labels == ii))
 Correct_predicted3.append(sum((test_generator.labels == ii) & (Correct_prediction3))/sum(test_generator.labels == ii))


For some species the model performed terribly. 

In [0]:
for ii in range(len(np.unique(test_generator.labels))):
# if sum((test_generator.labels == ii)) < 8:
 if Correct_predicted3[ii] < 0.4:
 print('{}: {:.2f}, {:.2f}, {}, {}'.format(ii, Correct_predicted[ii], Correct_predicted3[ii], sum((train_generator.labels == ii)), Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(ii)])]))

In [0]:
initial_epochs = 20
fine_tune_epochs = 10
total_epochs = initial_epochs + fine_tune_epochs


def fit_model_FT2(model, batch_size=32, epochs=10): 
 history = model.fit_generator(
 generator=train_generator,
 steps_per_epoch=(len(train_frame) // batch_size),
 epochs=total_epochs,
 initial_epoch=initial_epochs,
 validation_data=validation_generator,
 callbacks=None
 )
 score = model.evaluate_generator(train_generator, verbose=1)
 probs = model.predict_generator(test_generator, verbose=1)
 return model, score, probs, history

In [0]:
#fit_model, score, probs, history = fit_model_FT(model, X_train, X_test, Y_train, Y_test, batch_size=32, epochs=15)
fit_model, score, probs, history = fit_model_FT2(model, batch_size=32, epochs=10)
model.save(datapath + '/model3_30.h5')



In [0]:
probs = model.predict_generator(test_generator, verbose=1)
np.savetxt(datapath + '/probs30.txt', probs)

After another 10 fine tuning epochs (total = 30 epochs) we have 66, 84, 89 % top 1, 3, 5 hit rate. At this point it may be more helpful to refine the probability based on the location and time of year that the picture is taken. 

In [0]:
correct_prediction = 0
correct_top3 = 0
correct_top5 = 0
for ii in range(len(probs)):
 P_this = np.argmax(probs[ii])
 GT_this = test_generator.labels[ii]
 if P_this == GT_this:
 correct_prediction += 1
 correct_top3 += top3(probs[ii],GT_this)
 correct_top5 += top5(probs[ii],GT_this)
 if ii % 100 == 26:
# if test_generator.labels[ii] == 10: # Gadwall
 #print("Prediction: {} ({})".format(P_this,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(top3_idx(probs[ii]))])]))
 t3, p3 = top3_idx(probs[ii])
 print("Prediction: {}, {}, or {} ({} ({:.1f} %), {} ({:.1f} %), or {} ({:.1f} %))".format(t3[0], t3[1], t3[2] ,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(t3[0])])], p3[0] * 100,
 Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(t3[1])])], p3[1] * 100,
 Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(t3[2])])], p3[2] * 100))
 print("Actual: {} ({})".format(GT_this,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(GT_this)])]))
# print("Prediction: {} ".format(P_this))
# print("Actual: {} ".format(GT_this))
# print('data/test/'+test_generator.filenames[ii])
 
 plt.figure()
 image = plt.imread('data/test/'+test_generator.filenames[ii])
 plt.imshow(image)
 plt.show()
print(correct_prediction, correct_prediction/len(probs))
print(correct_top3, correct_top3/len(probs))
print(correct_top5, correct_top5/len(probs))
print(len(probs))

Using my photos to test.

In [0]:
Darren_test_generator = test_datagen.flow_from_directory(
 #directory=datapath + '/data/test/',
 directory='/content/Darren_test/',
 shuffle=False,
 #classes=list(Bird_list),
 target_size=(224, 224),
 batch_size=32,
 class_mode='categorical')

In [0]:
Darren_probs = model.predict_generator(Darren_test_generator, verbose=1)

In [0]:
correct_prediction = 0
correct_top3 = 0
correct_top5 = 0
for ii in range(len(Darren_probs)):
# for ii in [0]:
 P_this = np.argmax(Darren_probs[ii])
 GT_this = test_generator.class_indices[list(Darren_test_generator.class_indices.keys())[Darren_test_generator.labels[ii]]]
 if P_this == GT_this:
 correct_prediction += 1
 correct_top3 += top3(Darren_probs[ii],GT_this)
 correct_top5 += top5(Darren_probs[ii],GT_this)
# if ii % 100 == 26:
# if test_generator.labels[ii] == 10: # Gadwall
 #print("Prediction: {} ({})".format(P_this,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(top3_idx(probs[ii]))])]))
 t3, p3 = top3_idx(Darren_probs[ii])
 print("Prediction: {}, {}, or {} ({} ({:.1f} %), {} ({:.1f} %), or {} ({:.1f} %))".format(
 t3[0], t3[1], t3[2] ,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(t3[0])])], p3[0] * 100,
 Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(t3[1])])], p3[1] * 100,
 Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(t3[2])])], p3[2] * 100))
 print("Actual: {} ({})".format(GT_this,Birds[int(list(test_generator.class_indices.keys())[list(test_generator.class_indices.values()).index(GT_this)])]))
# print("Prediction: {} ".format(P_this))
# print("Actual: {} ".format(GT_this))
# print('data/test/'+test_generator.filenames[ii])
 
 plt.figure()
 image = plt.imread('Darren_test/'+Darren_test_generator.filenames[ii])
 plt.imshow(image)
 plt.show()
print(correct_prediction, correct_prediction/len(Darren_probs))
print(correct_top3, correct_top3/len(Darren_probs))
print(correct_top5, correct_top5/len(Darren_probs))
print(len(Darren_probs))

Below are scratch codes.

In [0]:
#Birds
class_indices_inv_map = {v: k for k, v in test_generator.class_indices.items()} 

import pickle

# write python dict to a file
output = open(datapath+'/class_indices_inv_map.pkl', 'wb')
pickle.dump(class_indices_inv_map, output)
output.close()
output = open(datapath+'/Birds.pkl', 'wb')
pickle.dump(Birds, output)
output.close()