Using Keras Sequential Model
Using Decision Tree Classifer
import numpy as np
#seed(123) is to reproduce results
np.random.seed(123)
import pandas as p
import matplotlib.pyplot as plt
trainFile = 'fashion-mnist_train.csv'
trainData = p.read_csv(trainFile).as_matrix()
testFile = 'fashion-mnist_test.csv'
testData = p.read_csv(testFile).as_matrix()
def makedataset(npArray):
target = npArray[:,0]
data = npArray[:,1:]
dataset = {
"target":target,
"data": data
}
return dataset
trainDataSet = makedataset(trainData)
testDataSet = makedataset(testData)
X_train = np.array(trainDataSet["data"])
y_train = np.array(trainDataSet["target"])
X_test = np.array(testDataSet["data"])
y_test = np.array(testDataSet["target"])
print("Train Data Set Shape :\t",format(X_train.shape))
print("Train Label Set Shape :\t",format(y_train.shape))
print("Test Data Set Shape :\t",format(X_test.shape))
print("Test Label Set Shape :\t",format(y_test.shape))
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.figure import Figure
labels = {
0:"T Shirt/Top", 1: "Trouser", 2:"PullOver", 3:"Dress", 4:"Coat", 5:"Sandal", 6 : "Shirt", 7:"Sneaker",
8: "Bag", 9: "Angkle Boot"
}
fig = plt.figure(figsize=(15,15))
for i in range(20):
fig.add_subplot(4,5,i+1)
plt.title(labels[y_train[i]])
plt.imshow(X_train[i].reshape(28,28),cmap="binary")
plt.show()
from keras.models import Sequential
Sequential_model.add()
X_train.reshape(number_of_elements, height,width, depth=1)
X_train /=255
model = Sequential()
model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(28,28,1)))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
from keras.models import Sequential
from keras.layers import Dense,Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
#reshaping back end for Tensorflow back end usage
# required (number_of_elements, width, height,depth = 1)
X_train = X_train.reshape(X_train.shape[0], 28, 28,1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
print("Train Data Set Shape :\t",format(X_train.shape))
print("Test Data Set Shape :\t",format(X_test.shape))
#Normalizing data so that value ranges from [0,1]
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
y_train = np_utils.to_categorical(y_train,10)
#Converting 1d array to 10d array
y_train = np_utils.to_categorical(y_train,10)
y_test = np_utils.to_categorical(y_test,10)
print(y_train.shape)
print(y_test.shape)
model = Sequential()
model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(28,28,1)))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
Sequential_model.compile()
model.compile(loss='categorical_crossentropy',
optimizer='Adadelta',
metrics=['accuracy'])
model.fit(X_train, y_train,
batch_size=32, epochs=12, verbose=1, validation_split=0.1)
from keras.models import load_model
model.save('my_model.h5')
model = load_model('my_model.h5') ## loading model
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import Normalizer
clf = DecisionTreeClassifier()
Xdc_train = trainDataSet["data"]
ydc_train = trainDataSet["target"]
Xdc_test = testDataSet["data"]
ydc_test = testDataSet["target"]
scaller = Normalizer().fit(Xdc_train)
Xdc_train_norm = scaller.transform(Xdc_train)
Xdc_test_norm = scaller.transform(Xdc_test)
clf.fit(Xdc_train_norm, ydc_train)
seq_prediction = model.predict_classes(X_test,batch_size=32, verbose=1)
seq_accuracy = model.evaluate(X_test,y_test)
Accuracy: 92.38%
from sklearn.metrics import accuracy_score
dt_prediction = clf.predict(Xdc_test)
dt_accuracy = accuracy_score(ydc_test,dt_prediction)
Accuracy: 80.14%
#Sequential Model
seq_prediction = model.predict_classes(X_test,batch_size=32, verbose=1)
seq_accuracy = model.evaluate(X_test,y_test)
print("Accuracy Keras Model: %.2f percent" %(seq_accuracy[1] * 100))
#DecisionTree Classifier
from sklearn.metrics import accuracy_score,confusion_matrix,precision_score
from sklearn.model_selection import cross_val_score
dt_prediction = clf.predict(Xdc_test_norm)
dt_accuracy = accuracy_score(ydc_test,dt_prediction)
print("Accuracy Decision Tree: %.2f percent" %(dt_accuracy * 100))
scores = cross_val_score(clf, Xdc_train_norm, ydc_train, cv=5)
print(scores)
Get precision
seq_precision = precision_score(ydc_test,seq_prediction, average=None)
dt_precision = precision_score(ydc_test,dt_prediction, average=None)
seq_precision = precision_score(ydc_test,seq_prediction, average=None)
dt_precision = precision_score(ydc_test,dt_prediction, average=None)
labels = ["T Shirt/Top", "Trouser", "PullOver", "Dress", "Coat", "Sandal", "Shirt", "Sneaker",
"Bag","Angkle Boot"]
fig = plt.figure(figsize=(12,6))
plt.title("Precision Score for Both Models")
plt.ylabel("Percentage /%")
plt.xlabel("Classes")
x_coordinate = [0,1,2,3,4,5,6,7,8,9]
plt.xticks(range(10), labels[:10])
blueLine = plt.plot(x_coordinate,seq_precision * 100, 'b',label="Keras Sequential Model")
greenLine = plt.plot(x_coordinate,dt_precision * 100, 'g',label="Decision Tree Model")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()
From the graph, Shirt recorded lowest precision for both models, while Trouser, Sandal Sneaker, Bag and Angkle Boot recorded high precision. However T Shirt, PullOver, Dress & Coat have low precision. This means, the output of the prediction is has only confidence level around 80% for Keras model and around 70% for decision tree model.
seq_confMat = confusion_matrix(ydc_test,seq_prediction)
dt_confMat = confusion_matrix(ydc_test,dt_prediction)
def plotConfMatrix(conf_mat, title):
norm_conf = []
for i in conf_mat:
a = 0
tmp_arr = []
a = sum(i, 0)
for j in i:
tmp_arr.append(float(j)/float(a))
norm_conf.append(tmp_arr)
fig = plt.figure(figsize=(10,10))
plt.clf()
ax = fig.add_subplot(111)
ax.set_aspect(1)
res = ax.imshow(np.array(norm_conf), cmap=plt.cm.jet,
interpolation='nearest')
width, height = conf_mat.shape
for x in range(width):
for y in range(height):
ax.annotate(str(conf_mat[x][y]), xy=(y, x),
horizontalalignment='center',
verticalalignment='center')
labels = ["T Shirt/Top", "Trouser", "PullOver", "Dress", "Coat", "Sandal", "Shirt", "Sneaker",
"Bag","Angkle Boot"]
plt.title(title)
plt.xticks(range(width), labels[:width])
plt.yticks(range(height), labels[:height])
plt.show()
plotConfMatrix(seq_confMat, "Keras Sequential Model")
plotConfMatrix(dt_confMat, "Decision Tree Classfier")
Most confusion or incorrect results comes from TShirt and Shirt. Pull Over, Coat and Bag also have added confusion to the predictions. Their similar shape/design may contribute to the confusion.
According to Confusion Matrix(Keras Sequential Model), there are 143 over T Shirts mistaken as Shirts. 96 Pull Overs is mistaken as Coat and Dress(45) is mistaken as T Shirt and Shirt. Other mistakes are vise versa mistakes of earlier confusion.
Trouser, Sneaker, Sandal, Dress,Angkle Boot and Bag have less confusion in between. This can be because of their disctinct shapes.
*Keras Sequential Model.*
This model did well. It took 26 minutes to train and 1 minute to predict the test data. The more epoch you have the more training time it will take. While the accuracy improved slightly with number of epochs, it reaches plateau aroun 93% Accuracy. It has got **92.38%** accuracy in predicting correct labels of the test data. According to the precision diagram for this model, the reliability in predicting labels such as Trousers, Sneaker, Sandal, Dress, Angkle Boot and Bag is very high. Hence the differentiating these labels should be very precise for this model.
However, this model is having a hard time in predicting T-Shirt and Shirt. Both prediction for these labels are just over **80%** in precision. These are the lowest among other classes.
**Ways To Improve this model**
This model requires more training in terms of T-Shirt and Shirt labels. One of the ways to improve this model on this particular labels is giving more sample images for the model to train on. However, the amount of data giving is huge and that may not help in increasing the accuracy of the model highly.
Another way, TShirt and Shirt share similar shapes by at least 80%. However, T Shirt has collars while Shirts don't. The question is, can the model take account of this small detail? Hence more research can be done on this part.
From Neural Network Preseptive, adding more hidden layers and neurons can help the model to train more precisely and accurately. This is well demonstrated in TensorFlow Playground. There are more parameters that can be tested and modified in order to increase the effciency of the training of the model. Validation Splits and Data Preprocessing can be tested.
*Decision Tree Classfier.*
This model did good overall, 80% accuracy but did badly if compared to Keras Sequential Classfier. It took 15 minutes to train and 0.5 minute to predict the test data.
This model has performance but lower compared to Keras Sequential Model. It also face same problems just like the Keras Sequential Model.
Ways To Improve this model
This model requires more training in terms of T-Shirt and Shirt labels especially and all other labels too. One of the ways to improve this model on this particular labels is giving more sample images for the model to train on. However, the amount of data giving is huge and that may not help in increasing the accuracy of the model highly.
Decision Tree works on feature by feature basis and improving it might not be as straight forward as it is for Keras Sequential Model. That is why continuing the training of Neural Network Model is recommended. In addition to that, preprocessing of the data has only increase 0.5% Accuracy and that may show it's effectiveness.
1. I would like to explore more on optimizers for Keras Sequential Model.
2. Extra whitebackground may have contributed on similaries and I would like to cut it off and start classifier training.
3. I would like to transform data into black and white pixels only, e.i pixels in range [0,255]. This is because what is important, is the shape of the images, not the pattern on top of it. It didn't give me the results I wanted, but I believe this can work.
4. I would like to train the model using KFold.
5. I would like to add more layers to Keras Sequential Model and add more nodes to train it.
Created By | Class | DataSet |
---|---|---|
Dheenodara Rao | Data Mining | Fashion MNIST |