Commit b06fd449 authored by Pietro Morerio's avatar Pietro Morerio
Browse files

Merge branch 'master' of gitlab.iit.it:pmorerio/siamese-tf

parents 28972ac1 77405cab
%% Cell type:code id: tags:
``` python
%matplotlib inline
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.contrib.slim as slim
import pandas as pd
from dataset import get_mnist
from model import *
from scipy.spatial.distance import cdist
from matplotlib import gridspec
```
%% Cell type:code id: tags:
``` python
mnist = get_mnist()
train_images = np.array([im.reshape((28,28,1)) for im in mnist.train.images])
test_images = np.array([im.reshape((28,28,1)) for im in mnist.test.images])
len_test = len(mnist.test.images)
len_train = len(mnist.train.images)
```
%% Output
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
%% Cell type:code id: tags:
``` python
#helper function to plot image
def show_image(idxs, data):
if type(idxs) != np.ndarray:
idxs = np.array([idxs])
fig = plt.figure()
gs = gridspec.GridSpec(1,len(idxs))
for i in range(len(idxs)):
ax = fig.add_subplot(gs[0,i])
ax.imshow(data[idxs[i],:,:,0],cmap='gray')
ax.axis('off')
plt.show()
```
%% Cell type:markdown id: tags:
## Create the siamese net feature extraction model
%% Cell type:code id: tags:
``` python
img_placeholder = tf.placeholder(tf.float32, [None, 28, 28, 1], name='img')
net = siamese(img_placeholder, reuse=False)
```
%% Cell type:markdown id: tags:
Cannot forward the whole training set
%% Cell type:code id: tags:
``` python
offset = len(train_images)
offset = 30000
```
%% Cell type:markdown id: tags:
## Restore from checkpoint and calc the features from all of train and test data
%% Cell type:code id: tags:
``` python
#train
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state("model")
saver.restore(sess, "model/model.ckpt")
train_feat = sess.run(net, feed_dict={img_placeholder:train_images[:offset]})
```
%% Output
INFO:tensorflow:Restoring parameters from model/model.ckpt
%% Cell type:code id: tags:
``` python
#test
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state("model")
saver.restore(sess, "model/model.ckpt")
test_feat = sess.run(net, feed_dict={img_placeholder:test_images})
```
%% Output
INFO:tensorflow:Restoring parameters from model/model.ckpt
%% Cell type:markdown id: tags:
## Restore from checkpoint and calc the features from all of corrupted train and test data
%% Cell type:code id: tags:
``` python
from syn_dataset import add_defect
#train
train_images_noisy = np.empty((offset,28,28,1))
for i, img in enumerate(train_images[:offset]):
train_images_noisy[i]=add_defect(img)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state("model")
saver.restore(sess, "model/model.ckpt")
train_feat_noisy = sess.run(net, feed_dict={img_placeholder:train_images_noisy})
```
%% Output
INFO:tensorflow:Restoring parameters from model/model.ckpt
%% Cell type:code id: tags:
``` python
#test
test_images_noisy = np.empty((len_test,28,28,1))
for i, img in enumerate(test_images):
test_images_noisy[i]=add_defect(img)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state("model")
saver.restore(sess, "model/model.ckpt")
test_feat_noisy = sess.run(net, feed_dict={img_placeholder:test_images_noisy})
```
%% Output
INFO:tensorflow:Restoring parameters from model/model.ckpt
%% Cell type:markdown id: tags:
## Add noisy images and features to training and test sets
%% Cell type:code id: tags:
``` python
#add noisy images & features to training set
train_feat = np.vstack([train_feat,train_feat_noisy])
train_images = np.vstack([train_images[:offset],train_images_noisy])
assert len(train_images) == len(train_feat)
assert len(test_images) == len(test_feat)
#add noisy images to test set
test_feat = np.vstack([test_feat,test_feat_noisy])
test_images = np.vstack([test_images,test_images_noisy])
assert len(test_images) == len(test_feat)
assert len(test_images) == len(test_feat)
```
%% Cell type:code id: tags:
``` python
train_labels= np.concatenate([np.zeros(offset,dtype=int), np.ones(offset,dtype=int)])
test_labels= np.concatenate([np.zeros(len_test,dtype=int), np.ones(len_test,dtype=int)])
assert len(test_labels)==len(test_images)
assert len(test_labels)==len(test_images)
```
%% Cell type:markdown id: tags:
## Searching for similar test images from trainset based on siamese feature
%% Cell type:code id: tags:
``` python
#generate new random test image
idx = np.random.randint(0, len_test+offset)
# 2050 -> 8
# 20 -> 9
# 8644 -> 6
# idx=20
im = test_images[idx]
#show the test image
show_image(idx, test_images)
print "This is image from id:", idx
```
%% Output
This is image from id: 4584
%% Cell type:code id: tags:
``` python
#calculate the cosine similarity and sort
dist = cdist(train_feat, np.expand_dims(test_feat[idx], axis=0), 'cosine')
rank = np.argsort(dist.ravel())
#show the top n similar image from train data
n = 10
show_image(rank[:n], train_images)
print "retrieved ids:", rank[:n]
```
%% Output
retrieved ids: [ 8797 9493 21609 15267 14938 50267 11193 25783 35165 31263]
retrieved ids: [52867 21609 27366 10395 20404 35361 49240 36499 22755 52635]
%% Cell type:markdown id: tags:
## Calculate test accuracy
%% Cell type:code id: tags:
``` python
correct_predictions=0
failures_test = []
failures_train = []
i=0
k=3 #nearest neighbour
for feat, label in zip(test_feat, test_labels):
print str(i)+'\r',
dist = cdist(train_feat, np.expand_dims(feat, axis=0), 'cosine')
rank = np.argsort(dist.ravel())
if label == int( np.round(np.mean(train_labels[rank[:k]])) ):
correct_predictions += 1
else:
failures_test.append(i)
failures_train.append(rank[0])
i+=1
print 'Accuracy = ' + str(float(correct_predictions)/float(len(test_labels) ))
```
%% Output
Accuracy = 0.9551
%% Cell type:code id: tags:
``` python
from numpy.random import choice
show=choice(range(len(failures_test)), n)
print str(n)+' failure cases are below '
print 'Test'
show_image(np.asarray(failures_test)[show], test_images)
print 'Closest neighbour in training set'
show_image(np.asarray(failures_train)[show], train_images)
```
%% Output
10 failure cases are below
Test
Closest neighbour in training set
%% Cell type:markdown id: tags:
## Visualizing Test Clusters with T-SNE
%% Cell type:code id: tags:
``` python
from sklearn.manifold import TSNE
model = TSNE(n_components=2, random_state=0)
TSNE_test = model.fit_transform(test_feat)
plt.scatter(TSNE_test[:,0], TSNE_test[:,1], c = test_labels, s=10)
```
%% Output
<matplotlib.collections.PathCollection at 0x7f0f0c6aa2d0>
%% Cell type:markdown id: tags:
## Visualizing Train Clusters with T-SNE
%% Cell type:code id: tags:
``` python
model = TSNE(n_components=2, random_state=0)
size = 10000 #actually it is double the size
TSNE_train = model.fit_transform(np.vstack([train_feat[:size], train_feat[offset:offset+size]]))
plt.scatter(TSNE_train[:,0], TSNE_train[:,1], c = np.concatenate([train_labels[:size], train_labels[offset:offset+size]]), s=10)
```
%% Output
<matplotlib.collections.PathCollection at 0x7f0f79098510>
%% Cell type:code id: tags:
``` python
```
......
rm img/*
#rm img/*
rm -rf train.log/
python train.py
\ No newline at end of file
python train.py
......@@ -23,7 +23,7 @@ def show_batch(batch):
def add_defect(image):
# pepper noise only
#~ image = random_noise(image, mode='pepper', seed=1234)
image = random_noise(image, mode='speckle', mean=-0.01, var = 0.001)
image = random_noise(image, mode='speckle', mean=-0.01, var = 0.001, seed=1234)
return image
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment