blob: 4aa8f428dc51a8a34cea3dd53199d1f3fa21b676 [file] [log] [blame]
# Copyright 2016 The Vanadium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
"""Runs the k-means clustering algorithms on the encoded images."""
import matplotlib.pyplot as plt
import numpy as np
import os
from keras.models import model_from_json
from sklearn.cluster import KMeans
from theano import theano
def encode_images(images):
"""Encodes images using model saved to disk earlier."""
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights('model.h5')
model.compile(optimizer='adadelta', loss='binary_crossentropy')
get_activations = theano.function([model.layers[0].input],
model.layers[3].output,
allow_input_downcast=True)
data = get_activations(images)
np.save('encoded_imgs.npy', data)
if __name__ == '__main__':
data = np.load('encoded_imgs.npy')
print data.shape
k_values = [2**exp for exp in range(5, 11)]
if not os.path.exists('clusters'):
os.makedirs('clusters')
for k in k_values:
if k > data.shape[0]:
break
model = KMeans(init='k-means++', n_clusters=k, max_iter=10000, verbose=0,
tol=0.000000001, n_jobs=4)
model.fit(data)
sse = 0 # Sum of squared errors.
centroids = model.cluster_centers_
labels = model.labels_
for idx in range(data.shape[0]):
dist = np.linalg.norm(data[idx] - centroids[labels[idx]])
sse += dist**2
mse = sse/data.shape[0] # Mean squared error.
mse_sqrt = np.sqrt(mse)
print str(k) + ': ' + str(model.inertia_) + ': ' + str(mse_sqrt)
np.save('clusters/centroids_' + str(k) + '.npy', centroids)
np.save('clusters/labels_' + str(k) + '.npy', labels)