i have done K-means clustering over a dataset of images after which i have 5 clusters. Now i want to extract the images from each clusters and save them separately. i have no idea how to do that. i have tried doing this but i am not able to access the images.
here is my code
import matplotlib.pyplot
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.externals import joblib
import numpy as np
import cv2
import sys
import pickle
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import os
from skimage.feature import local_binary_pattern
# To calculate a normalized histogram
from scipy.stats import itemfreq
from sklearn.preprocessing import normalize
import cvutils
import csv
import numpy
from matplotlib.pyplot import imshow
from PIL import Image
import time
from sklearn.cluster import KMeans
start_time=time.time()
############################################################################################
dir_unknown = 'UntitledFolder'
trainingSet='/home/irum/Desktop/Face-Recognition/thakarrecog /UntitledFolder/UntitledFolder1'
imageLabels='/home/irum/Desktop/Face-Recognition/thakarrecog/class_train'
path='/home/irum/Desktop/Face-Recognition/thakarrecog/Clusters'
#Create CSV File
images_names = []
SEPARATOR=" "
print"start"
'''
for (dirname, dirnames, filenames) in os.walk(dir_unknown):
for subdirname in dirnames:
subject_path = os.path.join(dirname, subdirname)
for filename in os.listdir(subject_path):
abs_path = "%s/%s" % (subject_path, filename)
#csv_path = "%s%s%d" % (abs_path, SEPARATOR, label)
#print "%s%s%d" % (abs_path, SEPARATOR, label)
images_names.append("%s%s%d" % (abs_path, SEPARATOR, label))
#print images_names
with open('class_train1', 'w') as myfile:
wr = csv.writer(myfile,delimiter=' ', doublequote=False , quotechar=None, lineterminator='\r\n', skipinitialspace=True)
wr.writerow(imageLabels)
label = label + 1
'''
# Store the path of training images in train_images
train_images = cvutils.imlist(trainingSet)
print "Total Images",len(train_images)
# Dictionary containing image paths as keys and corresponding label as value
train_dic = {}
with open('/home/irum/Desktop/Face-Recognition/thakarrecog/class_train', 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
train_dic[row[0]] = row[1]
# List for storing the LBP Histograms, address of images and the corresponding label
X_test = []
X_name = []
y_test = []
print"Calculating LBP Histograms"
h1 = time.time()
# For each image in the training set calculate the LBP histogram
# and update X_test, X_name and y_test
for train_image in train_images:
# Read the image
im = cv2.imread(train_image)
# Convert to grayscale as LBP works on grayscale image
im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
radius = 3
# Number of points to be considered as neighbourers
no_points = 8 * radius
# Uniform LBP is used
lbp = local_binary_pattern(im_gray, no_points, radius, method='uniform')
# Calculate the histogram
x = itemfreq(lbp.ravel())
# Normalize the histogram
hist = x[:, 1]/sum(x[:, 1])
# Append image path in X_name
X_name.append(os.path.join(train_image))
# Append histogram to X_name
X_test.append(os.path.join(hist))
# Append class label in y_test
#y_test.append(train_dic[os.path.split(images_names)[1]])
h2 = time.time()
t = (h2 - h1)
print"Time taken by LBPH",t
# Dump the data
joblib.dump((X_name, X_test), "lbp.pkl", compress=3)
p1 = time.time()
print"Applying PCA on LBP Histograms"
X_test = np.array(X_test)
pca = PCA(n_components=26)
pca.fit(X_test)
pca_activations = pca.transform(X_test)
p2 = time.time()
t = (p2 - p1)
print"Time taken by PCA",t
t1 = time.time()
print"Applying t-SNE on PCA"
# then run the PCA-projected activations through t-SNE to get our final embedding
X = np.array(pca_activations)
tsne = TSNE(n_components=2, learning_rate=500, perplexity=50, verbose=2, angle=0.2, early_exaggeration=7.0).fit_transform(X)
print "t-SNE Type", type(tsne)
print"tsne",tsne
t2 = time.time()
t = (t2 - t1)
print"Time taken by t-SNE",t
n1 = time.time()
print"normalize t-sne points to {0,1}"
tx, ty = tsne[:,0], tsne[:,1]
tx = (tx-np.min(tx)) / (np.max(tx) - np.min(tx))
ty = (ty-np.min(ty)) / (np.max(ty) - np.min(ty))
n2 = time.time()
t = (n2 - n1)
print "Normalization completed in time",t
width = 5000
height = 5000
max_dim = 100
print "displaying"
full_image = Image.new('RGB', (width, height))
for img, x, y in zip(X_name, tx, ty):
#print "for loop"
tile = Image.open(img)
rs = max(1, tile.width/max_dim, tile.height/max_dim)
tile = tile.resize((tile.width/rs, tile.height/rs), Image.ANTIALIAS)
full_image.paste(tile, (int((width-max_dim)*x), int((height-max_dim)*y)))
full_image.save("myTSNE.png")
#matplotlib.pyplot.figure(figsize = (12,12))
#plt.imshow(full_image)
print "K-Means clustering"
#Convert Images to Float32
images = np.asarray(tsne, np.float32)
N = len(images)
images = images.reshape(N,-1)
#using kmeans clustring having 5 clusters
kmeans = KMeans(n_clusters=5)
#passing images to kmeans
kmeans.fit(images)
centroids = kmeans.cluster_centers_
labels = kmeans.labels_
colors = 10*['r.','g.','b.','c.','k.','y.','m.']
#I want to Move each cluster to seperate folder (5 clusters means 5 folders)
for i in range(len(images)):
print("coordinate:",images[i], "label:", labels[i])
plt.plot(images[i][0], images[i][1], colors[labels[i]], markersize = 10)
img = cv2.convertScaleAbs(images[i])
print "Images Type", img.dtype
pin=sorted([int(n[:n.find('.')]) for n in os.listdir(path)
if n[0]!='.' ]+[0])[-1] + 1
cv2.imwrite('%s/%s.png' % (path, pin), img)
plt.scatter(centroids[:, 0],centroids[:, 1], marker = "x", s=150, linewidths = 5, zorder = 10)
plt.show()
end_time=time.time()
total_time=t = (end_time - start_time)
print"Total execution time in seconds",total_time
i am trying to extract clusters here, but failing. I need images I clusters separately as an output so that I can manipulate them further.
`#I want to Move each cluster to seperate folder (5 clusters means 5 folders)
For i in range(len(images)):
print("coordinate:",images[i], "label:", labels[i])
plt.plot(images[i][0], images[i][1], colors[labels[i]], markersize = 10)
img = cv2.convertScaleAbs(images[i])
print "Images Type", img.dtype
I want images in red cluster separate, in blue cluster separate and so on, in separate folders actually. 5 clusters 5 folders.
I have accessed images like this:
for i,j in zip(images, labels):
if labels[j] == 1:
#print "Images Type", images.dtype
img = images[i]
pin=sorted([int(n[:n.find('.')]) for n in os.listdir(path)
if n[0]!='.' ]+[0])[-1] + 1
cv2.imwrite('%s/%s.png' % (path, pin), img)
but i am getting deformed images and in a very small size. i get output like this
From your code, it seems that you have your images here images
and that the variable labels
is an array with the same dimension, containing the class labels.
If you want to get all the images for a class called myclass
, then simply do:
images_in_myclass = [i for i,j in zip(images, labels) where j=='myclass']
zip
allows you to iterate over the two arrays element-wise, and you are only returning the images for which the label condition is satisfied.
In your code, images
does not contain the pictures.
It's an array of coordinates:
images = np.asarray(tsne, np.float32)
Writing an array of coordinates to an image file of course yields such a small glitch. If you want the original images, copy the original images files.
Note that tSNE is a visualization technique. It is probably not a good idea to use this visualization for clustering; as Van der Maaten and Hinton note: "it is unclear how t-SNE performs on the more general dimensionality reduction tasks". For clustering, it may be sensible to use the original data (and a better algorithm than k-means); and use tSNE only for visualizing and validating the result.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.