-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocessImages.py
More file actions
77 lines (66 loc) · 2.33 KB
/
processImages.py
File metadata and controls
77 lines (66 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy
from PIL import Image
import os
import pandas as pd
import pylab as pl
from sklearn.decomposition import RandomizedPCA
from sklearn.neighbors import KNeighborsClassifier
SIZE = (300, 150)
def img_to_matrix(filename):
# takes a filename and turns it into a numpy array of RGB pixels
img = Image.open(filename)
img = img.resize(SIZE)
img = list(img.getdata())
# some images seem to be corrupt and fail here
try:
img = map(list, img)
except:
return -1
img = numpy.array(img)
return img
def flatten_image(img):
# takes in an (m, n) numpy array and flattens it into an array of shape (1, m * n)
s = img.shape[0] * img.shape[1]
img_wide = img.reshape(1, s)
return img_wide[0]
img_dir = "images/"
images_dir = [img_dir + f for f in os.listdir(img_dir)]
data = []
processed = 0
skipped = 0
normalized_size = 0
for dir in images_dir:
for image in os.listdir(dir):
image = dir + "/" + image
img = img_to_matrix(image)
# handling the case of error when processing images
if isinstance(img, int):
skipped += 1
continue
else:
img = flatten_image(img)
processed += 1
data.append(img)
# find any element with different size and remove it
same_size = len(data[0])
data_new = []
i = 0
for d in data:
if len(d) == same_size:
data_new.append(d)
i += 1
data = data_new
data = numpy.array(data)
print len(data)
print str(processed) + " images processed and " + str(skipped) + " images skipped."
pca = RandomizedPCA(n_components=2)
X = pca.fit_transform(data)
df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1]})
df["label"] = ["elephant","elephant","elephant","elephant","elephant", "elephant","elephant","elephant","elephant","elephant", "elephant","elephant","elephant","elephant","elephant", "elephant","elephant","elephant","elephant","elephant", "elephant", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe", "giraffe"]
print df
colors = ["red", "yellow"]
for label, color in zip(df['label'].unique(), colors):
mask = df['label']==label
pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
pl.legend()
pl.show()