-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimage_processing.py
More file actions
366 lines (257 loc) · 12.7 KB
/
image_processing.py
File metadata and controls
366 lines (257 loc) · 12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
# This is the image processing module, which I used to preprocess my images, augment my dataset, and
# organize them into a structure suitable for input to a machine learning model
from __future__ import print_function
import os
import numpy as np
from IPython.display import Image
from PIL import Image
from scipy import ndimage
from six.moves import cPickle as pickle
from six.moves import range
# filenames for the training and testing folders
train_folder = "Train"
test_folder = "Test"
# standard dimensions to which all images will be rescaled
dimensions = (50, 50)
# maximum angle by which the image can be rotated during data augmentation
max_angle = 15
# function to rotate an image by a given angle and fill in the black corners created
# with a specified color
def rotate_img(image, angle, color, filter = Image.NEAREST):
if image.mode == "P" or filter == Image.NEAREST:
matte = Image.new("1", image.size, 1) # mask
else:
matte = Image.new("L", image.size, 255) # true matte
bg = Image.new(image.mode, image.size, color)
bg.paste(
image.rotate(angle, filter),
matte.rotate(angle, filter)
)
return bg
# function to turn grey-colored backgrounds to white. r, b and g specify the
# exact shade of grey color to eliminate. Source: stackoverflow.
def make_greyscale_white_bg(im, r, b, g):
im = im.convert('RGBA') # Convert to RGBA
data = np.array(im) # "data" is a height x width x 4 numpy array
red, green, blue, alpha = data.T # Temporarily unpack the bands for readability
# Replace grey with white... (leaves alpha values alone...)
grey_areas = (red == r) & (blue == b) & (green == g)
data[..., :-1][grey_areas.T] = (255, 255, 255) # Transpose back needed
im2 = Image.fromarray(data)
im2 = im2.convert('L') # convert to greyscale image
return im2
# Make a specified number of copies if the given image by rotating the original image by
# some random angle, and save the images according to the naming scheme followed by the original images
def random_rotate(img, copies, curr_filename, path):
c_color = img.getpixel((0,0)) # get the pixel values of top-left corner of image
for i in range(copies):
# rotate image by a random angle from [-max_angle, max_angle], using the c_color to fill in the corners
new_im = rotate_img(img, np.random.randint((0 - max_angle), max_angle), c_color)
# save new image to file
new_im.save(os.path.join(path, "bcc" + str(curr_filename).zfill(6) + ".bmp"))
curr_filename = curr_filename + 1
# augment the dataset by adding random rotations. The count of the original images is needed
# for naming the new images in a sequential order
def augment_by_rotations(folder, prev_cnt):
classes = [os.path.join(folder, d) for d in sorted(os.listdir(folder))] # get list of all sub-folders in folder
for path_to_folder in classes:
if os.path.isdir(path_to_folder):
images = [os.path.join(path_to_folder, i) for i in sorted(os.listdir(path_to_folder)) if i != '.DS_Store']
filename = prev_cnt
for image in images:
im = Image.open(image)
# make 2 copies of each image, with random rotations added in
#random_rotate(im, 2, filename, path_to_folder)
#filename = filename + 2
print("Finished augmenting " + path_to_folder)
# function to invert colors (black -> white and white-> black). Since most of the image consists
# of white areas, specified by (255, 255, 255) in RGB, inverting the colors means more zeros, making
# future operations less computationally expensive
def invert_colors(im):
im = im.convert('RGBA') # Convert to RGBA
data = np.array(im) # "data" is a height x width x 4 numpy array
red, green, blue, alpha = data.T # Temporarily unpack the bands for readability
# Replace black with red temporarily... (leaves alpha values alone...)
black_areas = (red == 0) & (blue == 0) & (green == 0)
data[..., :-1][black_areas.T] = (255, 0, 0) # Transpose back needed
# Replace white areas with black
white_areas = (red == 255) & (blue == 255) & (green == 255)
data[..., :-1][white_areas.T] = (0, 0, 0) # Transpose back needed
# Replace red areas (originally white) with black
red_areas = (red == 255) & (blue == 0) & (green == 0)
data[..., :-1][red_areas.T] = (255, 255, 255) # Transpose back needed
im2 = Image.fromarray(data)
im2 = im2.convert('L') # convert to greyscale image
return im2
# function to test the other functions on a specified image
# this is not needed once the other functins are confirmed to be working
def test_rotations():
img = Image.open("Train/172/bcc000002.bmp")
#img = img.rotate(30)
img = img.resize(dimensions)
rot = make_greyscale_white_bg(img, 127, 127, 127)
rot = invert_colors(rot)
c_color = rot.getpixel((0, 0))
rot = rotate_img(rot, 10, c_color)
w, h = rot.size
rot.show()
# function to process images (resizing, removal of grey backgrounds if any, color inversion, greyscale conversion)
def process_images(folder):
classes = [os.path.join(folder, d) for d in sorted(os.listdir(folder))] # get list of all sub-folders in folder
img_cnt = 0
for class_x in classes:
if os.path.isdir(class_x):
# get paths to all the images in this folder
images = [os.path.join(class_x, i) for i in sorted(os.listdir(class_x)) if i != '.DS_Store']
for image in images:
img_cnt = img_cnt + 1
if(img_cnt % 1000 == 0): # show progress
print("Processed %s images" % str(img_cnt))
im = Image.open(image)
im = im.resize(dimensions) # resize image according to dimensions set
im = make_greyscale_white_bg(im, 127, 127, 127) # turn grey background (if any) to white, and
# convert into greyscale image with 1 channel
#im = invert_colors(im)
im.save(image) # overwrite previous image file with new image
print("Finished processing images, images found = ")
print(img_cnt)
process_images(test_folder)
process_images(train_folder)
#augment_by_rotations(train_folder, 1349)
# The code below organizes the processed images into structures suitable for use with ML models
# A lot of the code is obtained from the assignments in the Google Deep Learning course in Udacity
image_size = 50 # Pixel width and height.
pixel_depth = 255.0 # Number of levels per pixel.
# function to load all images from given folder, then convert the dataset into a 3D array (image index, x, y)
# of floating point values, normalized to have approximately zero mean and
# standard deviation ~0.5 to make training easier.
def load_letter(folder, min_num_images):
"""Load the data for a single letter label."""
image_files = os.listdir(folder)
dataset = np.ndarray(shape=(len(image_files), image_size, image_size),
dtype=np.float32)
print(folder)
num_images = 0
for image_index, image in enumerate(image_files):
image_file = os.path.join(folder, image)
try:
image_data = (ndimage.imread(image_file).astype(float) - # normalize data
pixel_depth / 2) / pixel_depth
if image_data.shape != (image_size, image_size):
raise Exception('Unexpected image shape: %s' % str(image_data.shape))
dataset[num_images, :, :] = image_data
num_images = num_images + 1
except IOError as e:
print('Could not read:', image_file, ':', e, '- it\'s ok, skipping.') # skip unreadable files
dataset = dataset[0:num_images, :, :]
if num_images < min_num_images: # check if a given min. no. of images
raise Exception('Many fewer images than expected: %d < %d' % # has been loaded
(num_images, min_num_images))
print('Full dataset tensor:', dataset.shape)
print('Mean:', np.mean(dataset))
print('Standard deviation:', np.std(dataset))
return dataset
# function to store the normalized tensors obtained from the load_letter function in
# .pickle files for later use
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
dataset_names = []
folders_list = os.listdir(data_folders)
for folder in folders_list:
#print(os.path.join(data_folders, folder))
curr_folder_path = os.path.join(data_folders, folder)
if os.path.isdir(curr_folder_path):
set_filename = curr_folder_path + '.pickle'
dataset_names.append(set_filename)
if os.path.exists(set_filename) and not force:
# You may override by setting force=True.
print('%s already present - Skipping pickling.' % set_filename)
else:
print('Pickling %s.' % set_filename)
dataset = load_letter(curr_folder_path, min_num_images_per_class) # load and normalize the data
try:
with open(set_filename, 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
f.close()
except Exception as e:
print('Unable to save data to', set_filename, ':', e)
return dataset_names
train_datasets = maybe_pickle(train_folder, 1050, True) # load, normalize and pickle the train and test datasets
test_datasets = maybe_pickle(test_folder, 200, True)
# function to make two empty arrays, one for the input data and one for the labels
def make_arrays(nb_rows, img_size):
if nb_rows:
dataset = np.ndarray((nb_rows, img_size, img_size), dtype=np.float32)
labels = np.ndarray(nb_rows, dtype=np.int32)
else:
dataset, labels = None, None
return dataset, labels
# function to merge all the images in the given pickle file. Part of the training dataset is used to
# create a validation dataset for hyperparameter tuning.
def merge_datasets(pickle_files, train_size, valid_size=0):
num_classes = len(pickle_files)
valid_dataset, valid_labels = make_arrays(valid_size, image_size)
train_dataset, train_labels = make_arrays(train_size, image_size)
vsize_per_class = valid_size // num_classes
tsize_per_class = train_size // num_classes
start_v, start_t = 0, 0
end_v, end_t = vsize_per_class, tsize_per_class
end_l = vsize_per_class+tsize_per_class
for label, pickle_file in enumerate(pickle_files):
try:
with open(pickle_file, 'rb') as f:
letter_set = pickle.load(f)
f.close()
# let's shuffle the letters to have random validation and training set
np.random.shuffle(letter_set)
if valid_dataset is not None:
valid_letter = letter_set[:vsize_per_class, :, :]
valid_dataset[start_v:end_v, :, :] = valid_letter
valid_labels[start_v:end_v] = label
start_v += vsize_per_class
end_v += vsize_per_class
train_letter = letter_set[vsize_per_class:end_l, :, :]
train_dataset[start_t:end_t, :, :] = train_letter
train_labels[start_t:end_t] = label
start_t += tsize_per_class
end_t += tsize_per_class
except Exception as e:
print('Unable to process data from', pickle_file, ':', e)
raise
return valid_dataset, valid_labels, train_dataset, train_labels
# set the no. of images to be used in each dataset
train_size = 50000
valid_size = 7400
test_size = 7400
valid_dataset, valid_labels, train_dataset, train_labels = merge_datasets(
train_datasets, train_size, valid_size)
_, _, test_dataset, test_labels = merge_datasets(test_datasets, test_size)
print('Training:', train_dataset.shape, train_labels.shape)
print('Validation:', valid_dataset.shape, valid_labels.shape)
print('Testing:', test_dataset.shape, test_labels.shape)
# shuffle the images in each dataset randomly, and their corresponding labels
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation,:,:]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
train_dataset, train_labels = randomize(train_dataset, train_labels)
test_dataset, test_labels = randomize(test_dataset, test_labels)
valid_dataset, valid_labels = randomize(valid_dataset, valid_labels)
# file name for saving all the processed datasets and their corresponding label tensors
pickle_file = 'bengaliOCR.pickle'
# save all the processed datasets into one large pickle file for later usage
try:
f = open(pickle_file, 'wb')
save = {
'train_dataset': train_dataset,
'train_labels': train_labels,
'valid_dataset': valid_dataset,
'valid_labels': valid_labels,
'test_dataset': test_dataset,
'test_labels': test_labels,
}
pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
f.close()
except Exception as e:
print('Unable to save data to', pickle_file, ':', e)
raise