-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocess.py
More file actions
93 lines (58 loc) · 2.68 KB
/
preprocess.py
File metadata and controls
93 lines (58 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import pickle
import numpy as np
import tensorflow as tf
import os
def unpickle(file):
"""
CIFAR data contains the files data_batch_1, data_batch_2, ...,
as well as test_batch. We have combined all train batches into one
batch for you. Each of these files is a Python "pickled"
object produced with cPickle. The code below will open up a
"pickled" object (each file) and return a dictionary.
NOTE: DO NOT EDIT
:param file: the file to unpickle
:return: dictionary of unpickled data
"""
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
def get_data(file_path, first_class, second_class):
"""
Given a file path and two target classes, returns an array of
normalized inputs (images) and an array of labels.
You will want to first extract only the data that matches the
corresponding classes we want (there are 10 classes and we only want 2).
You should make sure to normalize all inputs and also turn the labels
into one hot vectors using tf.one_hot().
Note that because you are using tf.one_hot() for your labels, your
labels will be a Tensor, while your inputs will be a NumPy array. This
is fine because TensorFlow works with NumPy arrays.
:param file_path: file path for inputs and labels, something
like 'CIFAR_data_compressed/train'
:param first_class: an integer (0-9) representing the first target
class in the CIFAR10 dataset, for a cat, this would be a 3
:param first_class: an integer (0-9) representing the second target
class in the CIFAR10 dataset, for a dog, this would be a 5
:return: normalized NumPy array of inputs and tensor of labels, where
inputs are of type np.float32 and has size (num_inputs, width, height, num_channels) and labels
has size (num_examples, num_classes)
"""
unpickled_file = unpickle(file_path)
inputs = unpickled_file[b'data']
labels = unpickled_file[b'labels']
# TODO: Do the rest of preprocessing!
filtered_l = []
binary = []
for i in range(len(labels)):
if labels[i] == first_class:
filtered_l.append(i)
binary.append(0)
elif labels[i] == second_class:
filtered_l.append(i)
binary.append(1)
inputs = tf.gather(inputs, filtered_l)
inputs = tf.cast(inputs, tf.float32) / 255.0
inputs = np.transpose(tf.reshape(inputs, [-1, 3, 32, 32]), [0, 2, 3, 1])
labels = tf.gather(labels, filtered_l)
labels = tf.one_hot(binary, depth = 2)
return inputs, labels