From cc0bbe2d80221fc4d95aeb12a3dfe520343ba2d7 Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Mon, 9 Jul 2018 18:24:17 -0700 Subject: [PATCH 01/16] First attempt Need to modify image saving --- image_vae.py | 114 +++++++++++++++++++++++++++++++++++++++------------ main.py | 3 ++ 2 files changed, 90 insertions(+), 27 deletions(-) diff --git a/image_vae.py b/image_vae.py index 960972f..24c1885 100644 --- a/image_vae.py +++ b/image_vae.py @@ -14,7 +14,9 @@ from keras.preprocessing.image import ImageDataGenerator from keras import backend as K from keras.callbacks import TerminateOnNaN, CSVLogger, ModelCheckpoint, Callback +from keras.utils import Sequence +os.environ['HDF5_USE_FILE_LOCKING']='FALSE' class ImgSave(Callback): """ this callback saves sample input images, their reconstructions, and a @@ -33,8 +35,12 @@ def __init__(self, model): self.data_dir = model.data_dir self.save_dir = model.save_dir self.vae = model.vae - self.decoder = model.decoder + self.decoder = model.decoder + #modified + self.is_numpy = model.is_numpy + self.channels_to_save = model.channels_to_save + #end of modification def save_input_images(self): """ save input images @@ -149,7 +155,12 @@ def __init__(self, args): self.phase = args.phase self.steps_per_epoch = args.steps_per_epoch - + + #modified + self.is_numpy = args.is_numpy + self.channels_to_save = args.channels_to_save + #end of modification + self.data_size = len(os.listdir(os.path.join(self.data_dir, 'train'))) if self.steps_per_epoch == 0: @@ -301,18 +312,21 @@ def train(self): """ train VAE model """ - train_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1), - horizontal_flip = True, - vertical_flip = True) - - train_generator = train_datagen.flow_from_directory( - self.data_dir, - target_size = (self.image_size, self.image_size), - batch_size = self.batch_size, - class_mode = 'input') - - # instantiate callbacks - + #modified + if(self.is_numpy): + train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=True) + else: + train_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1), + horizontal_flip = True, + vertical_flip = True) + + train_generator = train_datagen.flow_from_directory( + self.data_dir, + target_size = (self.image_size, self.image_size), + batch_size = self.batch_size, + class_mode = 'input') + #end of modification + term_nan = TerminateOnNaN() csv_logger = CSVLogger(os.path.join(self.save_dir, 'training.log'), @@ -338,21 +352,26 @@ def train(self): self.vae.save_weights(os.path.join(self.save_dir, 'checkpoints/vae_weights.hdf5')) self.encode() - - + def encode(self): """ encode data with trained model """ - test_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1)) - - test_generator = test_datagen.flow_from_directory( - self.data_dir, - target_size = (self.image_size, self.image_size), - batch_size = self.batch_size, - shuffle = False, - class_mode = 'input') - + #modified + if(self.is_numpy): + test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=False) + else: + test_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1)) + + test_generator = test_datagen.flow_from_directory( + self.data_dir, + target_size = (self.image_size, self.image_size), + batch_size = self.batch_size, + shuffle = False, + class_mode = 'input') + #end of modification + + print('encoding training data...') x_test_encoded = self.encoder.predict_generator(test_generator, steps = self.data_size // self.batch_size) @@ -361,5 +380,46 @@ def encode(self): with outFile: writer = csv.writer(outFile) writer.writerows(x_test_encoded) - - + +#modified +class DataGenerator(Sequence): + def __init__(self, data_dir, batch_size, image_size, image_channel, shuffle): + self.image_size = image_size + self.batch_size = batch_size + self.list_IDs = glob.glob(os.path.join(self.data_dir, 'train', '*')) + self.image_channel = image_channel + self.shuffle = shuffle + self.on_epoch_end() + + def __len__(self): + return int(np.floor(len(self.list_IDs) / self.batch_size)) + + def __getitem__(self, index): + # Generate indexes of the batch + indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size] + + # Find list of IDs + list_IDs_temp = [self.list_IDs[k] for k in indexes] + + # Generate data + X = self.__data_generation(list_IDs_temp) + + return X + + def on_epoch_end(self): + self.indexes = np.arange(len(self.list_IDs)) + if self.shuffle == True: + np.random.shuffle(self.indexes) + + def __data_generation(self, list_IDs_temp): + # X : (n_samples, *dim, n_channels) + # Initialization + X = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel)) + + # Generate data + for i, ID in enumerate(list_IDs_temp): + # Store sample + X[i,] = np.load(ID) + + return X +#end of modification \ No newline at end of file diff --git a/main.py b/main.py index 2c9694a..fd89034 100644 --- a/main.py +++ b/main.py @@ -33,6 +33,9 @@ parser.add_argument('--steps_per_epoch', type=int, default=0, help='steps per epoch') +parser.add_argument('--is_numpy', type=bool, default=False, help='images are represented as numpy arrays') +parser.add_argument('--channels_to_save', type=tuple, default=(0,1,2), help='specify channels to save if image contains more than 3') + args = parser.parse_args() From e5df3946bd08f29b0e46d75aeab2aedc6a6a1bed Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Tue, 10 Jul 2018 18:15:22 -0700 Subject: [PATCH 02/16] Add files via upload --- image_vae.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/image_vae.py b/image_vae.py index 24c1885..5062dc6 100644 --- a/image_vae.py +++ b/image_vae.py @@ -17,7 +17,7 @@ from keras.utils import Sequence os.environ['HDF5_USE_FILE_LOCKING']='FALSE' - +''' class ImgSave(Callback): """ this callback saves sample input images, their reconstructions, and a latent space walk at the end of each epoch @@ -123,7 +123,7 @@ def on_epoch_end(self, epoch, logs={}): def on_train_begin(self, logs={}): self.save_input_images() - +''' class ImageVAE(): """ 2-dimensional variational autoencoder for latent phenotype capture @@ -338,7 +338,7 @@ def train(self): save_weights_only=True) # custom image saving callback - img_saver = ImgSave(self) + #img_saver = ImgSave(self) self.history = self.vae.fit_generator(train_generator, epochs = self.epochs, @@ -346,7 +346,8 @@ def train(self): callbacks = [term_nan, csv_logger, checkpointer, - img_saver], + #img_saver + ], steps_per_epoch = self.steps_per_epoch) self.vae.save_weights(os.path.join(self.save_dir, @@ -386,7 +387,7 @@ class DataGenerator(Sequence): def __init__(self, data_dir, batch_size, image_size, image_channel, shuffle): self.image_size = image_size self.batch_size = batch_size - self.list_IDs = glob.glob(os.path.join(self.data_dir, 'train', '*')) + self.list_IDs = glob.glob(os.path.join(data_dir, 'train', '*')) self.image_channel = image_channel self.shuffle = shuffle self.on_epoch_end() @@ -404,7 +405,7 @@ def __getitem__(self, index): # Generate data X = self.__data_generation(list_IDs_temp) - return X + return X,X def on_epoch_end(self): self.indexes = np.arange(len(self.list_IDs)) @@ -414,7 +415,7 @@ def on_epoch_end(self): def __data_generation(self, list_IDs_temp): # X : (n_samples, *dim, n_channels) # Initialization - X = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel)) + X = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel,)) # Generate data for i, ID in enumerate(list_IDs_temp): From c191639f6226dff133744f2d267ffe7e49b70b41 Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Wed, 11 Jul 2018 10:28:53 -0700 Subject: [PATCH 03/16] Add files via upload --- image_vae.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/image_vae.py b/image_vae.py index 5062dc6..7d88cc5 100644 --- a/image_vae.py +++ b/image_vae.py @@ -347,7 +347,7 @@ def train(self): csv_logger, checkpointer, #img_saver - ], + ], steps_per_epoch = self.steps_per_epoch) self.vae.save_weights(os.path.join(self.save_dir, @@ -403,9 +403,9 @@ def __getitem__(self, index): list_IDs_temp = [self.list_IDs[k] for k in indexes] # Generate data - X = self.__data_generation(list_IDs_temp) + X,Y = self.__data_generation(list_IDs_temp) - return X,X + return X,Y def on_epoch_end(self): self.indexes = np.arange(len(self.list_IDs)) @@ -415,12 +415,15 @@ def on_epoch_end(self): def __data_generation(self, list_IDs_temp): # X : (n_samples, *dim, n_channels) # Initialization - X = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel,)) + X = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel)) + Y = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel)) + # Generate data for i, ID in enumerate(list_IDs_temp): # Store sample - X[i,] = np.load(ID) + X[i,] = np.transpose(np.load(ID), (1,2,0)) + Y[i,] = np.transpose(np.load(ID), (1,2,0)) - return X -#end of modification \ No newline at end of file + return X,Y +#end of modification From de5ec360c3851c95ba4731bbd2e1bbc8903ad3cc Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Wed, 11 Jul 2018 12:13:54 -0700 Subject: [PATCH 04/16] Add files via upload --- image_vae.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/image_vae.py b/image_vae.py index 7d88cc5..c7e9ef1 100644 --- a/image_vae.py +++ b/image_vae.py @@ -314,6 +314,7 @@ def train(self): #modified if(self.is_numpy): + print("HELLlloooooo") train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=True) else: train_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1), @@ -328,10 +329,10 @@ def train(self): #end of modification term_nan = TerminateOnNaN() - + csv_logger = CSVLogger(os.path.join(self.save_dir, 'training.log'), separator='\t') - + checkpointer = ModelCheckpoint(os.path.join(self.save_dir, 'checkpoints/vae_weights.hdf5'), verbose=1, @@ -339,7 +340,7 @@ def train(self): # custom image saving callback #img_saver = ImgSave(self) - + self.history = self.vae.fit_generator(train_generator, epochs = self.epochs, verbose = self.verbose, @@ -352,6 +353,7 @@ def train(self): self.vae.save_weights(os.path.join(self.save_dir, 'checkpoints/vae_weights.hdf5')) + self.encode() def encode(self): From fc88056e083ed84ed741591546a657f83c54e1c7 Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Wed, 11 Jul 2018 12:15:47 -0700 Subject: [PATCH 05/16] Add files via upload --- image_vae.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/image_vae.py b/image_vae.py index c7e9ef1..8f951e0 100644 --- a/image_vae.py +++ b/image_vae.py @@ -156,10 +156,8 @@ def __init__(self, args): self.steps_per_epoch = args.steps_per_epoch - #modified self.is_numpy = args.is_numpy self.channels_to_save = args.channels_to_save - #end of modification self.data_size = len(os.listdir(os.path.join(self.data_dir, 'train'))) @@ -312,7 +310,6 @@ def train(self): """ train VAE model """ - #modified if(self.is_numpy): print("HELLlloooooo") train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=True) @@ -326,7 +323,6 @@ def train(self): target_size = (self.image_size, self.image_size), batch_size = self.batch_size, class_mode = 'input') - #end of modification term_nan = TerminateOnNaN() @@ -360,7 +356,6 @@ def encode(self): """ encode data with trained model """ - #modified if(self.is_numpy): test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=False) else: @@ -372,7 +367,6 @@ def encode(self): batch_size = self.batch_size, shuffle = False, class_mode = 'input') - #end of modification print('encoding training data...') @@ -384,7 +378,6 @@ def encode(self): writer = csv.writer(outFile) writer.writerows(x_test_encoded) -#modified class DataGenerator(Sequence): def __init__(self, data_dir, batch_size, image_size, image_channel, shuffle): self.image_size = image_size @@ -427,5 +420,4 @@ def __data_generation(self, list_IDs_temp): X[i,] = np.transpose(np.load(ID), (1,2,0)) Y[i,] = np.transpose(np.load(ID), (1,2,0)) - return X,Y -#end of modification + return X,Y \ No newline at end of file From 970adb0ff3b5978c2938c373a51e58a7557b7f26 Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Wed, 11 Jul 2018 12:20:56 -0700 Subject: [PATCH 06/16] Update image_vae.py --- image_vae.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/image_vae.py b/image_vae.py index 8f951e0..7166763 100644 --- a/image_vae.py +++ b/image_vae.py @@ -311,7 +311,6 @@ def train(self): """ if(self.is_numpy): - print("HELLlloooooo") train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=True) else: train_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1), @@ -413,11 +412,10 @@ def __data_generation(self, list_IDs_temp): X = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel)) Y = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel)) - # Generate data for i, ID in enumerate(list_IDs_temp): # Store sample X[i,] = np.transpose(np.load(ID), (1,2,0)) Y[i,] = np.transpose(np.load(ID), (1,2,0)) - return X,Y \ No newline at end of file + return X,Y From 1d0da54a9fcd421a616a34d3b50d88acf10d4e8c Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Wed, 11 Jul 2018 16:51:48 -0700 Subject: [PATCH 07/16] Add files via upload --- image_vae.py | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/image_vae.py b/image_vae.py index 7166763..ed79eab 100644 --- a/image_vae.py +++ b/image_vae.py @@ -17,12 +17,12 @@ from keras.utils import Sequence os.environ['HDF5_USE_FILE_LOCKING']='FALSE' + ''' class ImgSave(Callback): """ this callback saves sample input images, their reconstructions, and a latent space walk at the end of each epoch - """ - + """ def __init__(self, model): self.latent_dim = model.latent_dim @@ -156,8 +156,10 @@ def __init__(self, args): self.steps_per_epoch = args.steps_per_epoch + #modified self.is_numpy = args.is_numpy self.channels_to_save = args.channels_to_save + #end of modification self.data_size = len(os.listdir(os.path.join(self.data_dir, 'train'))) @@ -310,8 +312,11 @@ def train(self): """ train VAE model """ + #modified if(self.is_numpy): + print("HELLlloooooo") train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=True) + # print(train_generator.__getitem__(1)) else: train_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1), horizontal_flip = True, @@ -322,12 +327,13 @@ def train(self): target_size = (self.image_size, self.image_size), batch_size = self.batch_size, class_mode = 'input') - + #end of modification + print("1") term_nan = TerminateOnNaN() - + print("2") csv_logger = CSVLogger(os.path.join(self.save_dir, 'training.log'), separator='\t') - + print("3") checkpointer = ModelCheckpoint(os.path.join(self.save_dir, 'checkpoints/vae_weights.hdf5'), verbose=1, @@ -335,26 +341,28 @@ def train(self): # custom image saving callback #img_saver = ImgSave(self) - + print("4") self.history = self.vae.fit_generator(train_generator, epochs = self.epochs, verbose = self.verbose, - callbacks = [term_nan, + callbacks = [ + #term_nan, csv_logger, - checkpointer, + checkpointer #img_saver ], steps_per_epoch = self.steps_per_epoch) - + print("4.1") self.vae.save_weights(os.path.join(self.save_dir, 'checkpoints/vae_weights.hdf5')) - + print("4.2") self.encode() - + print("5") def encode(self): """ encode data with trained model """ + #modified if(self.is_numpy): test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=False) else: @@ -366,6 +374,7 @@ def encode(self): batch_size = self.batch_size, shuffle = False, class_mode = 'input') + #end of modification print('encoding training data...') @@ -409,13 +418,13 @@ def on_epoch_end(self): def __data_generation(self, list_IDs_temp): # X : (n_samples, *dim, n_channels) # Initialization - X = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel)) - Y = np.empty((self.batch_size, self.image_size, self.image_size, self.image_channel)) + X = np.zeros((self.batch_size, self.image_size, self.image_size, self.image_channel)) + Y = np.zeros((self.batch_size, self.image_size, self.image_size, self.image_channel)) + # Generate data for i, ID in enumerate(list_IDs_temp): # Store sample - X[i,] = np.transpose(np.load(ID), (1,2,0)) - Y[i,] = np.transpose(np.load(ID), (1,2,0)) - - return X,Y + X[i,] = np.transpose(np.load(ID), (1,2,0))/(2**16 - 1) + Y[i,] = np.transpose(np.load(ID), (1,2,0))/(2**16 - 1) + return X,Y \ No newline at end of file From 48d9c21f2f216ccacf0958de5c579ca4a80404ea Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Wed, 11 Jul 2018 17:15:01 -0700 Subject: [PATCH 08/16] Add files via upload --- image_vae.py | 48 ++++++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/image_vae.py b/image_vae.py index ed79eab..9ffd9f5 100644 --- a/image_vae.py +++ b/image_vae.py @@ -156,10 +156,8 @@ def __init__(self, args): self.steps_per_epoch = args.steps_per_epoch - #modified self.is_numpy = args.is_numpy self.channels_to_save = args.channels_to_save - #end of modification self.data_size = len(os.listdir(os.path.join(self.data_dir, 'train'))) @@ -311,12 +309,9 @@ def vae_loss(x, x_decoded_mean_squash): def train(self): """ train VAE model """ - - #modified + if(self.is_numpy): - print("HELLlloooooo") - train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=True) - # print(train_generator.__getitem__(1)) + train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, self.image_res, shuffle=True) else: train_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1), horizontal_flip = True, @@ -327,13 +322,12 @@ def train(self): target_size = (self.image_size, self.image_size), batch_size = self.batch_size, class_mode = 'input') - #end of modification - print("1") + term_nan = TerminateOnNaN() - print("2") + csv_logger = CSVLogger(os.path.join(self.save_dir, 'training.log'), separator='\t') - print("3") + checkpointer = ModelCheckpoint(os.path.join(self.save_dir, 'checkpoints/vae_weights.hdf5'), verbose=1, @@ -341,7 +335,7 @@ def train(self): # custom image saving callback #img_saver = ImgSave(self) - print("4") + self.history = self.vae.fit_generator(train_generator, epochs = self.epochs, verbose = self.verbose, @@ -350,21 +344,20 @@ def train(self): csv_logger, checkpointer #img_saver - ], + ], steps_per_epoch = self.steps_per_epoch) - print("4.1") + self.vae.save_weights(os.path.join(self.save_dir, 'checkpoints/vae_weights.hdf5')) - print("4.2") + self.encode() - print("5") + def encode(self): """ encode data with trained model """ - - #modified + if(self.is_numpy): - test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, shuffle=False) + test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, self.image_res, shuffle=False) else: test_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1)) @@ -374,8 +367,6 @@ def encode(self): batch_size = self.batch_size, shuffle = False, class_mode = 'input') - #end of modification - print('encoding training data...') x_test_encoded = self.encoder.predict_generator(test_generator, @@ -387,11 +378,12 @@ def encode(self): writer.writerows(x_test_encoded) class DataGenerator(Sequence): - def __init__(self, data_dir, batch_size, image_size, image_channel, shuffle): + def __init__(self, data_dir, batch_size, image_size, image_channel, image_res, shuffle): self.image_size = image_size self.batch_size = batch_size self.list_IDs = glob.glob(os.path.join(data_dir, 'train', '*')) self.image_channel = image_channel + self.image_res = image_res self.shuffle = shuffle self.on_epoch_end() @@ -406,9 +398,9 @@ def __getitem__(self, index): list_IDs_temp = [self.list_IDs[k] for k in indexes] # Generate data - X,Y = self.__data_generation(list_IDs_temp) + X = self.__data_generation(list_IDs_temp) - return X,Y + return X,X def on_epoch_end(self): self.indexes = np.arange(len(self.list_IDs)) @@ -416,15 +408,11 @@ def on_epoch_end(self): np.random.shuffle(self.indexes) def __data_generation(self, list_IDs_temp): - # X : (n_samples, *dim, n_channels) # Initialization X = np.zeros((self.batch_size, self.image_size, self.image_size, self.image_channel)) - Y = np.zeros((self.batch_size, self.image_size, self.image_size, self.image_channel)) - # Generate data for i, ID in enumerate(list_IDs_temp): # Store sample - X[i,] = np.transpose(np.load(ID), (1,2,0))/(2**16 - 1) - Y[i,] = np.transpose(np.load(ID), (1,2,0))/(2**16 - 1) - return X,Y \ No newline at end of file + X[i,] = np.transpose(np.load(ID), (1,2,0))/(2**self.image_res - 1) + return X \ No newline at end of file From f1da1306ae40e529ac47e7f3fcfa828524e9f252 Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Sun, 29 Jul 2018 15:52:30 -0700 Subject: [PATCH 09/16] Updates that hopefully work If numpy, saves reconstruction as tif at the end of each epoch if regular images, saves all images that it originally saved --- image_vae.py | 177 ++++++++++++++++++++++++++++++++++----------------- main.py | 14 ++-- 2 files changed, 127 insertions(+), 64 deletions(-) diff --git a/image_vae.py b/image_vae.py index 9ffd9f5..b9c8025 100644 --- a/image_vae.py +++ b/image_vae.py @@ -7,6 +7,12 @@ import numpy as np from scipy.stats import norm +#UNCOMMENT BELOW IF MATPLOTLIB IS GIVING YOU PROBLEMS +# import matplotlib +# matplotlib.use('TkAgg') + +from matplotlib import pyplot as plt + from keras.layers import Input, Conv2D, Flatten, Dense, Reshape, Lambda, Conv2DTranspose from keras import optimizers from keras import metrics @@ -18,7 +24,7 @@ os.environ['HDF5_USE_FILE_LOCKING']='FALSE' -''' + class ImgSave(Callback): """ this callback saves sample input images, their reconstructions, and a latent space walk at the end of each epoch @@ -36,30 +42,30 @@ def __init__(self, model): self.save_dir = model.save_dir self.vae = model.vae self.decoder = model.decoder - - #modified + self.is_numpy = model.is_numpy - self.channels_to_save = model.channels_to_save - #end of modification - + self.channels_to_use = model.channels_to_use + def save_input_images(self): """ save input images """ - input_figure = np.zeros((self.image_size * self.num_save, - self.image_size * self.num_save, + input_figure = np.zeros((self.image_size * self.num_save, + self.image_size * self.num_save, self.image_channel)) - + to_load = glob.glob(os.path.join(self.data_dir, 'train', '*'))[:(self.num_save * self.num_save)] - + input_images = np.array([np.array(Image.open(fname)) for fname in to_load]) - + if self.image_channel == 1: + input_images = input_images[..., None] # add extra index dimension + idx = 0 for i in range(self.num_save): for j in range(self.num_save): input_figure[i * self.image_size : (i+1) * self.image_size, j * self.image_size : (j+1) * self.image_size, :] = input_images[idx,:,:,:] idx += 1 - + imageio.imwrite(os.path.join(self.save_dir, 'input_images.png'), input_figure.astype(np.uint8)) @@ -67,63 +73,98 @@ def save_input_images(self): def save_input_reconstruction(self, epoch): """ save grid of both input and reconstructed images side by side """ - - recon_figure = np.zeros((self.image_size * self.num_save, - self.image_size * self.num_save, - self.image_channel)) - - to_load = glob.glob(os.path.join(self.data_dir, 'train', '*'))[:(self.num_save * self.num_save)] - - input_images = np.array([np.array(Image.open(fname)) for fname in to_load]) - scaled_input = input_images / float((2**self.image_res - 1)) - - recon_images = self.vae.predict(scaled_input, batch_size = self.batch_size) - scaled_recon = recon_images * float((2**self.image_res - 1)) - - idx = 0 - for i in range(self.num_save): - for j in range(self.num_save): - recon_figure[i * self.image_size : (i+1) * self.image_size, - j * self.image_size : (j+1) * self.image_size, :] = scaled_recon[idx,:,:,:] - idx += 1 + if (self.is_numpy): + to_load = np.array(glob.glob(os.path.join(self.data_dir, 'train', '*'))[:(self.num_save)]) + + input_numpys = np.zeros((to_load.size, self.image_size, self.image_size, self.image_channel)) + for i, fname in enumerate(to_load): + temp = np.transpose(np.load(fname), (1, 2, 0)) + channels = np.array(self.channels_to_use.split(',')).astype(int) + temp = temp[:, :, channels] + input_numpys[i,] = temp + scaled_input = input_numpys / float((2**self.image_res - 1)) + + recon_images = self.vae.predict(scaled_input, batch_size = self.batch_size) + scaled_recon = recon_images * float((2**self.image_res - 1)) + + fig, axs = plt.subplots(self.image_channel, self.num_save * 2, + figsize=(self.num_save * 4, self.image_channel * 2)) + for k, j in enumerate(range(0, self.num_save * 2, 2)): + for i in range(0, self.image_channel): + axs[i, j].imshow(input_numpys[k, :, :, i], cmap='gray', vmax=20000) + axs[i, j].set_xticks([]) + axs[i, j].set_yticks([]) + axs[i, j + 1].imshow(scaled_recon[k, :, :, i], cmap='gray', vmax=20000) + axs[i, j + 1].set_xticks([]) + axs[i, j + 1].set_yticks([]) + if (j == 0): + axs[i, j].set_ylabel('Channel ' + str(i + 1)) + if (i == 0): + axs[i, j].set_title(to_load[k][-33:-17], fontsize=12) + axs[i, j + 1].set_title(to_load[k][-33:-17], fontsize=12) + fig.tight_layout() + plt.savefig(os.path.join(self.save_dir,'reconstructed','epoch '+str(epoch)+'.tif'), dpi=300) + else: + recon_figure = np.zeros((self.image_size * self.num_save, + self.image_size * self.num_save, + self.image_channel)) + + to_load = glob.glob(os.path.join(self.data_dir, 'train', '*'))[:(self.num_save * self.num_save)] + + input_images = np.array([np.array(Image.open(fname)) for fname in to_load]) + scaled_input = input_images / float((2 ** self.image_res - 1)) + scaled_input = scaled_input[..., None] + + recon_images = self.vae.predict(scaled_input, batch_size=self.batch_size) + scaled_recon = recon_images * float((2 ** self.image_res - 1)) + if self.image_channel == 1: + scaled_recon = scaled_recon[..., None] + + idx = 0 + for i in range(self.num_save): + for j in range(self.num_save): + recon_figure[i * self.image_size: (i + 1) * self.image_size, + j * self.image_size: (j + 1) * self.image_size, :] = scaled_recon[idx, :, :, :] + idx += 1 + + imageio.imwrite(os.path.join(self.save_dir, + 'reconstructed', + 'recon_images_epoch_{0:03d}.png'.format(epoch)), + recon_figure.astype(np.uint8)) - imageio.imwrite(os.path.join(self.save_dir, - 'reconstructed', - 'recon_images_epoch_{0:03d}.png'.format(epoch)), - recon_figure.astype(np.uint8)) - - def latent_walk(self, epoch): """ latent space walking """ - + figure = np.zeros((self.image_size * self.latent_dim, self.image_size * self.latent_samp, self.image_channel)) grid_x = norm.ppf(np.linspace(0.05, 0.95, self.latent_samp)) - + for i in range(self.latent_dim): for j, xi in enumerate(grid_x): z_sample = np.zeros(self.latent_dim) z_sample[i] = xi - + z_sample = np.tile(z_sample, self.batch_size).reshape(self.batch_size, self.latent_dim) x_decoded = self.decoder.predict(z_sample, batch_size=self.batch_size) x_decoded = x_decoded * float((2**self.image_res - 1)) - + sample = x_decoded[0].reshape(self.image_size, self.image_size, self.image_channel) - + figure[i * self.image_size: (i + 1) * self.image_size, j * self.image_size: (j + 1) * self.image_size, :] = sample - - imageio.imwrite(os.path.join(self.save_dir, 'latent_walk', 'latent_walk_epoch_{0:03d}.png'.format(epoch)), + + imageio.imwrite(os.path.join(self.save_dir, 'latent_walk', 'latent_walk_epoch_{0:03d}.png'.format(epoch)), figure.astype(np.uint8)) def on_epoch_end(self, epoch, logs={}): self.save_input_reconstruction(epoch) - self.latent_walk(epoch) + if (not self.is_numpy): + self.latent_walk(epoch) def on_train_begin(self, logs={}): - self.save_input_images() -''' + if (not self.is_numpy): + self.save_input_images() + class ImageVAE(): """ 2-dimensional variational autoencoder for latent phenotype capture @@ -135,7 +176,7 @@ def __init__(self, args): self.data_dir = args.data_dir self.save_dir = args.save_dir - + self.image_size = args.image_size self.image_channel = args.image_channel self.image_res = args.image_res @@ -157,7 +198,7 @@ def __init__(self, args): self.steps_per_epoch = args.steps_per_epoch self.is_numpy = args.is_numpy - self.channels_to_save = args.channels_to_save + self.channels_to_use = args.channels_to_use self.data_size = len(os.listdir(os.path.join(self.data_dir, 'train'))) @@ -311,7 +352,8 @@ def train(self): """ if(self.is_numpy): - train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, self.image_res, shuffle=True) + train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, + self.image_res, self.channels_to_use, shuffle=True, ) else: train_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1), horizontal_flip = True, @@ -334,17 +376,17 @@ def train(self): save_weights_only=True) # custom image saving callback - #img_saver = ImgSave(self) + img_saver = ImgSave(self) self.history = self.vae.fit_generator(train_generator, epochs = self.epochs, verbose = self.verbose, callbacks = [ - #term_nan, + term_nan, csv_logger, - checkpointer - #img_saver - ], + checkpointer, + img_saver, + ], steps_per_epoch = self.steps_per_epoch) self.vae.save_weights(os.path.join(self.save_dir, @@ -357,7 +399,8 @@ def encode(self): """ if(self.is_numpy): - test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, self.image_res, shuffle=False) + test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, + self.image_res, self.channels_to_use, shuffle=False) else: test_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1)) @@ -370,15 +413,25 @@ def encode(self): print('encoding training data...') x_test_encoded = self.encoder.predict_generator(test_generator, - steps = self.data_size // self.batch_size) - + steps = self.data_size // self.batch_size, verbose = 1) + + list_IDs = np.array(glob.glob(os.path.join(self.data_dir, 'train', '*'))) + labeled_encodings = np.array(x_test_encoded, dtype=object) + labeled_encodings = np.insert(labeled_encodings, 0, + list_IDs[:((self.data_size // self.batch_size)*self.batch_size)], axis=1) + outFile = open(os.path.join(self.save_dir, 'encodings.csv'), 'w') with outFile: writer = csv.writer(outFile) + writer.writerows(labeled_encodings) + + outFile2 = open(os.path.join(self.save_dir, 'encodings2.csv'), 'w') + with outFile2: + writer = csv.writer(outFile2) writer.writerows(x_test_encoded) class DataGenerator(Sequence): - def __init__(self, data_dir, batch_size, image_size, image_channel, image_res, shuffle): + def __init__(self, data_dir, batch_size, image_size, image_channel, image_res, channels_to_use, shuffle): self.image_size = image_size self.batch_size = batch_size self.list_IDs = glob.glob(os.path.join(data_dir, 'train', '*')) @@ -386,6 +439,7 @@ def __init__(self, data_dir, batch_size, image_size, image_channel, image_res, s self.image_res = image_res self.shuffle = shuffle self.on_epoch_end() + self.channels_to_use = channels_to_use def __len__(self): return int(np.floor(len(self.list_IDs) / self.batch_size)) @@ -414,5 +468,8 @@ def __data_generation(self, list_IDs_temp): # Generate data for i, ID in enumerate(list_IDs_temp): # Store sample - X[i,] = np.transpose(np.load(ID), (1,2,0))/(2**self.image_res - 1) + temp = np.transpose(np.load(ID), (1,2,0))/(2**self.image_res - 1) + channels = np.array(self.channels_to_use.split(',')).astype(int) + temp = temp[:,:, channels] + X[i,] = temp return X \ No newline at end of file diff --git a/main.py b/main.py index fd89034..14fea45 100644 --- a/main.py +++ b/main.py @@ -5,7 +5,7 @@ import sys import os import argparse - +import numpy as np from image_vae import ImageVAE parser = argparse.ArgumentParser(description='') @@ -34,7 +34,7 @@ parser.add_argument('--steps_per_epoch', type=int, default=0, help='steps per epoch') parser.add_argument('--is_numpy', type=bool, default=False, help='images are represented as numpy arrays') -parser.add_argument('--channels_to_save', type=tuple, default=(0,1,2), help='specify channels to save if image contains more than 3') +parser.add_argument('--channels_to_use', type=str, default='all', help='specify channels to use if complex images are used') args = parser.parse_args() @@ -46,7 +46,14 @@ def main(): os.makedirs(os.path.join(args.save_dir, 'latent_walk'), exist_ok=True) os.makedirs(os.path.join(args.save_dir, 'input'), exist_ok=True) os.makedirs(os.path.join(args.save_dir, 'reconstructed'), exist_ok=True) - + + if args.channels_to_use != 'all': + channels = np.array(args.channels_to_use.split(',')).astype(int) + if len(channels) != args.image_channel: + sys.exit('Number of specified channels does not image_channel argument!') + else: + args.channels_to_use = ','.join(str(i) for i in list(range(args.image_channel))) + if args.phase == 'train': model = ImageVAE(args) model.train() @@ -58,7 +65,6 @@ def main(): model = ImageVAE(args) model.vae.load_weights(args.checkpoint) model.train() - if __name__ == '__main__': main() From 6c29366e6e3c895338486ebe812aa4010524f932 Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Sun, 29 Jul 2018 16:01:30 -0700 Subject: [PATCH 10/16] Updates that hopefully work 2 If numpy, saves reconstruction as tif at the end of each epoch if regular images, saves all images that it originally saved changed .tif to .png for consistency --- image_vae.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/image_vae.py b/image_vae.py index b9c8025..987b974 100644 --- a/image_vae.py +++ b/image_vae.py @@ -103,7 +103,7 @@ def save_input_reconstruction(self, epoch): axs[i, j].set_title(to_load[k][-33:-17], fontsize=12) axs[i, j + 1].set_title(to_load[k][-33:-17], fontsize=12) fig.tight_layout() - plt.savefig(os.path.join(self.save_dir,'reconstructed','epoch '+str(epoch)+'.tif'), dpi=300) + plt.savefig(os.path.join(self.save_dir,'reconstructed','epoch '+str(epoch)+'.png'), dpi=300) else: recon_figure = np.zeros((self.image_size * self.num_save, self.image_size * self.num_save, @@ -472,4 +472,4 @@ def __data_generation(self, list_IDs_temp): channels = np.array(self.channels_to_use.split(',')).astype(int) temp = temp[:,:, channels] X[i,] = temp - return X \ No newline at end of file + return X From 7d63bcfe9981d17d128f4af25fb93859abad302a Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Tue, 31 Jul 2018 16:52:31 -0700 Subject: [PATCH 11/16] fixed 1 channel numpy saving --- image_vae.py | 60 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/image_vae.py b/image_vae.py index 987b974..87d6b8d 100644 --- a/image_vae.py +++ b/image_vae.py @@ -8,8 +8,8 @@ from scipy.stats import norm #UNCOMMENT BELOW IF MATPLOTLIB IS GIVING YOU PROBLEMS -# import matplotlib -# matplotlib.use('TkAgg') +import matplotlib +matplotlib.use('TkAgg') from matplotlib import pyplot as plt @@ -56,8 +56,7 @@ def save_input_images(self): to_load = glob.glob(os.path.join(self.data_dir, 'train', '*'))[:(self.num_save * self.num_save)] input_images = np.array([np.array(Image.open(fname)) for fname in to_load]) - if self.image_channel == 1: - input_images = input_images[..., None] # add extra index dimension + input_images = input_images[..., None] # add extra index dimension idx = 0 for i in range(self.num_save): @@ -87,23 +86,40 @@ def save_input_reconstruction(self, epoch): recon_images = self.vae.predict(scaled_input, batch_size = self.batch_size) scaled_recon = recon_images * float((2**self.image_res - 1)) - fig, axs = plt.subplots(self.image_channel, self.num_save * 2, - figsize=(self.num_save * 4, self.image_channel * 2)) - for k, j in enumerate(range(0, self.num_save * 2, 2)): - for i in range(0, self.image_channel): - axs[i, j].imshow(input_numpys[k, :, :, i], cmap='gray', vmax=20000) - axs[i, j].set_xticks([]) - axs[i, j].set_yticks([]) - axs[i, j + 1].imshow(scaled_recon[k, :, :, i], cmap='gray', vmax=20000) - axs[i, j + 1].set_xticks([]) - axs[i, j + 1].set_yticks([]) + if self.image_channel == 1: + fig, axs = plt.subplots(1, self.num_save * 2, + figsize=(self.num_save * 4, self.image_channel * 2)) + for k, j in enumerate(range(0, self.num_save * 2, 2)): + axs[j].imshow(input_numpys[k, :, :, 0], cmap='gray', vmax=20000) + axs[j].set_xticks([]) + axs[j].set_yticks([]) + axs[j + 1].imshow(scaled_recon[k, :, :, 0], cmap='gray', vmax=20000) + axs[j + 1].set_xticks([]) + axs[j + 1].set_yticks([]) if (j == 0): - axs[i, j].set_ylabel('Channel ' + str(i + 1)) - if (i == 0): - axs[i, j].set_title(to_load[k][-33:-17], fontsize=12) - axs[i, j + 1].set_title(to_load[k][-33:-17], fontsize=12) - fig.tight_layout() - plt.savefig(os.path.join(self.save_dir,'reconstructed','epoch '+str(epoch)+'.png'), dpi=300) + axs[j].set_ylabel('Channel 1') + axs[j].set_title(to_load[k][-33:-17], fontsize=12) + axs[j + 1].set_title(to_load[k][-33:-17], fontsize=12) + fig.tight_layout() + plt.savefig(os.path.join(self.save_dir,'reconstructed','epoch '+str(epoch)+'.png')) + else: + fig, axs = plt.subplots(self.image_channel, self.num_save * 2, + figsize=(self.num_save * 4, self.image_channel * 2)) + for k, j in enumerate(range(0, self.num_save * 2, 2)): + for i in range(0, self.image_channel): + axs[i, j].imshow(input_numpys[k, :, :, i], cmap='gray', vmax=20000) + axs[i, j].set_xticks([]) + axs[i, j].set_yticks([]) + axs[i, j + 1].imshow(scaled_recon[k, :, :, i], cmap='gray', vmax=20000) + axs[i, j + 1].set_xticks([]) + axs[i, j + 1].set_yticks([]) + if (j == 0): + axs[i, j].set_ylabel('Channel ' + str(i + 1)) + if (i == 0): + axs[i, j].set_title(to_load[k][-33:-17], fontsize=12) + axs[i, j + 1].set_title(to_load[k][-33:-17], fontsize=12) + fig.tight_layout() + plt.savefig(os.path.join(self.save_dir,'reconstructed','epoch '+str(epoch)+'.png')) else: recon_figure = np.zeros((self.image_size * self.num_save, self.image_size * self.num_save, @@ -117,8 +133,6 @@ def save_input_reconstruction(self, epoch): recon_images = self.vae.predict(scaled_input, batch_size=self.batch_size) scaled_recon = recon_images * float((2 ** self.image_res - 1)) - if self.image_channel == 1: - scaled_recon = scaled_recon[..., None] idx = 0 for i in range(self.num_save): @@ -472,4 +486,4 @@ def __data_generation(self, list_IDs_temp): channels = np.array(self.channels_to_use.split(',')).astype(int) temp = temp[:,:, channels] X[i,] = temp - return X + return X \ No newline at end of file From 095fe883b2c1753e72a4d8615ab397ba8e6f6007 Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Wed, 1 Aug 2018 16:11:33 -0700 Subject: [PATCH 12/16] Update image_vae.py --- image_vae.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/image_vae.py b/image_vae.py index 87d6b8d..6d27edc 100644 --- a/image_vae.py +++ b/image_vae.py @@ -9,7 +9,7 @@ #UNCOMMENT BELOW IF MATPLOTLIB IS GIVING YOU PROBLEMS import matplotlib -matplotlib.use('TkAgg') +matplotlib.use('Agg') from matplotlib import pyplot as plt @@ -486,4 +486,4 @@ def __data_generation(self, list_IDs_temp): channels = np.array(self.channels_to_use.split(',')).astype(int) temp = temp[:,:, channels] X[i,] = temp - return X \ No newline at end of file + return X From 41c907d0d43a6b93798fb16d0340cc81256e75bd Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Fri, 3 Aug 2018 10:59:10 -0700 Subject: [PATCH 13/16] Update image_vae.py --- image_vae.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/image_vae.py b/image_vae.py index 6d27edc..8eb93f1 100644 --- a/image_vae.py +++ b/image_vae.py @@ -427,7 +427,7 @@ def encode(self): print('encoding training data...') x_test_encoded = self.encoder.predict_generator(test_generator, - steps = self.data_size // self.batch_size, verbose = 1) + steps = self.data_size // self.batch_size) list_IDs = np.array(glob.glob(os.path.join(self.data_dir, 'train', '*'))) labeled_encodings = np.array(x_test_encoded, dtype=object) From d5f98bc10705fef89249409522248fb72fbcffca Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Wed, 15 Aug 2018 21:49:57 -0400 Subject: [PATCH 14/16] almost finalized --- image_vae.py | 535 +++++++++++++++++++++++++++++++-------------------- main.py | 94 +++++---- 2 files changed, 390 insertions(+), 239 deletions(-) diff --git a/image_vae.py b/image_vae.py index 8eb93f1..45cb194 100644 --- a/image_vae.py +++ b/image_vae.py @@ -1,13 +1,13 @@ import os import csv -import glob +import glob import imageio from PIL import Image import numpy as np from scipy.stats import norm -#UNCOMMENT BELOW IF MATPLOTLIB IS GIVING YOU PROBLEMS +# UNCOMMENT BELOW IF MATPLOTLIB IS GIVING YOU PROBLEMS import matplotlib matplotlib.use('Agg') @@ -19,107 +19,118 @@ from keras.models import Model from keras.preprocessing.image import ImageDataGenerator from keras import backend as K -from keras.callbacks import TerminateOnNaN, CSVLogger, ModelCheckpoint, Callback +from keras.callbacks import TerminateOnNaN, CSVLogger, ModelCheckpoint, Callback, EarlyStopping from keras.utils import Sequence -os.environ['HDF5_USE_FILE_LOCKING']='FALSE' - +os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE' class ImgSave(Callback): - """ this callback saves sample input images, their reconstructions, and a + """ this callback saves sample input images, their reconstructions, and a latent space walk at the end of each epoch - """ + """ + def __init__(self, model): - - self.latent_dim = model.latent_dim - self.latent_samp = model.latent_samp - self.batch_size = model.batch_size - self.image_size = model.image_size - self.num_save = model.num_save - self.image_channel = model.image_channel - self.image_res = model.image_res - self.data_dir = model.data_dir - self.save_dir = model.save_dir - self.vae = model.vae - self.decoder = model.decoder - - self.is_numpy = model.is_numpy - self.channels_to_use = model.channels_to_use + + self.latent_dim = model.latent_dim + self.latent_samp = model.latent_samp + self.batch_size = model.batch_size + self.image_size = model.image_size + self.num_save = model.num_save + self.image_channel = model.image_channel + self.image_res = model.image_res + self.data_dir = model.data_dir + self.save_dir = model.save_dir + self.vae = model.vae + self.decoder = model.decoder + + self.is_numpy = model.is_numpy + self.channels_to_use = model.channels_to_use + self.channel_labels = model.channel_labels + self.channel_first = model.channel_first def save_input_images(self): """ save input images """ input_figure = np.zeros((self.image_size * self.num_save, - self.image_size * self.num_save, - self.image_channel)) + self.image_size * self.num_save, + self.image_channel)) to_load = glob.glob(os.path.join(self.data_dir, 'train', '*'))[:(self.num_save * self.num_save)] input_images = np.array([np.array(Image.open(fname)) for fname in to_load]) - input_images = input_images[..., None] # add extra index dimension + if self.image_channel == 1: + input_images = input_images[..., None] # add extra index dimension idx = 0 for i in range(self.num_save): for j in range(self.num_save): - input_figure[i * self.image_size : (i+1) * self.image_size, - j * self.image_size : (j+1) * self.image_size, :] = input_images[idx,:,:,:] + input_figure[i * self.image_size: (i + 1) * self.image_size, + j * self.image_size: (j + 1) * self.image_size, :] = input_images[idx, :, :, :] idx += 1 imageio.imwrite(os.path.join(self.save_dir, 'input_images.png'), input_figure.astype(np.uint8)) - - + def save_input_reconstruction(self, epoch): """ save grid of both input and reconstructed images side by side """ + if (self.is_numpy): - to_load = np.array(glob.glob(os.path.join(self.data_dir, 'train', '*'))[:(self.num_save)]) + to_load = np.array(glob.glob(os.path.join(self.data_dir, 'train', '*'))[:self.num_save]) input_numpys = np.zeros((to_load.size, self.image_size, self.image_size, self.image_channel)) for i, fname in enumerate(to_load): - temp = np.transpose(np.load(fname), (1, 2, 0)) + if self.channel_first: + temp = np.transpose(np.load(fname), (1, 2, 0)) + else: + temp = np.load(fname) channels = np.array(self.channels_to_use.split(',')).astype(int) temp = temp[:, :, channels] input_numpys[i,] = temp - scaled_input = input_numpys / float((2**self.image_res - 1)) + scaled_input = input_numpys / float((2 ** self.image_res - 1)) - recon_images = self.vae.predict(scaled_input, batch_size = self.batch_size) - scaled_recon = recon_images * float((2**self.image_res - 1)) + recon_images = self.vae.predict(scaled_input, batch_size=self.batch_size) + scaled_recon = recon_images * float((2 ** self.image_res - 1)) + + ch_labels = np.array(self.channel_labels.split(',')) + fnames = (os.listdir(os.path.join(self.data_dir, 'train')))[:self.num_save] if self.image_channel == 1: fig, axs = plt.subplots(1, self.num_save * 2, figsize=(self.num_save * 4, self.image_channel * 2)) for k, j in enumerate(range(0, self.num_save * 2, 2)): - axs[j].imshow(input_numpys[k, :, :, 0], cmap='gray', vmax=20000) + axs[j].imshow(input_numpys[k, :, :, 0], cmap='gray', vmax=(2 ** self.image_res - 1)) axs[j].set_xticks([]) axs[j].set_yticks([]) - axs[j + 1].imshow(scaled_recon[k, :, :, 0], cmap='gray', vmax=20000) + axs[j + 1].imshow(scaled_recon[k, :, :, 0], cmap='gray', vmax=(2 ** self.image_res - 1)) axs[j + 1].set_xticks([]) axs[j + 1].set_yticks([]) if (j == 0): - axs[j].set_ylabel('Channel 1') - axs[j].set_title(to_load[k][-33:-17], fontsize=12) - axs[j + 1].set_title(to_load[k][-33:-17], fontsize=12) + axs[j].set_ylabel(ch_labels[0]) + axs[j].set_title(fnames[k][:-4], fontsize=12) + axs[j + 1].set_title(fnames[k][:-4], fontsize=12) fig.tight_layout() - plt.savefig(os.path.join(self.save_dir,'reconstructed','epoch '+str(epoch)+'.png')) + plt.savefig(os.path.join(self.save_dir, 'reconstructed', 'epoch ' + str(epoch) + '.png')) + plt.close("all") else: fig, axs = plt.subplots(self.image_channel, self.num_save * 2, figsize=(self.num_save * 4, self.image_channel * 2)) for k, j in enumerate(range(0, self.num_save * 2, 2)): for i in range(0, self.image_channel): - axs[i, j].imshow(input_numpys[k, :, :, i], cmap='gray', vmax=20000) + axs[i, j].imshow(input_numpys[k, :, :, i], cmap='gray', vmax=(2 ** self.image_res - 1)) axs[i, j].set_xticks([]) axs[i, j].set_yticks([]) - axs[i, j + 1].imshow(scaled_recon[k, :, :, i], cmap='gray', vmax=20000) + axs[i, j + 1].imshow(scaled_recon[k, :, :, i], cmap='gray', vmax=(2 ** self.image_res - 1)) axs[i, j + 1].set_xticks([]) axs[i, j + 1].set_yticks([]) if (j == 0): - axs[i, j].set_ylabel('Channel ' + str(i + 1)) + axs[i, j].set_ylabel(ch_labels[i]) if (i == 0): - axs[i, j].set_title(to_load[k][-33:-17], fontsize=12) - axs[i, j + 1].set_title(to_load[k][-33:-17], fontsize=12) + axs[i, j].set_title(fnames[:-4], fontsize=12) + axs[i, j + 1].set_title(fnames[:-4], fontsize=12) fig.tight_layout() - plt.savefig(os.path.join(self.save_dir,'reconstructed','epoch '+str(epoch)+'.png')) + plt.savefig(os.path.join(self.save_dir, 'reconstructed', 'epoch ' + str(epoch) + '.png')) + plt.close("all") else: recon_figure = np.zeros((self.image_size * self.num_save, self.image_size * self.num_save, @@ -129,7 +140,8 @@ def save_input_reconstruction(self, epoch): input_images = np.array([np.array(Image.open(fname)) for fname in to_load]) scaled_input = input_images / float((2 ** self.image_res - 1)) - scaled_input = scaled_input[..., None] + if self.image_channel == 1: + scaled_input = scaled_input[..., None] recon_images = self.vae.predict(scaled_input, batch_size=self.batch_size) scaled_recon = recon_images * float((2 ** self.image_res - 1)) @@ -160,16 +172,16 @@ def latent_walk(self, epoch): z_sample = np.tile(z_sample, self.batch_size).reshape(self.batch_size, self.latent_dim) x_decoded = self.decoder.predict(z_sample, batch_size=self.batch_size) - x_decoded = x_decoded * float((2**self.image_res - 1)) + x_decoded = x_decoded * float((2 ** self.image_res - 1)) sample = x_decoded[0].reshape(self.image_size, self.image_size, self.image_channel) figure[i * self.image_size: (i + 1) * self.image_size, - j * self.image_size: (j + 1) * self.image_size, :] = sample + j * self.image_size: (j + 1) * self.image_size, :] = sample imageio.imwrite(os.path.join(self.save_dir, 'latent_walk', 'latent_walk_epoch_{0:03d}.png'.format(epoch)), figure.astype(np.uint8)) - + def on_epoch_end(self, epoch, logs={}): self.save_input_reconstruction(epoch) if (not self.is_numpy): @@ -183,269 +195,377 @@ def on_train_begin(self, logs={}): class ImageVAE(): """ 2-dimensional variational autoencoder for latent phenotype capture """ - + def __init__(self, args): """ initialize model with argument parameters and build """ - self.data_dir = args.data_dir - self.save_dir = args.save_dir - - self.image_size = args.image_size - self.image_channel = args.image_channel - self.image_res = args.image_res - - self.latent_dim = args.latent_dim - self.inter_dim = args.inter_dim - self.num_conv = args.num_conv - self.batch_size = args.batch_size - self.epochs = args.epochs - self.nfilters = args.nfilters - self.learn_rate = args.learn_rate - self.epsilon_std = args.epsilon_std - self.latent_samp = args.latent_samp - self.num_save = args.num_save - self.verbose = args.verbose - - self.phase = args.phase - + self.data_dir = args.data_dir + self.save_dir = args.save_dir + + self.image_size = args.image_size + self.image_channel = args.image_channel + self.image_res = args.image_res + + self.latent_dim = args.latent_dim + self.inter_dim = args.inter_dim + self.num_conv = args.num_conv + self.batch_size = args.batch_size + self.epochs = args.epochs + self.nfilters = args.nfilters + self.learn_rate = args.learn_rate + self.epsilon_std = args.epsilon_std + self.latent_samp = args.latent_samp + self.num_save = args.num_save + self.verbose = args.verbose + + self.phase = args.phase + self.steps_per_epoch = args.steps_per_epoch - self.is_numpy = args.is_numpy - self.channels_to_use = args.channels_to_use + self.is_numpy = args.is_numpy + self.channel_first = args.channel_first + self.channels_to_use = args.channels_to_use + self.save_individual = args.save_individual + self.channel_labels = args.channel_labels self.data_size = len(os.listdir(os.path.join(self.data_dir, 'train'))) - + if self.steps_per_epoch == 0: self.steps_per_epoch = self.data_size // self.batch_size - - self.build_model() + self.build_model() def sampling(self, sample_args): """ sample latent layer from normal prior """ - + z_mean, z_log_var = sample_args - + epsilon = K.random_normal(shape=(K.shape(z_mean)[0], self.latent_dim), mean=0, stddev=self.epsilon_std) - + return z_mean + K.exp(z_log_var) * epsilon - - + def build_model(self): """ build VAE model """ - + input_dim = (self.image_size, self.image_size, self.image_channel) - + # encoder architecture - + x = Input(shape=input_dim) - + conv_1 = Conv2D(self.image_channel, kernel_size=self.num_conv, padding='same', activation='relu')(x) - + conv_2 = Conv2D(self.nfilters, kernel_size=self.num_conv, padding='same', activation='relu', strides=2)(conv_1) - + conv_3 = Conv2D(self.nfilters, kernel_size=self.num_conv, padding='same', activation='relu', strides=1)(conv_2) - + conv_4 = Conv2D(self.nfilters, kernel_size=self.num_conv, padding='same', activation='relu', strides=1)(conv_3) - + flat = Flatten()(conv_4) hidden = Dense(self.inter_dim, activation='relu')(flat) - + # reparameterization trick - - z_mean = Dense(self.latent_dim)(hidden) - z_log_var = Dense(self.latent_dim)(hidden) - - z = Lambda(self.sampling)([z_mean, z_log_var]) - - + + z_mean = Dense(self.latent_dim)(hidden) + z_log_var = Dense(self.latent_dim)(hidden) + + z = Lambda(self.sampling)([z_mean, z_log_var]) + # decoder architecture - output_dim = (self.batch_size, - self.image_size//2, - self.image_size//2, + output_dim = (self.batch_size, + self.image_size // 2, + self.image_size // 2, self.nfilters) - + # instantiate rather than pass through for later resuse - - decoder_hid = Dense(self.inter_dim, + + decoder_hid = Dense(self.inter_dim, activation='relu') - - decoder_upsample = Dense(self.nfilters * - self.image_size//2 * - self.image_size//2, + + decoder_upsample = Dense(self.nfilters * + self.image_size // 2 * + self.image_size // 2, activation='relu') decoder_reshape = Reshape(output_dim[1:]) - + decoder_deconv_1 = Conv2DTranspose(self.nfilters, kernel_size=self.num_conv, padding='same', strides=1, activation='relu') - + decoder_deconv_2 = Conv2DTranspose(self.nfilters, kernel_size=self.num_conv, padding='same', strides=1, activation='relu') - + decoder_deconv_3_upsamp = Conv2DTranspose(self.nfilters, - kernel_size = self.num_conv, - strides = 2, - padding = 'valid', - activation = 'relu') - + kernel_size=self.num_conv, + strides=2, + padding='valid', + activation='relu') + decoder_mean_squash = Conv2D(self.image_channel, - kernel_size = self.num_conv - 1, - padding = 'valid', - activation = 'sigmoid') - - hid_decoded = decoder_hid(z) - up_decoded = decoder_upsample(hid_decoded) - reshape_decoded = decoder_reshape(up_decoded) - deconv_1_decoded = decoder_deconv_1(reshape_decoded) - deconv_2_decoded = decoder_deconv_2(deconv_1_decoded) - x_decoded_relu = decoder_deconv_3_upsamp(deconv_2_decoded) - x_decoded_mean_squash = decoder_mean_squash(x_decoded_relu) + kernel_size=self.num_conv - 1, + padding='valid', + activation='sigmoid') + + hid_decoded = decoder_hid(z) + up_decoded = decoder_upsample(hid_decoded) + reshape_decoded = decoder_reshape(up_decoded) + deconv_1_decoded = decoder_deconv_1(reshape_decoded) + deconv_2_decoded = decoder_deconv_2(deconv_1_decoded) + x_decoded_relu = decoder_deconv_3_upsamp(deconv_2_decoded) + x_decoded_mean_squash = decoder_mean_squash(x_decoded_relu) # need to keep generator model separate so new inputs can be used - - decoder_input = Input(shape=(self.latent_dim,)) - _hid_decoded = decoder_hid(decoder_input) - _up_decoded = decoder_upsample(_hid_decoded) - _reshape_decoded = decoder_reshape(_up_decoded) - _deconv_1_decoded = decoder_deconv_1(_reshape_decoded) - _deconv_2_decoded = decoder_deconv_2(_deconv_1_decoded) - _x_decoded_relu = decoder_deconv_3_upsamp(_deconv_2_decoded) - _x_decoded_mean_squash = decoder_mean_squash(_x_decoded_relu) - + + decoder_input = Input(shape=(self.latent_dim,)) + _hid_decoded = decoder_hid(decoder_input) + _up_decoded = decoder_upsample(_hid_decoded) + _reshape_decoded = decoder_reshape(_up_decoded) + _deconv_1_decoded = decoder_deconv_1(_reshape_decoded) + _deconv_2_decoded = decoder_deconv_2(_deconv_1_decoded) + _x_decoded_relu = decoder_deconv_3_upsamp(_deconv_2_decoded) + _x_decoded_mean_squash = decoder_mean_squash(_x_decoded_relu) + # instantiate VAE models - - self.vae = Model(x, x_decoded_mean_squash) - self.encoder = Model(x, z_mean) - self.decoder = Model(decoder_input, _x_decoded_mean_squash) - + + self.vae = Model(x, x_decoded_mean_squash) + self.encoder = Model(x, z_mean) + self.decoder = Model(decoder_input, _x_decoded_mean_squash) + # VAE loss terms w/ KL divergence - + def vae_loss(x, x_decoded_mean_squash): xent_loss = self.image_size * self.image_size * metrics.binary_crossentropy(K.flatten(x), K.flatten(x_decoded_mean_squash)) kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) vae_loss = K.mean(xent_loss + kl_loss) return vae_loss - - - adam = optimizers.adam(lr = self.learn_rate) - - self.vae.compile(optimizer = adam, - loss = vae_loss) - + + adam = optimizers.adam(lr=self.learn_rate) + + self.vae.compile(optimizer=adam, + loss=vae_loss) + self.vae.summary() - - + def train(self): """ train VAE model """ - if(self.is_numpy): + if (self.is_numpy): train_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, - self.image_res, self.channels_to_use, shuffle=True, ) + self.image_res, self.channels_to_use, self.channel_first, shuffle=True) else: - train_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1), - horizontal_flip = True, - vertical_flip = True) - - train_generator = train_datagen.flow_from_directory( + train_datagen = ImageDataGenerator(rescale=1. / (2 ** self.image_res - 1), + horizontal_flip=True, + vertical_flip=True) + if self.image_channel == 1: + train_generator = train_datagen.flow_from_directory( + self.data_dir, + target_size=(self.image_size, self.image_size), + batch_size=self.batch_size, + color_mode='grayscale', + class_mode='input') + + else: + train_generator = train_datagen.flow_from_directory( self.data_dir, - target_size = (self.image_size, self.image_size), - batch_size = self.batch_size, - class_mode = 'input') + target_size=(self.image_size, self.image_size), + batch_size=self.batch_size, + color_mode='rgb', + class_mode='input') term_nan = TerminateOnNaN() - csv_logger = CSVLogger(os.path.join(self.save_dir, 'training.log'), + csv_logger = CSVLogger(os.path.join(self.save_dir, 'training.log'), separator='\t') - checkpointer = ModelCheckpoint(os.path.join(self.save_dir, - 'checkpoints/vae_weights.hdf5'), - verbose=1, - save_weights_only=True) - + checkpointer = ModelCheckpoint(os.path.join(self.save_dir, + 'checkpoints/vae_weights.hdf5'), + verbose=1, + save_weights_only=True, + monitor='loss', + save_best_only=True) + # custom image saving callback img_saver = ImgSave(self) self.history = self.vae.fit_generator(train_generator, - epochs = self.epochs, - verbose = self.verbose, - callbacks = [ - term_nan, - csv_logger, - checkpointer, - img_saver, - ], - steps_per_epoch = self.steps_per_epoch) - - self.vae.save_weights(os.path.join(self.save_dir, + epochs=self.epochs, + verbose=self.verbose, + callbacks=[ + term_nan, + csv_logger, + checkpointer, + img_saver, + ], + steps_per_epoch=self.steps_per_epoch) + + self.vae.save_weights(os.path.join(self.save_dir, 'checkpoints/vae_weights.hdf5')) - self.encode() + self.encode() def encode(self): """ encode data with trained model """ - if(self.is_numpy): + if (self.is_numpy): test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, - self.image_res, self.channels_to_use, shuffle=False) + self.image_res, self.channels_to_use, self.channel_first, shuffle=False) else: - test_datagen = ImageDataGenerator(rescale = 1./(2**self.image_res - 1)) - - test_generator = test_datagen.flow_from_directory( + test_datagen = ImageDataGenerator(rescale=1. / (2 ** self.image_res - 1)) + + if self.image_channel == 1: + test_generator = test_datagen.flow_from_directory( self.data_dir, - target_size = (self.image_size, self.image_size), - batch_size = self.batch_size, - shuffle = False, - class_mode = 'input') + target_size=(self.image_size, self.image_size), + batch_size=self.batch_size, + color_mode='grayscale', + shuffle=False, + class_mode='input') + + else: + test_generator = test_datagen.flow_from_directory( + self.data_dir, + target_size=(self.image_size, self.image_size), + batch_size=self.batch_size, + color_mode='rgb', + shuffle=False, + class_mode='input') print('encoding training data...') x_test_encoded = self.encoder.predict_generator(test_generator, - steps = self.data_size // self.batch_size) + steps=self.data_size // self.batch_size) - list_IDs = np.array(glob.glob(os.path.join(self.data_dir, 'train', '*'))) + list_IDs = (os.listdir(os.path.join(self.data_dir, 'train'))) labeled_encodings = np.array(x_test_encoded, dtype=object) labeled_encodings = np.insert(labeled_encodings, 0, - list_IDs[:((self.data_size // self.batch_size)*self.batch_size)], axis=1) + list_IDs[:((self.data_size // self.batch_size) * self.batch_size)], axis=1) - outFile = open(os.path.join(self.save_dir, 'encodings.csv'), 'w') - with outFile: - writer = csv.writer(outFile) + lab_enc = open(os.path.join(self.save_dir, 'labeled_encodings.csv'), 'w') + with lab_enc: + writer = csv.writer(lab_enc) writer.writerows(labeled_encodings) - outFile2 = open(os.path.join(self.save_dir, 'encodings2.csv'), 'w') - with outFile2: - writer = csv.writer(outFile2) + enc = open(os.path.join(self.save_dir, 'encodings.csv'), 'w') + with enc: + writer = csv.writer(enc) writer.writerows(x_test_encoded) + self.dice() + + def dice(self): + """calculates the dice coefficient and area for every image + results are saved as separate .csv files + """ + + if (self.is_numpy): + test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, + self.image_res, self.channels_to_use, self.channel_first, shuffle=False) + else: + test_datagen = ImageDataGenerator(rescale=1. / (2 ** self.image_res - 1)) + + if self.image_channel == 1: + test_generator = test_datagen.flow_from_directory( + self.data_dir, + target_size=(self.image_size, self.image_size), + batch_size=self.batch_size, + color_mode='grayscale', + shuffle=False, + class_mode='input') + + else: + test_generator = test_datagen.flow_from_directory( + self.data_dir, + target_size=(self.image_size, self.image_size), + batch_size=self.batch_size, + color_mode='rgb', + shuffle=False, + class_mode='input') + + dice_vals = np.zeros((len(test_generator) * self.batch_size, self.image_channel)) + coverage = np.zeros((len(test_generator) * self.batch_size, self.image_channel)) + + channels = np.array(self.channels_to_use.split(',')).astype(int) + ch_labels = np.array(self.channel_labels.split(',')) + + fnames = (os.listdir(os.path.join(self.data_dir, 'train'))) + fnames_counter = 0 + + # print('calculating dice coefficients...') + print('generating reconstructions...') + for gen_batch in range(len(test_generator)): + input_batch = test_generator[gen_batch][0] + recon_batch = self.vae.predict(input_batch, batch_size=self.batch_size) + for cell in range(self.batch_size): + for ch in range(len(channels)): + if self.save_individual: + fig, axs = plt.subplots(1, 2, figsize=(4, 2)) + axs[0].imshow(input_batch[cell, :, :, ch] * float((2 ** self.image_res - 1)), + cmap='gray', vmax=(2 ** self.image_res - 1)) + axs[0].set_xticks([]) + axs[0].set_yticks([]) + axs[1].imshow(recon_batch[cell, :, :, ch] * float((2 ** self.image_res - 1)), + cmap='gray', vmax=(2 ** self.image_res - 1)) + axs[1].set_xticks([]) + axs[1].set_yticks([]) + axs[0].set_ylabel(ch_labels[ch]) + fig.tight_layout() + plt.savefig(os.path.join(self.save_dir, 'individual_cells', + fnames[fnames_counter][:-4] + '_' + ch_labels[ch] + '.png')) + fnames_counter += 1 + plt.close("all") + + coverage[(gen_batch * self.batch_size) + cell, ch] = np.count_nonzero(input_batch[cell, :, :, ch]) + input_bool = input_batch[cell, :, :, ch].astype(bool) + recon_bool = recon_batch[cell, :, :, ch].astype(bool) + intersection = np.logical_and(input_bool, recon_bool) + im_sum = input_bool.sum() + recon_bool.sum() + if im_sum == 0: + dice_vals[(gen_batch * self.batch_size) + cell, ch] = 1 + else: + dice_coef = 2. * intersection.sum() / (input_bool.sum() + recon_bool.sum()) + dice_vals[(gen_batch * self.batch_size) + cell, ch] = dice_coef + + dv = open(os.path.join(self.save_dir, 'dice_vals.csv'), 'w') + with dv: + writer = csv.writer(dv) + writer.writerows(dice_vals) + + cov = open(os.path.join(self.save_dir, 'coverage.csv'), 'w') + with cov: + writer = csv.writer(cov) + writer.writerows(coverage) + print('calculated dice coefficients and coverage!') + + class DataGenerator(Sequence): - def __init__(self, data_dir, batch_size, image_size, image_channel, image_res, channels_to_use, shuffle): + def __init__(self, data_dir, batch_size, image_size, image_channel, + image_res, channels_to_use, channel_first, shuffle): self.image_size = image_size self.batch_size = batch_size self.list_IDs = glob.glob(os.path.join(data_dir, 'train', '*')) @@ -454,13 +574,17 @@ def __init__(self, data_dir, batch_size, image_size, image_channel, image_res, c self.shuffle = shuffle self.on_epoch_end() self.channels_to_use = channels_to_use + self.channel_first = channel_first def __len__(self): return int(np.floor(len(self.list_IDs) / self.batch_size)) def __getitem__(self, index): + if index >= self.__len__(): + raise IndexError() + # Generate indexes of the batch - indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size] + indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] # Find list of IDs list_IDs_temp = [self.list_IDs[k] for k in indexes] @@ -468,7 +592,7 @@ def __getitem__(self, index): # Generate data X = self.__data_generation(list_IDs_temp) - return X,X + return X, X def on_epoch_end(self): self.indexes = np.arange(len(self.list_IDs)) @@ -477,13 +601,16 @@ def on_epoch_end(self): def __data_generation(self, list_IDs_temp): # Initialization - X = np.zeros((self.batch_size, self.image_size, self.image_size, self.image_channel)) + X = np.zeros((self.batch_size, self.image_size, self.image_size, self.image_channel), dtype=np.float32) # Generate data for i, ID in enumerate(list_IDs_temp): # Store sample - temp = np.transpose(np.load(ID), (1,2,0))/(2**self.image_res - 1) + if self.channel_first: + temp = np.transpose(np.load(ID), (1, 2, 0)) / (2 ** self.image_res - 1) + else: + temp = np.load(ID) / (2 ** self.image_res - 1) channels = np.array(self.channels_to_use.split(',')).astype(int) - temp = temp[:,:, channels] + temp = temp[:, :, channels] X[i,] = temp return X diff --git a/main.py b/main.py index 14fea45..4b52c45 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,4 @@ -""" +""" Image Variational Autoencoding """ @@ -10,49 +10,64 @@ parser = argparse.ArgumentParser(description='') -parser.add_argument('--data_dir', type=str, default='data', help='input data directory (in train subfolder)') -parser.add_argument('--save_dir', type=str, default='save', help='save directory') -parser.add_argument('--phase', type=str, default='train', help='train or load') -parser.add_argument('--checkpoint', type=str, default='NA', help='checkpoint weight file') - -parser.add_argument('--image_size', type=int, default=64, help='image size') -parser.add_argument('--image_channel', type=int, default=3, help='image channels') -parser.add_argument('--image_res', type=int, default=8, help='image resolution (8 or 16)') +parser.add_argument('--data_dir', type=str, default='data', help='input data directory (in train subfolder)') +parser.add_argument('--save_dir', type=str, default='save', help='save directory') +parser.add_argument('--phase', type=str, default='train', help='train, load, or dice') +parser.add_argument('--checkpoint', type=str, default='NA', help='checkpoint weight file') -parser.add_argument('--latent_dim', type=int, default=2, help='latent dimension') -parser.add_argument('--inter_dim', type=int, default=64, help='intermediate dimension') -parser.add_argument('--num_conv', type=int, default=3, help='number of convolutions') -parser.add_argument('--batch_size', type=int, default=32, help='batch size') -parser.add_argument('--epochs', type=int, default=2, help='training epochs') -parser.add_argument('--nfilters', type=int, default=64, help='num convolution filters') -parser.add_argument('--learn_rate', type=float, default=0.001, help='learning rate') -parser.add_argument('--epsilon_std', type=float, default=1.0, help='epsilon width') -parser.add_argument('--latent_samp', type=int, default=10, help='number of latent samples') -parser.add_argument('--num_save', type=int, default=8, help='number of reconstructed images to save') -parser.add_argument('--verbose', type=int, default=2, help='1=verbose, 2=quiet') +parser.add_argument('--image_size', type=int, default=64, help='image size') +parser.add_argument('--image_channel', type=int, default=3, help='image channels') +parser.add_argument('--image_res', type=int, default=8, help='image resolution (8 or 16)') -parser.add_argument('--steps_per_epoch', type=int, default=0, help='steps per epoch') - -parser.add_argument('--is_numpy', type=bool, default=False, help='images are represented as numpy arrays') -parser.add_argument('--channels_to_use', type=str, default='all', help='specify channels to use if complex images are used') +parser.add_argument('--latent_dim', type=int, default=2, help='latent dimension') +parser.add_argument('--inter_dim', type=int, default=64, help='intermediate dimension') +parser.add_argument('--num_conv', type=int, default=3, help='number of convolutions') +parser.add_argument('--batch_size', type=int, default=32, help='batch size') +parser.add_argument('--epochs', type=int, default=2, help='training epochs') +parser.add_argument('--nfilters', type=int, default=64, help='num convolution filters') +parser.add_argument('--learn_rate', type=float, default=0.001, help='learning rate') +parser.add_argument('--epsilon_std', type=float, default=1.0, help='epsilon width') +parser.add_argument('--latent_samp', type=int, default=10, help='number of latent samples') +parser.add_argument('--num_save', type=int, default=8, help='number of reconstructed images to save') +parser.add_argument('--verbose', type=int, default=2, help='1=verbose, 2=quiet') +parser.add_argument('--steps_per_epoch', type=int, default=0, help='steps per epoch') +parser.add_argument('--is_numpy', default=False, action='store_true', + help='images are represented as numpy arrays') +parser.add_argument('--channel_first', default=False, action='store_true', + help='specify if images are channel first') +parser.add_argument('--channels_to_use', type=str, default='all', + help='specify channels to use if numpy arrays are used') +parser.add_argument('--save_individual', default=False, action='store_true', + help='if numpy, specify to save individual image reconstruction') +parser.add_argument('--channel_labels', type=str, default='NA', + help='specify channel labels if numpy arrays are used') args = parser.parse_args() def main(): - os.makedirs(args.save_dir, exist_ok=True) os.makedirs(os.path.join(args.save_dir, 'checkpoints'), exist_ok=True) - os.makedirs(os.path.join(args.save_dir, 'latent_walk'), exist_ok=True) - os.makedirs(os.path.join(args.save_dir, 'input'), exist_ok=True) + if not args.is_numpy: + os.makedirs(os.path.join(args.save_dir, 'latent_walk'), exist_ok=True) + os.makedirs(os.path.join(args.save_dir, 'input'), exist_ok=True) os.makedirs(os.path.join(args.save_dir, 'reconstructed'), exist_ok=True) + if args.save_individual: + os.makedirs(os.path.join(args.save_dir, 'individual_cells'), exist_ok=True) - if args.channels_to_use != 'all': - channels = np.array(args.channels_to_use.split(',')).astype(int) - if len(channels) != args.image_channel: - sys.exit('Number of specified channels does not image_channel argument!') - else: - args.channels_to_use = ','.join(str(i) for i in list(range(args.image_channel))) + if args.is_numpy: + if args.channels_to_use != 'all': + channels = np.array(args.channels_to_use.split(',')).astype(int) + if len(channels) != args.image_channel: + sys.exit('Number of specified channels does not image_channel argument!') + else: + args.channels_to_use = ','.join(str(i) for i in list(range(args.image_channel))) + if args.channel_labels != 'NA': + ch_names = np.array(args.channels_to_use.split(',')).astype(int) + if len(ch_names) != args.image_channel: + sys.exit('Number of channel names does not image_channel argument!') + else: + args.channel_labels = ','.join(str(i) for i in list(range(args.image_channel))) if args.phase == 'train': model = ImageVAE(args) @@ -61,10 +76,19 @@ def main(): if args.phase == 'load': if args.checkpoint == 'NA': sys.exit('No checkpoint file provided') - + model = ImageVAE(args) model.vae.load_weights(args.checkpoint) model.train() - + + if args.phase == 'dice': + if args.checkpoint == 'NA': + sys.exit('No checkpoint file provided') + + model = ImageVAE(args) + model.vae.load_weights(args.checkpoint) + model.dice() + + if __name__ == '__main__': main() From 60624a9ffd6b0d663475daadd2e847d50e45dda9 Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Thu, 16 Aug 2018 16:38:00 -0400 Subject: [PATCH 15/16] limited dice for only numpy images --- image_vae.py | 131 ++++++++++++++++++++++----------------------------- 1 file changed, 56 insertions(+), 75 deletions(-) diff --git a/image_vae.py b/image_vae.py index 45cb194..2ed7846 100644 --- a/image_vae.py +++ b/image_vae.py @@ -476,91 +476,72 @@ def encode(self): writer = csv.writer(enc) writer.writerows(x_test_encoded) - self.dice() - def dice(self): """calculates the dice coefficient and area for every image results are saved as separate .csv files """ - if (self.is_numpy): + if self.is_numpy: test_generator = DataGenerator(self.data_dir, self.batch_size, self.image_size, self.image_channel, self.image_res, self.channels_to_use, self.channel_first, shuffle=False) - else: - test_datagen = ImageDataGenerator(rescale=1. / (2 ** self.image_res - 1)) - if self.image_channel == 1: - test_generator = test_datagen.flow_from_directory( - self.data_dir, - target_size=(self.image_size, self.image_size), - batch_size=self.batch_size, - color_mode='grayscale', - shuffle=False, - class_mode='input') + dice_vals = np.zeros((len(test_generator) * self.batch_size, self.image_channel)) + coverage = np.zeros((len(test_generator) * self.batch_size, self.image_channel)) - else: - test_generator = test_datagen.flow_from_directory( - self.data_dir, - target_size=(self.image_size, self.image_size), - batch_size=self.batch_size, - color_mode='rgb', - shuffle=False, - class_mode='input') + channels = np.array(self.channels_to_use.split(',')).astype(int) + ch_labels = np.array(self.channel_labels.split(',')) + + fnames = (os.listdir(os.path.join(self.data_dir, 'train'))) + fnames_counter = 0 + + # print('calculating dice coefficients...') + print('generating reconstructions...') + for gen_batch in range(len(test_generator)): + input_batch = test_generator[gen_batch][0] + recon_batch = self.vae.predict(input_batch, batch_size=self.batch_size) + for cell in range(self.batch_size): + for ch in range(len(channels)): + if self.save_individual: + fig, axs = plt.subplots(1, 2, figsize=(4, 2)) + axs[0].imshow(input_batch[cell, :, :, ch] * float((2 ** self.image_res - 1)), + cmap='gray', vmax=(2 ** self.image_res - 1)) + axs[0].set_xticks([]) + axs[0].set_yticks([]) + axs[1].imshow(recon_batch[cell, :, :, ch] * float((2 ** self.image_res - 1)), + cmap='gray', vmax=(2 ** self.image_res - 1)) + axs[1].set_xticks([]) + axs[1].set_yticks([]) + axs[0].set_ylabel(ch_labels[ch]) + fig.tight_layout() + plt.savefig(os.path.join(self.save_dir, 'individual_cells', + fnames[fnames_counter][:-4] + '_' + ch_labels[ch] + '.png')) + fnames_counter += 1 + plt.close("all") + + coverage[(gen_batch * self.batch_size) + cell, ch] = np.count_nonzero(input_batch[cell, :, :, ch]) + input_bool = input_batch[cell, :, :, ch].astype(bool) + recon_bool = recon_batch[cell, :, :, ch].astype(bool) + intersection = np.logical_and(input_bool, recon_bool) + im_sum = input_bool.sum() + recon_bool.sum() + if im_sum == 0: + dice_vals[(gen_batch * self.batch_size) + cell, ch] = 1 + else: + dice_coef = 2. * intersection.sum() / (input_bool.sum() + recon_bool.sum()) + dice_vals[(gen_batch * self.batch_size) + cell, ch] = dice_coef + + dv = open(os.path.join(self.save_dir, 'dice_vals.csv'), 'w') + with dv: + writer = csv.writer(dv) + writer.writerows(dice_vals) + + cov = open(os.path.join(self.save_dir, 'coverage.csv'), 'w') + with cov: + writer = csv.writer(cov) + writer.writerows(coverage) + print('calculated dice coefficients and coverage!') - dice_vals = np.zeros((len(test_generator) * self.batch_size, self.image_channel)) - coverage = np.zeros((len(test_generator) * self.batch_size, self.image_channel)) - - channels = np.array(self.channels_to_use.split(',')).astype(int) - ch_labels = np.array(self.channel_labels.split(',')) - - fnames = (os.listdir(os.path.join(self.data_dir, 'train'))) - fnames_counter = 0 - - # print('calculating dice coefficients...') - print('generating reconstructions...') - for gen_batch in range(len(test_generator)): - input_batch = test_generator[gen_batch][0] - recon_batch = self.vae.predict(input_batch, batch_size=self.batch_size) - for cell in range(self.batch_size): - for ch in range(len(channels)): - if self.save_individual: - fig, axs = plt.subplots(1, 2, figsize=(4, 2)) - axs[0].imshow(input_batch[cell, :, :, ch] * float((2 ** self.image_res - 1)), - cmap='gray', vmax=(2 ** self.image_res - 1)) - axs[0].set_xticks([]) - axs[0].set_yticks([]) - axs[1].imshow(recon_batch[cell, :, :, ch] * float((2 ** self.image_res - 1)), - cmap='gray', vmax=(2 ** self.image_res - 1)) - axs[1].set_xticks([]) - axs[1].set_yticks([]) - axs[0].set_ylabel(ch_labels[ch]) - fig.tight_layout() - plt.savefig(os.path.join(self.save_dir, 'individual_cells', - fnames[fnames_counter][:-4] + '_' + ch_labels[ch] + '.png')) - fnames_counter += 1 - plt.close("all") - - coverage[(gen_batch * self.batch_size) + cell, ch] = np.count_nonzero(input_batch[cell, :, :, ch]) - input_bool = input_batch[cell, :, :, ch].astype(bool) - recon_bool = recon_batch[cell, :, :, ch].astype(bool) - intersection = np.logical_and(input_bool, recon_bool) - im_sum = input_bool.sum() + recon_bool.sum() - if im_sum == 0: - dice_vals[(gen_batch * self.batch_size) + cell, ch] = 1 - else: - dice_coef = 2. * intersection.sum() / (input_bool.sum() + recon_bool.sum()) - dice_vals[(gen_batch * self.batch_size) + cell, ch] = dice_coef - - dv = open(os.path.join(self.save_dir, 'dice_vals.csv'), 'w') - with dv: - writer = csv.writer(dv) - writer.writerows(dice_vals) - - cov = open(os.path.join(self.save_dir, 'coverage.csv'), 'w') - with cov: - writer = csv.writer(cov) - writer.writerows(coverage) - print('calculated dice coefficients and coverage!') + else: + print('Must use numpy arrays to calculate dice coefficients and coverage!') class DataGenerator(Sequence): From f472bf65d482508b72834d194b4d0b040d71d399 Mon Sep 17 00:00:00 2001 From: jah267 <31462867+jah267@users.noreply.github.com> Date: Thu, 16 Aug 2018 16:42:40 -0400 Subject: [PATCH 16/16] Add files via upload --- image_vae.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/image_vae.py b/image_vae.py index 2ed7846..db18792 100644 --- a/image_vae.py +++ b/image_vae.py @@ -494,8 +494,7 @@ def dice(self): fnames = (os.listdir(os.path.join(self.data_dir, 'train'))) fnames_counter = 0 - # print('calculating dice coefficients...') - print('generating reconstructions...') + print('calculating dice coefficients...') for gen_batch in range(len(test_generator)): input_batch = test_generator[gen_batch][0] recon_batch = self.vae.predict(input_batch, batch_size=self.batch_size)