From bdaaa5c09b68d29f44137da51f13e8b64bfdc92e Mon Sep 17 00:00:00 2001 From: dwarakarashmi Date: Thu, 12 Apr 2018 23:05:49 -0400 Subject: [PATCH] dropout feature addition --- .gitignore | 6 ++++++ src/cli.py | 5 +++-- src/network.py | 11 ++++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 7c3568d..10496d3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,8 @@ *.pyc .DS_store +.eggs +build +deps +dist +fiddler.egg-info +models \ No newline at end of file diff --git a/src/cli.py b/src/cli.py index 5ad8b22..770ebd1 100644 --- a/src/cli.py +++ b/src/cli.py @@ -21,6 +21,7 @@ def main(): @click.option("--learning-rate", "-r", default=1e-3, type=click.FLOAT, help="Learning Rate") @click.option("--num-steps", "-n", type=click.INT, default=15, help="No. of time steps in RNN") @click.option("--cell-size", "-s", type=click.INT, default=100, help="Dimension of cell states") +@click.option("--dropout", "-d", type=click.FLOAT, help="Dropout probability for the output") @click.option("--epochs", "-e", type=click.INT, help="No. of epochs to run training for") @click.option("--cell", "-c", type=click.Choice(['lstm', 'gru']), @@ -28,12 +29,12 @@ def main(): @click.option("--test-seed", "-t", help="Seed input for printing predicted text after each training step") @click.option("--delim/--no-delim", default=True, help="Delimit tunes with start and end symbol") def train_rnn(file, batch_size, layers, learning_rate, - num_steps, cell_size, epochs, cell, test_seed, delim): + num_steps, cell_size, dropout, epochs, cell, test_seed, delim): """ Train neural network """ ds = Dataset(file, batch_size=batch_size, num_steps=num_steps, with_delim=delim) n = RNN(data=ds, cell=cell, num_layers=layers, - learning_rate=learning_rate, cell_size=cell_size, num_epochs=epochs) + learning_rate=learning_rate, cell_size=cell_size, dropout=dropout, num_epochs=epochs) n.train(test_output=True, test_seed=test_seed, with_delim=delim) diff --git a/src/network.py b/src/network.py index 7d13d34..eedffa2 100644 --- a/src/network.py +++ b/src/network.py @@ -6,7 +6,7 @@ class RNN(object): - def __init__(self, data, cell, cell_size, num_layers=2, num_epochs=50, learning_rate=1e-3): + def __init__(self, data, cell, cell_size, dropout=0.2, num_layers=2, num_epochs=50, learning_rate=1e-3): """ `data` is dataset.Dataset object `cell_size` is the Dimensions for each RNN cell's parameters (i.e. c and h) @@ -18,6 +18,7 @@ def __init__(self, data, cell, cell_size, num_layers=2, num_epochs=50, learning_ self.batch_size = self.data.batch_size self.cell = cell self.cell_size = cell_size + self.dropout = dropout self.num_layers = num_layers self.num_steps = self.data.num_steps self.num_classes = self.data.vocab_size @@ -58,12 +59,20 @@ def _build(self): for i in range(self.num_layers)]) single_cell = tf.nn.rnn_cell.LSTMCell( self.cell_size, forget_bias=1.0) + """Use dropout only for training""" + if self.dropout: + single_cell = tf.contrib.rnn.DropoutWrapper( + single_cell, output_keep_prob=self.dropout) multi_cell = tf.nn.rnn_cell.MultiRNNCell([single_cell for _ in xrange(self.num_layers)], state_is_tuple=True) else: rnn_states = tuple([state_per_layer[i] for i in range(self.num_layers)]) single_cell = tf.nn.rnn_cell.GRUCell(self.cell_size) + """Use dropout only for training""" + if self.dropout: + single_cell = tf.contrib.rnn.DropoutWrapper( + single_cell, output_keep_prob=self.dropout) multi_cell = tf.nn.rnn_cell.MultiRNNCell( [single_cell] * self.num_layers)