Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 70 additions & 45 deletions deep_q_network.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,50 @@
#!/usr/bin/env python
from __future__ import print_function
from collections import deque
import numpy as np
import random
import game.wrapped_flappy_bird as game

import tensorflow as tf
import tensorflow.compat.v1 as tf
import cv2
import sys

tf.compat.v1.disable_eager_execution()


sys.path.append("game/")
import wrapped_flappy_bird as game
import random
import numpy as np
from collections import deque

GAME = 'bird' # the name of the game being played for log files
ACTIONS = 2 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 100000. # timesteps to observe before training
EXPLORE = 2000000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.0001 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
GAME = "bird" # the name of the game being played for log files
ACTIONS = 2 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 100000.0 # timesteps to observe before training
EXPLORE = 2000000.0 # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.0001 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
FRAME_PER_ACTION = 1


def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev = 0.01)
# initial = tf.truncated_normal(shape, stddev=0.01)
initial = tf.random.truncated_normal(shape, stddev=0.01)
return tf.Variable(initial)


def bias_variable(shape):
initial = tf.constant(0.01, shape = shape)
initial = tf.constant(0.01, shape=shape)
return tf.Variable(initial)


def conv2d(x, W, stride):
return tf.nn.conv2d(x, W, strides = [1, stride, stride, 1], padding = "SAME")
return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding="SAME")


def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


def createNetwork():
# network weights
Expand All @@ -53,19 +64,19 @@ def createNetwork():
b_fc2 = bias_variable([ACTIONS])

# input layer
s = tf.placeholder("float", [None, 80, 80, 4])
s = tf.compat.v1.placeholder("float", [None, 80, 80, 4])

# hidden layers
h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, 2) + b_conv2)
#h_pool2 = max_pool_2x2(h_conv2)
# h_pool2 = max_pool_2x2(h_conv2)

h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1) + b_conv3)
#h_pool3 = max_pool_2x2(h_conv3)
# h_pool3 = max_pool_2x2(h_conv3)

#h_pool3_flat = tf.reshape(h_pool3, [-1, 256])
# h_pool3_flat = tf.reshape(h_pool3, [-1, 256])
h_conv3_flat = tf.reshape(h_conv3, [-1, 1600])

h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat, W_fc1) + b_fc1)
Expand All @@ -75,11 +86,13 @@ def createNetwork():

return s, readout, h_fc1


def trainNetwork(s, readout, h_fc1, sess):
# define the cost function
a = tf.placeholder("float", [None, ACTIONS])
y = tf.placeholder("float", [None])
readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1)
a = tf.compat.v1.placeholder("float", [None, ACTIONS])
y = tf.compat.v1.placeholder("float", [None])
readout_action = tf.reduce_sum(
tf.multiply(readout, a), reduction_indices=1)
cost = tf.reduce_mean(tf.square(y - readout_action))
train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

Expand All @@ -90,15 +103,15 @@ def trainNetwork(s, readout, h_fc1, sess):
D = deque()

# printing
a_file = open("logs_" + GAME + "/readout.txt", 'w')
h_file = open("logs_" + GAME + "/hidden.txt", 'w')
a_file = open("logs_" + GAME + "/readout.txt", "w")
h_file = open("logs_" + GAME + "/hidden.txt", "w")

# get the first state by doing nothing and preprocess the image to 80x80x4
do_nothing = np.zeros(ACTIONS)
do_nothing[0] = 1
x_t, r_0, terminal = game_state.frame_step(do_nothing)
x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

# saving and loading networks
Expand All @@ -116,7 +129,7 @@ def trainNetwork(s, readout, h_fc1, sess):
t = 0
while "flappy bird" != "angry bird":
# choose an action epsilon greedily
readout_t = readout.eval(feed_dict={s : [s_t]})[0]
readout_t = readout.eval(feed_dict={s: [s_t]})[0]
a_t = np.zeros([ACTIONS])
action_index = 0
if t % FRAME_PER_ACTION == 0:
Expand All @@ -128,18 +141,19 @@ def trainNetwork(s, readout, h_fc1, sess):
action_index = np.argmax(readout_t)
a_t[action_index] = 1
else:
a_t[0] = 1 # do nothing
a_t[0] = 1 # do nothing

# scale down epsilon
if epsilon > FINAL_EPSILON and t > OBSERVE:
epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

# run the selected action and observe next state and reward
x_t1_colored, r_t, terminal = game_state.frame_step(a_t)
x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY)
x_t1 = cv2.cvtColor(cv2.resize(
x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY)
ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)
x_t1 = np.reshape(x_t1, (80, 80, 1))
#s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2)
# s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2)
s_t1 = np.append(x_t1, s_t[:, :, :3], axis=2)

# store the transition in D
Expand All @@ -159,29 +173,26 @@ def trainNetwork(s, readout, h_fc1, sess):
s_j1_batch = [d[3] for d in minibatch]

y_batch = []
readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch})
readout_j1_batch = readout.eval(feed_dict={s: s_j1_batch})
for i in range(0, len(minibatch)):
terminal = minibatch[i][4]
# if terminal, only equals reward
if terminal:
y_batch.append(r_batch[i])
else:
y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))
y_batch.append(r_batch[i] + GAMMA *
np.max(readout_j1_batch[i]))

# perform gradient step
train_step.run(feed_dict = {
y : y_batch,
a : a_batch,
s : s_j_batch}
)
train_step.run(feed_dict={y: y_batch, a: a_batch, s: s_j_batch})

# update the old values
s_t = s_t1
t += 1

# save progress every 10000 iterations
if t % 10000 == 0:
saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)
saver.save(sess, "saved_networks/" + GAME + "-dqn", global_step=t)

# print info
state = ""
Expand All @@ -192,24 +203,38 @@ def trainNetwork(s, readout, h_fc1, sess):
else:
state = "train"

print("TIMESTEP", t, "/ STATE", state, \
"/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \
"/ Q_MAX %e" % np.max(readout_t))
print(
"TIMESTEP",
t,
"/ STATE",
state,
"/ EPSILON",
epsilon,
"/ ACTION",
action_index,
"/ REWARD",
r_t,
"/ Q_MAX %e" % np.max(readout_t),
)
# write info to files
'''
"""
if t % 10000 <= 100:
a_file.write(",".join([str(x) for x in readout_t]) + '\n')
h_file.write(",".join([str(x) for x in h_fc1.eval(feed_dict={s:[s_t]})[0]]) + '\n')
cv2.imwrite("logs_tetris/frame" + str(t) + ".png", x_t1)
'''
"""


def playGame():
sess = tf.InteractiveSession()
sess = tf.compat.v1.InteractiveSession()
# sess = tf.InteractiveSession()
s, readout, h_fc1 = createNetwork()
trainNetwork(s, readout, h_fc1, sess)


def main():
playGame()


if __name__ == "__main__":
main()
Empty file added game/__init__.py
Empty file.
Loading