diff --git a/MEOT/DQN/DQL.py b/MEOT/DQN/DQL.py
new file mode 100644
index 0000000..3e229bd
--- /dev/null
+++ b/MEOT/DQN/DQL.py
@@ -0,0 +1,241 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Feb 16 10:50:17 2018
+
+@author: Louis
+"""
+
+import os
+
+os.environ['SDL_VIDEODRIVER'] = 'dummy'
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+from FlappyAgent import FlappyPolicy
+from testG import test_model_G
+
+import matplotlib.pyplot as plt
+from skimage.color import rgb2gray
+from skimage.transform import resize
+from skimage.exposure import rescale_intensity
+
+from keras.models import Sequential, load_model
+from keras.layers import Dense, Conv2D, Flatten
+import graphviz
+
+
+from collections import deque
+
+def process_screen(x):
+
+    return (255 * resize(rgb2gray(x)[50:, :410], (84, 84))).astype("uint8")
+
+
+#%% Network Definition
+dqn = Sequential()
+#1st layer
+dqn.add(Conv2D(filters=16, kernel_size=(8,8), strides=4, activation="relu", input_shape=(84,84,4)))
+#2nd layer
+dqn.add(Conv2D(filters=32, kernel_size=(4,4), strides=2, activation="relu"))
+dqn.add(Flatten())
+#3rd layer
+dqn.add(Dense(units=256, activation="relu"))
+#output layer
+dqn.add(Dense(units=2, activation="linear"))
+
+dqn.compile(optimizer="rmsprop", loss="mean_squared_error")
+
+#%% Training Fonctions
+
+def epsilon(step):
+    ##Linear decay until step 200 000 then constant
+    if step<200000:
+        return 1-step*(0.09/200000)
+    return .01
+
+def clip_reward(r):
+    ## Shaping Reward : -1, 0.1, 1
+    if (r==0):
+        return 0.1
+    if (r<0):
+        return -1
+    return r
+
+def greedy_action(network, x):
+    Q = network.predict(np.array([x]))
+    return np.argmax(Q)
+
+
+#%% Memory_buffer
+# A class for the replay memory
+
+
+class MemoryBuffer:
+    "An experience replay buffer using numpy arrays"
+    def __init__(self, length, screen_shape, action_shape):
+        self.length = length
+        self.screen_shape = screen_shape
+        self.action_shape = action_shape
+        shape = (length,) + screen_shape
+        self.screens_x = np.zeros(shape, dtype=np.uint8) # starting states
+        self.screens_y = np.zeros(shape, dtype=np.uint8) # resulting states
+        shape = (length,) + action_shape
+        self.actions = np.zeros(shape, dtype=np.uint8) # actions
+        self.rewards = np.zeros((length,1), dtype=np.int8) # rewards
+        self.terminals = np.zeros((length,1), dtype=np.bool) # true if resulting state is terminal
+        self.terminals[-1] = True
+        self.index = 0 # points one position past the last inserted element
+        self.size = 0 # current size of the buffer
+    
+    def append(self, screenx, a, r, screeny, d):
+        self.screens_x[self.index] = screenx
+        #plt.imshow(screenx)
+        #plt.show()
+        #plt.imshow(self.screens_x[self.index])
+        #plt.show()
+        self.actions[self.index] = a
+        self.rewards[self.index] = r
+        self.screens_y[self.index] = screeny
+        self.terminals[self.index] = d
+        self.index = (self.index+1) % self.length
+        self.size = np.min([self.size+1,self.length])
+    
+    def stacked_frames_x(self, index):
+        im_deque = deque(maxlen=4)
+        pos = index % self.length
+        for i in range(4): # todo
+            im = self.screens_x[pos]
+            im_deque.appendleft(im)
+            test_pos = (pos-1) % self.length
+            if self.terminals[test_pos] == False:
+                pos = test_pos
+        return np.stack(im_deque, axis=-1)
+    
+    def stacked_frames_y(self, index):
+        im_deque = deque(maxlen=4)
+        pos = index % self.length
+        for i in range(4): # todo
+            im = self.screens_y[pos]
+            im_deque.appendleft(im)
+            test_pos = (pos-1) % self.length
+            if self.terminals[test_pos] == False:
+                pos = test_pos
+        return np.stack(im_deque, axis=-1)
+    
+    def minibatch(self, size):
+        #return np.random.choice(self.data[:self.size], size=sz, replace=False)
+        indices = np.random.choice(self.size, size=size, replace=False)
+        x = np.zeros((size,)+self.screen_shape+(4,))
+        y = np.zeros((size,)+self.screen_shape+(4,))
+        
+        for i in range(size):
+            x[i] = self.stacked_frames_x(indices[i])
+            y[i] = self.stacked_frames_y(indices[i])
+        return x, self.actions[indices], self.rewards[indices], y, self.terminals[indices]
+
+
+#%% Training Episode
+# initialize state and replay memory  
+game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen='store_false')
+# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.
+
+p.init()
+
+total_steps = 800000
+replay_memory_size = 100000
+intermediate_size = 50000
+interval_test = 25000
+mini_batch_size = 32
+gamma = 0.99
+
+average_score = []
+max_score= []
+
+
+p.reset_game()
+screen_x = process_screen(p.getScreenRGB())
+stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)
+x = np.stack(stacked_x, axis=-1)
+replay_memory = MemoryBuffer(replay_memory_size, (84,84), (1,))
+# initial state for evaluation
+evaluation_period = 30
+Xtest = np.array([x])
+nb_epochs = total_steps // evaluation_period
+epoch=-1
+scoreQ = np.zeros((nb_epochs))
+scoreMC = np.zeros((nb_epochs))
+list_actions = [0,119]
+
+
+# Deep Q-learning with experience replay
+for step in range(total_steps):
+    
+    if (step%intermediate_size==0):
+        dqn.save('TrainG5_'+str(int(step/intermediate_size))+'.h5')
+        print('Sauvegarde du modèle : Step = ' + str(step))
+    
+    if (step%interval_test==0):
+        avg_temp = 0
+        max_temp = 0
+        print('Eval Period : '+str(step))
+        avg_temp, max_temp  = test_model_G(evaluation_period, dqn)
+        average_score.append(avg_temp)
+        max_score.append(max_temp)        
+    
+    # evaluation
+#    if(step%10 == 0):
+#        epoch = epoch+1
+#        # evaluation of initial state
+#        scoreQ[epoch] = np.mean(dqn.predict(Xtest).max(1))
+#        # roll-out evaluation
+#        scoreMC[epoch] = MCeval(network=dqn, trials=20, length=700, gamma=gamma)
+    # action selection
+    
+    if np.random.rand() < epsilon(step):
+        if np.random.randint(0,5)==1:
+            a = 0
+        else :
+            a = 1
+    else:
+        a = greedy_action(dqn, x)
+    # step
+
+    r=p.act(list_actions[a])
+    raw_screen_y = p.getScreenRGB()
+    
+    r = clip_reward(r)
+    d=p.game_over()
+    
+    screen_y = process_screen(raw_screen_y)
+    replay_memory.append(screen_x, a, r, screen_y, d)
+    
+    # train
+    if step>step+mini_batch_size:
+        X,A,R,Y,D = replay_memory.minibatch(mini_batch_size)
+        QY = dqn.predict(Y)
+        QYmax = QY.max(1).reshape((mini_batch_size,1))
+        update = R + gamma * (1-D) * QYmax
+        QX = dqn.predict(X)
+        QX[np.arange(mini_batch_size), A.ravel()] = update.ravel()
+        dqn.train_on_batch(x=X, y=QX)
+        
+    # prepare next transition
+    if d==True:
+        # restart episode
+        p.reset_game()
+        screen_x = process_screen(p.getScreenRGB())
+        stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)
+        x = np.stack(stacked_x, axis=-1)
+    else:
+        
+        # keep going
+        screen_x = screen_y
+        stacked_x.append(screen_x)
+        x = np.stack(stacked_x, axis=-1)
+
+
+dqn.save('TrainG5_max.h5')
+
+np.savetxt('average.txt',average_score, delimiter=',')
+np.savetxt('max.txt',max_score, delimiter=',')
\ No newline at end of file
diff --git a/MEOT/DQN/FlappyAgent.py b/MEOT/DQN/FlappyAgent.py
new file mode 100644
index 0000000..f97021a
--- /dev/null
+++ b/MEOT/DQN/FlappyAgent.py
@@ -0,0 +1,26 @@
+import numpy as np
+
+import matplotlib.pyplot as plt
+from skimage.color import rgb2gray
+from skimage.transform import resize
+from skimage.exposure import rescale_intensity
+
+from keras.models import Sequential, load_model
+from keras.layers import Dense, Conv2D, Flatten
+import graphviz
+
+from collections import deque
+
+list_actions = [0,119]
+dqn = load_model('TrainG4_max.h5')
+def process_screen(x):
+    return (255 * resize(rgb2gray(x)[50:, :410], (84, 84))).astype("uint8")
+
+def FlappyPolicy(state, screen):
+    screen_x = process_screen(screen)
+    stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)
+    x = np.stack(stacked_x, axis=-1)
+    action = list_actions[np.argmax(dqn.predict(np.expand_dims(x,axis=0)))]
+    return action
+
+
diff --git a/MEOT/DQN/TrainG4_max.h5 b/MEOT/DQN/TrainG4_max.h5
new file mode 100644
index 0000000..1848bfb
Binary files /dev/null and b/MEOT/DQN/TrainG4_max.h5 differ
diff --git a/MEOT/DQN/run.py b/MEOT/DQN/run.py
new file mode 100644
index 0000000..79770e1
--- /dev/null
+++ b/MEOT/DQN/run.py
@@ -0,0 +1,39 @@
+# You're not allowed to change this file
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+from FlappyAgent import FlappyPolicy
+
+game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=True)
+# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.
+
+p.init()
+reward = 0.0
+
+nb_games = 100
+cumulated = np.zeros((nb_games))
+
+
+for i in range(nb_games):
+    p.reset_game()
+    
+    while(not p.game_over()):
+        state = game.getGameState()
+        screen = p.getScreenRGB()
+
+        
+        action=FlappyPolicy(state, screen) ### Your job is to define this function.
+        
+        
+        
+        reward = p.act(action)
+        print(reward)
+        cumulated[i] = cumulated[i] + reward
+
+average_score = np.mean(cumulated)
+max_score = np.max(cumulated)
+
+
+#####----------
+
diff --git a/MEOT/DQN/testG.py b/MEOT/DQN/testG.py
new file mode 100644
index 0000000..9237ae1
--- /dev/null
+++ b/MEOT/DQN/testG.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Feb 16 22:13:01 2018
+
+@author: Louis
+
+"""
+
+# Functions used to test during Gcloud training phase.
+import os
+
+os.environ['SDL_VIDEODRIVER'] = 'dummy'
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+from FlappyAgent import FlappyPolicy
+
+import matplotlib.pyplot as plt
+from skimage.color import rgb2gray
+from skimage.transform import resize
+from skimage.exposure import rescale_intensity
+
+from keras.models import Sequential, load_model
+from keras.layers import Dense, Conv2D, Flatten
+import graphviz
+
+from collections import deque
+
+def process_screen(x):
+    return (255 * resize(rgb2gray(x)[50:, :410], (84, 84))).astype("uint8")
+
+def test_model_G(nb_games, model):
+    game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
+    p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
+    p.init()
+    reward = 0.0
+    
+    cumulated = np.zeros((nb_games))
+    list_actions = [0,119]
+    
+    for i in range(nb_games):
+        p.reset_game()
+        
+        while(not p.game_over()):
+            state = game.getGameState()
+    
+            screen_x = process_screen(p.getScreenRGB())
+            stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)
+            x = np.stack(stacked_x, axis=-1)
+            action = list_actions[np.argmax(model.predict(np.expand_dims(x,axis=0)))]
+            
+            reward = p.act(action)
+            
+            cumulated[i] = cumulated[i] + reward
+    
+    avg_score = np.mean(cumulated)
+    print('Average : '+ str(avg_score))
+    mx_score = np.max(cumulated)
+    print('Max : '+ str(mx_score))
+    return avg_score, mx_score
diff --git a/MEOT/DQN/testres.py b/MEOT/DQN/testres.py
new file mode 100644
index 0000000..916fa5d
--- /dev/null
+++ b/MEOT/DQN/testres.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Mar  9 14:48:23 2018
+
+@author: Louis
+
+"""
+
+#Local test sequence
+
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+from FlappyAgent import FlappyPolicy
+
+import matplotlib.pyplot as plt
+from skimage.color import rgb2gray
+from skimage.transform import resize
+from skimage.exposure import rescale_intensity
+
+from keras.models import Sequential, load_model
+from keras.layers import Dense, Conv2D, Flatten
+import graphviz
+
+from collections import deque
+
+def process_screen(x):
+    return (255 * resize(rgb2gray(x)[50:, :410], (84, 84))).astype("uint8")
+
+
+def greedy_action(network, x):
+    Q = network.predict(np.array([x]))
+    return np.argmax(Q)
+
+
+    #%% 
+dqn=load_model('TrainG4_19.h5')
+game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=True)
+p.init()
+reward = 0.0
+list_actions=[0,119]
+nb_games = 100
+cumulated = np.zeros((nb_games))
+
+
+for i in range(nb_games):
+    p.reset_game()
+    
+    while(not p.game_over()):
+        state = game.getGameState()
+
+        screen_x = process_screen(p.getScreenRGB())
+        stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)
+        x = np.stack(stacked_x, axis=-1)
+        action = list_actions[greedy_action(dqn,x)]
+        
+        reward = p.act(action)
+        cumulated[i] = cumulated[i] + reward
+
+average_score = np.mean(cumulated)
+max_score = np.max(cumulated)   
\ No newline at end of file
diff --git a/MEOT/QL/FlappyAgent.py b/MEOT/QL/FlappyAgent.py
new file mode 100644
index 0000000..ac9e6d0
--- /dev/null
+++ b/MEOT/QL/FlappyAgent.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+"""
+Created on Wed Jan 24 14:55:41 2018
+
+@author: Louis MEOT
+"""
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+from random import randint
+import math
+import pickle
+
+game=FlappyBird()
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True)
+
+p.init()
+
+nb_games = 20
+cumulated = np.zeros((nb_games))
+f_myfile = open('Q_function600.pickle', 'rb')
+Q_function = pickle.load(f_myfile)  # variables come out in the order you put them in
+f_myfile.close()
+
+def FlappyPolicy(state, screen):
+    a= play_loop(state)
+    return a
+
+
+# Maillage des états 
+def observeState(state):      
+    y_to_pipe_bottom = state["player_y"] - state["next_pipe_bottom_y"]
+    y_cat = 0
+    x_cat = 0
+    h_max = 412
+    h_min = -412
+    d_max = 288
+    nb_y_cat = 14
+    nb_x_cat = 5
+    
+    while(y_to_pipe_bottom - h_min > (h_max - h_min) * y_cat/nb_y_cat):
+        y_cat += 1
+    
+    while(state["next_pipe_dist_to_player"] > d_max * x_cat/nb_x_cat):
+        x_cat += 1
+    
+    speed_cat = int((state["player_vel"]+16)/2)
+
+    return (x_cat-1,y_cat-1,speed_cat)
+
+    
+def epsilon_greedy(Q, s):
+    a = np.argmax(Q[s[0]][s[1]][s[2]][:]) # Action optimale avec une proba 1-eps
+    return a
+
+def play_loop(state):
+    ps = observeState(state)
+    action_ind = epsilon_greedy(Q_function,ps) 
+    if (action_ind==1):
+        action = 119
+    else:
+        action = None
+    return action
+
+        
\ No newline at end of file
diff --git a/MEOT/QL/FlappyTraining.py b/MEOT/QL/FlappyTraining.py
new file mode 100644
index 0000000..a9d279c
--- /dev/null
+++ b/MEOT/QL/FlappyTraining.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jan 24 14:55:41 2018
+
+@author: Louis MEOT
+"""
+import os
+
+os.environ['SDL_VIDEODRIVER'] = 'dummy'
+
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+from random import randint
+import math
+import pickle
+
+def FlappyPolicyDyn(state,screen):
+    print(state)
+    if(state["player_y"]>state["next_pipe_bottom_y"]-50):
+        return 119
+    else: 
+        return None
+        
+def FlappyPolicy(state, screen,game,p,epsilon,cummulated,i,count,Q, STATES, nb_states):
+    a= play_loop(state,Q,game,p,epsilon,cumulated,i,count, STATES, nb_states)
+    return a
+
+# Maillage des états
+def observeState(state,p):      
+    y_to_pipe_bottom = state["player_y"] - state["next_pipe_bottom_y"]
+    y_cat = 0
+    x_cat = 0
+    h_max = 412
+    h_min = -412
+    d_max = 288
+    nb_y_cat = 14
+    nb_x_cat = 5
+    
+    while(y_to_pipe_bottom - h_min > (h_max - h_min) * y_cat/nb_y_cat):
+        y_cat += 1
+    
+    while(state["next_pipe_dist_to_player"] > d_max * x_cat/nb_x_cat):
+        x_cat += 1
+    
+    speed_cat = int((state["player_vel"]+16)/2)
+
+    return (x_cat-1,y_cat-1,speed_cat)
+
+def rewardAndUpdateQ(STATES, nb_states, Q):
+    alpha = 0.4
+    gamma = 0.9
+    
+    for i in range(nb_states-1):
+        s = STATES[i]
+        ns = STATES[i+1]
+        if (i<nb_states-9):
+            reward = 1
+        else:
+            reward = -1000
+        Q[s[0]][s[1]][s[2]][s[3]] += alpha*(reward+gamma*np.max(Q[ns[0]][ns[1]][ns[2]][:])-Q[s[0]][s[1]][s[2]][s[3]])
+      
+# Retourne l'action a 
+def epsilon_greedy(Q, s, epsilon):
+    a = np.argmax(Q[s[0]][s[1]][s[2]][:]) # Action optimale avec une proba 1-eps
+    if(np.random.rand()<=epsilon): # random action
+        rd = np.random.rand(1) #Seulement deux actions possibles
+        if (rd<=0.2):
+            a=1
+        else:
+            a=0
+    return a
+
+
+def play_loop(state,Q,game,p,epsilon,cumulated,i,count, STATES, nb_states):
+    ps = observeState(state,p)
+    action_ind = epsilon_greedy(Q,ps,epsilon) 
+    STATES.append([ps[0],ps[1],ps[2],action_ind])
+    if (action_ind==1):
+        action = 119
+    else:
+        action = None
+    game_reward = p.act(action)  #Fait l'action
+    state = game.getGameState() # Nouvel état
+    new_state = observeState(state,p)
+    cumulated[i] += game_reward
+    return action
+
+game=FlappyBird()
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=True)
+
+p.init()
+reward = 0.0
+
+nb_games = 700
+t = 0
+epsilon = 1
+cumulated = np.zeros((nb_games))
+Qp = np.zeros((5,14,27,2))
+Q = np.zeros((5,14,27,2))
+nb_states = 0
+STATES = []
+count = np.zeros((5,14,26,2))
+
+for i in range(nb_games):
+    p.reset_game()
+    if (i%100==0) :
+        print("saving model")
+        f_myfile = open('Q_function'+ str(i) +'.pickle', 'wb')
+        pickle.dump(Q, f_myfile)
+        f_myfile.close()
+        
+    while(not p.game_over()):
+        t += 1
+        if(t == 10000):
+            epsilon = epsilon/2
+            t = 0
+        state = game.getGameState()
+        screen = p.getScreenRGB()
+        nb_states += 1
+        action = FlappyPolicy(state, screen,game,p,epsilon,cumulated,i,count,Q, STATES, nb_states)
+    rewardAndUpdateQ(STATES, nb_states, Q)
+    nb_states = 0
+    STATES = []
+    print("GAME NUMBER",i)
+    print("Score final = ", cumulated[i])
+    
+    
+average_score = np.mean(cumulated)
+max_score = np.max(cumulated)
+
+    
+########### SAuvegarde de la matrice entraînée
+print("saving model")
+f_myfile = open('Q_function'+ str(i) +'.pickle', 'wb')
+pickle.dump(Q, f_myfile)
+f_myfile.close()
+
+# Read from file
+f_myfile = open('Q_function_1_ite.pickle', 'rb')
+Q_function = pickle.load(f_myfile)  # variables come out in the order you put them in
+f_myfile.close()
+        
\ No newline at end of file
diff --git a/MEOT/QL/Q_function600.pickle b/MEOT/QL/Q_function600.pickle
new file mode 100644
index 0000000..cceef54
Binary files /dev/null and b/MEOT/QL/Q_function600.pickle differ
diff --git a/MEOT/QL/__pycache__/FlappyAgent.cpython-35.pyc b/MEOT/QL/__pycache__/FlappyAgent.cpython-35.pyc
new file mode 100644
index 0000000..9fa277c
Binary files /dev/null and b/MEOT/QL/__pycache__/FlappyAgent.cpython-35.pyc differ
diff --git a/run.py b/MEOT/QL/run.py
similarity index 65%
rename from run.py
rename to MEOT/QL/run.py
index 3ade40e..9c6a9e0 100644
--- a/run.py
+++ b/MEOT/QL/run.py
@@ -4,8 +4,9 @@
 import numpy as np
 from FlappyAgent import FlappyPolicy
 
-game = FlappyBird()
+FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
 p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True)
+# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.
 
 p.init()
 reward = 0.0
@@ -25,4 +26,4 @@
         cumulated[i] = cumulated[i] + reward
 
 average_score = np.mean(cumulated)
-max_score = np.max(cumulated)
+max_score = np.max(cumulated)
\ No newline at end of file
diff --git a/README.md b/README.md
index 9e9f30a..b8ce6d8 100644
--- a/README.md
+++ b/README.md
@@ -1,44 +1,26 @@
-# RL challenge
-
-Your challenge is to learn to play [Flappy Bird](https://en.wikipedia.org/wiki/Flappy_Bird)!
-
-Flappybird is a side-scrolling game where the agent must successfully nagivate through gaps between pipes. Only two actions in this game: at each time step, either you click and the bird flaps, or you don't click and gravity plays its role.
-
-There are three levels of difficulty in this challenge:
-- Learn an optimal policy with hand-crafted features
-- Learn an optimal policy with raw variables
-- Learn an optimal policy from pixels.
-
-# Your job
-
-Your job is to:
-<ol>
-<li> fork the project at [https://github.com/SupaeroDataScience/RLchallenge](https://github.com/SupaeroDataScience/RLchallenge) on your own github (yes, you'll need one).
-<li> move the only file there ('run.py') under a directory "YourLastName".
-<li> create 'FlappyPolicy.py' in order to implement the function `FlappyPolicy(state,screen)` used below. You're free to add as many extra files as you need. However, you're not allowed to change 'run.py'.
-<li> add any useful material (comments, text files, analysis, etc.)
-<li> make a pull request on the original repository when you're done.
-</ol>
-
-`FlappyPolicy(state,screen)` takes both the game state and the screen as input. It gives you the choice of what you base your policy on:
-<ul>
-<li> If you use the state variables vector and perform some handcrafted feature engineering, you're playing in the "easy" league. If your agent reaches an average score of 15, you're sure to have a grade of at least 10/20 (possibly more if you implement smart stuff and/or provide a smart discussion).
-<li> If you use the state variables vector without altering it (no feature engineering), you're playing in the "good job" league. If your agent reaches an average score of 15, you're sure to have at least 15/20 (possibly more if you implement smart stuff and/or provide a smart discussion).
-<li> If your agent uses only the raw pixels from the image, you're playing in the "Deepmind" league. If your agent reaches an average score of 15, you're sure to have at the maximum grade (plus possible additional benefits).
-</ul>
-
-# Installation
-
-You will need to install a few things to get started.
-First, you will need PyGame.
-
-```
-pip install pygame
-```
-
-And you will need [PLE (PyGame Learning Environment)](https://github.com/ntasfi/PyGame-Learning-Environment).
-```
-git clone https://github.com/ntasfi/PyGame-Learning-Environment.git
-cd PyGame-Learning-Environment/
-pip install -e .
-```
+# Presentation
+
+This repository is the result of a school Reinforcment Learning Challenge aiming to train an policy for FlappyBird atari game.
+Two methods have been deployed to do so : 
+
+# Computation constraints : 
+All calculations and training phases have pushed to Gcloud platform with a free trial account : (8vCPUS, 30GB Memory)
+To do so, the line allows the game not to display any window during the training phase (which would generate an error since Gcloud Virtual Machines come without UI) : 
+
+os.environ['SDL_VIDEODRIVER'] = 'dummy'
+
+# QLearning
+Largely inspired from previous Lessons & Fellow Students results, the main idea of this QLearning is to narrowly **crop the state-space** and to punish with a large **negative** reward when flappy crashes.
+
+# DQL
+Deep Q Learning is the first solution I tried to implement (then switched to QLearning in order to present some results).
+The original frame is croped to the floor and just behind the back of flappy, resized to a 84x84 grasycale window then stacked with the 3 previous states so the network as the ability to get differential information (velocity).
+## Training Period : 
+300 000 steps
+Network saved after 50 000 steps
+Policy evaluated after 25 000 steps
+
+Network has been trained over 300 000 steps so far. 
+Results and evolution tend to make me think that a longer training period would hopefully largely improve results.
+
+