-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmovefaster.py
More file actions
72 lines (63 loc) · 2.45 KB
/
movefaster.py
File metadata and controls
72 lines (63 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class GridAgent (Agent):
def __init__(self, numactions, numstates, epsilon=0.05, alpha=0.5, \
gamma=.9, initialvalue=0.1, agentlambda=0.8):
Agent.__init__(self)
self.alpha = alpha
self.initialvalue = initialvalue
self.gamma = gamma
self.epsilon = epsilon
self.agentlambda = agentlambda
self.recentsensations = []
self.recentactions = []
self.numactions = numactions
self.numstates = numstates
self.Q = [[self.initialvalue for i in range(self.numactions)] \
for j in range(self.numstates)]
self.savedq = [[self.initialvalue for i in range(self.numactions)] \
for j in range(self.numstates)]
self.changedstates = []
def agentStartEpisode (self, sensation):
self.recentsensations = []
self.recentactions = []
def agentchangestate(self, s):
if not s in self.changedstates:
self.changedstates.append(s)
def actionvalues (self, s):
return [self.Q[s][a] for a in range(self.numactions)]
def statevalue (self, s):
if s == None:
return 0
elif s == 'terminal':
return 0
else:
return max(self.actionvalues(s))
def policy (self, state):
return egreedy(self.epsilon, self.numactions, self.actionvalues(state))
def agentChoose (self, sprime): # epsilon greedy
self.recentsensations = [sprime] + self.recentsensations
if sprime != 'terminal':
self.recentactions = [self.policy(sprime)] + self.recentactions
return self.recentactions[0]
def agentLearn (self, s, a, reward, sprime): #default is one step q
self.Q[s][a] += self.alpha * (r + \
(self.gamma * self.statevalue(sprime)) \
- self.Q[s][a])
def agentInit(self):
self.recentsensations = []
self.recentactions = []
pass
def agentfn(self, verbose, s, r=None):
global lasts, lasta
simcollect(self.sim, lasts, lasta, r, s)
if r != None:
self.agentLearn(lasts, lasta, r, s)
else:
self.agentStartEpisode(s)
if s != 'terminal':
a = self.agentChoose(s)
lasts, lasta = s, a
if verbose:
print("Agent chose action", a)
return a
else:
return None