-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlearningAgent.py
More file actions
110 lines (86 loc) · 4.48 KB
/
Copy pathlearningAgent.py
File metadata and controls
110 lines (86 loc) · 4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 4 18:42:24 2016
@author: Kireeti
"""
import numpy as np
import sys
import random
class LearningAgent:
def __init__(self, currentState, actions, E_cap,P_cap,epsilon):
self.debug=True
self.currentState=currentState
self.actions=actions
self.E_max=E_cap
self.P_cap=P_cap
self.epsilon=epsilon
self.E_min = (1-0.80)*self.E_max #depth of discharge
self.E_init = (0.30)*self.E_max #30% of charge every morning
def getLegalActions(self, currentState):
'''
Calculate and return allowable action set
Output: List of indices of allowable actions
'''
energy_level = currentState[1] # 1.8
lower_bound = max(self.E_min - energy_level, -self.P_cap) # 3.0
upper_bound = min(self.E_max - energy_level, self.P_cap) # -0.6
max_bin = int(np.digitize(upper_bound, self.actions, right=True)) ###
min_bin = int(np.digitize(lower_bound, self.actions, right=True)) ###
legal_actions = []
for k in range(min_bin, max_bin):
legal_actions.append(k)
'''
if self.debug:
file=open('debug.txt','a')
file.write('in getLegalActions in LearningAgent.py.......\n')
file.write("currentState = " + str(currentState) + " upper_bound = " + str(upper_bound) + "lower_bound = " + str(lower_bound) + " max_bin, min_bin = " + str((max_bin, min_bin)) + "\n")
file.close()
'''
return legal_actions
def exploration(self, episode_number, time_step, environment, k):
legalActions = self.getLegalActions(self.currentState)
action_index = random.choice(legalActions)
state, cumulativeReward = environment.getCumulativeReward(episode_number, time_step, k, [self.actions[action_index]])
#print state, cumulativeReward
return [action_index], cumulativeReward
def getAction(self,episodeNumber, state, model, environment, k, gama,timeStamp):
"""returns a tupple of optimal actions , reward."""
if k==0:
legalActions=self.getLegalActions(state) #check this
'''
if self.debug and len(legalActions)== 0:
file=open('debug.txt','a')
file.write('legalActions should not be empty in getAction in LearningAgent.py.......\n')
file.write(" episodeNumber = " + " " + str(episodeNumber) + " state = " + str(state) + " k =" + str(k) + " timeStamp = " + timeStamp + "\n")
file.close()
'''
flag=0
Qvalues =[(legalActions[i], model.predictQvalue(state,self,[j])) for i,j in enumerate(legalActions)]
QValue = Qvalues[0][1]
optimalAction = Qvalues[0][0]
for a,q in Qvalues:
if q >= QValue:
optimalAction = a
QValue = q
return [optimalAction], QValue
else:
legalActions = self.getLegalActions(state) #check this
optimalReward = None
optimalActions = []
currentOPtimalAction = None
atleastOnce = False
for action in legalActions:
#check the return order for getNextState here
#print 'getAction: ',self.actions, action
nextState, current_reward, isValid = environment.getNextState(episodeNumber,timeStamp, state, self.actions[action])
if isValid:
continue
atleastOnce = True
actionTupples, reward = self.getAction(episodeNumber,nextState, model, environment, k-1, gama, timeStamp+1)
if optimalReward < current_reward + gama*reward:
optimalReward = current_reward + gama*reward
currentOPtimalAction = action
optimalActions = actionTupples
if not atleastOnce :
return [None], -100000
return [currentOPtimalAction] + optimalActions, optimalReward