forked from Sir-Batman/Rover-Domain
-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathgym_example.py
More file actions
88 lines (67 loc) · 2.73 KB
/
gym_example.py
File metadata and controls
88 lines (67 loc) · 2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from rover_domain_core_gym import RoverDomainCoreGym
from mods import *
import datetime
from code.world_setup import * # Rover Domain Construction
from code.agent_domain_2 import * # Rover Domain Dynamic
from code.trajectory_history import * # Agent Position Trajectory History
from code.reward_2 import * # Agent Reward
from code.reward_history import * # Performance Recording
from code.ccea_2 import * # CCEA
from code.save_to_pickle import * # Save data as pickle file
import random
stepCount = 5
trainCountXEpisode = 3
testCountXEpisode = 1
episodeCount = 20
# NOTE: Add the mod functions (variables) to run to modCol here:
modCol = [
globalRewardMod,
differenceRewardMod,
dppRewardMod
]
i = 0
while True:
print("Run %i"%(i))
random.shuffle(modCol)
for mod in modCol:
sim = RoverDomainCoreGym()
mod(sim)
#Trial Begins
createRewardHistory(sim.data)
initCcea(input_shape= 8, num_outputs=2, num_units = 32)(sim.data)
sim.data["Steps"] = stepCount
for episodeIndex in range(episodeCount):
sim.data["Episode Index"] = episodeIndex
# Training Phase
obs = sim.reset('Train', True)
for worldIndex in range(trainCountXEpisode):
sim.data["World Index"] = worldIndex
obs = sim.reset('Train', False)
assignCceaPolicies(sim.data)
done = False
stepCount = 0
while not done:
doAgentProcess(sim.data)
jointAction = sim.data["Agent Actions"]
obs, reward, done, info = sim.step(jointAction)
stepCount += 1
rewardCceaPolicies(sim.data)
# Testing Phase
obs = sim.reset('Test', True)
assignBestCceaPolicies(sim.data)
for worldIndex in range(testCountXEpisode):
sim.data["World Index"] = worldIndex
obs = sim.reset('Test', False)
done = False
stepCount = 0
while not done:
doAgentProcess(sim.data)
jointAction = sim.data["Agent Actions"]
obs, reward, done, info = sim.step(jointAction)
stepCount += 1
evolveCceaPolicies(sim.data)
updateRewardHistory(sim.data)
# Trial End
saveRewardHistory(sim.data)
saveTrajectoryHistories(sim.data)
i += 1