From f7b482dc72b40d22f7b8fbd5d61e88adc398dd10 Mon Sep 17 00:00:00 2001 From: Harley Montgomery Date: Tue, 25 Oct 2016 17:16:28 -0700 Subject: [PATCH 1/2] add RFGPS --- experiments/box2d_arm_example/hyperparams.py | 9 +- .../box2d_badmm_example/hyperparams.py | 9 +- .../box2d_pointmass_example/hyperparams.py | 9 +- .../mjc_pointmass_example/hyperparams.py | 9 +- experiments/mjc_rfgps_example/hyperparams.py | 174 ++++++++++++ python/gps/agent/config.py | 6 +- python/gps/agent/mjc/agent_mjc.py | 49 +++- python/gps/algorithm/algorithm.py | 128 +++++++-- python/gps/algorithm/algorithm_mdgps.py | 248 +++++++++++++++--- python/gps/algorithm/config.py | 11 +- python/gps/algorithm/cost/config.py | 30 +-- python/gps/algorithm/cost/cost_state.py | 70 +++-- .../algorithm/dynamics/dynamics_lr_prior.py | 3 - .../gps/algorithm/policy/policy_prior_gmm.py | 2 - python/gps/gps_main.py | 10 +- python/gps/gui/gps_training_gui.py | 4 +- python/gps/utility/general_utils.py | 44 +++- 17 files changed, 669 insertions(+), 146 deletions(-) create mode 100644 experiments/mjc_rfgps_example/hyperparams.py diff --git a/experiments/box2d_arm_example/hyperparams.py b/experiments/box2d_arm_example/hyperparams.py index ea3d8acbe..bc50dd2e7 100644 --- a/experiments/box2d_arm_example/hyperparams.py +++ b/experiments/box2d_arm_example/hyperparams.py @@ -82,12 +82,9 @@ state_cost = { 'type': CostState, - 'data_types' : { - JOINT_ANGLES: { - 'wp': np.array([1, 1]), - 'target_state': agent["target_state"], - }, - }, + 'data_type': JOINT_ANGLES, + 'wp': np.array([1, 1]), + 'target_state': agent["target_state"], } algorithm['cost'] = { diff --git a/experiments/box2d_badmm_example/hyperparams.py b/experiments/box2d_badmm_example/hyperparams.py index 463b9d94b..c7762ae83 100644 --- a/experiments/box2d_badmm_example/hyperparams.py +++ b/experiments/box2d_badmm_example/hyperparams.py @@ -97,12 +97,9 @@ state_cost = { 'type': CostState, - 'data_types' : { - JOINT_ANGLES: { - 'wp': np.array([1, 1]), - 'target_state': agent["target_state"], - }, - }, + 'data_type': JOINT_ANGLES, + 'wp': np.array([1, 1]), + 'target_state': agent["target_state"], } algorithm['cost'] = { diff --git a/experiments/box2d_pointmass_example/hyperparams.py b/experiments/box2d_pointmass_example/hyperparams.py index a53d7afba..b88edcd08 100644 --- a/experiments/box2d_pointmass_example/hyperparams.py +++ b/experiments/box2d_pointmass_example/hyperparams.py @@ -80,12 +80,9 @@ state_cost = { 'type': CostState, - 'data_types' : { - END_EFFECTOR_POINTS: { - 'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]), - 'target_state': agent["target_state"], - }, - }, + 'data_type': END_EFFECTOR_POINTS, + 'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]), + 'target_state': agent["target_state"], } algorithm['cost'] = { diff --git a/experiments/mjc_pointmass_example/hyperparams.py b/experiments/mjc_pointmass_example/hyperparams.py index 4024914f7..cfd2120ab 100644 --- a/experiments/mjc_pointmass_example/hyperparams.py +++ b/experiments/mjc_pointmass_example/hyperparams.py @@ -84,12 +84,9 @@ algorithm['cost'] = { 'type': CostState, - 'data_types' : { - JOINT_ANGLES: { - 'wp': np.ones(SENSOR_DIMS[ACTION]), - 'target_state': np.array([0.5, 0.5]), - }, - }, + 'data_type': JOINT_ANGLES, + 'wp': np.ones(SENSOR_DIMS[ACTION]), + 'target_state': np.array([0.5, 0.5]), } algorithm['dynamics'] = { diff --git a/experiments/mjc_rfgps_example/hyperparams.py b/experiments/mjc_rfgps_example/hyperparams.py new file mode 100644 index 000000000..4f751a4ed --- /dev/null +++ b/experiments/mjc_rfgps_example/hyperparams.py @@ -0,0 +1,174 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +from __future__ import division + +from datetime import datetime +import os.path + +import numpy as np + +from gps import __file__ as gps_filepath +from gps.agent.mjc.agent_mjc import AgentMuJoCo +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS +from gps.algorithm.cost.cost_state import CostState +from gps.algorithm.cost.cost_action import CostAction +from gps.algorithm.cost.cost_sum import CostSum +from gps.algorithm.cost.cost_utils import evall1l2term +from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior +from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM +from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython +from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe +from gps.algorithm.policy.lin_gauss_init import init_lqr +from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM +from gps.algorithm.policy.policy_prior import PolicyPrior +from gps.gui.config import generate_experiment_info +from gps.utility.general_utils import sample_params +from gps.proto.gps_pb2 import * + +SENSOR_DIMS = { + JOINT_ANGLES: 2, + JOINT_VELOCITIES: 2, + END_EFFECTOR_POINTS: 6, + END_EFFECTOR_POINT_VELOCITIES: 6, + ACTION: 2, +} + +GAINS = np.ones(SENSOR_DIMS[ACTION]) + +TRAIN_CONDITIONS = 40 +TEST_CONDITIONS = 10 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Set up ranges. +pos_body_lower = np.array([-0.3, -0.1, 0]) +pos_body_upper = np.array([0.1, 0.3, 0]) + +# Create dummy train positions (will be overwritten randomly). +pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)] + +# Add random test conditions. +np.random.seed(13) +pos_body_offset += [sample_params([pos_body_lower, pos_body_upper], []) \ + for _ in range(TEST_CONDITIONS)] + +BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2]) +EXP_DIR = BASE_DIR + '/../experiments/mjc_mdgps_example/' + +common = { + 'experiment_name': 'my_experiment' + '_' + \ + datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'), + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +} + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = { + 'type': AgentMuJoCo, + 'filename': './mjc_models/reacher.xml', + 'conditions': common['conditions'], + 'x0': np.zeros(4), + 'T': 50, + 'dt': 0.05, + 'substeps': 2, + 'pos_body_offset': pos_body_offset, + 'pos_body_idx': np.array([4]), + 'randomly_sample_bodypos': True, + 'sampling_range_bodypos': [pos_body_lower, pos_body_upper], + 'sensor_dims': SENSOR_DIMS, + 'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'camera_pos': np.array([0., 0., 1.5, 0., 0., 0.]), +} + +algorithm = { + 'type': AlgorithmMDGPS, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'sample_on_policy': True, + 'iterations': 15, + 'kl_step': 1.0, + 'min_step_mult': 0.5, + 'max_step_mult': 3.0, + 'policy_sample_mode': 'replace', + 'cluster_method': 'traj_em', + 'num_clusters': 8, +} + +algorithm['init_traj_distr'] = { + 'type': init_lqr, + 'init_gains': 100.0*GAINS, + 'init_acc': np.zeros_like(GAINS), + 'init_var': 10.0, + 'dt': agent['dt'], + 'T': agent['T'], +} + +torque_cost = { + 'type': CostAction, + 'wu': 1e-3 / GAINS, +} + +# Diff between average of end_effectors and block. +touch_cost = { + 'type': CostState, + 'data_type': END_EFFECTOR_POINTS, + 'A' : np.c_[np.eye(3), -np.eye(3)], + 'l1': 1.0, + 'evalnorm': evall1l2term, +} + +algorithm['cost'] = { + 'type': CostSum, + 'costs': [torque_cost, touch_cost], + 'weights': [1.0, 1.0], +} + +algorithm['dynamics'] = { + 'type': DynamicsLRPrior, + 'regularization': 1e-6, + 'prior': { + 'type': DynamicsPriorGMM, + 'max_clusters': 50, + 'min_samples_per_cluster': 40, + 'max_samples': TRAIN_CONDITIONS, + }, +} + +algorithm['traj_opt'] = { + 'type': TrajOptLQRPython, +} + +algorithm['policy_opt'] = { + 'type': PolicyOptCaffe, + 'iterations': 4000, + 'weights_file_prefix': EXP_DIR + 'policy', +} + +algorithm['policy_prior'] = { + 'type': PolicyPriorGMM, + 'max_clusters': 50, + 'min_samples_per_cluster': 40, + 'max_samples': 20, +} + +config = { + 'gui_on': True, + 'iterations': algorithm['iterations'], + 'num_samples': 1, + 'verbose_trials': 0, + 'verbose_policy_trials': 1, + 'common': common, + 'agent': agent, + 'algorithm': algorithm, +} + +common['info'] = generate_experiment_info(config) diff --git a/python/gps/agent/config.py b/python/gps/agent/config.py index 9c2e18a6a..79308b470 100755 --- a/python/gps/agent/config.py +++ b/python/gps/agent/config.py @@ -69,7 +69,11 @@ 'image_width': 640, 'image_height': 480, 'image_channels': 3, - 'meta_include': [] + 'meta_include': [], + 'randomly_sample_x0': False, + 'prohibited_ranges_x0': [], + 'randomly_sample_bodypos': False, + 'prohibited_ranges_bodypos': [], } AGENT_BOX2D = { diff --git a/python/gps/agent/mjc/agent_mjc.py b/python/gps/agent/mjc/agent_mjc.py index bfc53747a..a2fbf060b 100755 --- a/python/gps/agent/mjc/agent_mjc.py +++ b/python/gps/agent/mjc/agent_mjc.py @@ -8,14 +8,10 @@ from gps.agent.agent import Agent from gps.agent.agent_utils import generate_noise, setup from gps.agent.config import AGENT_MUJOCO -from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \ - END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, \ - END_EFFECTOR_POINT_JACOBIANS, ACTION, RGB_IMAGE, RGB_IMAGE_SIZE, \ - CONTEXT_IMAGE, CONTEXT_IMAGE_SIZE, IMAGE_FEAT, \ - END_EFFECTOR_POINTS_NO_TARGET, END_EFFECTOR_POINT_VELOCITIES_NO_TARGET +from gps.proto.gps_pb2 import * from gps.sample.sample import Sample - +from gps.utility.general_utils import sample_params class AgentMuJoCo(Agent): """ @@ -74,7 +70,6 @@ def _setup_world(self, filename): self.x0 = [] for i in range(self._hyperparams['conditions']): if END_EFFECTOR_POINTS in self.x_data_types: - # TODO: this assumes END_EFFECTOR_VELOCITIES is also in datapoints right? self._init(i) eepts = self._world[i].get_data()['site_xpos'].flatten() self.x0.append( @@ -99,6 +94,44 @@ def _setup_world(self, filename): cam_pos[0], cam_pos[1], cam_pos[2], cam_pos[3], cam_pos[4], cam_pos[5]) + def reset_initial_x0(self, condition): + """ + Reset initial x0 randomly based on sampling_range_x0 + and prohibited_ranges_x0 + Args: + condition: Which condition setup to run. + """ + self._hyperparams['x0'][condition] = sample_params( + self._hyperparams['sampling_range_x0'], + self._hyperparams['prohibited_ranges_x0']) + + def reset_initial_body_offset(self, condition): + """ + Reset initial body offset randomly based on sampling_range_bodypos + and prohibited_ranges_bodypos + Args: + condition: Which condition setup to run. + """ + tmp_body_pos_offset = self._hyperparams['pos_body_offset'][condition][:] + self._hyperparams['pos_body_offset'][condition] = sample_params( + self._hyperparams['sampling_range_bodypos'], + self._hyperparams['prohibited_ranges_bodypos']) + # TODO: handle the i/j stuff as above + for j in range(len(self._hyperparams['pos_body_idx'][condition])): + idx = self._hyperparams['pos_body_idx'][condition][j] + self._model[condition]['body_pos'][idx, :] -= tmp_body_pos_offset + self._model[condition]['body_pos'][idx, :] += self._hyperparams['pos_body_offset'][condition] + + if END_EFFECTOR_POINTS in self.x_data_types: + x0 = self._hyperparams['x0'][condition] + eepts = self._world[condition].get_data()['site_xpos'].flatten() + self.x0[condition] = np.concatenate([x0, eepts, np.zeros_like(eepts)]) + elif END_EFFECTOR_POINTS_NO_TARGET in self.x_data_types: + x0 = self._hyperparams['x0'][condition] + eepts = self._world[i].get_data()['site_xpos'].flatten() + eepts_notgt = np.delete(eepts, self._hyperparams['target_idx']) + self.x0[condition] = np.concatenate([x0, eepts_notgt, np.zeros_like(eepts_notgt)]) + def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information @@ -188,6 +221,7 @@ def _init_sample(self, condition, feature_fn=None): if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']): sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(eepts, self._hyperparams['target_idx']), t=0) + if (END_EFFECTOR_POINT_VELOCITIES_NO_TARGET in self._hyperparams['obs_include']): sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(np.zeros_like(eepts), self._hyperparams['target_idx']), t=0) jac = np.zeros([eepts.shape[0], self._model[condition]['nq']]) @@ -248,6 +282,7 @@ def _set_sample(self, sample, mj_X, t, condition, feature_fn=None): if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']): sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(cur_eepts, self._hyperparams['target_idx']), t=t+1) + if (END_EFFECTOR_POINT_VELOCITIES_NO_TARGET in self._hyperparams['obs_include']): sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(eept_vels, self._hyperparams['target_idx']), t=t+1) jac = np.zeros([cur_eepts.shape[0], self._model[condition]['nq']]) diff --git a/python/gps/algorithm/algorithm.py b/python/gps/algorithm/algorithm.py index 1b949866c..29ddee759 100755 --- a/python/gps/algorithm/algorithm.py +++ b/python/gps/algorithm/algorithm.py @@ -9,6 +9,7 @@ from gps.algorithm.config import ALG from gps.algorithm.algorithm_utils import IterationData, TrajectoryInfo from gps.utility.general_utils import extract_condition +from gps.sample.sample_list import SampleList LOGGER = logging.getLogger(__name__) @@ -51,6 +52,8 @@ def __init__(self, hyperparams): self.prev = [IterationData() for _ in range(self.M)] dynamics = self._hyperparams['dynamics'] + + # Store traj_info per condition. for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() self.cur[m].traj_info.dynamics = dynamics['type'](dynamics) @@ -59,18 +62,27 @@ def __init__(self, hyperparams): ) self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr) + # If clustering, store traj_info per cluster and single dynamics GMM. + if self._hyperparams['num_clusters']: + self.dynamics = dynamics['type'](dynamics) + self.cluster_traj_info = [] + for k in range(self._hyperparams['num_clusters']): + self.cluster_traj_info.append(TrajectoryInfo()) + self.cluster_traj_info[k].dynamics = \ + dynamics['type'](dynamics) + self.traj_opt = hyperparams['traj_opt']['type']( hyperparams['traj_opt'] ) if type(hyperparams['cost']) == list: self.cost = [ hyperparams['cost'][i]['type'](hyperparams['cost'][i]) - for i in range(self.M) + for i in range(hyperparams['conditions']) ] else: self.cost = [ hyperparams['cost']['type'](hyperparams['cost']) - for _ in range(self.M) + for _ in range(hyperparams['conditions']) ] self.base_kl_step = self._hyperparams['kl_step'] @@ -79,36 +91,94 @@ def iteration(self, sample_list): """ Run iteration of the algorithm. """ raise NotImplementedError("Must be implemented in subclass") - def _update_dynamics(self): + def _get_samples(self, idxs=None): """ - Instantiate dynamics objects and update prior. Fit dynamics to - current samples. + Helper function for extracting from 'self.cur'. """ - for m in range(self.M): - cur_data = self.cur[m].sample_list - X = cur_data.get_X() - U = cur_data.get_U() - - # Update prior and fit dynamics. - self.cur[m].traj_info.dynamics.update_prior(cur_data) - self.cur[m].traj_info.dynamics.fit(X, U) - - # Fit x0mu/x0sigma. - x0 = X[:, 0, :] - x0mu = np.mean(x0, axis=0) - self.cur[m].traj_info.x0mu = x0mu - self.cur[m].traj_info.x0sigma = np.diag( - np.maximum(np.var(x0, axis=0), - self._hyperparams['initial_state_var']) - ) + if idxs is None: + idxs = range(self.M) + samples = [self.cur[i].sample_list.get_samples() for i in idxs] + return SampleList(sum(samples, [])) + + def _update_dynamics(self, update_prior=True): + """ + Update dynamics linearizations (dispatch for conditions/clusters). + """ + if self._hyperparams['num_clusters']: + K = self._hyperparams['num_clusters'] + if update_prior: + all_samples = self._get_samples() + self.dynamics.update_prior(all_samples) + for k in range(K): + self._update_cluster_dynamics(k) + else: + for m in range(self.M): + self._update_condition_dynamics(m, update_prior) + + def _update_condition_dynamics(self, m, update_prior): + """ + Update dynamics linearizations for condition m. + """ + samples = self.cur[m].sample_list + X = samples.get_X() + U = samples.get_U() + + traj_info = self.cur[m].traj_info + if update_prior: + traj_info.dynamics.update_prior(samples) + traj_info.dynamics.fit(X, U) # Stores Fm/fv/dyn_covar. + + # Fit x0mu/x0sigma and store. + x0 = X[:, 0, :] + x0mu = np.mean(x0, axis=0) + x0sigma = np.diag( + np.maximum(np.var(x0, axis=0), + self._hyperparams['initial_state_var']) + ) + prior = self.cur[m].traj_info.dynamics.get_prior() + if prior: + mu0, Phi, priorm, n0 = prior.initial_state() + N = len(samples) + x0sigma += \ + Phi + (N*priorm) / (N+priorm) * \ + np.outer(x0mu-mu0, x0mu-mu0) / (N+n0) + traj_info.x0mu = x0mu + traj_info.x0sigma = x0sigma + + def _update_cluster_dynamics(self, k): + """ + Update dynamics linearizations for cluster k. + """ + idxs = np.where(self.cluster_idx == k)[0] + LOGGER.debug("Updating dynamics for cluster %d, idxs %s", k, idxs) + if idxs.size == 0: + return + + samples = self._get_samples(idxs) + X = samples.get_X() + U = samples.get_U() + + # Fit x0mu/x0sigma/dynamics and store cluster-level info. + traj_info = self.cluster_traj_info[k] + x0 = X[:, 0, :] + traj_info.x0mu = np.mean(x0, axis=0) + traj_info.x0sigma = np.diag( + np.maximum(np.var(x0, axis=0), + self._hyperparams['initial_state_var']) + ) + + Fm, fv, dyn_covar = self.dynamics.fit(X, U) + traj_info.dynamics.Fm = Fm + traj_info.dynamics.fv = fv + traj_info.dynamics.dyn_covar = dyn_covar - prior = self.cur[m].traj_info.dynamics.get_prior() - if prior: - mu0, Phi, priorm, n0 = prior.initial_state() - N = len(cur_data) - self.cur[m].traj_info.x0sigma += \ - Phi + (N*priorm) / (N+priorm) * \ - np.outer(x0mu-mu0, x0mu-mu0) / (N+n0) + # Copy to trajectory-level info. + for i in idxs: + self.cur[i].traj_info.x0mu = traj_info.x0mu + self.cur[i].traj_info.x0sigma = traj_info.x0sigma + self.cur[i].traj_info.dynamics.Fm = Fm + self.cur[i].traj_info.dynamics.fv = fv + self.cur[i].traj_info.dynamics.dyn_covar = dyn_covar def _update_trajectories(self): """ diff --git a/python/gps/algorithm/algorithm_mdgps.py b/python/gps/algorithm/algorithm_mdgps.py index b3bf8c932..22fdb2a4a 100644 --- a/python/gps/algorithm/algorithm_mdgps.py +++ b/python/gps/algorithm/algorithm_mdgps.py @@ -9,6 +9,7 @@ from gps.algorithm.algorithm_utils import PolicyInfo from gps.algorithm.config import ALG_MDGPS from gps.sample.sample_list import SampleList +from sklearn.cluster import KMeans LOGGER = logging.getLogger(__name__) @@ -23,12 +24,22 @@ def __init__(self, hyperparams): config.update(hyperparams) Algorithm.__init__(self, config) + # Store single policy prior. policy_prior = self._hyperparams['policy_prior'] + + # Store pol_info per condition. for m in range(self.M): self.cur[m].pol_info = PolicyInfo(self._hyperparams) self.cur[m].pol_info.policy_prior = \ policy_prior['type'](policy_prior) + # Store pol per cluster. + if self._hyperparams['num_clusters']: + self.policy_prior = policy_prior['type'](policy_prior) + self.cluster_pol_info = [PolicyInfo(self._hyperparams) \ + for k in range(self._hyperparams['num_clusters']) + ] + self.policy_opt = self._hyperparams['policy_opt']['type']( self._hyperparams['policy_opt'], self.dO, self.dU ) @@ -40,24 +51,29 @@ def iteration(self, sample_lists): Args: sample_lists: List of SampleList objects for each condition. """ - # Store the samples and evaluate the costs. + # Store the samples. for m in range(self.M): self.cur[m].sample_list = sample_lists[m] - self._eval_cost(m) - # Update dynamics linearizations. - self._update_dynamics() - - # On the first iteration, need to catch policy up to init_traj_distr. + # On the first iteration, need to train on init_traj_distr. if self.iteration_count == 0: - self.new_traj_distr = [ - self.cur[cond].traj_distr for cond in range(self.M) - ] + self.new_traj_distr = [self.cur[m].traj_distr \ + for m in range(self.M)] self._update_policy() - # Update policy linearizations. + # If clustering, set self.cluster_idx. + if self._hyperparams['num_clusters']: + self._cluster_samples() + + # Update dynamics/policy linearizations. + # NOTE: Skip for 'traj_em', since fits while clustering. + if self._hyperparams['cluster_method'] != 'traj_em': + self._update_dynamics() + self._update_policy_fit() + + # Update quadratic cost expansions for all conditions. for m in range(self.M): - self._update_policy_fit(m) + self._eval_cost(m) # C-step if self.iteration_count > 0: @@ -96,39 +112,79 @@ def _update_policy(self): tgt_prc = np.concatenate((tgt_prc, prc)) tgt_wt = np.concatenate((tgt_wt, wt)) obs_data = np.concatenate((obs_data, samples.get_obs())) - self.policy_opt.update(obs_data, tgt_mu, tgt_prc, tgt_wt) + if 'fc_only_iterations' in self.policy_opt._hyperparams: + self.policy_opt.update(obs_data, tgt_mu, tgt_prc, tgt_wt, self.iteration_count) + else: + self.policy_opt.update(obs_data, tgt_mu, tgt_prc, tgt_wt) - def _update_policy_fit(self, m): + def _update_policy_fit(self, update_prior=True): + """ + Update dynamics linearizations (dispatch for conditions/clusters). + """ + if self._hyperparams['num_clusters']: + K = self._hyperparams['num_clusters'] + if update_prior: + all_samples = self._get_samples() + self.policy_prior.update(all_samples, self.policy_opt, + mode=self._hyperparams['policy_sample_mode']) + for k in range(K): + self._update_cluster_policy_fit(k) + else: + for m in range(self.M): + self._update_condition_policy_fit(m, update_prior) + + def _update_condition_policy_fit(self, m, update_prior): """ Re-estimate the local policy values in the neighborhood of the - trajectory. - Args: - m: Condition - init: Whether this is the initial fitting of the policy. + trajectory for condition m. """ - dX, dU, T = self.dX, self.dU, self.T - # Choose samples to use. samples = self.cur[m].sample_list - N = len(samples) - pol_info = self.cur[m].pol_info X = samples.get_X() obs = samples.get_obs().copy() pol_mu, pol_sig = self.policy_opt.prob(obs)[:2] - pol_info.pol_mu, pol_info.pol_sig = pol_mu, pol_sig - # Update policy prior. - policy_prior = pol_info.policy_prior - samples = SampleList(self.cur[m].sample_list) - mode = self._hyperparams['policy_sample_mode'] - policy_prior.update(samples, self.policy_opt, mode) + pol_info = self.cur[m].pol_info + if update_prior: + pol_info.policy_prior.update(samples, self.policy_opt, + mode=self._hyperparams['policy_sample_mode']) - # Fit linearization and store in pol_info. + # Fit policy linearization and store. pol_info.pol_K, pol_info.pol_k, pol_info.pol_S = \ - policy_prior.fit(X, pol_mu, pol_sig) - for t in range(T): + pol_info.policy_prior.fit(X, pol_mu, pol_sig) + for t in range(X.shape[1]): pol_info.chol_pol_S[t, :, :] = \ sp.linalg.cholesky(pol_info.pol_S[t, :, :]) + def _update_cluster_policy_fit(self, k): + """ + Re-estimate the local policy values in the neighborhood of the + trajectory for condition m. + """ + idxs = np.where(self.cluster_idx == k)[0] + LOGGER.debug("Updating policy fit for cluster %d, idxs %s", k, idxs) + if idxs.size == 0: + return + + samples = self._get_samples(idxs) + X = samples.get_X() + obs = samples.get_obs().copy() + pol_mu, pol_sig = self.policy_opt.prob(obs)[:2] + + # Fit policy linearization and store. + pol_info = self.cluster_pol_info[k] + pol_info.pol_K, pol_info.pol_k, pol_info.pol_S = \ + self.policy_prior.fit(X, pol_mu, pol_sig) + for t in range(X.shape[1]): + pol_info.chol_pol_S[t, :, :] = \ + sp.linalg.cholesky(pol_info.pol_S[t, :, :]) + + # Copy to trajectory-level info. + for i in idxs: + self.cur[i].pol_info.pol_K = pol_info.pol_K + self.cur[i].pol_info.pol_k = pol_info.pol_k + self.cur[i].pol_info.pol_S = pol_info.pol_S + self.cur[i].pol_info.chol_pol_S = pol_info.chol_pol_S + def _advance_iteration_variables(self): """ Move all 'cur' variables to 'prev', reinitialize 'cur' @@ -231,3 +287,135 @@ def compute_costs(self, m, eta): fcv[t, :] = (cv[t, :] + PKLv[t, :] * eta) / (eta + multiplier) return fCm, fcv + + def _cluster_samples(self, mode=None): + """ + Divide samples into clusters, using desired clustering method. + """ + # By default, use 'cluster_method', but can override. + # (mainly for using kmeans when training on init_traj_distr). + if mode is None: + mode = self._hyperparams['cluster_method'] + + # Dispatch to clustering method. + if mode == 'random': + self._cluster_random() + elif mode == 'kmeans': + self._cluster_kmeans() + elif mode == 'traj_em': + self._cluster_traj_em() + else: + raise NotImplementedError + + def _cluster_random(self): + """ + Set cluster_idx using random clustering. + """ + self.cluster_idx = np.random.choice(K, size=self.M) + LOGGER.debug("Clustering randomly: %s", self.cluster_idx) + + def _cluster_kmeans(self): + """ + Set cluster_idx using kmeans clustering on initial state. + """ + K = self._hyperparams['num_clusters'] + kmeans = KMeans(K) + x0 = self._get_samples().get_X()[:, 0, :] # (N, dX) + self.cluster_idx = kmeans.fit_predict(x0) + LOGGER.debug("Clustering by kmeans: %s", self.cluster_idx) + + def _cluster_traj_em(self): + """ + Set cluster_idx using trajectory aware clustering. + """ + # Initialize starting with random data points as centers. + K = self._hyperparams['num_clusters'] +# self.cluster_idx = np.random.choice(K, size=self.M) +# LOGGER.debug("Traj EM initialization: %s", self.cluster_idx) + LOGGER.debug("Traj EM initialization to kmeans") + self._cluster_kmeans() + self._update_dynamics() + self._update_policy_fit() + + # Begin EM steps. + logp = np.empty([self.M, K]) + max_iters = self._hyperparams['traj_em_iters'] + T = self._hyperparams['traj_em_horizon'] + for i in range(max_iters): + # E step. + LOGGER.debug("E Step (%d/%d)", i+1, max_iters) + for k in range(K): + for m in range(self.M): + logp[m, k] = self._traj_log_prob(m, k, T) + + cluster_idx = logp.argmax(axis=1) + maxlogp = np.max(logp, axis=1, keepdims=True) + logp -= maxlogp + np.log(np.sum(np.exp(logp-maxlogp), axis=1, keepdims=True)) + cluster_weights = np.exp(logp) + cluster_masses = cluster_weights.sum(axis=0) + LOGGER.debug("Traj EM cluster idx: %s", cluster_idx) + LOGGER.debug("Total cluster weights: %s", cluster_masses) + + # Convergence check. + if np.all(cluster_idx == self.cluster_idx): +# if (i > 0) and np.all(cluster_idx == self.cluster_idx): + LOGGER.debug("Converged on itr (%d/%d)", i+1, max_iters) + break + + # Reboot small clusters. + if i < max_iters-1: + ks = np.where(cluster_masses < 0.1)[0] + for k in ks: + ms = np.random.choice(self.M, size=2) + LOGGER.debug("Rebooting cluster %d with %s", k, ms) + cluster_idx[ms] = k + + # M step. + self.cluster_idx = cluster_idx + LOGGER.debug("M Step (%d/%d)", i+1, max_iters) + self._update_dynamics(update_prior=False) + self._update_policy_fit(update_prior=False) + + def _traj_log_prob(self, m, k, T=None): + """ + Returns the log likelihood of condition 'm' occurring under + the trajectory defined by cluster 'k'. + """ + # Pull out states/actions and linearizations. + sample = self.cur[m].sample_list[0] # M = 1 for clustering. + X = sample.get_X() + U = sample.get_U() + + traj_info = self.cluster_traj_info[k] + x0mu, x0sigma = traj_info.x0mu, traj_info.x0sigma + dyn = traj_info.dynamics + pol = self.cluster_pol_info[k] + + # Set horizon, we ignore final (X, U) pair. + if T is None: + T = X.shape[0] - 1 + else: + assert T < X.shape[0] - 1 + + # Begin with log prob x0mu/x0sigma. + # TODO: use sp.linalg.cholesky for all of this? + logp = -0.5*np.log(np.linalg.det(x0sigma)) + diff = X[0] - x0mu + logp -= 0.5*diff.dot(np.linalg.solve(x0sigma, diff)) + + # Rest of trajectory. + for t in range(T): + # Action term. + sigma = pol.pol_S[t] + logp -= 0.5*np.log(np.linalg.det(sigma)) + diff = U[t] - pol.pol_K[t].dot(X[t]) - pol.pol_k[t] + logp -= 0.5*diff.dot(np.linalg.solve(sigma, diff)) + + # Next state term. + xu = np.concatenate([X[t], U[t]]) + sigma = dyn.dyn_covar[t] + logp -= 0.5*np.log(np.linalg.det(sigma)) + diff = X[t+1] - dyn.Fm[t].dot(xu) - dyn.fv[t] + logp -= 0.5*diff.dot(np.linalg.solve(sigma, diff)) + + return logp diff --git a/python/gps/algorithm/config.py b/python/gps/algorithm/config.py index 032f648bc..8771d64c6 100644 --- a/python/gps/algorithm/config.py +++ b/python/gps/algorithm/config.py @@ -22,6 +22,8 @@ 'cost': None, # A list of Cost objects for each condition. # Whether or not to sample with neural net policy (only for badmm/mdgps). 'sample_on_policy': False, + # Number of clusters to use if clustering samples. + 'num_clusters': 0, } @@ -45,7 +47,12 @@ ALG_MDGPS = { # TODO: remove need for init_pol_wt in MDGPS 'init_pol_wt': 0.01, - 'policy_sample_mode': 'add', - # Whether to use 'laplace' or 'mc' cost in step adjusment + 'policy_sample_mode': 'replace', 'step_rule': 'laplace', + # How to cluster samples, 'random', 'kmeans', 'traj_em'. + 'cluster_method': None, + # Horizon for trajectory aware EM. + 'traj_em_horizon': None, + # How many iterations trajectory aware EM runs. + 'traj_em_iters': 3, } diff --git a/python/gps/algorithm/cost/config.py b/python/gps/algorithm/cost/config.py index 97acdfb76..1f20f95c8 100644 --- a/python/gps/algorithm/cost/config.py +++ b/python/gps/algorithm/cost/config.py @@ -6,34 +6,30 @@ # CostFK COST_FK = { - 'ramp_option': RAMP_CONSTANT, # How target cost ramps over time. 'wp': None, # State weights - must be set. - 'wp_final_multiplier': 1.0, # Weight multiplier on final time step. - 'env_target': True, # TODO - This isn't used. - 'l1': 0.0, - 'l2': 1.0, - 'alpha': 1e-5, 'target_end_effector': None, # Target end-effector position. 'evalnorm': evallogl2term, + 'alpha': 1e-5, + 'l1': 0.0, + 'l2': 1.0, + 'ramp_option': RAMP_CONSTANT, # How target cost ramps over time. + 'wp_final_multiplier': 1.0, # Weight multiplier on final time step. } - -# CostState +# CostState (dist = Ax - tgt), x.shape depends on data_type COST_STATE = { - 'ramp_option': RAMP_CONSTANT, # How target cost ramps over time. + 'wp': None, # State weights - Defaults to ones. + 'A': None, # A matrix - Defaults to identity. + 'data_type': None, # Must be set. + 'target': 0.0, + 'evalnorm': evallogl2term, + 'alpha': 1e-5, 'l1': 0.0, 'l2': 1.0, - 'alpha': 1e-2, + 'ramp_option': RAMP_CONSTANT, # How target cost ramps over time. 'wp_final_multiplier': 1.0, # Weight multiplier on final time step. - 'data_types': { - 'JointAngle': { - 'target_state': None, # Target state - must be set. - 'wp': None, # State weights - must be set. - }, - }, } - # CostSum COST_SUM = { 'costs': [], # A list of hyperparam dictionaries for each cost. diff --git a/python/gps/algorithm/cost/cost_state.py b/python/gps/algorithm/cost/cost_state.py index cb6346c75..f47d5d427 100644 --- a/python/gps/algorithm/cost/cost_state.py +++ b/python/gps/algorithm/cost/cost_state.py @@ -21,6 +21,7 @@ def eval(self, sample): Args: sample: A single sample """ + # Allocate space for derivatives (full state). T = sample.T Du = sample.dU Dx = sample.dX @@ -32,32 +33,45 @@ def eval(self, sample): final_lxx = np.zeros((T, Dx, Dx)) final_lux = np.zeros((T, Du, Dx)) - for data_type in self._hyperparams['data_types']: - config = self._hyperparams['data_types'][data_type] - wp = config['wp'] - tgt = config['target_state'] - x = sample.get(data_type) - _, dim_sensor = x.shape - - wpm = get_ramp_multiplier( - self._hyperparams['ramp_option'], T, - wp_final_multiplier=self._hyperparams['wp_final_multiplier'] - ) - wp = wp * np.expand_dims(wpm, axis=-1) - # Compute state penalty. - dist = x - tgt - - # Evaluate penalty term. - l, ls, lss = evall1l2term( - wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]), - np.zeros((T, dim_sensor, dim_sensor, dim_sensor)), - self._hyperparams['l1'], self._hyperparams['l2'], - self._hyperparams['alpha'] - ) - - final_l += l - - sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) - sample.agent.pack_data_x(final_lxx, lss, - data_types=[data_type, data_type]) + # Compute state penalty for data_type. + data_type = self._hyperparams['data_type'] + x = sample.get(data_type) + _, Ds = x.shape + + # Weighted distance is wp*(A.dot(x) - tgt). + A = self._hyperparams['A'] + if A is None: + A = np.eye(Ds) + D = A.shape[0] + + wp = self._hyperparams['wp'] + if wp is None: + wp = np.ones(D) + + # Compute state penalty. + # x.shape == (T, Ds) + # A.shape == (D, Ds) + # x.dot(A.T).shape == (T, D) + # tgt.shape == (D,) or scalar + tgt = self._hyperparams['target'] + dist = x.dot(A.T) - tgt + + ramp = get_ramp_multiplier( + self._hyperparams['ramp_option'], T, + wp_final_multiplier=self._hyperparams['wp_final_multiplier'] + ) + wp = wp*np.expand_dims(ramp, axis=-1) + + # Evaluate penalty term. + l, ls, lss = evall1l2term( + wp, dist, np.tile(A, [T, 1, 1]), np.zeros((T, D, Ds, Ds)), + self._hyperparams['l1'], self._hyperparams['l2'], + self._hyperparams['alpha'] + ) + + # Pack into final cost. + final_l += l + sample.agent.pack_data_x(final_lx, ls, data_types=[data_type]) + sample.agent.pack_data_x(final_lxx, lss, + data_types=[data_type, data_type]) return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux diff --git a/python/gps/algorithm/dynamics/dynamics_lr_prior.py b/python/gps/algorithm/dynamics/dynamics_lr_prior.py index c3b576651..72d3ece0b 100644 --- a/python/gps/algorithm/dynamics/dynamics_lr_prior.py +++ b/python/gps/algorithm/dynamics/dynamics_lr_prior.py @@ -31,9 +31,6 @@ def fit(self, X, U): N, T, dX = X.shape dU = U.shape[2] - if N == 1: - raise ValueError("Cannot fit dynamics on 1 sample") - self.Fm = np.zeros([T, dX, dX+dU]) self.fv = np.zeros([T, dX]) self.dyn_covar = np.zeros([T, dX, dX]) diff --git a/python/gps/algorithm/policy/policy_prior_gmm.py b/python/gps/algorithm/policy/policy_prior_gmm.py index 27a43ccd4..9a268eb3e 100755 --- a/python/gps/algorithm/policy/policy_prior_gmm.py +++ b/python/gps/algorithm/policy/policy_prior_gmm.py @@ -109,8 +109,6 @@ def fit(self, X, pol_mu, pol_sig): """ N, T, dX = X.shape dU = pol_mu.shape[2] - if N == 1: - raise ValueError("Cannot fit dynamics on 1 sample") # Collapse policy covariances. (This is only correct because # the policy doesn't depend on state). diff --git a/python/gps/gps_main.py b/python/gps/gps_main.py index d3a0ea883..a3931c068 100755 --- a/python/gps/gps_main.py +++ b/python/gps/gps_main.py @@ -1,7 +1,7 @@ """ This file defines the main object that runs experiments. """ import matplotlib as mpl -mpl.use('Qt4Agg') +mpl.use('Qt5Agg') import logging import imp @@ -61,6 +61,14 @@ def run(self, itr_load=None): itr_start = self._initialize(itr_load) for itr in range(itr_start, self._hyperparams['iterations']): + if self.agent._hyperparams['randomly_sample_x0']: + for cond in self._train_idx: + self.agent.reset_initial_x0(cond) + + if self.agent._hyperparams['randomly_sample_bodypos']: + for cond in self._train_idx: + self.agent.reset_initial_body_offset(cond) + for cond in self._train_idx: for i in range(self._hyperparams['num_samples']): self._take_sample(itr, cond, i) diff --git a/python/gps/gui/gps_training_gui.py b/python/gps/gui/gps_training_gui.py index f7923c746..60819836f 100644 --- a/python/gps/gui/gps_training_gui.py +++ b/python/gps/gui/gps_training_gui.py @@ -353,7 +353,9 @@ def _update_trajectory_visualizations(self, algorithm, agent, self._traj_visualizer.set_lim(i=m, xlim=xlim, ylim=ylim, zlim=zlim) self._update_linear_gaussian_controller_plots(algorithm, agent, m) self._update_samples_plots(traj_sample_lists, m, 'green', 'Trajectory Samples') - if pol_sample_lists: + + config = algorithm._hyperparams + if pol_sample_lists and config['train_conditions'] == config['test_conditions']: self._update_samples_plots(pol_sample_lists, m, 'blue', 'Policy Samples') self._traj_visualizer.draw() # this must be called explicitly diff --git a/python/gps/utility/general_utils.py b/python/gps/utility/general_utils.py index 98cc3e49d..625f9ed37 100644 --- a/python/gps/utility/general_utils.py +++ b/python/gps/utility/general_utils.py @@ -1,7 +1,6 @@ """ This file defines general utility functions and classes. """ import numpy as np - class BundleType(object): """ This class bundles many fields, similar to a record or a mutable @@ -92,3 +91,46 @@ def get_ee_points(offsets, ee_pos, ee_rot): 3 x N array of end effector points. """ return ee_rot.dot(offsets.T) + ee_pos.T + +def sample_params(sampling_range, prohibited_ranges): + """ + Samples parameters from sampling_range ensuring that the sampled_point + doesn't lie in any of the prohibited_ranges. + + Args: + sampling_range: A list of form [lower_lim, upper_lim] where lower_lim + and upper_lim are two numpy arrays with size N x 1 where N is the + dimension of the sample. + prohibited_ranges: A list of ranges which describes the region from which + the point can't be sampled. A range is a list of form [[l_0, u_0], .., + [l_N, u_N]] where l_i and u_i are the upper and lower limits of the + i-th dimension. Can contain replace a [l_i, u_i] with None. + Returns: + sampled_point: A sampled vector within sampling range and lying outside + prohibited ranges. + """ + lower_lim, upper_lim = sampling_range + N = len(lower_lim) + assert N == len(upper_lim) + + valid = False + while not valid: + # Sample point from within lower/upper limits. + sampled_point = np.random.rand(N)*(upper_lim - lower_lim) + lower_lim + + # Make sure it is valid. + valid = True + for prohibited_range in prohibited_ranges: + for i in range(N): + if prohibited_range[i] is None: + continue + + lower, upper = prohibited_range[i] + if (lower <= sampled_point[i] <= upper): + valid = False + break + + if not valid: + break + + return sampled_point From ab6d7ce9cf42249cdacbb17439f0b23249d0cb18 Mon Sep 17 00:00:00 2001 From: Harley Montgomery Date: Fri, 24 Feb 2017 13:33:01 -0800 Subject: [PATCH 2/2] added ICRA experiments --- experiments/icra_peg_2D/badmm/hyperparams.py | 75 ++++++++ experiments/icra_peg_2D/hyperparams.py | 150 +++++++++++++++ .../icra_peg_2D/mdgps_cluster/hyperparams.py | 74 ++++++++ .../icra_peg_2D/mdgps_on/hyperparams.py | 70 +++++++ experiments/icra_peg_3D/badmm/hyperparams.py | 76 ++++++++ experiments/icra_peg_3D/hyperparams.py | 151 +++++++++++++++ .../icra_peg_3D/mdgps_cluster/hyperparams.py | 75 ++++++++ .../icra_peg_3D/mdgps_on/hyperparams.py | 71 +++++++ .../icra_peg_vision/badmm/hyperparams.py | 74 ++++++++ experiments/icra_peg_vision/hyperparams.py | 175 ++++++++++++++++++ .../mdgps_cluster/hyperparams.py | 76 ++++++++ .../icra_peg_vision/mdgps_on/hyperparams.py | 70 +++++++ experiments/icra_reacher/badmm/hyperparams.py | 86 +++++++++ experiments/icra_reacher/hyperparams.py | 143 ++++++++++++++ .../icra_reacher/mdgps_cluster/hyperparams.py | 79 ++++++++ .../icra_reacher/mdgps_on/hyperparams.py | 81 ++++++++ .../icra_reacher_vision/badmm/hyperparams.py | 93 ++++++++++ .../icra_reacher_vision/hyperparams.py | 166 +++++++++++++++++ .../mdgps_cluster/hyperparams.py | 76 ++++++++ .../mdgps_on/hyperparams.py | 88 +++++++++ 20 files changed, 1949 insertions(+) create mode 100644 experiments/icra_peg_2D/badmm/hyperparams.py create mode 100644 experiments/icra_peg_2D/hyperparams.py create mode 100644 experiments/icra_peg_2D/mdgps_cluster/hyperparams.py create mode 100644 experiments/icra_peg_2D/mdgps_on/hyperparams.py create mode 100644 experiments/icra_peg_3D/badmm/hyperparams.py create mode 100644 experiments/icra_peg_3D/hyperparams.py create mode 100644 experiments/icra_peg_3D/mdgps_cluster/hyperparams.py create mode 100644 experiments/icra_peg_3D/mdgps_on/hyperparams.py create mode 100644 experiments/icra_peg_vision/badmm/hyperparams.py create mode 100644 experiments/icra_peg_vision/hyperparams.py create mode 100644 experiments/icra_peg_vision/mdgps_cluster/hyperparams.py create mode 100644 experiments/icra_peg_vision/mdgps_on/hyperparams.py create mode 100644 experiments/icra_reacher/badmm/hyperparams.py create mode 100644 experiments/icra_reacher/hyperparams.py create mode 100644 experiments/icra_reacher/mdgps_cluster/hyperparams.py create mode 100644 experiments/icra_reacher/mdgps_on/hyperparams.py create mode 100644 experiments/icra_reacher_vision/badmm/hyperparams.py create mode 100644 experiments/icra_reacher_vision/hyperparams.py create mode 100644 experiments/icra_reacher_vision/mdgps_cluster/hyperparams.py create mode 100644 experiments/icra_reacher_vision/mdgps_on/hyperparams.py diff --git a/experiments/icra_peg_2D/badmm/hyperparams.py b/experiments/icra_peg_2D/badmm/hyperparams.py new file mode 100644 index 000000000..dfa82e37b --- /dev/null +++ b/experiments/icra_peg_2D/badmm/hyperparams.py @@ -0,0 +1,75 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_badmm import AlgorithmBADMM + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 4 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create train positions. +xs = np.linspace(default.lower, default.upper, 2) +ys = np.linspace(default.lower, default.upper, 2) +pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys] + +# Add test positions. +pos_body_offset += default.test_pos_body_offset + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': np.array([1]), + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmBADMM, + 'inner_iterations': 4, + 'sample_on_policy': False, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'lg_step_schedule': np.array([1e-4, 1e-4, 1e-3, 1e-3]), + 'policy_dual_rate': 0.1, + 'init_pol_wt': 0.002, + 'fixed_lg_step': 3, + 'kl_step': 0.5, + 'max_step_mult': 2.0, + 'min_step_mult': 0.1, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_peg_2D/hyperparams.py b/experiments/icra_peg_2D/hyperparams.py new file mode 100644 index 000000000..cb4286a49 --- /dev/null +++ b/experiments/icra_peg_2D/hyperparams.py @@ -0,0 +1,150 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +from __future__ import division + +from datetime import datetime +import os.path + +import numpy as np + +from gps import __file__ as gps_filepath +from gps.agent.mjc.agent_mjc import AgentMuJoCo +from gps.algorithm.cost.cost_fk import CostFK +from gps.algorithm.cost.cost_action import CostAction +from gps.algorithm.cost.cost_sum import CostSum +from gps.algorithm.cost.cost_utils import RAMP_FINAL_ONLY +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS +from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior +from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM +from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython +from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe +from gps.algorithm.policy.lin_gauss_init import init_lqr +from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM +from gps.algorithm.policy.policy_prior import PolicyPrior +from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \ + END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION +from gps.gui.config import generate_experiment_info + + +SENSOR_DIMS = { + JOINT_ANGLES: 7, + JOINT_VELOCITIES: 7, + END_EFFECTOR_POINTS: 6, + END_EFFECTOR_POINT_VELOCITIES: 6, + ACTION: 7, +} + +PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098]) + +common = { + 'experiment_name': 'my_experiment' + '_' + \ + datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'), +} + +# Set up grid of test positions. +lower = -0.25 +upper = 0.15 + +TEST_CONDITIONS = 49 +xs = np.linspace(lower + 0.01, upper - 0.01, 7) +ys = np.linspace(lower + 0.01, upper - 0.01, 7) +test_pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys] + +agent = { + 'type': AgentMuJoCo, + 'filename': './mjc_models/pr2_arm3d.xml', + 'x0': np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]), + np.zeros(7)]), + 'dt': 0.05, + 'substeps': 5, + 'T': 100, + 'sensor_dims': SENSOR_DIMS, + 'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]), +} + +algorithm = { + 'iterations': 12, + 'policy_sample_mode': 'replace', +} + +algorithm['init_traj_distr'] = { + 'type': init_lqr, + 'init_gains': 1.0 / PR2_GAINS, + 'init_acc': np.zeros(SENSOR_DIMS[ACTION]), + 'init_var': 1.0, + 'stiffness': 1.0, + 'stiffness_vel': 0.5, + 'final_weight': 50.0, + 'dt': agent['dt'], + 'T': agent['T'], +} + +torque_cost = { + 'type': CostAction, + 'wu': 1e-3 / PR2_GAINS, +} + +fk_cost = { + 'type': CostFK, + 'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]), + 'wp': np.array([2, 2, 1, 2, 2, 1]), + 'l1': 0.1, + 'l2': 10.0, + 'alpha': 1e-5, +} + +# Create second cost function for last step only. +final_cost = { + 'type': CostFK, + 'ramp_option': RAMP_FINAL_ONLY, + 'target_end_effector': fk_cost['target_end_effector'], + 'wp': fk_cost['wp'], + 'l1': 1.0, + 'l2': 0.0, + 'alpha': 1e-5, + 'wp_final_multiplier': 10.0, +} + +algorithm['cost'] = { + 'type': CostSum, + 'costs': [torque_cost, fk_cost, final_cost], + 'weights': [1.0, 1.0, 1.0], +} + +algorithm['dynamics'] = { + 'type': DynamicsLRPrior, + 'regularization': 1e-6, + 'prior': { + 'type': DynamicsPriorGMM, + 'max_clusters': 50, + 'min_samples_per_cluster': 40, + 'max_samples': 20, + }, +} + +algorithm['traj_opt'] = { + 'type': TrajOptLQRPython, +} + +algorithm['policy_opt'] = { + 'type': PolicyOptCaffe, + 'iterations': 5000, +} + +algorithm['policy_prior'] = { + 'type': PolicyPriorGMM, + 'max_clusters': 50, + 'min_samples_per_cluster': 40, +} + +config = { + 'iterations': algorithm['iterations'], + 'verbose_trials': 0, + 'verbose_policy_trials': 1, + 'agent': agent, + 'gui_on': True, + 'random_seed': list(range(5)), +} diff --git a/experiments/icra_peg_2D/mdgps_cluster/hyperparams.py b/experiments/icra_peg_2D/mdgps_cluster/hyperparams.py new file mode 100644 index 000000000..d38202df4 --- /dev/null +++ b/experiments/icra_peg_2D/mdgps_cluster/hyperparams.py @@ -0,0 +1,74 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 20 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create dummy train positions (will be overwritten randomly). +pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)] + +# Add test positions. +pos_body_offset += default.test_pos_body_offset + +# Update the default config. +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': np.array([1]), + 'pos_body_offset': pos_body_offset, + 'randomly_sample_bodypos': True, + 'sampling_range_bodypos': [np.array([default.lower, default.lower, 0.0]), + np.array([default.upper, default.upper, 0.0])], + 'prohibited_ranges_bodypos': [], +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'num_clusters': 4, + 'cluster_method': 'traj_em', + 'kl_step': 2.0, + 'max_step_mult': 2.0, + 'min_step_mult': 1.0, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 1, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_peg_2D/mdgps_on/hyperparams.py b/experiments/icra_peg_2D/mdgps_on/hyperparams.py new file mode 100644 index 000000000..a75c6a679 --- /dev/null +++ b/experiments/icra_peg_2D/mdgps_on/hyperparams.py @@ -0,0 +1,70 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 4 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create train positions. +xs = np.linspace(default.lower, default.upper, 2) +ys = np.linspace(default.lower, default.upper, 2) +pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys] + +# Add test positions. +pos_body_offset += default.test_pos_body_offset + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': np.array([1]), + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'kl_step': 2.0, + 'max_step_mult': 2.0, + 'min_step_mult': 0.5, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_peg_3D/badmm/hyperparams.py b/experiments/icra_peg_3D/badmm/hyperparams.py new file mode 100644 index 000000000..f245f5ef4 --- /dev/null +++ b/experiments/icra_peg_3D/badmm/hyperparams.py @@ -0,0 +1,76 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_badmm import AlgorithmBADMM + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 8 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create train positions. +xs = np.linspace(default.lower, default.upper, 2) +ys = np.linspace(default.lower, default.upper, 2) +zs = np.linspace(default.lower, default.upper, 2) +pos_body_offset = [np.array([x,y,z]) for x in xs for y in ys for z in zs] + +# Add test positions. +pos_body_offset += default.test_pos_body_offset + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': np.array([1]), + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmBADMM, + 'inner_iterations': 4, + 'sample_on_policy': False, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'lg_step_schedule': np.array([1e-4, 1e-3, 1e-2, 1e-2]), + 'policy_dual_rate': 0.1, + 'init_pol_wt': 0.002, + 'fixed_lg_step': 3, + 'kl_step': 0.25, + 'max_step_mult': 2.0, + 'min_step_mult': 0.1, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_peg_3D/hyperparams.py b/experiments/icra_peg_3D/hyperparams.py new file mode 100644 index 000000000..9b925954e --- /dev/null +++ b/experiments/icra_peg_3D/hyperparams.py @@ -0,0 +1,151 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +from __future__ import division + +from datetime import datetime +import os.path + +import numpy as np + +from gps import __file__ as gps_filepath +from gps.agent.mjc.agent_mjc import AgentMuJoCo +from gps.algorithm.cost.cost_fk import CostFK +from gps.algorithm.cost.cost_action import CostAction +from gps.algorithm.cost.cost_sum import CostSum +from gps.algorithm.cost.cost_utils import RAMP_FINAL_ONLY +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS +from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior +from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM +from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython +from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe +from gps.algorithm.policy.lin_gauss_init import init_lqr +from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM +from gps.algorithm.policy.policy_prior import PolicyPrior +from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \ + END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION +from gps.gui.config import generate_experiment_info + + +SENSOR_DIMS = { + JOINT_ANGLES: 7, + JOINT_VELOCITIES: 7, + END_EFFECTOR_POINTS: 6, + END_EFFECTOR_POINT_VELOCITIES: 6, + ACTION: 7, +} + +PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098]) + +common = { + 'experiment_name': 'my_experiment' + '_' + \ + datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'), +} + +# Set up grid of test positions. +lower = -0.15 +upper = 0.15 + +TEST_CONDITIONS = 64 +xs = np.linspace(lower + 0.01, upper - 0.01, 4) +ys = np.linspace(lower + 0.01, upper - 0.01, 4) +zs = np.linspace(lower + 0.01, upper - 0.01, 4) +test_pos_body_offset = [np.array([x,y,z]) for x in xs for y in ys for z in zs] + +agent = { + 'type': AgentMuJoCo, + 'filename': './mjc_models/pr2_arm3d.xml', + 'x0': np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]), + np.zeros(7)]), + 'dt': 0.05, + 'substeps': 5, + 'T': 100, + 'sensor_dims': SENSOR_DIMS, + 'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]), +} + +algorithm = { + 'iterations': 20, + 'policy_sample_mode': 'replace', +} + +algorithm['init_traj_distr'] = { + 'type': init_lqr, + 'init_gains': 1.0 / PR2_GAINS, + 'init_acc': np.zeros(SENSOR_DIMS[ACTION]), + 'init_var': 1.0, + 'stiffness': 1.0, + 'stiffness_vel': 0.5, + 'final_weight': 50.0, + 'dt': agent['dt'], + 'T': agent['T'], +} + +torque_cost = { + 'type': CostAction, + 'wu': 1e-3 / PR2_GAINS, +} + +fk_cost = { + 'type': CostFK, + 'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]), + 'wp': np.array([2, 2, 1, 2, 2, 1]), + 'l1': 0.1, + 'l2': 10.0, + 'alpha': 1e-5, +} + +# Create second cost function for last step only. +final_cost = { + 'type': CostFK, + 'ramp_option': RAMP_FINAL_ONLY, + 'target_end_effector': fk_cost['target_end_effector'], + 'wp': fk_cost['wp'], + 'l1': 1.0, + 'l2': 0.0, + 'alpha': 1e-5, + 'wp_final_multiplier': 10.0, +} + +algorithm['cost'] = { + 'type': CostSum, + 'costs': [torque_cost, fk_cost, final_cost], + 'weights': [1.0, 1.0, 1.0], +} + +algorithm['dynamics'] = { + 'type': DynamicsLRPrior, + 'regularization': 1e-6, + 'prior': { + 'type': DynamicsPriorGMM, + 'max_clusters': 50, + 'min_samples_per_cluster': 40, + 'max_samples': 40, + }, +} + +algorithm['traj_opt'] = { + 'type': TrajOptLQRPython, +} + +algorithm['policy_opt'] = { + 'type': PolicyOptCaffe, + 'iterations': 5000, +} + +algorithm['policy_prior'] = { + 'type': PolicyPriorGMM, + 'max_clusters': 50, + 'min_samples_per_cluster': 40, +} + +config = { + 'iterations': algorithm['iterations'], + 'verbose_trials': 0, + 'verbose_policy_trials': 0, + 'agent': agent, + 'gui_on': True, + 'random_seed': list(range(5)), +} diff --git a/experiments/icra_peg_3D/mdgps_cluster/hyperparams.py b/experiments/icra_peg_3D/mdgps_cluster/hyperparams.py new file mode 100644 index 000000000..605d3ef5e --- /dev/null +++ b/experiments/icra_peg_3D/mdgps_cluster/hyperparams.py @@ -0,0 +1,75 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 40 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create dummy train positions (will be overwritten randomly). +pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)] + +# Add test positions. +pos_body_offset += default.test_pos_body_offset + +# Update the default config. +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': np.array([1]), + 'pos_body_offset': pos_body_offset, + 'randomly_sample_bodypos': True, + 'sampling_range_bodypos': [ + np.array([default.lower, default.lower, default.lower]), + np.array([default.upper, default.upper, default.upper]) ], + 'prohibited_ranges_bodypos': [], +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'num_clusters': 8, + 'cluster_method': 'kmeans', + 'kl_step': 2.0, + 'max_step_mult': 1.5, + 'min_step_mult': 1.0, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 1, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_peg_3D/mdgps_on/hyperparams.py b/experiments/icra_peg_3D/mdgps_on/hyperparams.py new file mode 100644 index 000000000..abd49f365 --- /dev/null +++ b/experiments/icra_peg_3D/mdgps_on/hyperparams.py @@ -0,0 +1,71 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 8 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create train positions. +xs = np.linspace(default.lower, default.upper, 2) +ys = np.linspace(default.lower, default.upper, 2) +zs = np.linspace(default.lower, default.upper, 2) +pos_body_offset = [np.array([x,y,z]) for x in xs for y in ys for z in zs] + +# Add test positions. +pos_body_offset += default.test_pos_body_offset + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': np.array([1]), + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'kl_step': 2.0, + 'max_step_mult': 2.0, + 'min_step_mult': 0.5, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_peg_vision/badmm/hyperparams.py b/experiments/icra_peg_vision/badmm/hyperparams.py new file mode 100644 index 000000000..d0f9d410b --- /dev/null +++ b/experiments/icra_peg_vision/badmm/hyperparams.py @@ -0,0 +1,74 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_badmm import AlgorithmBADMM + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 9 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create train positions. +xs = np.linspace(default.lower, default.upper, 3) +ys = np.linspace(default.lower, default.upper, 3) +pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys] + +# Add test positions. +pos_body_offset += default.test_pos_body_offset + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': np.array([1]), + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmBADMM, + 'sample_on_policy': False, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'lg_step_schedule': np.array([1e-4, 1e-3, 1e-2, 1e-2]), + 'policy_dual_rate': 0.1, + 'init_pol_wt': 0.002, + 'fixed_lg_step': 3, + 'kl_step': 0.5, + 'max_step_mult': 2.0, + 'min_step_mult': 0.1, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_peg_vision/hyperparams.py b/experiments/icra_peg_vision/hyperparams.py new file mode 100644 index 000000000..fc811d356 --- /dev/null +++ b/experiments/icra_peg_vision/hyperparams.py @@ -0,0 +1,175 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +from __future__ import division + +from datetime import datetime +import os.path + +import numpy as np + +from gps import __file__ as gps_filepath +from gps.agent.mjc.agent_mjc import AgentMuJoCo +from gps.algorithm.cost.cost_fk import CostFK +from gps.algorithm.cost.cost_action import CostAction +from gps.algorithm.cost.cost_sum import CostSum +from gps.algorithm.cost.cost_utils import RAMP_FINAL_ONLY +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS +from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior +from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM +from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython +from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe +from gps.algorithm.policy_opt.policy_opt_tf import PolicyOptTf +from gps.algorithm.policy.lin_gauss_init import init_lqr +from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM +from gps.algorithm.policy.policy_prior import PolicyPrior +from gps.algorithm.policy_opt.tf_model_example import multi_modal_network, multi_modal_network_fp + +from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \ + END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, RGB_IMAGE, RGB_IMAGE_SIZE, ACTION, \ + END_EFFECTOR_POINTS_NO_TARGET, END_EFFECTOR_POINT_VELOCITIES_NO_TARGET +from gps.gui.config import generate_experiment_info + +IMAGE_WIDTH = 80 +IMAGE_HEIGHT = 64 +IMAGE_CHANNELS = 3 + +SENSOR_DIMS = { + JOINT_ANGLES: 7, + JOINT_VELOCITIES: 7, + END_EFFECTOR_POINTS: 6, + END_EFFECTOR_POINT_VELOCITIES: 6, + ACTION: 7, + RGB_IMAGE: IMAGE_WIDTH*IMAGE_HEIGHT*IMAGE_CHANNELS, + RGB_IMAGE_SIZE: 3, +} + +PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098]) + +common = { + 'experiment_name': 'my_experiment' + '_' + \ + datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'), +} + +# Set up grid of test positions. +lower = -0.15 +upper = 0.15 + +TEST_CONDITIONS = 25 +xs = np.linspace(lower + 0.01, upper - 0.01, 5) +ys = np.linspace(lower + 0.01, upper - 0.01, 5) +test_pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys] + +agent = { + 'type': AgentMuJoCo, + 'filename': './mjc_models/pr2_arm3d.xml', + 'x0': np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]), + np.zeros(7)]), + 'dt': 0.05, + 'substeps': 5, + 'T': 100, + 'sensor_dims': SENSOR_DIMS, + 'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, RGB_IMAGE], + 'meta_include': [RGB_IMAGE_SIZE], + 'image_width': IMAGE_WIDTH, + 'image_height': IMAGE_HEIGHT, + 'image_channels': IMAGE_CHANNELS, + 'camera_pos': np.array([0., 0., 3., 0., 0.2, 0.5]), +} + +algorithm = { + 'iterations': 15, + 'policy_sample_mode': 'replace', +} + +algorithm['init_traj_distr'] = { + 'type': init_lqr, + 'init_gains': 1.0 / PR2_GAINS, + 'init_acc': np.zeros(SENSOR_DIMS[ACTION]), + 'init_var': 1.0, + 'stiffness': 1.0, + 'stiffness_vel': 0.5, + 'final_weight': 50.0, + 'dt': agent['dt'], + 'T': agent['T'], +} + +torque_cost = { + 'type': CostAction, + 'wu': 1e-3 / PR2_GAINS, +} + +fk_cost = { + 'type': CostFK, + 'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]), + 'wp': np.array([2, 2, 1, 2, 2, 1]), + 'l1': 0.1, + 'l2': 10.0, + 'alpha': 1e-5, +} + +# Create second cost function for last step only. +final_cost = { + 'type': CostFK, + 'ramp_option': RAMP_FINAL_ONLY, + 'target_end_effector': fk_cost['target_end_effector'], + 'wp': fk_cost['wp'], + 'l1': 1.0, + 'l2': 0.0, + 'alpha': 1e-5, + 'wp_final_multiplier': 10.0, +} + +algorithm['cost'] = { + 'type': CostSum, + 'costs': [torque_cost, fk_cost, final_cost], + 'weights': [1.0, 1.0, 1.0], +} + +algorithm['dynamics'] = { + 'type': DynamicsLRPrior, + 'regularization': 1e-6, + 'prior': { + 'type': DynamicsPriorGMM, + 'max_clusters': 40, + 'min_samples_per_cluster': 40, + }, +} + +algorithm['traj_opt'] = { + 'type': TrajOptLQRPython, +} + +algorithm['policy_opt'] = { + 'type': PolicyOptTf, + 'network_params': { + 'num_filters': [15, 15, 15], + 'obs_include': agent['obs_include'], + 'obs_vector_data': [JOINT_ANGLES, JOINT_VELOCITIES], + 'obs_image_data': [RGB_IMAGE], + 'image_width': IMAGE_WIDTH, + 'image_height': IMAGE_HEIGHT, + 'image_channels': IMAGE_CHANNELS, + 'sensor_dims': SENSOR_DIMS, + }, + 'network_model': multi_modal_network_fp, + 'fc_only_iterations': 5000, + 'init_iterations': 1000, + 'iterations': 1000, + 'batch_size': 100, +} + +algorithm['policy_prior'] = { + 'type': PolicyPriorGMM, + 'max_clusters': 40, + 'min_samples_per_cluster': 40, +} + +config = { + 'iterations': algorithm['iterations'], + 'verbose_trials': np.inf, + 'verbose_policy_trials': np.inf, + 'agent': agent, + 'gui_on': True, + 'random_seed': list(range(5)), +} diff --git a/experiments/icra_peg_vision/mdgps_cluster/hyperparams.py b/experiments/icra_peg_vision/mdgps_cluster/hyperparams.py new file mode 100644 index 000000000..70c50ac22 --- /dev/null +++ b/experiments/icra_peg_vision/mdgps_cluster/hyperparams.py @@ -0,0 +1,76 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 45 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create dummy train positions (will be overwritten randomly). +pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)] + +# Add test positions. +pos_body_offset += default.test_pos_body_offset + +# Update the default config. +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': np.array([1]), + 'pos_body_offset': pos_body_offset, + 'randomly_sample_bodypos': True, + 'sampling_range_bodypos': [np.array([default.lower, default.lower, 0.0]), + np.array([default.upper, default.upper, 0.0])], + 'prohibited_ranges_bodypos': [], +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'num_clusters': 9, +# 'cluster_method': 'traj_em', + 'cluster_method': 'kmeans', + 'kl_step': 2.0, + 'max_step_mult': 1.0, + 'min_step_mult': 0.5, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' +algorithm['dynamics']['prior']['max_samples'] = TRAIN_CONDITIONS + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 1, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_peg_vision/mdgps_on/hyperparams.py b/experiments/icra_peg_vision/mdgps_on/hyperparams.py new file mode 100644 index 000000000..e5f6e2748 --- /dev/null +++ b/experiments/icra_peg_vision/mdgps_on/hyperparams.py @@ -0,0 +1,70 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 9 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create train positions. +xs = np.linspace(default.lower, default.upper, 3) +ys = np.linspace(default.lower, default.upper, 3) +pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys] + +# Add test positions. +pos_body_offset += default.test_pos_body_offset + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': np.array([1]), + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'kl_step': 2.0, + 'max_step_mult': 1.0, + 'min_step_mult': 0.2, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_reacher/badmm/hyperparams.py b/experiments/icra_reacher/badmm/hyperparams.py new file mode 100644 index 000000000..1a3c1447f --- /dev/null +++ b/experiments/icra_reacher/badmm/hyperparams.py @@ -0,0 +1,86 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_badmm import AlgorithmBADMM + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 8 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create dummy train positions (will be overwritten randomly). +x0 = [np.zeros(6) for _ in range(TRAIN_CONDITIONS)] + +pos_body_offset = [ + np.array([.4, .0, .7]), + np.array([.4, .0, -.7]), + np.array([-.4, .0, .7]), + np.array([-.4, .0, -.7]), + np.array([.7, .0, .4]), + np.array([.7, .0, -.4]), + np.array([-.7, .0, .4]), + np.array([-.7, .0, -.4]) ] + +# Add test positions. +x0 += default.test_x0 +pos_body_offset += default.test_pos_body + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'x0': x0, + 'pos_body_idx': default.pos_body_idx, + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmBADMM, + 'inner_iterations': 2, + 'sample_on_policy': False, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'lg_step_schedule': np.array([1e-4, 1e-4, 1e-3, 1e-3]), + 'policy_dual_rate': 0.1, + 'init_pol_wt': 0.002, + 'fixed_lg_step': 3, + 'kl_step': 0.5, + 'max_step_mult': 2.0, + 'min_step_mult': 0.1, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' +algorithm['dynamics']['prior']['max_samples'] = 5*TRAIN_CONDITIONS + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_reacher/hyperparams.py b/experiments/icra_reacher/hyperparams.py new file mode 100644 index 000000000..0fa8a6571 --- /dev/null +++ b/experiments/icra_reacher/hyperparams.py @@ -0,0 +1,143 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +from __future__ import division + +from datetime import datetime +import os.path + +import numpy as np + +from gps import __file__ as gps_filepath +from gps.agent.mjc.agent_mjc import AgentMuJoCo +from gps.algorithm.cost.cost_fk import CostFK +from gps.algorithm.cost.cost_action import CostAction +from gps.algorithm.cost.cost_state import CostState +from gps.algorithm.cost.cost_utils import evall1l2term +from gps.algorithm.cost.cost_sum import CostSum +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS +from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior +from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM +from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython +from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe +from gps.algorithm.policy.lin_gauss_init import init_lqr +from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM +from gps.algorithm.policy.policy_prior import PolicyPrior +from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \ + END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION +from gps.gui.config import generate_experiment_info + +from gps.utility.general_utils import sample_params + +SENSOR_DIMS = { + JOINT_ANGLES: 3, + JOINT_VELOCITIES: 3, + END_EFFECTOR_POINTS: 6, + END_EFFECTOR_POINT_VELOCITIES: 6, + ACTION: 3, +} + +GAINS = np.ones(SENSOR_DIMS[ACTION]) + +common = { + 'experiment_name': 'my_experiment' + '_' + \ + datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'), +} + +# Set up ranges. +x0_upper = np.array([0., 0., 0., 0., 0., 0.]) +#x0_upper = np.array([2., 0., 0., 0., 0., 0.]) +x0_lower = -x0_upper + +pos_body_idx = np.array([1]), +pos_body_upper = np.array([1., 0., 1.]) +pos_body_lower = -pos_body_upper + +TEST_CONDITIONS = 40 +np.random.seed(47) +test_x0 = [sample_params([x0_lower, x0_upper], []) \ + for _ in range(TEST_CONDITIONS)] +test_pos_body = [sample_params([pos_body_lower, pos_body_upper], []) \ + for _ in range(TEST_CONDITIONS)] + +agent = { + 'type': AgentMuJoCo, + 'filename': './mjc_models/arm_3link_reach.xml', + 'x0': np.zeros(6), + 'dt': 0.05, + 'substeps': 5, + 'T': 100, + 'sensor_dims': SENSOR_DIMS, + 'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'camera_pos': np.array([0., 6., 0., 0., 0., 0.]), +} + +algorithm = { + 'iterations': 15, + 'policy_sample_mode': 'replace', +} + +algorithm['init_traj_distr'] = { + 'type': init_lqr, + 'init_gains': 100.0*GAINS, + 'init_acc': np.zeros_like(GAINS), + 'init_var': 10.0, + 'dt': agent['dt'], + 'T': agent['T'], +} + +torque_cost = { + 'type': CostAction, + 'wu': 1e-3 / GAINS, +} + +# Diff between average of end_effectors and block. +touch_cost = { + 'type': CostState, + 'data_type': END_EFFECTOR_POINTS, + 'A' : np.c_[np.eye(3), -np.eye(3)], + 'l1': 1.0, + 'evalnorm': evall1l2term, +} + +algorithm['cost'] = { + 'type': CostSum, + 'costs': [torque_cost, touch_cost], + 'weights': [1.0, 1.0, 1.0], +} + +algorithm['dynamics'] = { + 'type': DynamicsLRPrior, + 'regularization': 1e-6, + 'prior': { + 'type': DynamicsPriorGMM, + 'max_clusters': 40, + 'min_samples_per_cluster': 25, +# 'max_samples': 40, + }, +} + +algorithm['traj_opt'] = { + 'type': TrajOptLQRPython, +} + +algorithm['policy_opt'] = { + 'type': PolicyOptCaffe, + 'iterations': 5000, +} + +algorithm['policy_prior'] = { + 'type': PolicyPriorGMM, + 'max_clusters': 40, + 'min_samples_per_cluster': 25, +} + +config = { + 'iterations': algorithm['iterations'], + 'verbose_trials': 8, + 'verbose_policy_trials': 8, + 'agent': agent, + 'gui_on': True, + 'random_seed': list(range(5)), +} diff --git a/experiments/icra_reacher/mdgps_cluster/hyperparams.py b/experiments/icra_reacher/mdgps_cluster/hyperparams.py new file mode 100644 index 000000000..1625e0859 --- /dev/null +++ b/experiments/icra_reacher/mdgps_cluster/hyperparams.py @@ -0,0 +1,79 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 40 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create dummy train positions (will be overwritten randomly). +x0 = [np.zeros(6) for _ in range(TRAIN_CONDITIONS)] +pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)] + +# Add test positions. +x0 += default.test_x0 +pos_body_offset += default.test_pos_body + +# Update the default config. +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'x0': x0, + 'randomly_sample_x0': True, + 'sampling_range_x0': [default.x0_lower, default.x0_upper], + 'pos_body_idx': default.pos_body_idx, + 'pos_body_offset': pos_body_offset, + 'randomly_sample_bodypos': True, + 'sampling_range_bodypos': [default.pos_body_lower, default.pos_body_upper], +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], +# 'cluster_method': 'traj_em', + 'cluster_method': 'kmeans', + 'num_clusters': 8, + 'kl_step': 1.0, + 'max_step_mult': 2.0, + 'min_step_mult': 0.1, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' +algorithm['dynamics']['prior']['max_samples'] = TRAIN_CONDITIONS + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 1, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_reacher/mdgps_on/hyperparams.py b/experiments/icra_reacher/mdgps_on/hyperparams.py new file mode 100644 index 000000000..66336ba4e --- /dev/null +++ b/experiments/icra_reacher/mdgps_on/hyperparams.py @@ -0,0 +1,81 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 8 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create dummy train positions (will be overwritten randomly). +x0 = [np.zeros(6) for _ in range(TRAIN_CONDITIONS)] + +pos_body_offset = [ + np.array([.4, .0, .7]), + np.array([.4, .0, -.7]), + np.array([-.4, .0, .7]), + np.array([-.4, .0, -.7]), + np.array([.7, .0, .4]), + np.array([.7, .0, -.4]), + np.array([-.7, .0, .4]), + np.array([-.7, .0, -.4]) ] + +# Add test positions. +x0 += default.test_x0 +pos_body_offset += default.test_pos_body + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'x0': x0, + 'pos_body_idx': default.pos_body_idx, + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'kl_step': 1.0, + 'max_step_mult': 2.0, + 'min_step_mult': 0.1, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' +algorithm['dynamics']['prior']['max_samples'] = 5*TRAIN_CONDITIONS + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_reacher_vision/badmm/hyperparams.py b/experiments/icra_reacher_vision/badmm/hyperparams.py new file mode 100644 index 000000000..459e785dd --- /dev/null +++ b/experiments/icra_reacher_vision/badmm/hyperparams.py @@ -0,0 +1,93 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_badmm import AlgorithmBADMM + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 12 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create body positions +pos_body_offset = [] + +# Outer square +bound = 0.75 +xs = np.linspace(-bound, bound, 2) +zs = np.linspace(-bound, bound, 2) +pos_body_offset += [np.array([x,0,z]) for x in xs for z in zs] + +# Inner square +bound = 0.25 +xs = np.linspace(-bound, bound, 2) +zs = np.linspace(-bound, bound, 2) +pos_body_offset += [np.array([x,0,z]) for x in xs for z in zs] + +# Middle diamond +bound = 0.5 +xs = np.linspace(-bound, bound, 2) +pos_body_offset += [np.array([x,0,0]) for x in xs] +zs = np.linspace(-bound, bound, 2) +pos_body_offset += [np.array([0,0,z]) for z in zs] + +# Add test positions. +pos_body_offset += default.test_pos_body + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': default.pos_body_idx, + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmBADMM, + 'inner_iterations': 2, + 'sample_on_policy': False, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'lg_step_schedule': np.array([1e-4, 1e-4, 1e-3, 1e-3]), + 'policy_dual_rate': 0.1, + 'init_pol_wt': 0.002, + 'fixed_lg_step': 3, + 'kl_step': 1.0, + 'max_step_mult': 2.0, + 'min_step_mult': 0.1, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' +algorithm['dynamics']['prior']['max_samples'] = 5*TRAIN_CONDITIONS + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_reacher_vision/hyperparams.py b/experiments/icra_reacher_vision/hyperparams.py new file mode 100644 index 000000000..4e2b47850 --- /dev/null +++ b/experiments/icra_reacher_vision/hyperparams.py @@ -0,0 +1,166 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +from __future__ import division + +from datetime import datetime +import os.path + +import numpy as np + +from gps import __file__ as gps_filepath +from gps.agent.mjc.agent_mjc import AgentMuJoCo +from gps.algorithm.cost.cost_fk import CostFK +from gps.algorithm.cost.cost_action import CostAction +from gps.algorithm.cost.cost_state import CostState +from gps.algorithm.cost.cost_utils import evall1l2term +from gps.algorithm.cost.cost_sum import CostSum +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS +from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior +from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM +from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython +from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe +from gps.algorithm.policy_opt.policy_opt_tf import PolicyOptTf +from gps.algorithm.policy.lin_gauss_init import init_lqr +from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM +from gps.algorithm.policy.policy_prior import PolicyPrior +from gps.algorithm.policy_opt.tf_model_example import multi_modal_network, multi_modal_network_fp + +from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \ + END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, \ + RGB_IMAGE, RGB_IMAGE_SIZE, ACTION, \ + END_EFFECTOR_POINTS_NO_TARGET, END_EFFECTOR_POINT_VELOCITIES_NO_TARGET +from gps.gui.config import generate_experiment_info + +from gps.utility.general_utils import sample_params + +IMAGE_WIDTH = 80 +IMAGE_HEIGHT = 64 +IMAGE_CHANNELS = 3 + +SENSOR_DIMS = { + JOINT_ANGLES: 3, + JOINT_VELOCITIES: 3, + END_EFFECTOR_POINTS: 6, + END_EFFECTOR_POINT_VELOCITIES: 6, +# END_EFFECTOR_POINTS_NO_TARGET: 3, +# END_EFFECTOR_POINT_VELOCITIES_NO_TARGET: 3, + ACTION: 3, + RGB_IMAGE: IMAGE_WIDTH*IMAGE_HEIGHT*IMAGE_CHANNELS, + RGB_IMAGE_SIZE: 3, +} + +GAINS = np.ones(SENSOR_DIMS[ACTION]) + +common = { + 'experiment_name': 'my_experiment' + '_' + \ + datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'), +} + +# Set up ranges. +pos_body_idx = np.array([1]), +pos_body_upper = np.array([1., 0., 1.]) +pos_body_lower = -pos_body_upper + +TEST_CONDITIONS = 40 +np.random.seed(47) +test_pos_body = [sample_params([pos_body_lower, pos_body_upper], []) \ + for _ in range(TEST_CONDITIONS)] + +agent = { + 'type': AgentMuJoCo, + 'filename': './mjc_models/arm_3link_reach.xml', + 'x0': np.zeros(6), + 'dt': 0.05, + 'substeps': 5, + 'T': 100, + 'sensor_dims': SENSOR_DIMS, + 'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, + END_EFFECTOR_POINT_VELOCITIES], + 'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, RGB_IMAGE], + 'meta_include': [RGB_IMAGE_SIZE], + 'image_width': IMAGE_WIDTH, + 'image_height': IMAGE_HEIGHT, + 'image_channels': IMAGE_CHANNELS, + 'camera_pos': np.array([0., 6., 0., 0., 0., 0.]), +} + +algorithm = { + 'iterations': 15, + 'policy_sample_mode': 'replace', +} + +algorithm['init_traj_distr'] = { + 'type': init_lqr, + 'init_gains': 100.0*GAINS, + 'init_acc': np.zeros_like(GAINS), + 'init_var': 10.0, + 'dt': agent['dt'], + 'T': agent['T'], +} + +torque_cost = { + 'type': CostAction, + 'wu': 1e-3 / GAINS, +} + +# Diff between average of end_effectors and block. +touch_cost = { + 'type': CostState, + 'data_type': END_EFFECTOR_POINTS, + 'A' : np.c_[np.eye(3), -np.eye(3)], + 'l1': 1.0, + 'evalnorm': evall1l2term, +} + +algorithm['cost'] = { + 'type': CostSum, + 'costs': [torque_cost, touch_cost], + 'weights': [1.0, 1.0, 1.0], +} + +algorithm['dynamics'] = { + 'type': DynamicsLRPrior, + 'regularization': 1e-6, + 'prior': { + 'type': DynamicsPriorGMM, + 'max_clusters': 40, + 'min_samples_per_cluster': 25, +# 'max_samples': 40, + }, +} + +algorithm['traj_opt'] = { + 'type': TrajOptLQRPython, +} + +algorithm['policy_opt'] = { + 'type': PolicyOptTf, + 'network_params': { + 'num_filters': [15, 15, 15], + 'obs_include': agent['obs_include'], + 'obs_vector_data': [JOINT_ANGLES, JOINT_VELOCITIES], + 'obs_image_data': [RGB_IMAGE], + 'image_width': IMAGE_WIDTH, + 'image_height': IMAGE_HEIGHT, + 'image_channels': IMAGE_CHANNELS, + 'sensor_dims': SENSOR_DIMS, + }, + 'network_model': multi_modal_network_fp, + 'fc_only_iterations': 5000, + 'init_iterations': 1000, + 'iterations': 1000, +} + +algorithm['policy_prior'] = { + 'type': PolicyPriorGMM, + 'max_clusters': 40, + 'min_samples_per_cluster': 25, +} + +config = { + 'iterations': algorithm['iterations'], + 'verbose_trials': np.inf, + 'verbose_policy_trials': np.inf, + 'agent': agent, + 'gui_on': True, + 'random_seed': list(range(5)), +} diff --git a/experiments/icra_reacher_vision/mdgps_cluster/hyperparams.py b/experiments/icra_reacher_vision/mdgps_cluster/hyperparams.py new file mode 100644 index 000000000..abbabfc55 --- /dev/null +++ b/experiments/icra_reacher_vision/mdgps_cluster/hyperparams.py @@ -0,0 +1,76 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 60 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create dummy train positions (will be overwritten randomly). +pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)] + +# Add test positions. +pos_body_offset += default.test_pos_body + +# Update the default config. +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'randomly_sample_x0': True, + 'sampling_range_x0': [default.x0_lower, default.x0_upper], + 'pos_body_idx': default.pos_body_idx, + 'pos_body_offset': pos_body_offset, + 'randomly_sample_bodypos': True, + 'sampling_range_bodypos': [default.pos_body_lower, default.pos_body_upper], +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], +# 'cluster_method': 'traj_em', + 'cluster_method': 'kmeans', + 'num_clusters': 12, + 'kl_step': 1.0, + 'max_step_mult': 2.0, + 'min_step_mult': 0.05, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' +algorithm['dynamics']['prior']['max_samples'] = TRAIN_CONDITIONS + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 1, +}) + +common['info'] = generate_experiment_info(config) diff --git a/experiments/icra_reacher_vision/mdgps_on/hyperparams.py b/experiments/icra_reacher_vision/mdgps_on/hyperparams.py new file mode 100644 index 000000000..28234a7a0 --- /dev/null +++ b/experiments/icra_reacher_vision/mdgps_on/hyperparams.py @@ -0,0 +1,88 @@ +""" Hyperparameters for MJC peg insertion policy optimization. """ +import imp +import os.path +import numpy as np +from gps.gui.config import generate_experiment_info +from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS + +BASE_DIR = '/'.join(str.split(__file__, '/')[:-2]) +default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py') + +EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/' + +TEST_CONDITIONS = default.TEST_CONDITIONS +TRAIN_CONDITIONS = 12 +TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS + +# Create body positions +pos_body_offset = [] + +# Outer square +bound = 0.75 +xs = np.linspace(-bound, bound, 2) +zs = np.linspace(-bound, bound, 2) +pos_body_offset += [np.array([x,0,z]) for x in xs for z in zs] + +# Inner square +bound = 0.25 +xs = np.linspace(-bound, bound, 2) +zs = np.linspace(-bound, bound, 2) +pos_body_offset += [np.array([x,0,z]) for x in xs for z in zs] + +# Middle diamond +bound = 0.5 +xs = np.linspace(-bound, bound, 2) +pos_body_offset += [np.array([x,0,0]) for x in xs] +zs = np.linspace(-bound, bound, 2) +pos_body_offset += [np.array([0,0,z]) for z in zs] + +# Add test positions. +pos_body_offset += default.test_pos_body + +# Update the defaults +common = default.common.copy() +common.update({ + 'experiment_dir': EXP_DIR, + 'data_files_dir': EXP_DIR + 'data_files/', + 'target_filename': EXP_DIR + 'target.npz', + 'log_filename': EXP_DIR + 'log.txt', + 'conditions': TOTAL_CONDITIONS, + 'train_conditions': range(TRAIN_CONDITIONS), + 'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), +}) + +if not os.path.exists(common['data_files_dir']): + os.makedirs(common['data_files_dir']) + +agent = default.agent.copy() +agent.update({ + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'pos_body_idx': default.pos_body_idx, + 'pos_body_offset': pos_body_offset, +}) + +algorithm = default.algorithm.copy() +algorithm.update({ + 'type': AlgorithmMDGPS, + 'sample_on_policy': True, + 'conditions': common['conditions'], + 'train_conditions': common['train_conditions'], + 'test_conditions': common['test_conditions'], + 'kl_step': 1.0, + 'max_step_mult': 2.0, + 'min_step_mult': 0.1, +}) +algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy' +algorithm['dynamics']['prior']['max_samples'] = 5*TRAIN_CONDITIONS + +config = default.config.copy() +config.update({ + 'common': common, + 'algorithm': algorithm, + 'agent': agent, + 'num_samples': 5, +}) + +common['info'] = generate_experiment_info(config)