From f7b482dc72b40d22f7b8fbd5d61e88adc398dd10 Mon Sep 17 00:00:00 2001
From: Harley Montgomery <harleymontgomery@gmail.com>
Date: Tue, 25 Oct 2016 17:16:28 -0700
Subject: [PATCH 1/2] add RFGPS

---
 experiments/box2d_arm_example/hyperparams.py  |   9 +-
 .../box2d_badmm_example/hyperparams.py        |   9 +-
 .../box2d_pointmass_example/hyperparams.py    |   9 +-
 .../mjc_pointmass_example/hyperparams.py      |   9 +-
 experiments/mjc_rfgps_example/hyperparams.py  | 174 ++++++++++++
 python/gps/agent/config.py                    |   6 +-
 python/gps/agent/mjc/agent_mjc.py             |  49 +++-
 python/gps/algorithm/algorithm.py             | 128 +++++++--
 python/gps/algorithm/algorithm_mdgps.py       | 248 +++++++++++++++---
 python/gps/algorithm/config.py                |  11 +-
 python/gps/algorithm/cost/config.py           |  30 +--
 python/gps/algorithm/cost/cost_state.py       |  70 +++--
 .../algorithm/dynamics/dynamics_lr_prior.py   |   3 -
 .../gps/algorithm/policy/policy_prior_gmm.py  |   2 -
 python/gps/gps_main.py                        |  10 +-
 python/gps/gui/gps_training_gui.py            |   4 +-
 python/gps/utility/general_utils.py           |  44 +++-
 17 files changed, 669 insertions(+), 146 deletions(-)
 create mode 100644 experiments/mjc_rfgps_example/hyperparams.py

diff --git a/experiments/box2d_arm_example/hyperparams.py b/experiments/box2d_arm_example/hyperparams.py
index ea3d8acbe..bc50dd2e7 100644
--- a/experiments/box2d_arm_example/hyperparams.py
+++ b/experiments/box2d_arm_example/hyperparams.py
@@ -82,12 +82,9 @@
 
 state_cost = {
     'type': CostState,
-    'data_types' : {
-        JOINT_ANGLES: {
-            'wp': np.array([1, 1]),
-            'target_state': agent["target_state"],
-        },
-    },
+    'data_type': JOINT_ANGLES,
+    'wp': np.array([1, 1]),
+    'target_state': agent["target_state"],
 }
 
 algorithm['cost'] = {
diff --git a/experiments/box2d_badmm_example/hyperparams.py b/experiments/box2d_badmm_example/hyperparams.py
index 463b9d94b..c7762ae83 100644
--- a/experiments/box2d_badmm_example/hyperparams.py
+++ b/experiments/box2d_badmm_example/hyperparams.py
@@ -97,12 +97,9 @@
 
 state_cost = {
     'type': CostState,
-    'data_types' : {
-        JOINT_ANGLES: {
-            'wp': np.array([1, 1]),
-            'target_state': agent["target_state"],
-        },
-    },
+    'data_type': JOINT_ANGLES,
+    'wp': np.array([1, 1]),
+    'target_state': agent["target_state"],
 }
 
 algorithm['cost'] = {
diff --git a/experiments/box2d_pointmass_example/hyperparams.py b/experiments/box2d_pointmass_example/hyperparams.py
index a53d7afba..b88edcd08 100644
--- a/experiments/box2d_pointmass_example/hyperparams.py
+++ b/experiments/box2d_pointmass_example/hyperparams.py
@@ -80,12 +80,9 @@
 
 state_cost = {
     'type': CostState,
-    'data_types' : {
-        END_EFFECTOR_POINTS: {
-            'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
-            'target_state': agent["target_state"],
-        },
-    },
+    'data_type': END_EFFECTOR_POINTS,
+    'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
+    'target_state': agent["target_state"],
 }
 
 algorithm['cost'] = {
diff --git a/experiments/mjc_pointmass_example/hyperparams.py b/experiments/mjc_pointmass_example/hyperparams.py
index 4024914f7..cfd2120ab 100644
--- a/experiments/mjc_pointmass_example/hyperparams.py
+++ b/experiments/mjc_pointmass_example/hyperparams.py
@@ -84,12 +84,9 @@
 
 algorithm['cost'] = {
     'type': CostState,
-    'data_types' : {
-        JOINT_ANGLES: {
-            'wp': np.ones(SENSOR_DIMS[ACTION]),
-            'target_state': np.array([0.5, 0.5]),
-        },
-    },
+    'data_type': JOINT_ANGLES,
+    'wp': np.ones(SENSOR_DIMS[ACTION]),
+    'target_state': np.array([0.5, 0.5]),
 }
 
 algorithm['dynamics'] = {
diff --git a/experiments/mjc_rfgps_example/hyperparams.py b/experiments/mjc_rfgps_example/hyperparams.py
new file mode 100644
index 000000000..4f751a4ed
--- /dev/null
+++ b/experiments/mjc_rfgps_example/hyperparams.py
@@ -0,0 +1,174 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+from __future__ import division
+
+from datetime import datetime
+import os.path
+
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.mjc.agent_mjc import AgentMuJoCo
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.cost.cost_state import CostState
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.cost.cost_utils import evall1l2term
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
+from gps.algorithm.policy.policy_prior import PolicyPrior
+from gps.gui.config import generate_experiment_info
+from gps.utility.general_utils import sample_params
+from gps.proto.gps_pb2 import *
+
+SENSOR_DIMS = {
+    JOINT_ANGLES: 2,
+    JOINT_VELOCITIES: 2,
+    END_EFFECTOR_POINTS: 6,
+    END_EFFECTOR_POINT_VELOCITIES: 6,
+    ACTION: 2,
+}
+
+GAINS = np.ones(SENSOR_DIMS[ACTION])
+
+TRAIN_CONDITIONS = 40
+TEST_CONDITIONS = 10
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Set up ranges.
+pos_body_lower = np.array([-0.3, -0.1, 0])
+pos_body_upper = np.array([0.1, 0.3, 0])
+
+# Create dummy train positions (will be overwritten randomly).
+pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)]
+
+# Add random test conditions.
+np.random.seed(13)
+pos_body_offset += [sample_params([pos_body_lower, pos_body_upper], []) \
+        for _ in range(TEST_CONDITIONS)]
+
+BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
+EXP_DIR = BASE_DIR + '/../experiments/mjc_mdgps_example/'
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+}
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/reacher.xml',
+    'conditions': common['conditions'],
+    'x0': np.zeros(4),
+    'T': 50,
+    'dt': 0.05,
+    'substeps': 2,
+    'pos_body_offset': pos_body_offset,
+    'pos_body_idx': np.array([4]),
+    'randomly_sample_bodypos': True,
+    'sampling_range_bodypos': [pos_body_lower, pos_body_upper],
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                    END_EFFECTOR_POINT_VELOCITIES],
+    'camera_pos': np.array([0., 0., 1.5, 0., 0., 0.]),
+}
+
+algorithm = {
+    'type': AlgorithmMDGPS,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'sample_on_policy': True,
+    'iterations': 15,
+    'kl_step': 1.0,
+    'min_step_mult': 0.5,
+    'max_step_mult': 3.0,
+    'policy_sample_mode': 'replace',
+    'cluster_method': 'traj_em',
+    'num_clusters': 8,
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains': 100.0*GAINS,
+    'init_acc': np.zeros_like(GAINS),
+    'init_var': 10.0,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+torque_cost = {
+    'type': CostAction,
+    'wu': 1e-3 / GAINS,
+}
+
+# Diff between average of end_effectors and block.
+touch_cost = {
+    'type': CostState,
+    'data_type': END_EFFECTOR_POINTS,
+    'A' : np.c_[np.eye(3), -np.eye(3)],
+    'l1': 1.0,
+    'evalnorm': evall1l2term,
+}
+
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [torque_cost, touch_cost],
+    'weights': [1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 50,
+        'min_samples_per_cluster': 40,
+        'max_samples': TRAIN_CONDITIONS,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'iterations': 4000,
+    'weights_file_prefix': EXP_DIR + 'policy',
+}
+
+algorithm['policy_prior'] = {
+    'type': PolicyPriorGMM,
+    'max_clusters': 50,
+    'min_samples_per_cluster': 40,
+    'max_samples': 20,
+}
+
+config = {
+    'gui_on': True,
+    'iterations': algorithm['iterations'],
+    'num_samples': 1,
+    'verbose_trials': 0,
+    'verbose_policy_trials': 1,
+    'common': common,
+    'agent': agent,
+    'algorithm': algorithm,
+}
+
+common['info'] = generate_experiment_info(config)
diff --git a/python/gps/agent/config.py b/python/gps/agent/config.py
index 9c2e18a6a..79308b470 100755
--- a/python/gps/agent/config.py
+++ b/python/gps/agent/config.py
@@ -69,7 +69,11 @@
     'image_width': 640,
     'image_height': 480,
     'image_channels': 3,
-    'meta_include': []
+    'meta_include': [],
+    'randomly_sample_x0': False,
+    'prohibited_ranges_x0': [],
+    'randomly_sample_bodypos': False,
+    'prohibited_ranges_bodypos': [],
 }
 
 AGENT_BOX2D = {
diff --git a/python/gps/agent/mjc/agent_mjc.py b/python/gps/agent/mjc/agent_mjc.py
index bfc53747a..a2fbf060b 100755
--- a/python/gps/agent/mjc/agent_mjc.py
+++ b/python/gps/agent/mjc/agent_mjc.py
@@ -8,14 +8,10 @@
 from gps.agent.agent import Agent
 from gps.agent.agent_utils import generate_noise, setup
 from gps.agent.config import AGENT_MUJOCO
-from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
-        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, \
-        END_EFFECTOR_POINT_JACOBIANS, ACTION, RGB_IMAGE, RGB_IMAGE_SIZE, \
-        CONTEXT_IMAGE, CONTEXT_IMAGE_SIZE, IMAGE_FEAT, \
-        END_EFFECTOR_POINTS_NO_TARGET, END_EFFECTOR_POINT_VELOCITIES_NO_TARGET
+from gps.proto.gps_pb2 import *
 
 from gps.sample.sample import Sample
-
+from gps.utility.general_utils import sample_params
 
 class AgentMuJoCo(Agent):
     """
@@ -74,7 +70,6 @@ def _setup_world(self, filename):
         self.x0 = []
         for i in range(self._hyperparams['conditions']):
             if END_EFFECTOR_POINTS in self.x_data_types:
-                # TODO: this assumes END_EFFECTOR_VELOCITIES is also in datapoints right?
                 self._init(i)
                 eepts = self._world[i].get_data()['site_xpos'].flatten()
                 self.x0.append(
@@ -99,6 +94,44 @@ def _setup_world(self, filename):
                                        cam_pos[0], cam_pos[1], cam_pos[2],
                                        cam_pos[3], cam_pos[4], cam_pos[5])
 
+    def reset_initial_x0(self, condition):
+        """
+        Reset initial x0 randomly based on sampling_range_x0
+        and prohibited_ranges_x0
+        Args:
+            condition: Which condition setup to run.
+        """
+        self._hyperparams['x0'][condition] = sample_params(
+                self._hyperparams['sampling_range_x0'],
+                self._hyperparams['prohibited_ranges_x0'])
+
+    def reset_initial_body_offset(self, condition):
+        """
+        Reset initial body offset randomly based on sampling_range_bodypos
+        and prohibited_ranges_bodypos
+        Args:
+            condition: Which condition setup to run.
+        """
+        tmp_body_pos_offset = self._hyperparams['pos_body_offset'][condition][:]
+        self._hyperparams['pos_body_offset'][condition] = sample_params(
+                self._hyperparams['sampling_range_bodypos'],
+                self._hyperparams['prohibited_ranges_bodypos'])
+        # TODO: handle the i/j stuff as above
+        for j in range(len(self._hyperparams['pos_body_idx'][condition])):
+            idx = self._hyperparams['pos_body_idx'][condition][j]
+            self._model[condition]['body_pos'][idx, :] -= tmp_body_pos_offset
+            self._model[condition]['body_pos'][idx, :] += self._hyperparams['pos_body_offset'][condition]
+
+        if END_EFFECTOR_POINTS in self.x_data_types:
+            x0 = self._hyperparams['x0'][condition]
+            eepts = self._world[condition].get_data()['site_xpos'].flatten()
+            self.x0[condition] = np.concatenate([x0, eepts, np.zeros_like(eepts)])
+        elif END_EFFECTOR_POINTS_NO_TARGET in self.x_data_types:
+            x0 = self._hyperparams['x0'][condition]
+            eepts = self._world[i].get_data()['site_xpos'].flatten()
+            eepts_notgt = np.delete(eepts, self._hyperparams['target_idx'])
+            self.x0[condition] = np.concatenate([x0, eepts_notgt, np.zeros_like(eepts_notgt)])
+
     def sample(self, policy, condition, verbose=True, save=True, noisy=True):
         """
         Runs a trial and constructs a new sample containing information
@@ -188,6 +221,7 @@ def _init_sample(self, condition, feature_fn=None):
 
         if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']):
             sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(eepts, self._hyperparams['target_idx']), t=0)
+        if (END_EFFECTOR_POINT_VELOCITIES_NO_TARGET in self._hyperparams['obs_include']):
             sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(np.zeros_like(eepts), self._hyperparams['target_idx']), t=0)
 
         jac = np.zeros([eepts.shape[0], self._model[condition]['nq']])
@@ -248,6 +282,7 @@ def _set_sample(self, sample, mj_X, t, condition, feature_fn=None):
 
         if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']):
             sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(cur_eepts, self._hyperparams['target_idx']), t=t+1)
+        if (END_EFFECTOR_POINT_VELOCITIES_NO_TARGET in self._hyperparams['obs_include']):
             sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(eept_vels, self._hyperparams['target_idx']), t=t+1)
 
         jac = np.zeros([cur_eepts.shape[0], self._model[condition]['nq']])
diff --git a/python/gps/algorithm/algorithm.py b/python/gps/algorithm/algorithm.py
index 1b949866c..29ddee759 100755
--- a/python/gps/algorithm/algorithm.py
+++ b/python/gps/algorithm/algorithm.py
@@ -9,6 +9,7 @@
 from gps.algorithm.config import ALG
 from gps.algorithm.algorithm_utils import IterationData, TrajectoryInfo
 from gps.utility.general_utils import extract_condition
+from gps.sample.sample_list import SampleList
 
 
 LOGGER = logging.getLogger(__name__)
@@ -51,6 +52,8 @@ def __init__(self, hyperparams):
         self.prev = [IterationData() for _ in range(self.M)]
 
         dynamics = self._hyperparams['dynamics']
+
+        # Store traj_info per condition.
         for m in range(self.M):
             self.cur[m].traj_info = TrajectoryInfo()
             self.cur[m].traj_info.dynamics = dynamics['type'](dynamics)
@@ -59,18 +62,27 @@ def __init__(self, hyperparams):
             )
             self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr)
 
+        # If clustering, store traj_info per cluster and single dynamics GMM.
+        if self._hyperparams['num_clusters']:
+            self.dynamics = dynamics['type'](dynamics)
+            self.cluster_traj_info = []
+            for k in range(self._hyperparams['num_clusters']):
+                self.cluster_traj_info.append(TrajectoryInfo())
+                self.cluster_traj_info[k].dynamics = \
+                        dynamics['type'](dynamics)
+
         self.traj_opt = hyperparams['traj_opt']['type'](
             hyperparams['traj_opt']
         )
         if type(hyperparams['cost']) == list:
             self.cost = [
                 hyperparams['cost'][i]['type'](hyperparams['cost'][i])
-                for i in range(self.M)
+                for i in range(hyperparams['conditions'])
             ]
         else:
             self.cost = [
                 hyperparams['cost']['type'](hyperparams['cost'])
-                for _ in range(self.M)
+                for _ in range(hyperparams['conditions'])
             ]
         self.base_kl_step = self._hyperparams['kl_step']
 
@@ -79,36 +91,94 @@ def iteration(self, sample_list):
         """ Run iteration of the algorithm. """
         raise NotImplementedError("Must be implemented in subclass")
 
-    def _update_dynamics(self):
+    def _get_samples(self, idxs=None):
         """
-        Instantiate dynamics objects and update prior. Fit dynamics to
-        current samples.
+        Helper function for extracting from 'self.cur'.
         """
-        for m in range(self.M):
-            cur_data = self.cur[m].sample_list
-            X = cur_data.get_X()
-            U = cur_data.get_U()
-
-            # Update prior and fit dynamics.
-            self.cur[m].traj_info.dynamics.update_prior(cur_data)
-            self.cur[m].traj_info.dynamics.fit(X, U)
-
-            # Fit x0mu/x0sigma.
-            x0 = X[:, 0, :]
-            x0mu = np.mean(x0, axis=0)
-            self.cur[m].traj_info.x0mu = x0mu
-            self.cur[m].traj_info.x0sigma = np.diag(
-                np.maximum(np.var(x0, axis=0),
-                           self._hyperparams['initial_state_var'])
-            )
+        if idxs is None:
+            idxs = range(self.M)
+        samples = [self.cur[i].sample_list.get_samples() for i in idxs]
+        return SampleList(sum(samples, []))
+
+    def _update_dynamics(self, update_prior=True):
+        """
+        Update dynamics linearizations (dispatch for conditions/clusters).
+        """
+        if self._hyperparams['num_clusters']:
+            K = self._hyperparams['num_clusters']
+            if update_prior:
+                all_samples = self._get_samples()
+                self.dynamics.update_prior(all_samples)
+            for k in range(K):
+                self._update_cluster_dynamics(k)
+        else:
+            for m in range(self.M):
+                self._update_condition_dynamics(m, update_prior)
+
+    def _update_condition_dynamics(self, m, update_prior):
+        """
+        Update dynamics linearizations for condition m.
+        """
+        samples = self.cur[m].sample_list
+        X = samples.get_X()
+        U = samples.get_U()
+
+        traj_info = self.cur[m].traj_info
+        if update_prior:
+            traj_info.dynamics.update_prior(samples)
+        traj_info.dynamics.fit(X, U) # Stores Fm/fv/dyn_covar.
+
+        # Fit x0mu/x0sigma and store.
+        x0 = X[:, 0, :]
+        x0mu = np.mean(x0, axis=0)
+        x0sigma = np.diag(
+            np.maximum(np.var(x0, axis=0),
+                       self._hyperparams['initial_state_var'])
+        )
+        prior = self.cur[m].traj_info.dynamics.get_prior()
+        if prior:
+            mu0, Phi, priorm, n0 = prior.initial_state()
+            N = len(samples)
+            x0sigma += \
+                    Phi + (N*priorm) / (N+priorm) * \
+                    np.outer(x0mu-mu0, x0mu-mu0) / (N+n0)
+        traj_info.x0mu = x0mu
+        traj_info.x0sigma = x0sigma
+
+    def _update_cluster_dynamics(self, k):
+        """
+        Update dynamics linearizations for cluster k.
+        """
+        idxs = np.where(self.cluster_idx == k)[0]
+        LOGGER.debug("Updating dynamics for cluster %d, idxs %s", k, idxs)
+        if idxs.size == 0:
+            return
+
+        samples = self._get_samples(idxs)
+        X = samples.get_X()
+        U = samples.get_U()
+
+        # Fit x0mu/x0sigma/dynamics and store cluster-level info.
+        traj_info = self.cluster_traj_info[k]
+        x0 = X[:, 0, :]
+        traj_info.x0mu = np.mean(x0, axis=0)
+        traj_info.x0sigma = np.diag(
+            np.maximum(np.var(x0, axis=0),
+                       self._hyperparams['initial_state_var'])
+        )
+
+        Fm, fv, dyn_covar = self.dynamics.fit(X, U)
+        traj_info.dynamics.Fm = Fm
+        traj_info.dynamics.fv = fv
+        traj_info.dynamics.dyn_covar = dyn_covar
 
-            prior = self.cur[m].traj_info.dynamics.get_prior()
-            if prior:
-                mu0, Phi, priorm, n0 = prior.initial_state()
-                N = len(cur_data)
-                self.cur[m].traj_info.x0sigma += \
-                        Phi + (N*priorm) / (N+priorm) * \
-                        np.outer(x0mu-mu0, x0mu-mu0) / (N+n0)
+        # Copy to trajectory-level info.
+        for i in idxs:
+            self.cur[i].traj_info.x0mu = traj_info.x0mu
+            self.cur[i].traj_info.x0sigma = traj_info.x0sigma
+            self.cur[i].traj_info.dynamics.Fm = Fm
+            self.cur[i].traj_info.dynamics.fv = fv
+            self.cur[i].traj_info.dynamics.dyn_covar = dyn_covar
 
     def _update_trajectories(self):
         """
diff --git a/python/gps/algorithm/algorithm_mdgps.py b/python/gps/algorithm/algorithm_mdgps.py
index b3bf8c932..22fdb2a4a 100644
--- a/python/gps/algorithm/algorithm_mdgps.py
+++ b/python/gps/algorithm/algorithm_mdgps.py
@@ -9,6 +9,7 @@
 from gps.algorithm.algorithm_utils import PolicyInfo
 from gps.algorithm.config import ALG_MDGPS
 from gps.sample.sample_list import SampleList
+from sklearn.cluster import KMeans
 
 LOGGER = logging.getLogger(__name__)
 
@@ -23,12 +24,22 @@ def __init__(self, hyperparams):
         config.update(hyperparams)
         Algorithm.__init__(self, config)
 
+        # Store single policy prior.
         policy_prior = self._hyperparams['policy_prior']
+
+        # Store pol_info per condition.
         for m in range(self.M):
             self.cur[m].pol_info = PolicyInfo(self._hyperparams)
             self.cur[m].pol_info.policy_prior = \
                     policy_prior['type'](policy_prior)
 
+        # Store pol per cluster.
+        if self._hyperparams['num_clusters']:
+            self.policy_prior = policy_prior['type'](policy_prior)
+            self.cluster_pol_info = [PolicyInfo(self._hyperparams) \
+                    for k in range(self._hyperparams['num_clusters'])
+            ]
+
         self.policy_opt = self._hyperparams['policy_opt']['type'](
             self._hyperparams['policy_opt'], self.dO, self.dU
         )
@@ -40,24 +51,29 @@ def iteration(self, sample_lists):
         Args:
             sample_lists: List of SampleList objects for each condition.
         """
-        # Store the samples and evaluate the costs.
+        # Store the samples.
         for m in range(self.M):
             self.cur[m].sample_list = sample_lists[m]
-            self._eval_cost(m)
 
-        # Update dynamics linearizations.
-        self._update_dynamics()
-
-        # On the first iteration, need to catch policy up to init_traj_distr.
+        # On the first iteration, need to train on init_traj_distr.
         if self.iteration_count == 0:
-            self.new_traj_distr = [
-                self.cur[cond].traj_distr for cond in range(self.M)
-            ]
+            self.new_traj_distr = [self.cur[m].traj_distr \
+                    for m in range(self.M)]
             self._update_policy()
 
-        # Update policy linearizations.
+        # If clustering, set self.cluster_idx.
+        if self._hyperparams['num_clusters']:
+            self._cluster_samples()
+
+        # Update dynamics/policy linearizations.
+        # NOTE: Skip for 'traj_em', since fits while clustering.
+        if self._hyperparams['cluster_method'] != 'traj_em':
+            self._update_dynamics()
+            self._update_policy_fit()
+
+        # Update quadratic cost expansions for all conditions.
         for m in range(self.M):
-            self._update_policy_fit(m)
+            self._eval_cost(m)
 
         # C-step
         if self.iteration_count > 0:
@@ -96,39 +112,79 @@ def _update_policy(self):
             tgt_prc = np.concatenate((tgt_prc, prc))
             tgt_wt = np.concatenate((tgt_wt, wt))
             obs_data = np.concatenate((obs_data, samples.get_obs()))
-        self.policy_opt.update(obs_data, tgt_mu, tgt_prc, tgt_wt)
+        if 'fc_only_iterations' in self.policy_opt._hyperparams:
+            self.policy_opt.update(obs_data, tgt_mu, tgt_prc, tgt_wt, self.iteration_count)
+        else:
+            self.policy_opt.update(obs_data, tgt_mu, tgt_prc, tgt_wt)
 
-    def _update_policy_fit(self, m):
+    def _update_policy_fit(self, update_prior=True):
+        """
+        Update dynamics linearizations (dispatch for conditions/clusters).
+        """
+        if self._hyperparams['num_clusters']:
+            K = self._hyperparams['num_clusters']
+            if update_prior:
+                all_samples = self._get_samples()
+                self.policy_prior.update(all_samples, self.policy_opt,
+                    mode=self._hyperparams['policy_sample_mode'])
+            for k in range(K):
+                self._update_cluster_policy_fit(k)
+        else:
+            for m in range(self.M):
+                self._update_condition_policy_fit(m, update_prior)
+
+    def _update_condition_policy_fit(self, m, update_prior):
         """
         Re-estimate the local policy values in the neighborhood of the
-        trajectory.
-        Args:
-            m: Condition
-            init: Whether this is the initial fitting of the policy.
+        trajectory for condition m.
         """
-        dX, dU, T = self.dX, self.dU, self.T
-        # Choose samples to use.
         samples = self.cur[m].sample_list
-        N = len(samples)
-        pol_info = self.cur[m].pol_info
         X = samples.get_X()
         obs = samples.get_obs().copy()
         pol_mu, pol_sig = self.policy_opt.prob(obs)[:2]
-        pol_info.pol_mu, pol_info.pol_sig = pol_mu, pol_sig
 
-        # Update policy prior.
-        policy_prior = pol_info.policy_prior
-        samples = SampleList(self.cur[m].sample_list)
-        mode = self._hyperparams['policy_sample_mode']
-        policy_prior.update(samples, self.policy_opt, mode)
+        pol_info = self.cur[m].pol_info
+        if update_prior:
+            pol_info.policy_prior.update(samples, self.policy_opt,
+                    mode=self._hyperparams['policy_sample_mode'])
 
-        # Fit linearization and store in pol_info.
+        # Fit policy linearization and store.
         pol_info.pol_K, pol_info.pol_k, pol_info.pol_S = \
-                policy_prior.fit(X, pol_mu, pol_sig)
-        for t in range(T):
+                pol_info.policy_prior.fit(X, pol_mu, pol_sig)
+        for t in range(X.shape[1]):
             pol_info.chol_pol_S[t, :, :] = \
                     sp.linalg.cholesky(pol_info.pol_S[t, :, :])
 
+    def _update_cluster_policy_fit(self, k):
+        """
+        Re-estimate the local policy values in the neighborhood of the
+        trajectory for condition m.
+        """
+        idxs = np.where(self.cluster_idx == k)[0]
+        LOGGER.debug("Updating policy fit for cluster %d, idxs %s", k, idxs)
+        if idxs.size == 0:
+            return
+
+        samples = self._get_samples(idxs)
+        X = samples.get_X()
+        obs = samples.get_obs().copy()
+        pol_mu, pol_sig = self.policy_opt.prob(obs)[:2]
+
+        # Fit policy linearization and store.
+        pol_info = self.cluster_pol_info[k]
+        pol_info.pol_K, pol_info.pol_k, pol_info.pol_S = \
+                self.policy_prior.fit(X, pol_mu, pol_sig)
+        for t in range(X.shape[1]):
+            pol_info.chol_pol_S[t, :, :] = \
+                    sp.linalg.cholesky(pol_info.pol_S[t, :, :])
+
+        # Copy to trajectory-level info.
+        for i in idxs:
+            self.cur[i].pol_info.pol_K = pol_info.pol_K
+            self.cur[i].pol_info.pol_k = pol_info.pol_k
+            self.cur[i].pol_info.pol_S = pol_info.pol_S
+            self.cur[i].pol_info.chol_pol_S = pol_info.chol_pol_S
+
     def _advance_iteration_variables(self):
         """
         Move all 'cur' variables to 'prev', reinitialize 'cur'
@@ -231,3 +287,135 @@ def compute_costs(self, m, eta):
             fcv[t, :] = (cv[t, :] + PKLv[t, :] * eta) / (eta + multiplier)
 
         return fCm, fcv
+
+    def _cluster_samples(self, mode=None):
+        """
+        Divide samples into clusters, using desired clustering method.
+        """
+        # By default, use 'cluster_method', but can override.
+        # (mainly for using kmeans when training on init_traj_distr).
+        if mode is None:
+            mode = self._hyperparams['cluster_method']
+
+        # Dispatch to clustering method.
+        if mode == 'random':
+            self._cluster_random()
+        elif mode == 'kmeans':
+            self._cluster_kmeans()
+        elif mode == 'traj_em':
+            self._cluster_traj_em()
+        else:
+            raise NotImplementedError
+
+    def _cluster_random(self):
+        """
+        Set cluster_idx using random clustering.
+        """
+        self.cluster_idx = np.random.choice(K, size=self.M)
+        LOGGER.debug("Clustering randomly: %s", self.cluster_idx)
+
+    def _cluster_kmeans(self):
+        """
+        Set cluster_idx using kmeans clustering on initial state.
+        """
+        K = self._hyperparams['num_clusters']
+        kmeans = KMeans(K)
+        x0 = self._get_samples().get_X()[:, 0, :] # (N, dX)
+        self.cluster_idx = kmeans.fit_predict(x0)
+        LOGGER.debug("Clustering by kmeans: %s", self.cluster_idx)
+
+    def _cluster_traj_em(self):
+        """
+        Set cluster_idx using trajectory aware clustering.
+        """
+        # Initialize starting with random data points as centers.
+        K = self._hyperparams['num_clusters']
+#        self.cluster_idx = np.random.choice(K, size=self.M)
+#        LOGGER.debug("Traj EM initialization: %s", self.cluster_idx)
+        LOGGER.debug("Traj EM initialization to kmeans")
+        self._cluster_kmeans()
+        self._update_dynamics()
+        self._update_policy_fit()
+
+        # Begin EM steps.
+        logp = np.empty([self.M, K])
+        max_iters = self._hyperparams['traj_em_iters']
+        T = self._hyperparams['traj_em_horizon']
+        for i in range(max_iters):
+            # E step.
+            LOGGER.debug("E Step (%d/%d)", i+1, max_iters)
+            for k in range(K):
+                for m in range(self.M):
+                    logp[m, k] = self._traj_log_prob(m, k, T)
+
+            cluster_idx = logp.argmax(axis=1)
+            maxlogp = np.max(logp, axis=1, keepdims=True)
+            logp -= maxlogp + np.log(np.sum(np.exp(logp-maxlogp), axis=1, keepdims=True))
+            cluster_weights = np.exp(logp)
+            cluster_masses = cluster_weights.sum(axis=0)
+            LOGGER.debug("Traj EM cluster idx: %s", cluster_idx)
+            LOGGER.debug("Total cluster weights: %s", cluster_masses)
+
+            # Convergence check.
+            if np.all(cluster_idx == self.cluster_idx):
+#            if (i > 0) and np.all(cluster_idx == self.cluster_idx):
+                LOGGER.debug("Converged on itr (%d/%d)", i+1, max_iters)
+                break
+
+            # Reboot small clusters.
+            if i < max_iters-1:
+                ks = np.where(cluster_masses < 0.1)[0]
+                for k in ks:
+                    ms = np.random.choice(self.M, size=2)
+                    LOGGER.debug("Rebooting cluster %d with %s", k, ms)
+                    cluster_idx[ms] = k
+
+            # M step.
+            self.cluster_idx = cluster_idx
+            LOGGER.debug("M Step (%d/%d)", i+1, max_iters)
+            self._update_dynamics(update_prior=False)
+            self._update_policy_fit(update_prior=False)
+
+    def _traj_log_prob(self, m, k, T=None):
+        """
+        Returns the log likelihood of condition 'm' occurring under
+        the trajectory defined by cluster 'k'.
+        """
+        # Pull out states/actions and linearizations.
+        sample = self.cur[m].sample_list[0] # M = 1 for clustering.
+        X = sample.get_X()
+        U = sample.get_U()
+
+        traj_info = self.cluster_traj_info[k]
+        x0mu, x0sigma = traj_info.x0mu, traj_info.x0sigma
+        dyn = traj_info.dynamics
+        pol = self.cluster_pol_info[k]
+
+        # Set horizon, we ignore final (X, U) pair.
+        if T is None:
+            T = X.shape[0] - 1
+        else:
+            assert T < X.shape[0] - 1
+
+        # Begin with log prob x0mu/x0sigma.
+        # TODO: use sp.linalg.cholesky for all of this?
+        logp = -0.5*np.log(np.linalg.det(x0sigma))
+        diff = X[0] - x0mu
+        logp -= 0.5*diff.dot(np.linalg.solve(x0sigma, diff))
+
+        # Rest of trajectory.
+        for t in range(T):
+            # Action term.
+            sigma = pol.pol_S[t]
+            logp -= 0.5*np.log(np.linalg.det(sigma))
+            diff = U[t] - pol.pol_K[t].dot(X[t]) - pol.pol_k[t]
+            logp -= 0.5*diff.dot(np.linalg.solve(sigma, diff))
+
+            # Next state term.
+            xu = np.concatenate([X[t], U[t]])
+            sigma = dyn.dyn_covar[t]
+            logp -= 0.5*np.log(np.linalg.det(sigma))
+            diff = X[t+1] - dyn.Fm[t].dot(xu) - dyn.fv[t]
+            logp -= 0.5*diff.dot(np.linalg.solve(sigma, diff))
+
+        return logp
diff --git a/python/gps/algorithm/config.py b/python/gps/algorithm/config.py
index 032f648bc..8771d64c6 100644
--- a/python/gps/algorithm/config.py
+++ b/python/gps/algorithm/config.py
@@ -22,6 +22,8 @@
     'cost': None,  # A list of Cost objects for each condition.
     # Whether or not to sample with neural net policy (only for badmm/mdgps).
     'sample_on_policy': False,
+    # Number of clusters to use if clustering samples.
+    'num_clusters': 0,
 }
 
 
@@ -45,7 +47,12 @@
 ALG_MDGPS = {
     # TODO: remove need for init_pol_wt in MDGPS
     'init_pol_wt': 0.01,
-    'policy_sample_mode': 'add',
-    # Whether to use 'laplace' or 'mc' cost in step adjusment
+    'policy_sample_mode': 'replace',
     'step_rule': 'laplace',
+    # How to cluster samples, 'random', 'kmeans', 'traj_em'.
+    'cluster_method': None,
+    # Horizon for trajectory aware EM.
+    'traj_em_horizon': None,
+    # How many iterations trajectory aware EM runs.
+    'traj_em_iters': 3,
 }
diff --git a/python/gps/algorithm/cost/config.py b/python/gps/algorithm/cost/config.py
index 97acdfb76..1f20f95c8 100644
--- a/python/gps/algorithm/cost/config.py
+++ b/python/gps/algorithm/cost/config.py
@@ -6,34 +6,30 @@
 
 # CostFK
 COST_FK = {
-    'ramp_option': RAMP_CONSTANT,  # How target cost ramps over time.
     'wp': None,  # State weights - must be set.
-    'wp_final_multiplier': 1.0,  # Weight multiplier on final time step.
-    'env_target': True,  # TODO - This isn't used.
-    'l1': 0.0,
-    'l2': 1.0,
-    'alpha': 1e-5,
     'target_end_effector': None,  # Target end-effector position.
     'evalnorm': evallogl2term,
+    'alpha': 1e-5,
+    'l1': 0.0,
+    'l2': 1.0,
+    'ramp_option': RAMP_CONSTANT,  # How target cost ramps over time.
+    'wp_final_multiplier': 1.0,  # Weight multiplier on final time step.
 }
 
-
-# CostState
+# CostState (dist = Ax - tgt), x.shape depends on data_type
 COST_STATE = {
-    'ramp_option': RAMP_CONSTANT,  # How target cost ramps over time.
+    'wp': None,  # State weights - Defaults to ones.
+    'A': None, # A matrix - Defaults to identity.
+    'data_type': None, # Must be set.
+    'target': 0.0,
+    'evalnorm': evallogl2term,
+    'alpha': 1e-5,
     'l1': 0.0,
     'l2': 1.0,
-    'alpha': 1e-2,
+    'ramp_option': RAMP_CONSTANT,  # How target cost ramps over time.
     'wp_final_multiplier': 1.0,  # Weight multiplier on final time step.
-    'data_types': {
-        'JointAngle': {
-            'target_state': None,  # Target state - must be set.
-            'wp': None,  # State weights - must be set.
-        },
-    },
 }
 
-
 # CostSum
 COST_SUM = {
     'costs': [],  # A list of hyperparam dictionaries for each cost.
diff --git a/python/gps/algorithm/cost/cost_state.py b/python/gps/algorithm/cost/cost_state.py
index cb6346c75..f47d5d427 100644
--- a/python/gps/algorithm/cost/cost_state.py
+++ b/python/gps/algorithm/cost/cost_state.py
@@ -21,6 +21,7 @@ def eval(self, sample):
         Args:
             sample:  A single sample
         """
+        # Allocate space for derivatives (full state).
         T = sample.T
         Du = sample.dU
         Dx = sample.dX
@@ -32,32 +33,45 @@ def eval(self, sample):
         final_lxx = np.zeros((T, Dx, Dx))
         final_lux = np.zeros((T, Du, Dx))
 
-        for data_type in self._hyperparams['data_types']:
-            config = self._hyperparams['data_types'][data_type]
-            wp = config['wp']
-            tgt = config['target_state']
-            x = sample.get(data_type)
-            _, dim_sensor = x.shape
-
-            wpm = get_ramp_multiplier(
-                self._hyperparams['ramp_option'], T,
-                wp_final_multiplier=self._hyperparams['wp_final_multiplier']
-            )
-            wp = wp * np.expand_dims(wpm, axis=-1)
-            # Compute state penalty.
-            dist = x - tgt
-
-            # Evaluate penalty term.
-            l, ls, lss = evall1l2term(
-                wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]),
-                np.zeros((T, dim_sensor, dim_sensor, dim_sensor)),
-                self._hyperparams['l1'], self._hyperparams['l2'],
-                self._hyperparams['alpha']
-            )
-
-            final_l += l
-
-            sample.agent.pack_data_x(final_lx, ls, data_types=[data_type])
-            sample.agent.pack_data_x(final_lxx, lss,
-                                     data_types=[data_type, data_type])
+        # Compute state penalty for data_type.
+        data_type = self._hyperparams['data_type']
+        x = sample.get(data_type)
+        _, Ds = x.shape
+
+        # Weighted distance is wp*(A.dot(x) - tgt).
+        A = self._hyperparams['A']
+        if A is None:
+            A = np.eye(Ds)
+        D = A.shape[0]
+
+        wp = self._hyperparams['wp']
+        if wp is None:
+            wp = np.ones(D)
+
+        # Compute state penalty.
+        # x.shape == (T, Ds)
+        # A.shape == (D, Ds)
+        # x.dot(A.T).shape == (T, D)
+        # tgt.shape == (D,) or scalar
+        tgt = self._hyperparams['target']
+        dist = x.dot(A.T) - tgt
+
+        ramp = get_ramp_multiplier(
+            self._hyperparams['ramp_option'], T,
+            wp_final_multiplier=self._hyperparams['wp_final_multiplier']
+        )
+        wp = wp*np.expand_dims(ramp, axis=-1)
+
+        # Evaluate penalty term.
+        l, ls, lss = evall1l2term(
+            wp, dist, np.tile(A, [T, 1, 1]), np.zeros((T, D, Ds, Ds)),
+            self._hyperparams['l1'], self._hyperparams['l2'],
+            self._hyperparams['alpha']
+        )
+
+        # Pack into final cost.
+        final_l += l
+        sample.agent.pack_data_x(final_lx, ls, data_types=[data_type])
+        sample.agent.pack_data_x(final_lxx, lss,
+                                 data_types=[data_type, data_type])
         return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
diff --git a/python/gps/algorithm/dynamics/dynamics_lr_prior.py b/python/gps/algorithm/dynamics/dynamics_lr_prior.py
index c3b576651..72d3ece0b 100644
--- a/python/gps/algorithm/dynamics/dynamics_lr_prior.py
+++ b/python/gps/algorithm/dynamics/dynamics_lr_prior.py
@@ -31,9 +31,6 @@ def fit(self, X, U):
         N, T, dX = X.shape
         dU = U.shape[2]
 
-        if N == 1:
-            raise ValueError("Cannot fit dynamics on 1 sample")
-
         self.Fm = np.zeros([T, dX, dX+dU])
         self.fv = np.zeros([T, dX])
         self.dyn_covar = np.zeros([T, dX, dX])
diff --git a/python/gps/algorithm/policy/policy_prior_gmm.py b/python/gps/algorithm/policy/policy_prior_gmm.py
index 27a43ccd4..9a268eb3e 100755
--- a/python/gps/algorithm/policy/policy_prior_gmm.py
+++ b/python/gps/algorithm/policy/policy_prior_gmm.py
@@ -109,8 +109,6 @@ def fit(self, X, pol_mu, pol_sig):
         """
         N, T, dX = X.shape
         dU = pol_mu.shape[2]
-        if N == 1:
-            raise ValueError("Cannot fit dynamics on 1 sample")
 
         # Collapse policy covariances. (This is only correct because
         # the policy doesn't depend on state).
diff --git a/python/gps/gps_main.py b/python/gps/gps_main.py
index d3a0ea883..a3931c068 100755
--- a/python/gps/gps_main.py
+++ b/python/gps/gps_main.py
@@ -1,7 +1,7 @@
 """ This file defines the main object that runs experiments. """
 
 import matplotlib as mpl
-mpl.use('Qt4Agg')
+mpl.use('Qt5Agg')
 
 import logging
 import imp
@@ -61,6 +61,14 @@ def run(self, itr_load=None):
         itr_start = self._initialize(itr_load)
 
         for itr in range(itr_start, self._hyperparams['iterations']):
+            if self.agent._hyperparams['randomly_sample_x0']:
+                for cond in self._train_idx:
+                    self.agent.reset_initial_x0(cond)
+
+            if self.agent._hyperparams['randomly_sample_bodypos']:
+                for cond in self._train_idx:
+                    self.agent.reset_initial_body_offset(cond)
+
             for cond in self._train_idx:
                 for i in range(self._hyperparams['num_samples']):
                     self._take_sample(itr, cond, i)
diff --git a/python/gps/gui/gps_training_gui.py b/python/gps/gui/gps_training_gui.py
index f7923c746..60819836f 100644
--- a/python/gps/gui/gps_training_gui.py
+++ b/python/gps/gui/gps_training_gui.py
@@ -353,7 +353,9 @@ def _update_trajectory_visualizations(self, algorithm, agent,
             self._traj_visualizer.set_lim(i=m, xlim=xlim, ylim=ylim, zlim=zlim)
             self._update_linear_gaussian_controller_plots(algorithm, agent, m)
             self._update_samples_plots(traj_sample_lists, m, 'green', 'Trajectory Samples')
-            if pol_sample_lists:
+
+            config = algorithm._hyperparams
+            if pol_sample_lists and config['train_conditions'] == config['test_conditions']:
                 self._update_samples_plots(pol_sample_lists,  m, 'blue',  'Policy Samples')
         self._traj_visualizer.draw()    # this must be called explicitly
 
diff --git a/python/gps/utility/general_utils.py b/python/gps/utility/general_utils.py
index 98cc3e49d..625f9ed37 100644
--- a/python/gps/utility/general_utils.py
+++ b/python/gps/utility/general_utils.py
@@ -1,7 +1,6 @@
 """ This file defines general utility functions and classes. """
 import numpy as np
 
-
 class BundleType(object):
     """
     This class bundles many fields, similar to a record or a mutable
@@ -92,3 +91,46 @@ def get_ee_points(offsets, ee_pos, ee_rot):
         3 x N array of end effector points.
     """
     return ee_rot.dot(offsets.T) + ee_pos.T
+
+def sample_params(sampling_range, prohibited_ranges):
+    """
+    Samples parameters from sampling_range ensuring that the sampled_point
+    doesn't lie in any of the prohibited_ranges.
+
+    Args:
+        sampling_range: A list of form [lower_lim, upper_lim] where lower_lim
+            and upper_lim are two numpy arrays with size N x 1 where N is the
+            dimension of the sample.
+        prohibited_ranges: A list of ranges which describes the region from which
+            the point can't be sampled. A range is a list of form [[l_0, u_0], ..,
+            [l_N, u_N]] where l_i and u_i are the upper and lower limits of the
+            i-th dimension. Can contain replace a [l_i, u_i] with None.
+    Returns:
+        sampled_point: A sampled vector within sampling range and lying outside
+        prohibited ranges.
+    """
+    lower_lim, upper_lim = sampling_range
+    N = len(lower_lim)
+    assert N == len(upper_lim)
+
+    valid = False
+    while not valid:
+        # Sample point from within lower/upper limits.
+        sampled_point = np.random.rand(N)*(upper_lim - lower_lim) + lower_lim
+
+        # Make sure it is valid.
+        valid = True
+        for prohibited_range in prohibited_ranges:
+            for i in range(N):
+                if prohibited_range[i] is None:
+                    continue
+
+                lower, upper = prohibited_range[i]
+                if (lower <= sampled_point[i] <= upper):
+                    valid = False
+                    break
+
+            if not valid:
+                break
+
+    return sampled_point

From ab6d7ce9cf42249cdacbb17439f0b23249d0cb18 Mon Sep 17 00:00:00 2001
From: Harley Montgomery <harleymontgomery@gmail.com>
Date: Fri, 24 Feb 2017 13:33:01 -0800
Subject: [PATCH 2/2] added ICRA experiments

---
 experiments/icra_peg_2D/badmm/hyperparams.py  |  75 ++++++++
 experiments/icra_peg_2D/hyperparams.py        | 150 +++++++++++++++
 .../icra_peg_2D/mdgps_cluster/hyperparams.py  |  74 ++++++++
 .../icra_peg_2D/mdgps_on/hyperparams.py       |  70 +++++++
 experiments/icra_peg_3D/badmm/hyperparams.py  |  76 ++++++++
 experiments/icra_peg_3D/hyperparams.py        | 151 +++++++++++++++
 .../icra_peg_3D/mdgps_cluster/hyperparams.py  |  75 ++++++++
 .../icra_peg_3D/mdgps_on/hyperparams.py       |  71 +++++++
 .../icra_peg_vision/badmm/hyperparams.py      |  74 ++++++++
 experiments/icra_peg_vision/hyperparams.py    | 175 ++++++++++++++++++
 .../mdgps_cluster/hyperparams.py              |  76 ++++++++
 .../icra_peg_vision/mdgps_on/hyperparams.py   |  70 +++++++
 experiments/icra_reacher/badmm/hyperparams.py |  86 +++++++++
 experiments/icra_reacher/hyperparams.py       | 143 ++++++++++++++
 .../icra_reacher/mdgps_cluster/hyperparams.py |  79 ++++++++
 .../icra_reacher/mdgps_on/hyperparams.py      |  81 ++++++++
 .../icra_reacher_vision/badmm/hyperparams.py  |  93 ++++++++++
 .../icra_reacher_vision/hyperparams.py        | 166 +++++++++++++++++
 .../mdgps_cluster/hyperparams.py              |  76 ++++++++
 .../mdgps_on/hyperparams.py                   |  88 +++++++++
 20 files changed, 1949 insertions(+)
 create mode 100644 experiments/icra_peg_2D/badmm/hyperparams.py
 create mode 100644 experiments/icra_peg_2D/hyperparams.py
 create mode 100644 experiments/icra_peg_2D/mdgps_cluster/hyperparams.py
 create mode 100644 experiments/icra_peg_2D/mdgps_on/hyperparams.py
 create mode 100644 experiments/icra_peg_3D/badmm/hyperparams.py
 create mode 100644 experiments/icra_peg_3D/hyperparams.py
 create mode 100644 experiments/icra_peg_3D/mdgps_cluster/hyperparams.py
 create mode 100644 experiments/icra_peg_3D/mdgps_on/hyperparams.py
 create mode 100644 experiments/icra_peg_vision/badmm/hyperparams.py
 create mode 100644 experiments/icra_peg_vision/hyperparams.py
 create mode 100644 experiments/icra_peg_vision/mdgps_cluster/hyperparams.py
 create mode 100644 experiments/icra_peg_vision/mdgps_on/hyperparams.py
 create mode 100644 experiments/icra_reacher/badmm/hyperparams.py
 create mode 100644 experiments/icra_reacher/hyperparams.py
 create mode 100644 experiments/icra_reacher/mdgps_cluster/hyperparams.py
 create mode 100644 experiments/icra_reacher/mdgps_on/hyperparams.py
 create mode 100644 experiments/icra_reacher_vision/badmm/hyperparams.py
 create mode 100644 experiments/icra_reacher_vision/hyperparams.py
 create mode 100644 experiments/icra_reacher_vision/mdgps_cluster/hyperparams.py
 create mode 100644 experiments/icra_reacher_vision/mdgps_on/hyperparams.py

diff --git a/experiments/icra_peg_2D/badmm/hyperparams.py b/experiments/icra_peg_2D/badmm/hyperparams.py
new file mode 100644
index 000000000..dfa82e37b
--- /dev/null
+++ b/experiments/icra_peg_2D/badmm/hyperparams.py
@@ -0,0 +1,75 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_badmm import AlgorithmBADMM
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 4
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create train positions.
+xs = np.linspace(default.lower, default.upper, 2)
+ys = np.linspace(default.lower, default.upper, 2)
+pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmBADMM,
+    'inner_iterations': 4,
+    'sample_on_policy': False,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'lg_step_schedule': np.array([1e-4, 1e-4, 1e-3, 1e-3]),
+    'policy_dual_rate': 0.1,
+    'init_pol_wt': 0.002,
+    'fixed_lg_step': 3,
+    'kl_step': 0.5,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.1,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_peg_2D/hyperparams.py b/experiments/icra_peg_2D/hyperparams.py
new file mode 100644
index 000000000..cb4286a49
--- /dev/null
+++ b/experiments/icra_peg_2D/hyperparams.py
@@ -0,0 +1,150 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+from __future__ import division
+
+from datetime import datetime
+import os.path
+
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.mjc.agent_mjc import AgentMuJoCo
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.cost.cost_utils import RAMP_FINAL_ONLY
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
+from gps.algorithm.policy.policy_prior import PolicyPrior
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION
+from gps.gui.config import generate_experiment_info
+
+
+SENSOR_DIMS = {
+    JOINT_ANGLES: 7,
+    JOINT_VELOCITIES: 7,
+    END_EFFECTOR_POINTS: 6,
+    END_EFFECTOR_POINT_VELOCITIES: 6,
+    ACTION: 7,
+}
+
+PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098])
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+}
+
+# Set up grid of test positions.
+lower = -0.25
+upper = 0.15
+
+TEST_CONDITIONS = 49
+xs = np.linspace(lower + 0.01, upper - 0.01, 7)
+ys = np.linspace(lower + 0.01, upper - 0.01, 7)
+test_pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]
+
+agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/pr2_arm3d.xml',
+    'x0': np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]),
+                          np.zeros(7)]),
+    'dt': 0.05,
+    'substeps': 5,
+    'T': 100,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                    END_EFFECTOR_POINT_VELOCITIES],
+    'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
+}
+
+algorithm = {
+    'iterations': 12,
+    'policy_sample_mode': 'replace',
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains':  1.0 / PR2_GAINS,
+    'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_var': 1.0,
+    'stiffness': 1.0,
+    'stiffness_vel': 0.5,
+    'final_weight': 50.0,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+torque_cost = {
+    'type': CostAction,
+    'wu': 1e-3 / PR2_GAINS,
+}
+
+fk_cost = {
+    'type': CostFK,
+    'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]),
+    'wp': np.array([2, 2, 1, 2, 2, 1]),
+    'l1': 0.1,
+    'l2': 10.0,
+    'alpha': 1e-5,
+}
+
+# Create second cost function for last step only.
+final_cost = {
+    'type': CostFK,
+    'ramp_option': RAMP_FINAL_ONLY,
+    'target_end_effector': fk_cost['target_end_effector'],
+    'wp': fk_cost['wp'],
+    'l1': 1.0,
+    'l2': 0.0,
+    'alpha': 1e-5,
+    'wp_final_multiplier': 10.0,
+}
+
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [torque_cost, fk_cost, final_cost],
+    'weights': [1.0, 1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 50,
+        'min_samples_per_cluster': 40,
+        'max_samples': 20,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'iterations': 5000,
+}
+
+algorithm['policy_prior'] = {
+    'type': PolicyPriorGMM,
+    'max_clusters': 50,
+    'min_samples_per_cluster': 40,
+}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'verbose_trials': 0,
+    'verbose_policy_trials': 1,
+    'agent': agent,
+    'gui_on': True,
+    'random_seed': list(range(5)),
+}
diff --git a/experiments/icra_peg_2D/mdgps_cluster/hyperparams.py b/experiments/icra_peg_2D/mdgps_cluster/hyperparams.py
new file mode 100644
index 000000000..d38202df4
--- /dev/null
+++ b/experiments/icra_peg_2D/mdgps_cluster/hyperparams.py
@@ -0,0 +1,74 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 20
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create dummy train positions (will be overwritten randomly).
+pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the default config.
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+    'randomly_sample_bodypos': True,
+    'sampling_range_bodypos': [np.array([default.lower, default.lower, 0.0]),
+                                np.array([default.upper, default.upper, 0.0])],
+    'prohibited_ranges_bodypos': [],
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'num_clusters': 4,
+    'cluster_method': 'traj_em',
+    'kl_step': 2.0,
+    'max_step_mult': 2.0,
+    'min_step_mult': 1.0,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 1,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_peg_2D/mdgps_on/hyperparams.py b/experiments/icra_peg_2D/mdgps_on/hyperparams.py
new file mode 100644
index 000000000..a75c6a679
--- /dev/null
+++ b/experiments/icra_peg_2D/mdgps_on/hyperparams.py
@@ -0,0 +1,70 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 4
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create train positions.
+xs = np.linspace(default.lower, default.upper, 2)
+ys = np.linspace(default.lower, default.upper, 2)
+pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'kl_step': 2.0,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.5,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_peg_3D/badmm/hyperparams.py b/experiments/icra_peg_3D/badmm/hyperparams.py
new file mode 100644
index 000000000..f245f5ef4
--- /dev/null
+++ b/experiments/icra_peg_3D/badmm/hyperparams.py
@@ -0,0 +1,76 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_badmm import AlgorithmBADMM
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 8
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create train positions.
+xs = np.linspace(default.lower, default.upper, 2)
+ys = np.linspace(default.lower, default.upper, 2)
+zs = np.linspace(default.lower, default.upper, 2)
+pos_body_offset = [np.array([x,y,z]) for x in xs for y in ys for z in zs]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmBADMM,
+    'inner_iterations': 4,
+    'sample_on_policy': False,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'lg_step_schedule': np.array([1e-4, 1e-3, 1e-2, 1e-2]),
+    'policy_dual_rate': 0.1,
+    'init_pol_wt': 0.002,
+    'fixed_lg_step': 3,
+    'kl_step': 0.25,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.1,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_peg_3D/hyperparams.py b/experiments/icra_peg_3D/hyperparams.py
new file mode 100644
index 000000000..9b925954e
--- /dev/null
+++ b/experiments/icra_peg_3D/hyperparams.py
@@ -0,0 +1,151 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+from __future__ import division
+
+from datetime import datetime
+import os.path
+
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.mjc.agent_mjc import AgentMuJoCo
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.cost.cost_utils import RAMP_FINAL_ONLY
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
+from gps.algorithm.policy.policy_prior import PolicyPrior
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION
+from gps.gui.config import generate_experiment_info
+
+
+SENSOR_DIMS = {
+    JOINT_ANGLES: 7,
+    JOINT_VELOCITIES: 7,
+    END_EFFECTOR_POINTS: 6,
+    END_EFFECTOR_POINT_VELOCITIES: 6,
+    ACTION: 7,
+}
+
+PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098])
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+}
+
+# Set up grid of test positions.
+lower = -0.15
+upper = 0.15
+
+TEST_CONDITIONS = 64
+xs = np.linspace(lower + 0.01, upper - 0.01, 4)
+ys = np.linspace(lower + 0.01, upper - 0.01, 4)
+zs = np.linspace(lower + 0.01, upper - 0.01, 4)
+test_pos_body_offset = [np.array([x,y,z]) for x in xs for y in ys for z in zs]
+
+agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/pr2_arm3d.xml',
+    'x0': np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]),
+                          np.zeros(7)]),
+    'dt': 0.05,
+    'substeps': 5,
+    'T': 100,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                    END_EFFECTOR_POINT_VELOCITIES],
+    'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
+}
+
+algorithm = {
+    'iterations': 20,
+    'policy_sample_mode': 'replace',
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains':  1.0 / PR2_GAINS,
+    'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_var': 1.0,
+    'stiffness': 1.0,
+    'stiffness_vel': 0.5,
+    'final_weight': 50.0,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+torque_cost = {
+    'type': CostAction,
+    'wu': 1e-3 / PR2_GAINS,
+}
+
+fk_cost = {
+    'type': CostFK,
+    'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]),
+    'wp': np.array([2, 2, 1, 2, 2, 1]),
+    'l1': 0.1,
+    'l2': 10.0,
+    'alpha': 1e-5,
+}
+
+# Create second cost function for last step only.
+final_cost = {
+    'type': CostFK,
+    'ramp_option': RAMP_FINAL_ONLY,
+    'target_end_effector': fk_cost['target_end_effector'],
+    'wp': fk_cost['wp'],
+    'l1': 1.0,
+    'l2': 0.0,
+    'alpha': 1e-5,
+    'wp_final_multiplier': 10.0,
+}
+
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [torque_cost, fk_cost, final_cost],
+    'weights': [1.0, 1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 50,
+        'min_samples_per_cluster': 40,
+        'max_samples': 40,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'iterations': 5000,
+}
+
+algorithm['policy_prior'] = {
+    'type': PolicyPriorGMM,
+    'max_clusters': 50,
+    'min_samples_per_cluster': 40,
+}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'verbose_trials': 0,
+    'verbose_policy_trials': 0,
+    'agent': agent,
+    'gui_on': True,
+    'random_seed': list(range(5)),
+}
diff --git a/experiments/icra_peg_3D/mdgps_cluster/hyperparams.py b/experiments/icra_peg_3D/mdgps_cluster/hyperparams.py
new file mode 100644
index 000000000..605d3ef5e
--- /dev/null
+++ b/experiments/icra_peg_3D/mdgps_cluster/hyperparams.py
@@ -0,0 +1,75 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 40
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create dummy train positions (will be overwritten randomly).
+pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the default config.
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+    'randomly_sample_bodypos': True,
+    'sampling_range_bodypos': [
+            np.array([default.lower, default.lower, default.lower]),
+            np.array([default.upper, default.upper, default.upper]) ],
+    'prohibited_ranges_bodypos': [],
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'num_clusters': 8,
+    'cluster_method': 'kmeans',
+    'kl_step': 2.0,
+    'max_step_mult': 1.5,
+    'min_step_mult': 1.0,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 1,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_peg_3D/mdgps_on/hyperparams.py b/experiments/icra_peg_3D/mdgps_on/hyperparams.py
new file mode 100644
index 000000000..abd49f365
--- /dev/null
+++ b/experiments/icra_peg_3D/mdgps_on/hyperparams.py
@@ -0,0 +1,71 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 8
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create train positions.
+xs = np.linspace(default.lower, default.upper, 2)
+ys = np.linspace(default.lower, default.upper, 2)
+zs = np.linspace(default.lower, default.upper, 2)
+pos_body_offset = [np.array([x,y,z]) for x in xs for y in ys for z in zs]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'kl_step': 2.0,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.5,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_peg_vision/badmm/hyperparams.py b/experiments/icra_peg_vision/badmm/hyperparams.py
new file mode 100644
index 000000000..d0f9d410b
--- /dev/null
+++ b/experiments/icra_peg_vision/badmm/hyperparams.py
@@ -0,0 +1,74 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_badmm import AlgorithmBADMM
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 9
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create train positions.
+xs = np.linspace(default.lower, default.upper, 3)
+ys = np.linspace(default.lower, default.upper, 3)
+pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmBADMM,
+    'sample_on_policy': False,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'lg_step_schedule': np.array([1e-4, 1e-3, 1e-2, 1e-2]),
+    'policy_dual_rate': 0.1,
+    'init_pol_wt': 0.002,
+    'fixed_lg_step': 3,
+    'kl_step': 0.5,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.1,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_peg_vision/hyperparams.py b/experiments/icra_peg_vision/hyperparams.py
new file mode 100644
index 000000000..fc811d356
--- /dev/null
+++ b/experiments/icra_peg_vision/hyperparams.py
@@ -0,0 +1,175 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+from __future__ import division
+
+from datetime import datetime
+import os.path
+
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.mjc.agent_mjc import AgentMuJoCo
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.cost.cost_utils import RAMP_FINAL_ONLY
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from gps.algorithm.policy_opt.policy_opt_tf import PolicyOptTf
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
+from gps.algorithm.policy.policy_prior import PolicyPrior
+from gps.algorithm.policy_opt.tf_model_example import multi_modal_network, multi_modal_network_fp
+
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, RGB_IMAGE, RGB_IMAGE_SIZE, ACTION, \
+        END_EFFECTOR_POINTS_NO_TARGET, END_EFFECTOR_POINT_VELOCITIES_NO_TARGET
+from gps.gui.config import generate_experiment_info
+
+IMAGE_WIDTH = 80
+IMAGE_HEIGHT = 64
+IMAGE_CHANNELS = 3
+
+SENSOR_DIMS = {
+    JOINT_ANGLES: 7,
+    JOINT_VELOCITIES: 7,
+    END_EFFECTOR_POINTS: 6,
+    END_EFFECTOR_POINT_VELOCITIES: 6,
+    ACTION: 7,
+    RGB_IMAGE: IMAGE_WIDTH*IMAGE_HEIGHT*IMAGE_CHANNELS,
+    RGB_IMAGE_SIZE: 3,
+}
+
+PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098])
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+}
+
+# Set up grid of test positions.
+lower = -0.15
+upper = 0.15
+
+TEST_CONDITIONS = 25
+xs = np.linspace(lower + 0.01, upper - 0.01, 5)
+ys = np.linspace(lower + 0.01, upper - 0.01, 5)
+test_pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]
+
+agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/pr2_arm3d.xml',
+    'x0': np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]),
+                          np.zeros(7)]),
+    'dt': 0.05,
+    'substeps': 5,
+    'T': 100,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, RGB_IMAGE],
+    'meta_include': [RGB_IMAGE_SIZE],
+    'image_width': IMAGE_WIDTH,
+    'image_height': IMAGE_HEIGHT,
+    'image_channels': IMAGE_CHANNELS,
+    'camera_pos': np.array([0., 0., 3., 0., 0.2, 0.5]),
+}
+
+algorithm = {
+    'iterations': 15,
+    'policy_sample_mode': 'replace',
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains':  1.0 / PR2_GAINS,
+    'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_var': 1.0,
+    'stiffness': 1.0,
+    'stiffness_vel': 0.5,
+    'final_weight': 50.0,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+torque_cost = {
+    'type': CostAction,
+    'wu': 1e-3 / PR2_GAINS,
+}
+
+fk_cost = {
+    'type': CostFK,
+    'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]),
+    'wp': np.array([2, 2, 1, 2, 2, 1]),
+    'l1': 0.1,
+    'l2': 10.0,
+    'alpha': 1e-5,
+}
+
+# Create second cost function for last step only.
+final_cost = {
+    'type': CostFK,
+    'ramp_option': RAMP_FINAL_ONLY,
+    'target_end_effector': fk_cost['target_end_effector'],
+    'wp': fk_cost['wp'],
+    'l1': 1.0,
+    'l2': 0.0,
+    'alpha': 1e-5,
+    'wp_final_multiplier': 10.0,
+}
+
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [torque_cost, fk_cost, final_cost],
+    'weights': [1.0, 1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 40,
+        'min_samples_per_cluster': 40,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptTf,
+    'network_params': {
+        'num_filters': [15, 15, 15],
+        'obs_include': agent['obs_include'],
+        'obs_vector_data': [JOINT_ANGLES, JOINT_VELOCITIES],
+        'obs_image_data': [RGB_IMAGE],
+        'image_width': IMAGE_WIDTH,
+        'image_height': IMAGE_HEIGHT,
+        'image_channels': IMAGE_CHANNELS,
+        'sensor_dims': SENSOR_DIMS,
+    },
+    'network_model': multi_modal_network_fp,
+    'fc_only_iterations': 5000,
+    'init_iterations': 1000,
+    'iterations': 1000,
+    'batch_size': 100,
+}
+
+algorithm['policy_prior'] = {
+    'type': PolicyPriorGMM,
+    'max_clusters': 40,
+    'min_samples_per_cluster': 40,
+}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'verbose_trials': np.inf,
+    'verbose_policy_trials': np.inf,
+    'agent': agent,
+    'gui_on': True,
+    'random_seed': list(range(5)),
+}
diff --git a/experiments/icra_peg_vision/mdgps_cluster/hyperparams.py b/experiments/icra_peg_vision/mdgps_cluster/hyperparams.py
new file mode 100644
index 000000000..70c50ac22
--- /dev/null
+++ b/experiments/icra_peg_vision/mdgps_cluster/hyperparams.py
@@ -0,0 +1,76 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 45
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create dummy train positions (will be overwritten randomly).
+pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the default config.
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+    'randomly_sample_bodypos': True,
+    'sampling_range_bodypos': [np.array([default.lower, default.lower, 0.0]),
+                                np.array([default.upper, default.upper, 0.0])],
+    'prohibited_ranges_bodypos': [],
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'num_clusters': 9,
+#    'cluster_method': 'traj_em',
+    'cluster_method': 'kmeans',
+    'kl_step': 2.0,
+    'max_step_mult': 1.0,
+    'min_step_mult': 0.5,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+algorithm['dynamics']['prior']['max_samples'] = TRAIN_CONDITIONS
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 1,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_peg_vision/mdgps_on/hyperparams.py b/experiments/icra_peg_vision/mdgps_on/hyperparams.py
new file mode 100644
index 000000000..e5f6e2748
--- /dev/null
+++ b/experiments/icra_peg_vision/mdgps_on/hyperparams.py
@@ -0,0 +1,70 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 9
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create train positions.
+xs = np.linspace(default.lower, default.upper, 3)
+ys = np.linspace(default.lower, default.upper, 3)
+pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'kl_step': 2.0,
+    'max_step_mult': 1.0,
+    'min_step_mult': 0.2,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_reacher/badmm/hyperparams.py b/experiments/icra_reacher/badmm/hyperparams.py
new file mode 100644
index 000000000..1a3c1447f
--- /dev/null
+++ b/experiments/icra_reacher/badmm/hyperparams.py
@@ -0,0 +1,86 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_badmm import AlgorithmBADMM
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 8
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create dummy train positions (will be overwritten randomly).
+x0 = [np.zeros(6) for _ in range(TRAIN_CONDITIONS)]
+
+pos_body_offset = [
+        np.array([.4, .0, .7]),
+        np.array([.4, .0, -.7]),
+        np.array([-.4, .0, .7]),
+        np.array([-.4, .0, -.7]),
+        np.array([.7, .0, .4]),
+        np.array([.7, .0, -.4]),
+        np.array([-.7, .0, .4]),
+        np.array([-.7, .0, -.4]) ]
+
+# Add test positions.
+x0 += default.test_x0
+pos_body_offset += default.test_pos_body
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'x0': x0,
+    'pos_body_idx': default.pos_body_idx,
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmBADMM,
+    'inner_iterations': 2,
+    'sample_on_policy': False,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'lg_step_schedule': np.array([1e-4, 1e-4, 1e-3, 1e-3]),
+    'policy_dual_rate': 0.1,
+    'init_pol_wt': 0.002,
+    'fixed_lg_step': 3,
+    'kl_step': 0.5,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.1,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+algorithm['dynamics']['prior']['max_samples'] = 5*TRAIN_CONDITIONS
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_reacher/hyperparams.py b/experiments/icra_reacher/hyperparams.py
new file mode 100644
index 000000000..0fa8a6571
--- /dev/null
+++ b/experiments/icra_reacher/hyperparams.py
@@ -0,0 +1,143 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+from __future__ import division
+
+from datetime import datetime
+import os.path
+
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.mjc.agent_mjc import AgentMuJoCo
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_state import CostState
+from gps.algorithm.cost.cost_utils import evall1l2term
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
+from gps.algorithm.policy.policy_prior import PolicyPrior
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION
+from gps.gui.config import generate_experiment_info
+
+from gps.utility.general_utils import sample_params
+
+SENSOR_DIMS = {
+    JOINT_ANGLES: 3,
+    JOINT_VELOCITIES: 3,
+    END_EFFECTOR_POINTS: 6,
+    END_EFFECTOR_POINT_VELOCITIES: 6,
+    ACTION: 3,
+}
+
+GAINS = np.ones(SENSOR_DIMS[ACTION])
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+}
+
+# Set up ranges.
+x0_upper = np.array([0., 0., 0., 0., 0., 0.])
+#x0_upper = np.array([2., 0., 0., 0., 0., 0.])
+x0_lower = -x0_upper
+
+pos_body_idx = np.array([1]),
+pos_body_upper = np.array([1., 0., 1.])
+pos_body_lower = -pos_body_upper
+
+TEST_CONDITIONS = 40
+np.random.seed(47)
+test_x0 = [sample_params([x0_lower, x0_upper], []) \
+        for _ in range(TEST_CONDITIONS)]
+test_pos_body = [sample_params([pos_body_lower, pos_body_upper], []) \
+        for _ in range(TEST_CONDITIONS)]
+
+agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/arm_3link_reach.xml',
+    'x0': np.zeros(6),
+    'dt': 0.05,
+    'substeps': 5,
+    'T': 100,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                    END_EFFECTOR_POINT_VELOCITIES],
+    'camera_pos': np.array([0., 6., 0., 0., 0., 0.]),
+}
+
+algorithm = {
+    'iterations': 15,
+    'policy_sample_mode': 'replace',
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains': 100.0*GAINS,
+    'init_acc': np.zeros_like(GAINS),
+    'init_var': 10.0,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+torque_cost = {
+    'type': CostAction,
+    'wu': 1e-3 / GAINS,
+}
+
+# Diff between average of end_effectors and block.
+touch_cost = {
+    'type': CostState,
+    'data_type': END_EFFECTOR_POINTS,
+    'A' : np.c_[np.eye(3), -np.eye(3)],
+    'l1': 1.0,
+    'evalnorm': evall1l2term,
+}
+
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [torque_cost, touch_cost],
+    'weights': [1.0, 1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 40,
+        'min_samples_per_cluster': 25,
+#        'max_samples': 40,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'iterations': 5000,
+}
+
+algorithm['policy_prior'] = {
+    'type': PolicyPriorGMM,
+    'max_clusters': 40,
+    'min_samples_per_cluster': 25,
+}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'verbose_trials': 8,
+    'verbose_policy_trials': 8,
+    'agent': agent,
+    'gui_on': True,
+    'random_seed': list(range(5)),
+}
diff --git a/experiments/icra_reacher/mdgps_cluster/hyperparams.py b/experiments/icra_reacher/mdgps_cluster/hyperparams.py
new file mode 100644
index 000000000..1625e0859
--- /dev/null
+++ b/experiments/icra_reacher/mdgps_cluster/hyperparams.py
@@ -0,0 +1,79 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 40
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create dummy train positions (will be overwritten randomly).
+x0 = [np.zeros(6) for _ in range(TRAIN_CONDITIONS)]
+pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)]
+
+# Add test positions.
+x0 += default.test_x0
+pos_body_offset += default.test_pos_body
+
+# Update the default config.
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'x0': x0,
+    'randomly_sample_x0': True,
+    'sampling_range_x0': [default.x0_lower, default.x0_upper],
+    'pos_body_idx': default.pos_body_idx,
+    'pos_body_offset': pos_body_offset,
+    'randomly_sample_bodypos': True,
+    'sampling_range_bodypos': [default.pos_body_lower, default.pos_body_upper],
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+#    'cluster_method': 'traj_em',
+    'cluster_method': 'kmeans',
+    'num_clusters': 8,
+    'kl_step': 1.0,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.1,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+algorithm['dynamics']['prior']['max_samples'] = TRAIN_CONDITIONS
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 1,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_reacher/mdgps_on/hyperparams.py b/experiments/icra_reacher/mdgps_on/hyperparams.py
new file mode 100644
index 000000000..66336ba4e
--- /dev/null
+++ b/experiments/icra_reacher/mdgps_on/hyperparams.py
@@ -0,0 +1,81 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 8
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create dummy train positions (will be overwritten randomly).
+x0 = [np.zeros(6) for _ in range(TRAIN_CONDITIONS)]
+
+pos_body_offset = [
+        np.array([.4, .0, .7]),
+        np.array([.4, .0, -.7]),
+        np.array([-.4, .0, .7]),
+        np.array([-.4, .0, -.7]),
+        np.array([.7, .0, .4]),
+        np.array([.7, .0, -.4]),
+        np.array([-.7, .0, .4]),
+        np.array([-.7, .0, -.4]) ]
+
+# Add test positions.
+x0 += default.test_x0
+pos_body_offset += default.test_pos_body
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'x0': x0,
+    'pos_body_idx': default.pos_body_idx,
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'kl_step': 1.0,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.1,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+algorithm['dynamics']['prior']['max_samples'] = 5*TRAIN_CONDITIONS
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_reacher_vision/badmm/hyperparams.py b/experiments/icra_reacher_vision/badmm/hyperparams.py
new file mode 100644
index 000000000..459e785dd
--- /dev/null
+++ b/experiments/icra_reacher_vision/badmm/hyperparams.py
@@ -0,0 +1,93 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_badmm import AlgorithmBADMM
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 12
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create body positions
+pos_body_offset = []
+
+# Outer square
+bound = 0.75
+xs = np.linspace(-bound, bound, 2)
+zs = np.linspace(-bound, bound, 2)
+pos_body_offset += [np.array([x,0,z]) for x in xs for z in zs]
+
+# Inner square
+bound = 0.25
+xs = np.linspace(-bound, bound, 2)
+zs = np.linspace(-bound, bound, 2)
+pos_body_offset += [np.array([x,0,z]) for x in xs for z in zs]
+
+# Middle diamond
+bound = 0.5
+xs = np.linspace(-bound, bound, 2)
+pos_body_offset += [np.array([x,0,0]) for x in xs]
+zs = np.linspace(-bound, bound, 2)
+pos_body_offset += [np.array([0,0,z]) for z in zs]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': default.pos_body_idx,
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmBADMM,
+    'inner_iterations': 2,
+    'sample_on_policy': False,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'lg_step_schedule': np.array([1e-4, 1e-4, 1e-3, 1e-3]),
+    'policy_dual_rate': 0.1,
+    'init_pol_wt': 0.002,
+    'fixed_lg_step': 3,
+    'kl_step': 1.0,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.1,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+algorithm['dynamics']['prior']['max_samples'] = 5*TRAIN_CONDITIONS
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_reacher_vision/hyperparams.py b/experiments/icra_reacher_vision/hyperparams.py
new file mode 100644
index 000000000..4e2b47850
--- /dev/null
+++ b/experiments/icra_reacher_vision/hyperparams.py
@@ -0,0 +1,166 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+from __future__ import division
+
+from datetime import datetime
+import os.path
+
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.mjc.agent_mjc import AgentMuJoCo
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_state import CostState
+from gps.algorithm.cost.cost_utils import evall1l2term
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from gps.algorithm.policy_opt.policy_opt_tf import PolicyOptTf
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
+from gps.algorithm.policy.policy_prior import PolicyPrior
+from gps.algorithm.policy_opt.tf_model_example import multi_modal_network, multi_modal_network_fp
+
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, \
+        RGB_IMAGE, RGB_IMAGE_SIZE, ACTION, \
+        END_EFFECTOR_POINTS_NO_TARGET, END_EFFECTOR_POINT_VELOCITIES_NO_TARGET
+from gps.gui.config import generate_experiment_info
+
+from gps.utility.general_utils import sample_params
+
+IMAGE_WIDTH = 80
+IMAGE_HEIGHT = 64
+IMAGE_CHANNELS = 3
+
+SENSOR_DIMS = {
+    JOINT_ANGLES: 3,
+    JOINT_VELOCITIES: 3,
+    END_EFFECTOR_POINTS: 6,
+    END_EFFECTOR_POINT_VELOCITIES: 6,
+#    END_EFFECTOR_POINTS_NO_TARGET: 3,
+#    END_EFFECTOR_POINT_VELOCITIES_NO_TARGET: 3,
+    ACTION: 3,
+    RGB_IMAGE: IMAGE_WIDTH*IMAGE_HEIGHT*IMAGE_CHANNELS,
+    RGB_IMAGE_SIZE: 3,
+}
+
+GAINS = np.ones(SENSOR_DIMS[ACTION])
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+}
+
+# Set up ranges.
+pos_body_idx = np.array([1]),
+pos_body_upper = np.array([1., 0., 1.])
+pos_body_lower = -pos_body_upper
+
+TEST_CONDITIONS = 40
+np.random.seed(47)
+test_pos_body = [sample_params([pos_body_lower, pos_body_upper], []) \
+        for _ in range(TEST_CONDITIONS)]
+
+agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/arm_3link_reach.xml',
+    'x0': np.zeros(6),
+    'dt': 0.05,
+    'substeps': 5,
+    'T': 100,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, RGB_IMAGE],
+    'meta_include': [RGB_IMAGE_SIZE],
+    'image_width': IMAGE_WIDTH,
+    'image_height': IMAGE_HEIGHT,
+    'image_channels': IMAGE_CHANNELS,
+    'camera_pos': np.array([0., 6., 0., 0., 0., 0.]),
+}
+
+algorithm = {
+    'iterations': 15,
+    'policy_sample_mode': 'replace',
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains': 100.0*GAINS,
+    'init_acc': np.zeros_like(GAINS),
+    'init_var': 10.0,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+torque_cost = {
+    'type': CostAction,
+    'wu': 1e-3 / GAINS,
+}
+
+# Diff between average of end_effectors and block.
+touch_cost = {
+    'type': CostState,
+    'data_type': END_EFFECTOR_POINTS,
+    'A' : np.c_[np.eye(3), -np.eye(3)],
+    'l1': 1.0,
+    'evalnorm': evall1l2term,
+}
+
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [torque_cost, touch_cost],
+    'weights': [1.0, 1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 40,
+        'min_samples_per_cluster': 25,
+#        'max_samples': 40,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptTf,
+    'network_params': {
+        'num_filters': [15, 15, 15],
+        'obs_include': agent['obs_include'],
+        'obs_vector_data': [JOINT_ANGLES, JOINT_VELOCITIES],
+        'obs_image_data': [RGB_IMAGE],
+        'image_width': IMAGE_WIDTH,
+        'image_height': IMAGE_HEIGHT,
+        'image_channels': IMAGE_CHANNELS,
+        'sensor_dims': SENSOR_DIMS,
+    },
+    'network_model': multi_modal_network_fp,
+    'fc_only_iterations': 5000,
+    'init_iterations': 1000,
+    'iterations': 1000,
+}
+
+algorithm['policy_prior'] = {
+    'type': PolicyPriorGMM,
+    'max_clusters': 40,
+    'min_samples_per_cluster': 25,
+}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'verbose_trials': np.inf,
+    'verbose_policy_trials': np.inf,
+    'agent': agent,
+    'gui_on': True,
+    'random_seed': list(range(5)),
+}
diff --git a/experiments/icra_reacher_vision/mdgps_cluster/hyperparams.py b/experiments/icra_reacher_vision/mdgps_cluster/hyperparams.py
new file mode 100644
index 000000000..abbabfc55
--- /dev/null
+++ b/experiments/icra_reacher_vision/mdgps_cluster/hyperparams.py
@@ -0,0 +1,76 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 60
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create dummy train positions (will be overwritten randomly).
+pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body
+
+# Update the default config.
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'randomly_sample_x0': True,
+    'sampling_range_x0': [default.x0_lower, default.x0_upper],
+    'pos_body_idx': default.pos_body_idx,
+    'pos_body_offset': pos_body_offset,
+    'randomly_sample_bodypos': True,
+    'sampling_range_bodypos': [default.pos_body_lower, default.pos_body_upper],
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+#    'cluster_method': 'traj_em',
+    'cluster_method': 'kmeans',
+    'num_clusters': 12,
+    'kl_step': 1.0,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.05,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+algorithm['dynamics']['prior']['max_samples'] = TRAIN_CONDITIONS
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 1,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_reacher_vision/mdgps_on/hyperparams.py b/experiments/icra_reacher_vision/mdgps_on/hyperparams.py
new file mode 100644
index 000000000..28234a7a0
--- /dev/null
+++ b/experiments/icra_reacher_vision/mdgps_on/hyperparams.py
@@ -0,0 +1,88 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 12
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create body positions
+pos_body_offset = []
+
+# Outer square
+bound = 0.75
+xs = np.linspace(-bound, bound, 2)
+zs = np.linspace(-bound, bound, 2)
+pos_body_offset += [np.array([x,0,z]) for x in xs for z in zs]
+
+# Inner square
+bound = 0.25
+xs = np.linspace(-bound, bound, 2)
+zs = np.linspace(-bound, bound, 2)
+pos_body_offset += [np.array([x,0,z]) for x in xs for z in zs]
+
+# Middle diamond
+bound = 0.5
+xs = np.linspace(-bound, bound, 2)
+pos_body_offset += [np.array([x,0,0]) for x in xs]
+zs = np.linspace(-bound, bound, 2)
+pos_body_offset += [np.array([0,0,z]) for z in zs]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': default.pos_body_idx,
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'kl_step': 1.0,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.1,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+algorithm['dynamics']['prior']['max_samples'] = 5*TRAIN_CONDITIONS
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)