Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ rlkit/launchers/conf_private.py
MANIFEST
*.egg-info
\.idea/
.pytest_cache
.coverage
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,13 @@ This requires some knowledge of AWS and/or GCP, which is beyond the scope of
this README.
To learn more, more about `doodad`, [go to the repository](https://github.com/vitchyr/doodad/), which is based on [this original repository](https://github.com/justinjfu/doodad/).

### Testing
A set of basic regression tests can be run with:
```
nose2 -v -B -s tests/regression/basic
```
Other directories in `tests/regression` have project-specific tests.

# Requests for pull-requests
- Implement policy-gradient algorithms.
- Implement model-based algorithms.
Expand Down
Empty file added examples/__init__.py
Empty file.
Empty file added examples/awac/__init__.py
Empty file.
Empty file added examples/awac/hand/__init__.py
Empty file.
53 changes: 16 additions & 37 deletions examples/awac/hand/awac1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,22 @@

import rlkit.util.hyperparameter as hyp
from rlkit.launchers.launcher_util import run_experiment
from rlkit.launchers.arglauncher import run_variants

from rlkit.torch.sac.policies import GaussianPolicy
from rlkit.torch.networks import Clamp

if __name__ == "__main__":
def main():
variant = dict(
num_epochs=501,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
algo_kwargs=dict(
num_epochs=501,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
batch_size=1024,
),
max_path_length=1000,
batch_size=1024,
algorithm="AWAC",
replay_buffer_size=int(1E6),

Expand Down Expand Up @@ -99,35 +102,11 @@
search_space, default_parameters=variant,
)

# n_seeds = 1
# mode = 'local'
# exp_prefix = 'dev-{}'.format(
# __file__.replace('/', '-').replace('_', '-').split('.')[0]
# )

n_seeds = 3
mode = 'ec2'
exp_prefix = 'hand-awac1'
variants = []
for variant in sweeper.iterate_hyperparameters():
variants.append(variant)

for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
for _ in range(n_seeds):
run_experiment(
experiment,
exp_prefix=exp_prefix,
mode=mode,
variant=variant,
use_gpu=True,
snapshot_gap=200,
snapshot_mode='gap_and_last',
num_exps_per_instance=3,
gcp_kwargs=dict(
zone='us-west1-b',
),
run_variants(experiment, variants, process_args)

)

# variants = []
# for variant in sweeper.iterate_hyperparameters():
# variants.append(variant)

# run_variants(experiment, variants, process_args)
if __name__ == "__main__":
main()
Empty file.
53 changes: 16 additions & 37 deletions examples/awac/mujoco/awac1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,22 @@

import rlkit.util.hyperparameter as hyp
from rlkit.launchers.launcher_util import run_experiment
from rlkit.launchers.arglauncher import run_variants

from rlkit.torch.sac.policies import GaussianPolicy
from rlkit.torch.networks import Clamp

if __name__ == "__main__":
def main():
variant = dict(
num_epochs=501,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
algo_kwargs=dict(
num_epochs=501,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
batch_size=1024,
),
max_path_length=1000,
batch_size=1024,
replay_buffer_size=int(1E6),
layer_size=256,
num_layers=2,
Expand Down Expand Up @@ -87,35 +90,11 @@
search_space, default_parameters=variant,
)

n_seeds = 1
mode = 'local'
exp_prefix = 'dev-{}'.format(
__file__.replace('/', '-').replace('_', '-').split('.')[0]
)

# n_seeds = 3
# mode = 'gcp'
# exp_prefix = 'skew-fit-pickup-reference-post-refactor'
variants = []
for variant in sweeper.iterate_hyperparameters():
variants.append(variant)

for exp_id, variant in enumerate(sweeper.iterate_hyperparameters()):
for _ in range(n_seeds):
run_experiment(
experiment,
exp_prefix=exp_prefix,
mode=mode,
variant=variant,
use_gpu=use_gpu,
snapshot_gap=200,
snapshot_mode='gap_and_last',
num_exps_per_instance=3,
gcp_kwargs=dict(
zone='us-west1-b',
),
run_variants(experiment, variants, process_args)

)

# variants = []
# for variant in sweeper.iterate_hyperparameters():
# variants.append(variant)

# run_variants(experiment, variants, process_args)
if __name__ == "__main__":
main()
69 changes: 41 additions & 28 deletions examples/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,24 @@
import rlkit.torch.pytorch_util as ptu
from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm

import random
import torch
import numpy as np

def experiment(variant):
eval_env = NormalizedBoxEnv(HalfCheetahEnv())
expl_env = NormalizedBoxEnv(HalfCheetahEnv())
# Or for a specific version:
# import gym
# env = NormalizedBoxEnv(gym.make('HalfCheetah-v1'))

seed = variant["seed"]
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
eval_env.seed(seed)
expl_env.seed(seed)

obs_dim = eval_env.observation_space.low.size
action_dim = eval_env.action_space.low.size
qf = ConcatMlp(
Expand Down Expand Up @@ -65,34 +76,36 @@ def experiment(variant):
algorithm.to(ptu.device)
algorithm.train()

variant = dict(
algorithm_kwargs=dict(
num_epochs=1000,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=10000,
max_path_length=1000,
batch_size=128,
),
trainer_kwargs=dict(
use_soft_update=True,
tau=1e-2,
discount=0.99,
qf_learning_rate=1e-3,
policy_learning_rate=1e-4,
),
qf_kwargs=dict(
hidden_sizes=[400, 300],
),
policy_kwargs=dict(
hidden_sizes=[400, 300],
),
replay_buffer_size=int(1E6),
seed=random.randint(0, 100000),
)

if __name__ == "__main__":
# noinspection PyTypeChecker
variant = dict(
algorithm_kwargs=dict(
num_epochs=1000,
num_eval_steps_per_epoch=1000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=10000,
max_path_length=1000,
batch_size=128,
),
trainer_kwargs=dict(
use_soft_update=True,
tau=1e-2,
discount=0.99,
qf_learning_rate=1e-3,
policy_learning_rate=1e-4,
),
qf_kwargs=dict(
hidden_sizes=[400, 300],
),
policy_kwargs=dict(
hidden_sizes=[400, 300],
),
replay_buffer_size=int(1E6),
)
# ptu.set_gpu_mode(True) # optionally set the GPU (default=False)
def main():
setup_logger('name-of-experiment', variant=variant)
experiment(variant)

if __name__ == "__main__":
main()
55 changes: 34 additions & 21 deletions examples/dqn_and_double_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,23 @@
from rlkit.samplers.data_collector import MdpPathCollector
from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm

import random
import torch
import numpy as np

def experiment(variant):
expl_env = gym.make('CartPole-v0').env
eval_env = gym.make('CartPole-v0').env
obs_dim = expl_env.observation_space.low.size
action_dim = eval_env.action_space.n

seed = variant["seed"]
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
eval_env.seed(seed)
expl_env.seed(seed)

qf = Mlp(
hidden_sizes=[32, 32],
input_size=obs_dim,
Expand Down Expand Up @@ -70,28 +80,31 @@ def experiment(variant):
algorithm.to(ptu.device)
algorithm.train()

variant = dict(
algorithm="DQN",
version="normal",
layer_size=256,
replay_buffer_size=int(1E6),
algorithm_kwargs=dict(
num_epochs=3000,
num_eval_steps_per_epoch=5000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
max_path_length=1000,
batch_size=256,
),
trainer_kwargs=dict(
discount=0.99,
learning_rate=3E-4,
),
seed=random.randint(0, 100000),
)

if __name__ == "__main__":
# noinspection PyTypeChecker
variant = dict(
algorithm="DQN",
version="normal",
layer_size=256,
replay_buffer_size=int(1E6),
algorithm_kwargs=dict(
num_epochs=3000,
num_eval_steps_per_epoch=5000,
num_trains_per_train_loop=1000,
num_expl_steps_per_train_loop=1000,
min_num_steps_before_training=1000,
max_path_length=1000,
batch_size=256,
),
trainer_kwargs=dict(
discount=0.99,
learning_rate=3E-4,
),
)
def main():
setup_logger('dqn-CartPole', variant=variant)
# ptu.set_gpu_mode(True) # optionally set the GPU (default=False)
experiment(variant)

if __name__ == "__main__":
main()
Empty file added examples/her/__init__.py
Empty file.
Loading