config#


class SamplingConfig(num_epochs: int = 100, step_per_epoch: int = 30000, batch_size: int = 64, num_train_envs: int = - 1, num_test_envs: int = 1, buffer_size: int = 4096, step_per_collect: int = 2048, repeat_per_collect: int | None = 10, update_per_step: float = 1.0, start_timesteps: int = 0, start_timesteps_random: bool = False, replay_buffer_ignore_obs_next: bool = False, replay_buffer_save_only_last_obs: bool = False, replay_buffer_stack_num: int = 1)[source]#

Sampling, epochs, parallelization, buffers, collectors, and batching.

batch_size: int = 64#
buffer_size: int = 4096#
num_epochs: int = 100#
num_test_envs: int = 1#
num_train_envs: int = -1#

the number of training environments to use. If set to -1, use number of CPUs/threads.

repeat_per_collect: int | None = 10#
replay_buffer_ignore_obs_next: bool = False#
replay_buffer_save_only_last_obs: bool = False#
replay_buffer_stack_num: int = 1#
start_timesteps: int = 0#
start_timesteps_random: bool = False#
step_per_collect: int = 2048#
step_per_epoch: int = 30000#
update_per_step: float = 1.0#

Only used in off-policy algorithms. How many gradient steps to perform per step in the environment (i.e., per sample added to the buffer).