Hi all,
I’ve been able to train an agent using PPO totally fine. As the next stage in my research, I am now attempting to train an agent using SAC instead. However, I seem to run into a consistent issue which causes pytorch/tensor to crash. It doesn’t happen at the same time, but it always happens after a few seconds of training (at 20x game speed of course). I’ve been able to run the Basic example SAC project fine, so I assume I’ve got something wrong in my config or agent setup but am at a loss as to where to start, as all I have changed is the algorithm used.
If anyone has any ideas as to where to start troubleshooting, please let me know! I’m happy to provide further detail too where needed.
Terminal Output
c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\torch\cuda\__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at ..\c10\cuda\CUDAFunctions.cpp:100.)
return torch._C._cuda_getDeviceCount() > 0
┐ ╖
╓╖╬│╡ ││╬╖╖
╓╖╬│││││┘ ╬│││││╬╖
╖╬│││││╬╜ ╙╬│││││╖╖ ╗╗╗
╬╬╬╬╖││╦╖ ╖╬││╗╣╣╣╬ ╟╣╣╬ ╟╣╣╣ ╜╜╜ ╟╣╣
╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬ ╟╣╣╬ ╟╣╣╣ ╒╣╣╖╗╣╣╣╗ ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖ ╣╣╣
╬╬╬╬┐ ╙╬╬╬╬│╓╣╣╣╝╜ ╫╣╣╣╬ ╟╣╣╬ ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣ ╣╣╣ ╙╟╣╣╜╙ ╫╣╣ ╟╣╣
╬╬╬╬┐ ╙╬╬╣╣ ╫╣╣╣╬ ╟╣╣╬ ╟╣╣╣ ╟╣╣╬ ╣╣╣ ╣╣╣ ╟╣╣ ╣╣╣┌╣╣╜
╬╬╬╜ ╬╬╣╣ ╙╝╣╣╬ ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬ ╣╣╣ ╣╣╣ ╟╣╣╦╓ ╣╣╣╣╣
╙ ╓╦╖ ╬╬╣╣ ╓╗╗╖ ╙╝╣╣╣╣╝╜ ╘╝╝╜ ╝╝╝ ╝╝╝ ╙╣╣╣ ╟╣╣╣
╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝ ╫╣╣╣╣
╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜
╙╬╬╬╣╣╣╜
╙
Version information:
ml-agents: 0.29.0,
ml-agents-envs: 0.29.0,
Communicator API: 1.5.0,
PyTorch: 1.7.0+cu110
c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\torch\cuda\__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at ..\c10\cuda\CUDAFunctions.cpp:100.)
return torch._C._cuda_getDeviceCount() > 0
[INFO] Listening on port 5004. Start training by pressing the Play button in the Unity Editor.
[INFO] Connected to Unity environment with package version 2.0.1 and communication version 1.5.0
[INFO] Connected new brain: Racecar?team=0
[WARNING] Deleting TensorBoard data events.out.tfevents.1679229999.Sams-Desktop.37004.0 that was left over from a previous run.
[WARNING] Deleting TensorBoard data events.out.tfevents.1679229999.Sams-Desktop.37004.0.meta that was left over from a previous run.
[INFO] Hyperparameters for behavior name Racecar:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 1024
buffer_size: 10240
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
save_replay_buffer: False
init_entcoef: 0.01
reward_signal_steps_per_update: 10.0
network_settings:
normalize: False
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: None
goal_conditioning_type: hyper
deterministic: False
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
network_settings:
normalize: False
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: None
goal_conditioning_type: hyper
deterministic: False
init_path: None
keep_checkpoints: 5
checkpoint_interval: 500000
max_steps: 3000000
time_horizon: 64
summary_freq: 50000
threaded: False
self_play: None
behavioral_cloning: None
[INFO] Exported ./Assets/Training-Results\test\Racecar\Racecar-1059.onnx
[INFO] Copied ./Assets/Training-Results\test\Racecar\Racecar-1059.onnx to ./Assets/Training-Results\test\Racecar.onnx.
Traceback (most recent call last):
File "c:\users\sam\appdata\local\programs\python\python37\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "c:\users\sam\appdata\local\programs\python\python37\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\Sam\AppData\Local\Programs\Python\Python37\Scripts\mlagents-learn.exe\__main__.py", line 7, in <module>
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\learn.py", line 260, in main
run_cli(parse_command_line())
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\learn.py", line 256, in run_cli
run_training(run_seed, options, num_areas)
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\learn.py", line 132, in run_training
tc.start_learning(env_manager)
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents_envs\timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\trainer_controller.py", line 176, in start_learning
n_steps = self.advance(env_manager)
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents_envs\timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\trainer_controller.py", line 251, in advance
trainer.advance()
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\trainer\rl_trainer.py", line 315, in advance
if self._update_policy():
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents_envs\timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\sac\trainer.py", line 205, in _update_policy
policy_was_updated = self._update_sac_policy()
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\sac\trainer.py", line 272, in _update_sac_policy
update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents_envs\timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\sac\optimizer_torch.py", line 552, in update
q1_stream = self._condense_q_streams(q1_out, disc_actions)
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\sac\optimizer_torch.py", line 448, in _condense_q_streams
item, self._action_spec.discrete_branches
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\torch\utils.py", line 269, in break_into_branches
for i in range(len(action_size))
File "c:\users\sam\appdata\local\programs\python\python37\lib\site-packages\mlagents\trainers\torch\utils.py", line 269, in <listcomp>
for i in range(len(action_size))
IndexError: too many indices for tensor of dimension 1
Press any key to continue . . .
config.yaml
behaviors:
Racecar:
trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 1024
buffer_size: 10240
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
save_replay_buffer: false
init_entcoef: 0.01
reward_signal_steps_per_update: 10.0
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 3000000
time_horizon: 64
summary_freq: 50000
checkpoint_settings:
run_id: racecar
initialize_from: null
load_model: false
resume: false
force: false
train_model: false
inference: false
results_dir: ./Assets/Training-Results
Agent Code
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;
using UnityEngine.InputSystem;
using System;
using UnityEditor.PackageManager.Requests;
using Unity.VisualScripting;
using Unity.MLAgents.Policies;
public class KartAgent : Agent
{
// Config Params
[SerializeField] KartController kartController;
[SerializeField] TerrainColliderDetector[] terrainColliders;
[SerializeField] GameObject checkpointParent;
[SerializeField] bool handBreakEnabled = false;
[SerializeField] bool reverseEnabled = false;
[SerializeField] float steeringRange = 0.3f;
[SerializeField] bool manualControl = false;
[Header("Rewards")]
[SerializeField] float stepReward = 0.001f;
[SerializeField] float failReward = -1f;
[SerializeField] float checkpointReward = 0.5f;
[SerializeField] float timeOut = 30.0f;
[SerializeField] [Range(1f, 20f)] float timeScale = 1f;
// Cached Components
// State
bool failed = false;
int checkpointIndex = 0;
float elapsedTime = 0;
RaceCheckpoint[] checkpoints;
public override void Initialize()
{
// ResetScene();
terrainColliders = FindObjectsOfType<TerrainColliderDetector>();
checkpoints = checkpointParent.GetComponentsInChildren<RaceCheckpoint>(true);
}
public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(kartController.GetRigidbody().velocity.magnitude);
sensor.AddObservation(Vector3.Distance(transform.position, checkpoints[checkpointIndex].transform.position));
}
public override void OnActionReceived(ActionBuffers actions)
{
Time.timeScale = timeScale; // This shouldn't be needed, but is nice for demos
if (!manualControl)
{
kartController.SetSpeed(Mathf.Abs(actions.ContinuousActions[0]));
kartController.SetTurn(actions.ContinuousActions[1]);
}
elapsedTime += Time.deltaTime;
foreach (TerrainColliderDetector terrainCollider in terrainColliders)
{
if (terrainCollider.GetAgentCollided())
{
failed = true;
break;
}
}
CheckCheckpoints();
AddReward(kartController.GetRigidbody().velocity.magnitude * stepReward);
AddReward(-Mathf.Abs(actions.ContinuousActions[1]) * stepReward);
if (failed || Keyboard.current.rKey.isPressed)
{
Failure();
}
if (elapsedTime > timeOut)
{
ResetScene();
}
ShowReward();
}
void CheckCheckpoints()
{
if (checkpoints[checkpointIndex].KartHitCheckpoint())
{
Debug.Log($"Checkpoint {checkpointIndex+1} hit!");
AddReward(checkpointReward);
checkpoints[checkpointIndex].Reset();
checkpoints[checkpointIndex].gameObject.SetActive(false);
checkpointIndex = (checkpointIndex + 1) % checkpoints.Length;
checkpoints[checkpointIndex].gameObject.SetActive(true);
}
}
void Failure()
{
AddReward(failReward);
ShowReward();
ResetScene();
}
public override void OnEpisodeBegin()
{
//ResetScene();
}
void ResetScene()
{
failed = false;
elapsedTime = 0;
foreach (RaceCheckpoint checkpoint in checkpoints)
{
checkpoint.gameObject.SetActive(false);
}
checkpointIndex = 0;
checkpoints[checkpointIndex].gameObject.SetActive(true);
kartController.Reset_();
foreach(TerrainColliderDetector terrainColliderDetector in terrainColliders)
{
terrainColliderDetector.Reset_();
}
EndEpisode();
}
public override void Heuristic(in ActionBuffers actionsOut)
{
base.Heuristic(actionsOut);
}
private void ShowReward()
{
Debug.Log($"Current Reward: {GetCumulativeReward()}");
}
}