Issue with ML Agents

Hi,

I am new to mlagents and have been trying to train a game demo to understand the working of the mlagents but it doesn’t wanna work!!

I have been at it for weeks now. whenever I launch mlagents-learn command it just hangs up and the action is not registered

My player agent code is:

using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;

public class PlayerAgent : Agent
{
private Rigidbody playerRigidbody;
private GameManager gameManager;
private Player _playerScript;
public GameObject playerPrefab;

public override void Initialize()
{
playerRigidbody = GetComponent();
gameManager = FindObjectOfType();
_playerScript = GetComponent();

Debug.Log(“PlayerAgent initialized.”);
}

public override void OnEpisodeBegin()
{
Debug.Log(“Episode Begin”);
if (gameManager.currentPlayer != null)
{
Destroy(gameManager.currentPlayer);
Debug.Log(“Destroyed existing player.”);
}
gameManager.newGame();
Debug.Log(“Called gameManager.newGame()”);

GameObject newPlayer = Instantiate(playerPrefab, Vector3.zero, Quaternion.identity);
gameManager.currentPlayer = newPlayer;
Debug.Log(“Instantiated new player.”);

Debug.Log(“New game started, player reset.”);
}

public override void CollectObservations(VectorSensor sensor)
{
Debug.Log(“Collecting Observations”);
sensor.AddObservation(transform.localPosition);
sensor.AddObservation(transform.localRotation.eulerAngles.z);
sensor.AddObservation(gameManager.phase);

Debug.Log($“Position: {transform.localPosition}, Rotation: {transform.localRotation.eulerAngles.z}, Phase: {gameManager.phase}”);
}

public override void OnActionReceived(ActionBuffers actionBuffers)
{
int rotate = actionBuffers.DiscreteActions[0];
int strafe = actionBuffers.DiscreteActions[1];
Debug.Log($“Action Received - Rotate: {rotate}, Strafe: {strafe}”);

_playerScript.Movement_V1(rotate, strafe);

// Rewards and penalties
if (gameManager.spawnsDestroyed > 0)
{
SetReward(gameManager.spawnsDestroyed * 0.1f);
Debug.Log("Reward for spawns destroyed: {gameManager.spawnsDestroyed * 0.1f}"); } if (gameManager.phase > 1) { SetReward(gameManager.phase * 0.5f); Debug.Log(“Reward for phase: {gameManager.phase * 0.5f}”);
}
if (gameManager.currentPlayer == null)
{
SetReward(-1.0f);
Debug.Log(“Player destroyed, ending episode with penalty.”);
Academy.Instance.StatsRecorder.Add(“PhaseReached”, gameManager.phase);
EndEpisode();
}
}

public override void Heuristic(in ActionBuffers actionsOut)
{
var discreteActionsOut = actionsOut.DiscreteActions;

// Manual control for testing
discreteActionsOut[0] = 0; // Default no rotation
if (Input.GetKey(KeyCode.LeftArrow))
{
discreteActionsOut[0] = 1;
}
else if (Input.GetKey(KeyCode.RightArrow))
{
discreteActionsOut[0] = 2;
}

discreteActionsOut[1] = 0; // Default no strafe
if (Input.GetKey(KeyCode.W))
{
discreteActionsOut[1] = 1;
}
else if (Input.GetKey(KeyCode.S))
{
discreteActionsOut[1] = 2;
}
else if (Input.GetKey(KeyCode.D))
{
discreteActionsOut[1] = 3;
}
else if (Input.GetKey(KeyCode.A))
{
discreteActionsOut[1] = 4;
}

Debug.Log($“Heuristic actions - Rotate: {discreteActionsOut[0]}, Strafe: {discreteActionsOut[1]}”);
}
}


My yaml:

behaviors:
ControlStyle1:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 2048
learning_rate: 3.0e-4
beta: 0.001
epsilon: 0.2
lambd: 0.99
num_epoch: 5
shared_critic: false
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
max_steps: 50000
time_horizon: 64
summary_freq: 1000

Myb installation was on python 3.18