Hey there,
I am working on my final year project for University which involves creating an ML agent using reinforcement learning. I am a reasonably confident programmer however, I am really struggling to implement ML agents successfully within Unity.
I have finally created a very simple agent script that I thought would work but when I go to train it does not work. The agent script should simply allow the agent to rotate to aim at an enemy and shoot when it is aiming at the enemy.
I am unsure if I have coded the agent script poorly or if the problem lies elsewhere (such as within the Unity editor and so on). Below is the code for the agent script (no observations are included as I am using the Ray Perception Sensor 3D to identify walls and the enemy):
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
public class MLAgent : Agent
{
private Rigidbody Rb;
private Vector3 startingRotation;
private float currentRotationY;
private Vector3 startingPosition;
private GameObject shootingPointGO;
private Transform shootingPoint;
//private float timer;
public override void Initialize()
{
startingPosition = transform.position;
startingRotation = transform.rotation.eulerAngles;
Rb = GetComponent<Rigidbody>();
shootingPointGO = GameObject.FindWithTag("shootingPoint");
shootingPoint = shootingPointGO.transform;
//timer = 0;
}
/*
public override void CollectObservations(VectorSensor sensor)
{
base.CollectObservations(sensor);
}
*/
public override void OnActionReceived(float[] vectorAction)
{
if (Mathf.RoundToInt(vectorAction[0]) == 1)
transform.Rotate(Vector3.left * Time.deltaTime);
if (Mathf.RoundToInt(vectorAction[1]) == 1)
transform.Rotate(Vector3.right * Time.deltaTime);
if (Mathf.RoundToInt(vectorAction[2]) == 1)
{
Physics.Raycast(shootingPoint.position, transform.forward, 20f);
Debug.DrawRay(shootingPoint.position, transform.forward, Color.blue, 20f);
if (Physics.Raycast(shootingPoint.position, transform.forward, out var hit, 20f) && hit.transform.tag == "Enemy")
{
AddReward(1.0f);
EndEpisode();
}
else
{
AddReward(-0.1f);
}
}
}
public override void Heuristic(float[] actionsOut)
{
actionsOut[0] = 0;
if (Input.GetKey(KeyCode.RightArrow))
actionsOut[0] = 1;
actionsOut[1] = 0;
if (Input.GetKey(KeyCode.LeftArrow))
actionsOut[1] = 1;
actionsOut[2] = 0;
if (Input.GetMouseButtonDown(0))
actionsOut[2] = 1;
}
public override void OnEpisodeBegin()
{
Debug.Log("Episode Begin");
transform.position = startingPosition;
transform.Rotate(startingRotation);
//timer = 0;
}
}
Below is what I am shown in the command prompt when attempting to train the agent (first time posting here so I am unsure the best way to include text from the command prompt, sorry!):
Version information:
ml-agents: 0.26.0,
ml-agents-envs: 0.26.0,
Communicator API: 1.5.0,
PyTorch: 1.7.1+cu110
[INFO] Listening on port 5004. Start training by pressing the Play button in the Unity Editor.
[INFO] Connected to Unity environment with package version 1.0.7 and communication version 1.0.0
[INFO] Connected new brain: My Behavior?team=0
[WARNING] Behavior name My Behavior does not match any behaviors specified in the trainer configuration file. A default configuration will be used.
[INFO] Hyperparameters for behavior name My Behavior:
trainer_type: ppo
hyperparameters:
batch_size: 1024
buffer_size: 10240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: False
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: None
goal_conditioning_type: hyper
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
network_settings:
normalize: False
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: None
goal_conditioning_type: hyper
init_path: None
keep_checkpoints: 5
checkpoint_interval: 500000
max_steps: 500000
time_horizon: 64
summary_freq: 50000
threaded: False
self_play: None
behavioral_cloning: None
[WARNING] Trainer has no policies, not saving anything.
Traceback (most recent call last):
File "C:\Users\Hari Prendergast\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\Hari Prendergast\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "E:\Unity_Projects\AgentShoot\venv\Scripts\mlagents-learn.exe\__main__.py", line 7, in <module>
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\learn.py", line 250, in main
run_cli(parse_command_line())
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\learn.py", line 246, in run_cli
run_training(run_seed, options)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\learn.py", line 125, in run_training
tc.start_learning(env_manager)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents_envs\timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\trainer_controller.py", line 173, in start_learning
self._reset_env(env_manager)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents_envs\timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\trainer_controller.py", line 107, in _reset_env
self._register_new_behaviors(env_manager, env_manager.first_step_infos)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\trainer_controller.py", line 268, in _register_new_behaviors
self._create_trainers_and_managers(env_manager, new_behavior_ids)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\trainer_controller.py", line 166, in _create_trainers_and_managers
self._create_trainer_and_manager(env_manager, behavior_id)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\trainer_controller.py", line 137, in _create_trainer_and_manager
policy = trainer.create_policy(
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\trainer\rl_trainer.py", line 119, in create_policy
return self.create_torch_policy(parsed_behavior_id, behavior_spec)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\ppo\trainer.py", line 226, in create_torch_policy
policy = TorchPolicy(
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\policy\torch_policy.py", line 65, in __init__
self.actor = SimpleActor(
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\torch\networks.py", line 592, in __init__
self.network_body = NetworkBody(observation_specs, network_settings)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\torch\networks.py", line 212, in __init__
self._body_endoder = LinearEncoder(
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\torch\layers.py", line 148, in __init__
linear_layer(
File "e:\unity_projects\agentshoot\venv\lib\site-packages\mlagents\trainers\torch\layers.py", line 49, in linear_layer
layer = torch.nn.Linear(input_size, output_size)
File "e:\unity_projects\agentshoot\venv\lib\site-packages\torch\nn\modules\linear.py", line 83, in __init__
self.reset_parameters()
File "e:\unity_projects\agentshoot\venv\lib\site-packages\torch\nn\modules\linear.py", line 86, in reset_parameters
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
File "e:\unity_projects\agentshoot\venv\lib\site-packages\torch\nn\init.py", line 381, in kaiming_uniform_
std = gain / math.sqrt(fan)
ZeroDivisionError: float division by zero
Any advice is greatly appreciated, thanks!