[Full code included at end] The ML-Agent in Unity gets worse the longer it trains. In my game the player grabs a key and brings it to a door and starts again. The position of the key and door is randomized. Here is a video of me playing the game:
The observations for the agent are; its position, the key’s position, the door’s position, and if it has the key. I have included the code segment below:
public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(transform.localPosition);
sensor.AddObservation(key.transform.localPosition);
sensor.AddObservation(hasKey);
sensor.AddObservation(door.transform.localPosition);
}
The rewards for the agent are; -1 at start, 0 when key is obtained, 1 when door opened. There is also a -.0000001 for each action and -.0001 for hitting a wall. I have included the code segments below:
void Update()
{
if (transform.position.x > upperXLimit)
{
gameObject.transform.Translate(-1, 0, 0, Space.World);
AddReward(-.0001f);
}
//Same for other edges
}
public override void OnEpisodeBegin()
{
SetReward(-1f);
}
public override void OnActionReceived(ActionBuffers actions)
{
AddReward(-.0000001f);
//.......
}
void OnTriggerEnter2D(Collider2D col)
{
if (col.gameObject == key)
{
SetReward(0);
}
if (col.gameObject == door)
{
SetReward(+1f);
}
}
The actions the agent can take are; idle, move up, move down, move right, move left. I have included the code segment below:
public override void OnActionReceived(ActionBuffers actions)
{
AddReward(-.0000001f);
switch (direction)
{
case 0: // idle
moveTo = Vector2.zero;
break;
case 1: // left
moveTo = new Vector2(-1, 0);
break;
case 2: // right
moveTo = new Vector2(1, 0);
break;
case 3: // up
moveTo = new Vector2(0, 1);
break;
case 4: // down
moveTo = new Vector2(0, -1);
break;
}
}
I have included the graphs from TensorFlow after training the agent for 2,000,000 steps. As you can see the cumulative reward goes down, while episode time goes up:
Here is my config file for training the agent:
behaviors:
My Behavior:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.00005
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.8
strength: 1.0
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64
summary_freq: 60000
Lastly I have included my complete code:
Game.cs. Runs and resets game.
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class Game : MonoBehaviour
{
public float upperXLimit;
public float lowerXLimit;
public float upperYLimit;
public float lowerYLimit;
public GameObject Key;
public GameObject Door;
public GameObject CurrentKey;
public GameObject CurrentDoor;
public AIPlayer AIPlayerScript;
public player PlayerScript;
// Start is called before the first frame update
void Start()
{
resetBoard();
}
public void resetBoard()
{
if (CurrentKey != null)
{
Destroy(CurrentKey);
}
if (CurrentDoor != null)
{
Destroy(CurrentDoor);
}
CurrentKey = Instantiate(Key, new Vector3(Mathf.Round(Random.Range(lowerXLimit, upperXLimit)), Mathf.Round(Random.Range(lowerYLimit, upperYLimit)), 0), Quaternion.identity);
CurrentDoor = Instantiate(Door, new Vector3(Mathf.Round(Random.Range(lowerXLimit, upperXLimit)), Mathf.Round(Random.Range(lowerYLimit, upperYLimit)), 0), Quaternion.identity);
AIPlayerScript.key = CurrentKey;
AIPlayerScript.door = CurrentDoor;
PlayerScript.key = CurrentKey;
PlayerScript.door = CurrentDoor;
}
}
AIPlayer.cs. Runs the mlagent.
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;
using static UnityEngine.RuleTile.TilingRuleOutput;
using static AIPlayer;
using UnityEditor.Tilemaps;
public class AIPlayer : Agent
{
Collider2D col;
public double upperXLimit;
public double lowerXLimit;
public double upperYLimit;
public double lowerYLimit;
public bool hasKey = false;
private Vector2 moveTo = Vector2.zero;
private int direction;
public GameObject key;
public GameObject door;
private Game game;
public enum MoveToDirection
{
Idle,
Left,
Right,
Up,
Down
}
void Start()
{
game = GameObject.FindGameObjectWithTag("Game").GetComponent<Game>();
}
// Update is called once per frame
void Update()
{
transform.Translate(moveTo, Space.World);
if (transform.position.x > upperXLimit)
{
gameObject.transform.Translate(-1, 0, 0, Space.World);
AddReward(-.0001f);
//Debug.Log("Wall");
}
if (transform.position.x < lowerXLimit)
{
gameObject.transform.Translate(1, 0, 0, Space.World);
AddReward(-.0001f);
//Debug.Log("Wall");
}
if (transform.position.y > upperYLimit)
{
gameObject.transform.Translate(0, -1, 0, Space.World);
AddReward(-.0001f);
//Debug.Log("Wall");
}
if (transform.position.y < lowerYLimit)
{
gameObject.transform.Translate(0, 1, 0, Space.World);
AddReward(-.0001f);
//Debug.Log("Wall");
}
}
public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(transform.localPosition);
sensor.AddObservation(key.transform.localPosition);
sensor.AddObservation(hasKey);
sensor.AddObservation(door.transform.localPosition);
//Debug.Log("Self: " + transform.localPosition + " | Key: " + key.transform.localPosition + " | hasKey: " + hasKey + " | Door: " + door.transform.localPosition);
}
public override void OnEpisodeBegin()
{
SetReward(-1f);
}
private MoveToDirection moveToDirection = MoveToDirection.Idle;
public override void OnActionReceived(ActionBuffers actions)
{
AddReward(-.0000001f);
direction = actions.DiscreteActions[0];
//Debug.Log("Direction: " + direction);
switch (direction)
{
case 0: // idle
moveTo = Vector2.zero;
moveToDirection = MoveToDirection.Idle;
break;
case 1: // left
moveTo = new Vector2(-1, 0);
moveToDirection = MoveToDirection.Left;
//interactor.localRotation = Quaternion.Euler(0, 0, 90);
//animator.SetFloat("LastHorizontal", moveTo.x);
break;
case 2: // right
moveTo = new Vector2(1, 0);
moveToDirection = MoveToDirection.Right;
//interactor.localRotation = Quaternion.Euler(0, 0, -90);
//animator.SetFloat("LastHorizontal", moveTo.x);
break;
case 3: // up
moveTo = new Vector2(0, 1);
moveToDirection = MoveToDirection.Up;
//interactor.localRotation = Quaternion.Euler(0, 0, 0);
//animator.SetFloat("LastVertical", moveTo.y);
break;
case 4: // down
moveTo = new Vector2(0, -1);
moveToDirection = MoveToDirection.Down;
//interactor.localRotation = Quaternion.Euler(0, 0, 180);
//animator.SetFloat("LastVertical", moveTo.y);
break;
}
}
void OnTriggerEnter2D(Collider2D col)
{
if (col.gameObject == key)
{
hasKey = true;
col.gameObject.SetActive(false);
SetReward(0);
}
if (col.gameObject == door)
{
if (hasKey == true)
{
SetReward(+1f);
hasKey = false;
EndEpisode();
game.resetBoard();
}
}
}
}