Hi! Can someone help me figure out why my Unity ML-Agents project goes up, but very slowly? It goes up and down with the the numbers slowly increasing. I’m trying to make a Tic-Tac-Toe ML-Agent. I have a board made up of 4 UI Images. I then use 9 UI Text object for both “X” and “O”. I enable/disable when I need the pieces.
/*
Copyright (c) 2020 CompuGenius Programs
https://cgprograms.com
*/
using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using UnityEngine;
using UnityEngine.UI;
public class SinglePlayer : Agent
{
public List<GameObject> Xs;
public List<GameObject> Os;
public Button[] buttons;
public bool xsTurn;
public enum WhoWon
{
N,
X,
O
};
public WhoWon whoWon;
static int[] win1 = new int[3] { 0, 1, 2 };
static int[] win2 = new int[3] { 3, 4, 5 };
static int[] win3 = new int[3] { 6, 7, 8 };
static int[] win4 = new int[3] { 0, 3, 6 };
static int[] win5 = new int[3] { 1, 4, 7 };
static int[] win6 = new int[3] { 2, 5, 8 };
static int[] win7 = new int[3] { 0, 4, 8 };
static int[] win8 = new int[3] { 2, 4, 6 };
List<int[]> wins = new List<int[]>() { win1, win2, win3, win4, win5, win6, win7, win8 };
public int oTurn;
public int xTurn;
public bool went;
public bool againstAHuman;
void Start()
{
whoWon = WhoWon.N;
xsTurn = true;
foreach (GameObject x in Xs)
{
x.SetActive(false);
}
foreach (GameObject o in Os)
{
o.SetActive(false);
}
if (!againstAHuman)
{
if (xsTurn)
Xs[UnityEngine.Random.Range(0, Xs.Count)].SetActive(true);
xsTurn = false;
}
else
{
foreach (Button button in buttons)
{
button.onClick.AddListener(OsTurn);
}
}
}
void OsTurn()
{
if (xsTurn)
{
xsTurn = false;
}
}
public override void OnEpisodeBegin()
{
whoWon = WhoWon.N;
foreach (GameObject x in Xs)
{
x.SetActive(false);
}
foreach (GameObject o in Os)
{
o.SetActive(false);
}
xsTurn = true;
if (!againstAHuman)
{
if (xsTurn)
Xs[UnityEngine.Random.Range(0, Xs.Count)].SetActive(true);
xsTurn = false;
}
}
public override void CollectObservations(VectorSensor sensor)
{
foreach (GameObject x in Xs)
{
sensor.AddObservation(x.activeInHierarchy);
}
foreach (GameObject o in Os)
{
sensor.AddObservation(o.activeInHierarchy);
}
sensor.AddOneHotObservation((int)whoWon, (int)WhoWon.O);
}
public void TopLeft()
{
Xs[0].SetActive(true);
}
public void TopCenter()
{
Xs[1].SetActive(true);
}
public void TopRight()
{
Xs[2].SetActive(true);
}
public void MiddleLeft()
{
Xs[3].SetActive(true);
}
public void MiddleCenter()
{
Xs[4].SetActive(true);
}
public void MiddleRight()
{
Xs[5].SetActive(true);
}
public void BottomLeft()
{
Xs[6].SetActive(true);
}
public void BottomCenter()
{
Xs[7].SetActive(true);
}
public void BottomRight()
{
Xs[8].SetActive(true);
}
public override void OnActionReceived(float[] vectorAction)
{
if (!againstAHuman)
{
if (xsTurn)
{
xTurn = UnityEngine.Random.Range(0, Xs.Count);
went = false;
}
}
if (!xsTurn)
{
oTurn = (int)vectorAction[0];
went = false;
}
if (whoWon == WhoWon.O)
{
SetReward(1.0f);
EndEpisode();
}
if (whoWon == WhoWon.X)
{
SetReward(-1.0f);
EndEpisode();
}
if (whoWon == WhoWon.N)
{
int numPiecesOnBoard = 0;
foreach (GameObject piece in Xs.Concat(Os))
{
if (piece.activeInHierarchy)
numPiecesOnBoard++;
}
if (numPiecesOnBoard == 9)
{
SetReward(0.25f);
EndEpisode();
}
}
}
private void Update()
{
if (whoWon == WhoWon.N)
{
if (!went)
{
if (!againstAHuman)
{
if (xsTurn)
{
if (Xs[xTurn].activeInHierarchy || Os[xTurn].activeInHierarchy)
{
xTurn = UnityEngine.Random.Range(0, Xs.Count);
}
else
{
Xs[xTurn].SetActive(true);
xsTurn = false;
went = true;
}
}
}
if (!xsTurn)
{
if (!Xs[oTurn].activeInHierarchy && !Os[oTurn].activeInHierarchy)
{
Os[oTurn].SetActive(true);
xsTurn = true;
went = true;
}
else
{
oTurn = UnityEngine.Random.Range(0, Os.Count);
}
}
}
foreach (int[] win in wins)
{
int countX = 0;
int countO = 0;
int countN = 0;
foreach (int place in win)
{
if (Xs[place].activeInHierarchy)
{
countX++;
}
if (Os[place].activeInHierarchy)
{
countO++;
}
else
{
countN++;
}
}
if (countX == 3)
{
whoWon = WhoWon.X;
break;
}
if (countO == 3)
{
whoWon = WhoWon.O;
break;
}
else
{
whoWon = WhoWon.N;
}
}
}
}
}
This is my Agent script. If someone sees something that seems wrong with it, please let me know!