Mean Reward Goes Up and Down

Hi! Can someone help me figure out why my Unity ML-Agents project goes up, but very slowly? It goes up and down with the the numbers slowly increasing. I’m trying to make a Tic-Tac-Toe ML-Agent. I have a board made up of 4 UI Images. I then use 9 UI Text object for both “X” and “O”. I enable/disable when I need the pieces.

/*
Copyright (c) 2020 CompuGenius Programs
https://cgprograms.com
*/

using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using UnityEngine;
using UnityEngine.UI;

public class SinglePlayer : Agent
{
    public List<GameObject> Xs;
    public List<GameObject> Os;

    public Button[] buttons;

    public bool xsTurn;

    public enum WhoWon
    {
        N,
        X,
        O
    };

    public WhoWon whoWon;

    static int[] win1 = new int[3] { 0, 1, 2 };
    static int[] win2 = new int[3] { 3, 4, 5 };
    static int[] win3 = new int[3] { 6, 7, 8 };
    static int[] win4 = new int[3] { 0, 3, 6 };
    static int[] win5 = new int[3] { 1, 4, 7 };
    static int[] win6 = new int[3] { 2, 5, 8 };
    static int[] win7 = new int[3] { 0, 4, 8 };
    static int[] win8 = new int[3] { 2, 4, 6 };
    List<int[]> wins = new List<int[]>() { win1, win2, win3, win4, win5, win6, win7, win8 };

    public int oTurn;
    public int xTurn;

    public bool went;

    public bool againstAHuman;

    void Start()
    {
        whoWon = WhoWon.N;

        xsTurn = true;

        foreach (GameObject x in Xs)
        {
            x.SetActive(false);
        }
        foreach (GameObject o in Os)
        {
            o.SetActive(false);
        }

        if (!againstAHuman)
        {
            if (xsTurn)
                Xs[UnityEngine.Random.Range(0, Xs.Count)].SetActive(true);

            xsTurn = false;
        }
        else
        {
            foreach (Button button in buttons)
            {
                button.onClick.AddListener(OsTurn);
            }
        }
    }

    void OsTurn()
    {
        if (xsTurn)
        {
            xsTurn = false;
        }
    }

    public override void OnEpisodeBegin()
    {
        whoWon = WhoWon.N;

        foreach (GameObject x in Xs)
        {
            x.SetActive(false);
        }
        foreach (GameObject o in Os)
        {
            o.SetActive(false);
        }
        xsTurn = true;

        if (!againstAHuman)
        {
            if (xsTurn)
                Xs[UnityEngine.Random.Range(0, Xs.Count)].SetActive(true);

            xsTurn = false;
        }
    }

    public override void CollectObservations(VectorSensor sensor)
    {
        foreach (GameObject x in Xs)
        {
            sensor.AddObservation(x.activeInHierarchy);
        }
        foreach (GameObject o in Os)
        {
            sensor.AddObservation(o.activeInHierarchy);
        }

        sensor.AddOneHotObservation((int)whoWon, (int)WhoWon.O);
    }

    public void TopLeft()
    {
        Xs[0].SetActive(true);
    }
    public void TopCenter()
    {
        Xs[1].SetActive(true);
    }
    public void TopRight()
    {
        Xs[2].SetActive(true);
    }
    public void MiddleLeft()
    {
        Xs[3].SetActive(true);
    }
    public void MiddleCenter()
    {
        Xs[4].SetActive(true);
    }
    public void MiddleRight()
    {
        Xs[5].SetActive(true);
    }
    public void BottomLeft()
    {
        Xs[6].SetActive(true);
    }
    public void BottomCenter()
    {
        Xs[7].SetActive(true);
    }
    public void BottomRight()
    {
        Xs[8].SetActive(true);
    }

    public override void OnActionReceived(float[] vectorAction)
    {
        if (!againstAHuman)
        {
            if (xsTurn)
            {
                xTurn = UnityEngine.Random.Range(0, Xs.Count);
                went = false;
            }
        }

        if (!xsTurn)
        {
            oTurn = (int)vectorAction[0];
            went = false;
        }

        if (whoWon == WhoWon.O)
        {
            SetReward(1.0f);
            EndEpisode();
        }

        if (whoWon == WhoWon.X)
        {
            SetReward(-1.0f);
            EndEpisode();
        }
        if (whoWon == WhoWon.N)
        {
            int numPiecesOnBoard = 0;

            foreach (GameObject piece in Xs.Concat(Os))
            {
                if (piece.activeInHierarchy)
                    numPiecesOnBoard++;
            }
            if (numPiecesOnBoard == 9)
            {
                SetReward(0.25f);
                EndEpisode();
            }
        }
    }

    private void Update()
    {
        if (whoWon == WhoWon.N)
        {
            if (!went)
            {
                if (!againstAHuman)
                {
                    if (xsTurn)
                    {
                        if (Xs[xTurn].activeInHierarchy || Os[xTurn].activeInHierarchy)
                        {
                            xTurn = UnityEngine.Random.Range(0, Xs.Count);
                        }
                        else
                        {
                            Xs[xTurn].SetActive(true);
                            xsTurn = false;
                            went = true;
                        }
                    }
                }
                if (!xsTurn)
                {
                    if (!Xs[oTurn].activeInHierarchy && !Os[oTurn].activeInHierarchy)
                    {
                        Os[oTurn].SetActive(true);
                        xsTurn = true;
                        went = true;
                    }
                    else
                    {
                        oTurn = UnityEngine.Random.Range(0, Os.Count);
                    }
                }
            }

            foreach (int[] win in wins)
            {
                int countX = 0;
                int countO = 0;
                int countN = 0;

                foreach (int place in win)
                {
                    if (Xs[place].activeInHierarchy)
                    {
                        countX++;
                    }
                    if (Os[place].activeInHierarchy)
                    {
                        countO++;
                    }
                    else
                    {
                        countN++;
                    }
                }

                if (countX == 3)
                {
                    whoWon = WhoWon.X;
                    break;
                }
                if (countO == 3)
                {
                    whoWon = WhoWon.O;
                    break;
                }
                else
                {
                    whoWon = WhoWon.N;
                }
            }
        }
    }
}

This is my Agent script. If someone sees something that seems wrong with it, please let me know!

Hi,

Can you share the output of your tensorboard logs? This will provide extra context as to what is happening during training. ml-agents/docs/Using-Tensorboard.md at main · Unity-Technologies/ml-agents · GitHub

@awjuliani I gave up on this project, yet can’t delete the forums. I will try again soon.