Agent Rapidly gets worse after doing well

I have an ai for a 2D platformer I have been training for a while now. Every iteration I do I encounter a problem where the agent has get success for an episode then completely reverts to a much worse version. I have added the code, hyperparameters, and tensorboard graph here. If you need any other information just let me know.

8990536--1237840--image_2023-05-03_205755040.png

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;
using UnityEngine.Tilemaps;
public class PlayerAgent : Agent
{
    Rigidbody2D rb;
    CircleCollider2D cc;
    SpriteRenderer ps;
  
    GameObject[] coins;
    List<Vector3> coinPos = new List<Vector3>();
    GameObject[] changers;
    List<Vector3> changerPos = new List<Vector3>();

    public Transform endLevel;
    public Transform startPos;

    bool playerDied;
    bool hasMoved;

    float distanceToEnd;
    float t;
    float oldPos;
    float newPos;
    // Start distance to end
    float dis;

    float lastReward;

    float lastDisToEnd;

    bool check1;
    bool check2;
  

    public Color playerColor;

    public int endReached;
    int jump;
    int movement;
    int dirX;

    private void Awake()
    {
        rb = GetComponent<Rigidbody2D>();
        cc = GetComponent<CircleCollider2D>();
        ps = GetComponent<SpriteRenderer>();
        coins = GameObject.FindGameObjectsWithTag("Coin");

        foreach (GameObject coin in coins)
        {
            coinPos.Add(coin.transform.localPosition);
        }

      
    
        changers = GameObject.FindGameObjectsWithTag("Changer");

        foreach (GameObject changer in changers)
        {
            changerPos.Add(changer.transform.localPosition);
        }
    }

    public override void OnEpisodeBegin()
    {
        foreach (GameObject coin in coins)
        {
            coin.SetActive(true);
            coin.GetComponent<CoinScirpt>().activelyCollected = false;
        }
        ps.color = Color.white;
        playerColor = ps.color;
        t = 0;
        oldPos = 0;
        playerDied = false;
        check1 = false;
        transform.position = startPos.position + new Vector3(Random.Range(-1.5f, 10f), 0f);
        rb.velocity = Vector3.zero;

        dis = Vector3.Distance(startPos.position, endLevel.transform.position);
        lastDisToEnd = dis;
        endReached = PlayerPrefs.GetInt("EndReached");
    }

    private void FixedUpdate()
    {
        GetComponent<SpriteRenderer>().color = playerColor;
    }

    public override void CollectObservations(VectorSensor sensor)
    {
        sensor.AddObservation(rb.velocity.x);
        sensor.AddObservation(rb.velocity.y);

        sensor.AddObservation(transform.position.x);
        sensor.AddObservation(transform.position.y);

        sensor.AddObservation(IsGrounded());

        sensor.AddObservation(endLevel.position.x);
        sensor.AddObservation(endLevel.position.y);

      
        /*for (int i = 0; i < coinPos.Count; i++)
        {
            sensor.AddObservation(coinPos[i].x);
            sensor.AddObservation(coinPos[i].y);
        }*/
      
        sensor.AddObservation(ps.color.r);
        sensor.AddObservation(ps.color.g);
        sensor.AddObservation(ps.color.b);

        for (int i = 0; i < changerPos.Count; i++)
        {
            sensor.AddObservation(changerPos[i].x);
            sensor.AddObservation(changerPos[i].y);
        }

        sensor.AddObservation(distanceToEnd);

    }

  

    public override void OnActionReceived(ActionBuffers actions)
    {
        Vector2 playerV = Vector2.zero;
        int movement = actions.DiscreteActions[0];
        jump = actions.DiscreteActions[1];
        if(movement == 1) { dirX = -1; }
        if(movement == 2) { dirX = 1; }
        if(movement == 3) { dirX = 0; }
        rb.velocity = new Vector2(dirX * 11, rb.velocity.y);

        if (IsGrounded() && jump == 1)
        {
            rb.velocity = new Vector2(rb.velocity.x, 0);
            rb.AddForce(new Vector2(0, 7.5f), ForceMode2D.Impulse);
        }
        // IF player is falling, multiply gravity by 4
        if (rb.velocity.y < 0)
        {
            rb.velocity += Vector2.up * Physics2D.gravity.y * 3 * Time.fixedDeltaTime;
        }
        // if player is going upwards and lets go of jump, start falling earlier
        else if (rb.velocity.y > 0 && jump != 1 )
        {
            rb.velocity += Vector2.up * Physics2D.gravity.y * 4 * Time.fixedDeltaTime;
        }

        distanceToEnd = Vector2.Distance(transform.position, endLevel.position);
        if (distanceToEnd <= 1.25f && ps.color == endLevel.GetComponent<SpriteRenderer>().color) { AddReward(3f); PlayerPrefs.SetInt("EndReached", PlayerPrefs.GetInt("EndReached") + 1); EndEpisode(); }
      
        if(distanceToEnd < lastDisToEnd)
        {
            float reward = (dis - distanceToEnd) / dis;
            if ((Mathf.Round(reward * 100) / 100) % 0.01 == 0)
            {
                //AddReward(0.02f);
            }
            lastDisToEnd = distanceToEnd;
        }

      
        //else if(!check1 && distanceToEnd <= 63f && ps.color == Color.red) { AddReward(1f); check1 = true; }
        //else if (!check2 && distanceToEnd <= 77f) { AddReward(1f); check2 = true; }
      
    }

    public override void Heuristic(in ActionBuffers actionsOut)
    {
        var discreteActionsOut = actionsOut.DiscreteActions;

        if (Input.GetKey(KeyCode.A)) { movement = 1; }
        else if (Input.GetKey(KeyCode.D)) { movement = 2; }
        else { movement = 3; }

        discreteActionsOut[0] = movement;
        // Problem is that jump is determined by is grounded, which will return jump as 2 if
        // player is in air, making second part of if statement not happen
        if (IsGrounded() && Input.GetKey(KeyCode.Space)) { jump = 1; }
        else if(rb.velocity.y > 0.01 && Input.GetKey(KeyCode.Space)) { jump = 1; }
        else { jump = 2; }


      
        discreteActionsOut[1] = jump;
    }


  

    public void PosCheck()
    {
      
        newPos = transform.position.x;
      
        if (oldPos == newPos)
        {
            //AddReward(-1f);
          
        }
        oldPos = newPos;
    }















    public bool IsGrounded() // Is grounded bool determines wether the player is grounded using raycast and layer masks. Resets the wall jumps aswell
    {
        bool isGrounded = false;
        float extraHeightText = .08f;
        RaycastHit2D[] raycastHit = Physics2D.BoxCastAll(cc.bounds.center, cc.bounds.extents, 0f, Vector2.down, extraHeightText);

        foreach (RaycastHit2D hit in raycastHit)
        {
            if (hit.collider != null)
            {
                if (hit.collider.GetComponent<Tilemap>()) //Checks for the obj being a tilemap
                {
                    Tilemap tm = hit.collider.gameObject.GetComponent<Tilemap>();
                    if (tm.color.r == ps.color.r && tm.color.b == ps.color.b && tm.color.g == ps.color.g && !tm.gameObject.CompareTag("TransTilemaps")) { isGrounded = true; } //Only returns true so that player cant jump on different color
                    else { isGrounded = false; }
                }
                else { isGrounded = false; }
            }
            else if (hit.collider == null) { isGrounded = false; }
        }
        if (isGrounded)
        {
            return true;
        }
        else
        {
            return false;
        }
    }

    private void OnCollisionEnter2D(Collision2D collision) // Checks if the player is colliding with a tilemap then checks if the player matches the colour of the tilemap
    {
        if (collision.gameObject.GetComponent<Tilemap>())
        {

            if ((collision.gameObject.layer == 6) || (collision.gameObject.layer == 7))
            {
                Tilemap tm = collision.gameObject.GetComponent<Tilemap>();

                if (tm.color != ps.color)
                {
                    AddReward(-0.15f);
                    EndEpisode();
                }
              
            }

        }

    }

   
    

      
  
}

Try upping the hidden units to 256 and layers to 3 and set the normalize to true.

Also I see you’re looking at only 2-3 million steps. It’s going to do that…..it will look at all the possible things you’re giving it the ability to observe to make sure it covers all possible outcomes and learns them all do to know not what to do too. I think….

Seconding @All_American on normalizing, large drawdowns in reward are usually caused by exploding gradients (observation values outside -1,1) you need to normalize all observations yourself or set normalize to true in the config. It may also be a too aggressive learning rate, you can try lowering that and training for longer.