My Agent Keeps Doing A Discrete Action Even Though I Gave It A Negative Reward

Hello everyone i am new to using Unity ML-Agents, I have created an environment for my agent to train but the agent keeps jumping even when i give the jump action -100 reward, can someone help me please i can provide more data if needed.

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;

public class MovetoGoal : Agent
{
    [SerializeField] private Transform targetTransform;
    [SerializeField] private Transform restart;
    [SerializeField] private Rigidbody2D rb;
    public float JumpPower = 0.1f;
    private bool isGrounded = true;
    private float previousDistanceToPlayer;
    private float totalReward = 0;
    public override void OnEpisodeBegin()
    {
        transform.position = restart.transform.position;
    }
    public override void CollectObservations(VectorSensor sensor)
    {
        sensor.AddObservation(transform.localPosition);
        sensor.AddObservation(targetTransform.localPosition);
        sensor.AddObservation(rb.velocity);
    }
    public override void OnActionReceived(ActionBuffers actions)
    {
        float moveX = actions.ContinuousActions[0];
        int jump = actions.DiscreteActions[0];
        rb.velocity = new Vector2(moveX * 8f, rb.velocity.y);
        if (jump != 0 && isGrounded)
        {
            rb.velocity = new Vector2(rb.velocity.x, JumpPower);
            isGrounded = false;
            SetReward(-1f);
            totalReward = totalReward - 1f;
        }
        float distanceToPlayer = Vector2.Distance(transform.position, targetTransform.position);

        // If the agent gets too far away from the player, give a negative reward
        if (distanceToPlayer >= previousDistanceToPlayer)
        {
            SetReward(-0.1f);
            totalReward = totalReward - 0.1f;      
        }

        // If the agent gets closer to the player, give a positive reward
        if (distanceToPlayer < previousDistanceToPlayer)
        {
            SetReward(0.1f);
            totalReward = totalReward + 0.1f;
        }

        // Update the previous distance to the player for the next time step
        previousDistanceToPlayer = distanceToPlayer;
    }
    public override void Heuristic(in ActionBuffers actionsOut)
    {
        ActionSegment<float> continousActions = actionsOut.ContinuousActions;
        continousActions[0] = Input.GetAxisRaw("Horizontal");
    }
    private void OnTriggerEnter2D(Collider2D other)
    {
        if (other.gameObject.tag == "Goal")
        {
            //Debug.Log(totalReward);
            SetReward(+1f);
            totalReward = totalReward + 1f;
            EndEpisode();
        }
        if (other.gameObject.tag == "Wall")
        {
            //Debug.Log(totalReward);
            SetReward(-1f);
            totalReward = totalReward - 1f;
            EndEpisode();
        }
    }
    void OnCollisionEnter2D(Collision2D col)
    {
        if (col.gameObject.tag == "Ground")
        {
            isGrounded = true;
        }
    }
}

Hi, I think here you should use AddReward() instead of SetReward(). AddReward() increments reward by the provided value, while SetReward() overrides the current reward.
You can confirm the difference of those API here. I also found a discussion on the difference here . I hope this helps.

1 Like

That fixed it thank you so much!

1 Like