I'm crazy about the project about MA-POCA, anyone can help me?

I have been training this project for two weeks, and i learned a lot about others settings,But iit can not get a good result.
during traning, I changed reward , observation, trainer.yaml... Now, i used the reward other used.

The goal of my project is round up a target in the degrees of 120.
groupAgent: 3 boat, 18 observation and ray sensor 3D, single reward contained distance and angle,diiscrete actions: 2(forward and back.., turn left and right)
environment: ocean simulated wind, wave ....
The specific info is as follows:
trainer.yaml:
9146311--1271068--upload_2023-7-14_16-28-10.png
agent control:

 public override void Initialize()
    {
        agentRb = GetComponent<Rigidbody>();
        usv = GameObject.FindGameObjectsWithTag("agent");
        target = GameObject.FindGameObjectWithTag("target");
        ec = GetComponentInParent<EnvControl>();
        m_ResetParams = Academy.Instance.EnvironmentParameters;



    }



    public override void CollectObservations(VectorSensor sensor)
    {
        var aP = transform.position - target.transform.position;

        posiDiff = new Vector2(aP.x, aP.z);
        //Debug.Log("posidiff:" + posiDiff);
        sensor.AddObservation(-posiDiff);//2
        //Debug.Log(-posiDiff / 400 + new Vector2(1, 1));
        sensor.AddObservation((posiDiff.magnitude - range));//1
        var v = transform.InverseTransformDirection(agentRb.velocity);
        sensor.AddObservation(v.z/10);//1
        var av = transform.InverseTransformDirection(agentRb.angularVelocity);
        sensor.AddObservation(av.y/10);//1

        Vector2 control = new Vector2(forward, rotate);
        sensor.AddObservation(control);//2
        sensor.AddObservation(transform.InverseTransformVector(force));//3
        sensor.AddObservation(transform.InverseTransformVector(torque));//3
        sensor.AddObservation(transform.position);//3


//the angle of adjacent agent and this
        if (transform.position == usv[0].transform.position)
        {
             aTot1 = target.transform.position - transform.position;
            var d1 = new Vector2(aTot1.x, aTot1.z);
             dis1 = d1.magnitude;
             aTot2 = target.transform.position - usv[1].transform.position;
            var d2 = new Vector2(aTot2.x, aTot2.z);
             dis2 = d2.magnitude;
             aTot3 = target.transform.position - usv[2].transform.position;
            var d3 = new Vector2(aTot3.x, aTot3.z);
             dis3 = d3.magnitude;

        }
        if (transform.position == usv[1].transform.position)
        {
             aTot1 = target.transform.position - transform.position;
            var d1 = new Vector2(aTot1.x, aTot1.z);
             dis1 = d1.magnitude;
             aTot2 = target.transform.position - usv[0].transform.position;
            var d2 = new Vector2(aTot2.x, aTot2.z);
             dis2 = d2.magnitude;
             aTot3 = target.transform.position - usv[2].transform.position;
            var d3 = new Vector2(aTot3.x, aTot3.z);
             dis3 = d3.magnitude;

        }
        if (transform.position == usv[2].transform.position)
        {
             aTot1 = target.transform.position - transform.position;
            var d1 = new Vector2(aTot1.x, aTot1.z);
             dis1 = d1.magnitude;
             aTot2 = target.transform.position - usv[0].transform.position;
            var d2 = new Vector2(aTot2.x, aTot2.z);
             dis2 = d2.magnitude;
             aTot3 = target.transform.position - usv[1].transform.position;
            var d3 = new Vector2(aTot3.x, aTot3.z);
             dis3 = d3.magnitude;

        }
        averDis = dis1 + dis2 + dis3 - 3 * range;
        sensor.AddObservation(averDis);//1
        float a1 = Vector3.SignedAngle(aTot1, aTot2, transform.up);
            float a2 = Vector3.SignedAngle(aTot1, aTot3, transform.up);
            if (a1 > 0&&a2>0)
            {
            if (a1 <= a2)
            {
                thet1 = a1;
                thet2 = a2;
            }
            else
            {
                thet1 = a2;
                thet2 = a1;
            }
            }
            if(a1>0&&a2<0)
            {
                thet1 = a1;
                thet2 = a2;
            }
            if (a1 < 0&&a2>0)
            {
                thet1 = a2;
                thet2 = a1;
            }
        if (a1 < 0 && a2 < 0)
        {
            a1 = 360 + a1;
            a2 = 360 + a2;
            {
                if (a1 <= a2)
                {
                    thet1 = a1;
                    thet2 = a2;
                }
                else
                {
                    thet1 = a2;
                    thet2 = a1;
                }
            }
        }
        if (a1 == 0 && a2 != 0)
        {
            thet1 = a1;
            thet2 = a2;
        }
        if (a2 == 0 && a1 != 0)
        { thet1 = a2; thet2 = a1; }
        if (a1 == 0 && a2 == 0)
        { thet1 = a1; thet2 = a2; }


        //Debug.Log(thet);
        sensor.AddObservation(thet1);//1
           //18
    }

    public void MoveAgent(ActionBuffers actionBuffers)
    {
        var Action = actionBuffers.DiscreteActions;



        forward = Action[0];
        rotate = Action[1];

        switch (forward)
        {
            case 0:
                force = new Vector3(0, 0, 0);
                break;
            case 1:
                force = transform.InverseTransformDirection(transform.forward);
                break;
            case 2:
                force = -transform.InverseTransformDirection(transform.forward);
                break;
        }
        agentRb.AddRelativeForce(force * moveSpeed);//vel:moveSpeed=1,turnSpeed=0.6
        switch (rotate)
        {
            case 0:
                torque = new Vector3(0, 0, 0);
                break;
            case 1:
                torque = transform.InverseTransformDirection(transform.up) * turnSpeed;
                break;
            case 2:
                torque = -transform.InverseTransformDirection(transform.up) * turnSpeed;
                break;
        }
        agentRb.AddRelativeTorque(torque);

        if (agentRb.velocity.z > 12)
        {

            agentRb.velocity = 0.85f * agentRb.velocity;

        }

        if (agentRb.angularVelocity.y > 0.4)
        {

            agentRb.angularVelocity = 0.85f * agentRb.angularVelocity;
        }

        DisReward();
        AngleReward();
        CheckIfOutbound();
        rewadSingle = 0.6f*Rd + 0.4f*Rthet;
        AddReward(rewadSingle);
// encourge forward
        if(transform.InverseTransformDirection(agentRb.velocity).z>0 )
        {
            AddReward(0.005f);
        }


    }
    public override void OnActionReceived(ActionBuffers actions)
    {
        MoveAgent(actions);
    }
    public override void Heuristic(in ActionBuffers actionsOut)
    {


        var discreteActionsOut = actionsOut.DiscreteActions;
        //forward
        if (Input.GetKey(KeyCode.W))
        {
            discreteActionsOut[0] = 1;
        }
        if (Input.GetKey(KeyCode.S))
        {
            discreteActionsOut[0] = 2;
        }
        //rotate
        if (Input.GetKey(KeyCode.A))
        {
            discreteActionsOut[1] = 1;
        }
        if (Input.GetKey(KeyCode.D))
        {
            discreteActionsOut[1] = 2;
        }
    }


    public override void OnEpisodeBegin()
    {

        ec.ResetScene();
    }
    void DisReward()
    {
        float meanAverDis = Mathf.Pow(((dis1 - averDis) * (dis1 - averDis) + (dis2 - averDis) * (dis2 - averDis) + (dis3 - averDis) * (dis3 - averDis)) / 3, 0.5f);

        Rd = 1 - 0.05f * (posiDiff.magnitude - range) - 0.2f * Mathf.Exp((posiDiff.magnitude - range) / meanAverDis);
    }
    void AngleReward()
    {
        float a1 = Vector3.SignedAngle(aTot1, aTot2, transform.up);
        float a2 = Vector3.SignedAngle(aTot1, aTot3, transform.up);
        Rthet = 0.3f*Mathf.Exp(-Mathf.Abs((thet1 - 120) * Mathf.PI / 180)) - 1+ Mathf.Exp(-Mathf.Abs((thet2 - 240) * Mathf.PI / 180)) - 1+
            0.4f*Mathf.Exp(-(Mathf.Abs(a1 * Mathf.PI / 180)- (Mathf.Abs(a2 * Mathf.PI / 180))))-1;
    }
    private void OnCollisionEnter(Collision collision)
    {

        if (collision.gameObject.CompareTag("obstacle")||collision.gameObject.CompareTag("agent")|| collision.gameObject.CompareTag("target"))
        {
            AddReward(-0.05f);
            ec.ResetScene();
        }
    }

    void CheckIfOutbound()
    {
        var bound = 1.5f;
        if (transform.position.x < -bound * ec.areaBounds.extents.x || transform.position.x > bound * ec.areaBounds.extents.x
            || transform.position.z < -bound * ec.areaBounds.extents.z || transform.position.z > bound *ec. areaBounds.extents.z
            )

        {
            AddReward(-0.03f);
            ec.ResetScene();
        }
    }


}

environmen control(contain group reward):

  private void Start()
    {
        areaBounds = ground.GetComponent<Collider>().bounds;
        //Debug.Log(areaBounds);

        m_purseAgent = FindObjectOfType<Cooperate>();
        m_AgentGroup = new SimpleMultiAgentGroup();
        usv = GameObject.FindGameObjectsWithTag("agent");
        target = GameObject.FindGameObjectWithTag("target");
        //Debug.Log("awake");

        foreach (var item in TargetsList)
        {
            item.StartingPos = item.target.transform.position;
            item.StartingRot = item.target.transform.rotation;
            item.T = item.target.transform;
            item.Rb = item.target.GetComponent<Rigidbody>();
            item.Col = item.target.GetComponent<Collider>();
        }



        foreach (var item in AgentsList)
        {
            item.StartingPos = item.agent.transform.position;
            item.StartingRot = item.agent.transform.rotation;
            item.Rb = item.agent.GetComponent<Rigidbody>();
            item.Col = item.agent.GetComponent<Collider>();
            m_AgentGroup.RegisterAgent(item.agent);
        }
        ResetScene();
    }

    public void ResetScene()
    {
        m_ResetTimer = 0;

        //Random platform rotation
        var rotation = Random.Range(0, 4);
        var rotationAngle = rotation * 90f;
        transform.Rotate(new Vector3(0f, rotationAngle, 0f));

        //Reset Agents
        foreach (var item in AgentsList)
        {
            var pos = UseRandomAgentPosition ? GetRandomSpawnPos() : item.StartingPos;
            var rot = UseRandomAgentRotation ? GetRandomRot() : item.StartingRot;

            item.agent.transform.SetPositionAndRotation(pos, rot);
            item.Rb.velocity = Vector3.zero;
            item.Rb.angularVelocity = Vector3.zero;
            m_AgentGroup.RegisterAgent(item.agent);
        }


        foreach (var item in TargetsList)
        {
            var pos = UseRandomTargetPosition ? GetRandomSpawnPos() : item.StartingPos;
            var rot = UseRandomTargetRotation ? GetRandomRot() : item.StartingRot;

            item.T.transform.SetPositionAndRotation(pos, rot);
            item.Rb.velocity = Vector3.zero;
            item.Rb.angularVelocity = Vector3.zero;
            item.T.gameObject.SetActive(true);
        }


    }

    public Vector3 GetRandomSpawnPos()
    {
        var foundNewSpawnLocation = false;
        var randomSpawnPos = Vector3.zero;
        while (foundNewSpawnLocation == false)
        {
            var randomPosX = Random.Range(-areaBounds.extents.x , areaBounds.extents.x  );

            var randomPosZ = Random.Range(-areaBounds.extents.z , areaBounds.extents.z );
            randomSpawnPos = ground.transform.position + new Vector3(randomPosX, 3.27f, randomPosZ);
            //randomSpawnPos =  new Vector3(randomPosX, ground.transform.position.y+86.8f, randomPosZ);
            var spawnPosTt = randomSpawnPos - target.transform.position;
            if (Physics.CheckBox(randomSpawnPos, new Vector3(1.75f, 1f, 4.25f)) == false
                && (randomSpawnPos.x > -areaBounds.extents.x && randomSpawnPos.x < areaBounds.extents.x)
                && (randomSpawnPos.z > -areaBounds.extents.z && randomSpawnPos.z < areaBounds.extents.z)
                && (Mathf.Pow(spawnPosTt.x * spawnPosTt.x + spawnPosTt.z * spawnPosTt.z, 0.5f) > m_purseAgent.range))
            {
                foundNewSpawnLocation = true;
            }
        }
        return randomSpawnPos;
    }


    Quaternion GetRandomRot()
    {
        return Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
    }


    public void FixedUpdate()
    {
        m_ResetTimer += 1;
        if(m_ResetTimer>=MaxEnvironmentSteps&&MaxEnvironmentSteps>0)
        {
            m_AgentGroup.GroupEpisodeInterrupted();
            ResetScene();
        }
        SuccessPurse();


        m_AgentGroup.AddGroupReward(-20f / MaxEnvironmentSteps);

    }


    void SuccessPurse()
    {
        int flag = 0;
        int f = 0;
        Vector2 u1 = new Vector2(target.transform.position.x - usv[0].transform.position.x, target.transform.position.z - usv[0].transform.position.z);
        Vector2 u2 = new Vector2(target.transform.position.x - usv[1].transform.position.x, target.transform.position.z - usv[1].transform.position.z);
        Vector2 u3 = new Vector2(target.transform.position.x - usv[2].transform.position.x, target.transform.position.z - usv[2].transform.position.z);
        if(u1.magnitude<=m_purseAgent.range)
        {
            flag++;
        }
        if(u2.magnitude<=m_purseAgent.range)
        {
            flag++;
        }
        if (u3.magnitude <= m_purseAgent.range)
        {
            flag++;
        }
        if(Vector2.Dot(u1.normalized,u2.normalized)>=-0.5&& Vector2.Dot(u1.normalized, u2.normalized) <= -0.1)

        {
            f++;
        }
        if (Vector2.Dot(u1.normalized, u3.normalized) >= -0.5 && Vector2.Dot(u1.normalized, u3.normalized) <= -0.1)
        {
            f++;
        }
        if(flag==3&&f==2)
        {
            m_AgentGroup.AddGroupReward(100);
            m_AgentGroup.EndGroupEpisode();
            ResetScene();
        }
    }

}

Very hard to tell. If you have one single bug in your code it can mess up training completely (fideling around with angles is always a source of errors for me).
Try to debug your code and see if every value is how it should be

Now I’m trying to put angle part into group reward. I will show the result later.

Hi,about reward normalization i have one question. Sometimes, mean reward sharply drop to -1e7. Is relative with normalize? Observation i nor in trainer.yaml. If yes, where wrong? Should i nor the total reward rather than single?
Appreciate!

poca team reward should be either 1 for win or 0 for loose. Having it slide between 1 and 0 can be suboptimal.
I would try to give the indiviudals points for being close to each other and let them figure out what to do. IDK, playing around with those reward functions is the diffcult part and really depends on your training hardware, bugs in code and indiviudal project


Are you using multiple environments to learn faster?
My observations are similar to yours:

if (transform.position == agent[0].transform.position)
        {
...
         }

The agents could fully learn in one environment, but in multiple environments, the agents couldn't.
I think this is because of the observation mentioned above.