In mlagents, I want to make decisions on demand, that is, after waiting for the agent to complete the action, set a series of operations such as rewards, and then conduct observation space. I used the requestdecision method and didn’t use the automatic decision component. It seemed to work, but I found that I used a coroutine while waiting for the action to execute, which kept calling the environment.step function in fixedupdate. Causes the turn to end without the specified number of moves. What should be done? Or how to set it according to such requirements?
Here’s my code
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Random = UnityEngine.Random;
using System.Collections;
using Unity.Mathematics;
using System.Reflection;
public class robotAgent : Agent
{
[Header(“Specific to Ball3D”)]
public GameObject valve;
[Tooltip("Whether to use vector observation. This option should be checked " +
"in 3DBall scene, and unchecked in Visual3DBall scene. ")]
EnvironmentParameters m_ResetParams;
public ArmController armController;
public GameObject markValve;
public int steps;
public float preAngle;
public bool isComplete;
public GameObject markLeft;
public GameObject markRight;
public GameObject desireGoal;
public override void Initialize()
{
isComplete = true;
armController.ResetAll();
armController.ResetFlags();
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
preAngle = 0;
steps = 0;
}
public override void CollectObservations(VectorSensor sensor)
{
float[ ] currentState = armController.GetObservation();
for (int i = 0; i < 6; i++)
{
sensor.AddObservation(currentState / 180);
}
sensor.AddObservation(armController.GetValvePosition() / 90);
sensor.AddObservation(markLeft.transform.position - markValve.transform.position);
sensor.AddObservation(markRight.transform.position - markValve.transform.position);
sensor.AddObservation(markValve.transform.position);
sensor.AddObservation((markLeft.transform.position + markRight.transform.position) / 2.0f);
sensor.AddObservation(desireGoal.transform.position);
sensor.AddObservation(markValve.transform.position);
isComplete = false;
}
public override void OnActionReceived(ActionBuffers actionBuffers)
{
steps++;
float[ ] targetAngle = {
actionBuffers.ContinuousActions[0], actionBuffers.ContinuousActions[1],
actionBuffers.ContinuousActions[2], actionBuffers.ContinuousActions[3],
actionBuffers.ContinuousActions[4], 180.0f
};
//float[ ] currentState = armController.GetCurrentJointState();
for (int i = 0; i < 5; i++)
{
targetAngle = (targetAngle + 1) * 90;
targetAngle = math.clamp(targetAngle*, 0, 180);*
}
armController.SetJointPositions(targetAngle);
StartCoroutine(WaitComplete());
}
public override void OnEpisodeBegin()
{
Debug.Log(“回合开始了!!!”);
isComplete = true;
steps = 0;
armController.ResetAll();
armController.ResetFlags();
RequestDecision();
}
public override void Heuristic(in ActionBuffers actionsOut)
{
}
public void SetResetParameters()
{
steps = 0;
preAngle = 0;
armController.ResetAll();
armController.ResetFlags();
}
public float ComputeReward()
{
float distenceReward = (Vector3.Distance(markLeft.transform.position, markValve.transform.position) +
Vector3.Distance(markRight.transform.position, markValve.transform.position)) / 2;
float transformReward = Vector3.Distance(desireGoal.transform.position, markValve.transform.position);
var reward = -0.3f * distenceReward / 4.5f - 0.7f * transformReward / 0.7829f;
reward = reward * 2 + 1;
if (armController.collisionHandler.isErrorCollision == true)
reward -= 20.0f;
if (armController.GetValvePosition() >= 85)
reward += 20.0f;
return reward;
}
IEnumerator WaitComplete()
{
float initstartTime = Time.time;
float initTimeout = 20.0f;
while (!armController.GetIsStuck() && !armController.AreJointsAtTargetAngle()
&& !armController.collisionHandler.isErrorCollision) //等待条件:没有卡住 且 没有到达 且 没有碰撞
{
if (Time.time - initstartTime >= initTimeout)
{
//Debug.LogWarning(“等待超时”);
break;
}
//Debug.Log(“等待”);
yield return null;
}
isComplete = true;
if (isComplete)
{
//if (armController.AreJointsAtTargetAngle() || armController.collisionHandler.isErrorCollision)
// Debug.Log(“动作执行完毕!!!”);
var reward = ComputeReward();
SetReward(reward);
Debug.Log(“第”+steps + “步” + reward);
if (armController.collisionHandler.isErrorCollision)
{
Debug.Log(“回合结束了!!!”);
EndEpisode();
}
if (armController.GetValvePosition() >= 85)
{
Debug.Log(“回合结束了!!!”);
EndEpisode();
}
if (steps >= 50)
{
Debug.Log(“回合结束了!!!”);
EndEpisode();
}
RequestDecision();
}
}
}