I am trying to create a multi-agent reinforcement learning environment where agent need to stand on a pressure plate to open the door, the idea is that the door only opens as long as you stand on the pressure plate, therefore the agents need to cooperate to open the door by having one of them stand of the pressure plate while the other goes through the door and stands on the pressure plate from the other side so the first one can also go through.
Right now my reward function looks something like this:
void FixedUpdate()
{
resetTimer += 1;
if (resetTimer >= MaxEnvironmentSteps && MaxEnvironmentSteps > 0)
{
agentGroup.GroupEpisodeInterrupted();
ResetScene();
}
for (int i = 0; i < agents.Count; i++)
{
agents[i].distanceToPlate0 = Vector3.Distance(agents[i].agent.transform.position, agents[i].agent.pressurePlates[0].transform.position);
agents[i].distanceToPlate1 = Vector3.Distance(agents[i].agent.transform.position, agents[i].agent.pressurePlates[1].transform.position);
//if agent on either plate add a reward
if (agents[i].distanceToPlate0 < 2.25f || agents[i].distanceToPlate1 < 2.25f)
{
agents[i].agent.AddReward(0.25f / MaxEnvironmentSteps);
}
//if agent left the room add a reward
if (agents[i].agent.thisAgentLeft)
{
agents[i].agent.AddReward(0.5f / MaxEnvironmentSteps);
}
//if the other agent is still in the first room while the current agent is on the plate
if (!agents[1 - i].agent.thisAgentLeft && (agents[i].distanceToPlate0 < 2.25f || agents[i].distanceToPlate1 < 2.25f))
{
agentGroup.AddGroupReward(-2 / MaxEnvironmentSteps);
agents[1-i].agent.AddReward(-0.5f / MaxEnvironmentSteps);
Debug.Log("Other agent still in the room while this agent is on the plate");
}
else if (agents[1 - i].agent.thisAgentLeft && !agents[i].agent.thisAgentLeft) //if other agent left and this one is still in the room
{
agentGroup.AddGroupReward(-4 / MaxEnvironmentSteps);
agents[i].agent.AddReward(-1 / MaxEnvironmentSteps);
Debug.Log("Other agent left the room and this one is still in the room");
}
}
if(agents[0].agent.thisAgentLeft && agents[1].agent.thisAgentLeft)
{
agentGroup.AddGroupReward(0.5f / MaxEnvironmentSteps);
Debug.Log("Both agents left the room");
}
//Hurry Up Penalty
agentGroup.AddGroupReward(-0.25f / MaxEnvironmentSteps);
}
And the agents will get a reward once they both find the checkpoint:
public void FoundCheckpoint(Collider cpCol, float reward)
{
// if all agents found the checkpoint then give reward
bool allFound = true;
foreach (AgentInfo agent in agents)
{
if (!agent.agent.FoundCheckpoint)
{
allFound = false;
break;
}
}
if (allFound)
{
Debug.Log("All agents found checkpoint");
cpCol.gameObject.SetActive(false);
agentGroup.AddGroupReward(reward);
}
}