Hello!
I am a university student currently working on my thesis which is creating a volleyball-esque game and adding an opponent AI using Unity’s ML-Agents. At the moment I have all of the basic functionality in the game - the player move, jump, dash in any of the cardinal directions, interact with the ball and score points. I’ve set up an environment for teaching a model to play the game, however I have had no luck at getting a half-way decently working AI opponent - the mean reward never meaningfully increases! So here I am, asking for assistance.
I shall add my current code down below. Right now my reward function gives a small amount of points depending on how long the ball was in play and gives increasing points if the agent is closer to the ball, to incentives interacting with the ball. The reward structure will probably change once I get one decent result, but right now the agent isn’t even able to keep the ball in play for any reasonable amount of time. Considering the goal is to create an AI opponent capable of really playing the game and getting points, this isn’t a very good result.
PlayerAgent.cs
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using UnityEngine.InputSystem;
public class PlayerAgent : Agent
{
public Ball ball;
public Player otherPlayer;
public Player agentPlayer;
public bool XFlipped;
private float xFlipMul;
private float accountedPoints;
public override void CollectObservations(VectorSensor sensor)
{
// Agent
var pos = agentPlayer.transform.localPosition;
sensor.AddObservation(pos.x * xFlipMul);
sensor.AddObservation(pos.y);
var vel = agentPlayer.velocity.current;
sensor.AddObservation(vel.x * xFlipMul);
sensor.AddObservation(vel.y);
// Opponent
pos = otherPlayer.transform.localPosition;
sensor.AddObservation(pos.x * xFlipMul);
sensor.AddObservation(pos.y);
vel = otherPlayer.velocity.current;
sensor.AddObservation(vel.x * xFlipMul);
sensor.AddObservation(vel.y);
// Ball
pos = ball.transform.localPosition;
sensor.AddObservation(pos.x * xFlipMul);
sensor.AddObservation(pos.y);
vel = ball.velocity.current;
sensor.AddObservation(vel.x * xFlipMul);
sensor.AddObservation(vel.y);
}
public override void OnActionReceived(float[] vectorAction)
{
Vector2 movement = new Vector2();
movement.x = Mathf.Abs(vectorAction[0]) > .5f ? vectorAction[0] * xFlipMul : 0;
movement.y = Mathf.Abs(vectorAction[1]) > .5f ? vectorAction[1] : 0;
agentPlayer.inputManager.SetMovementKey(movement);
agentPlayer.inputManager.SetJumpKey(vectorAction[2]);
agentPlayer.inputManager.SetDashKey(vectorAction[3]);
float checkPoints = ball.Game.leftPoint;
float otherPoints = ball.Game.rightPoint;
if (agentPlayer.Game.Player2.Equals(agentPlayer))
{
checkPoints = ball.Game.rightPoint;
otherPoints = ball.Game.leftPoint;
}
// Reached target
if (checkPoints > accountedPoints)
{
EndEpisode();
accountedPoints = checkPoints;
}
AddReward(0.01f);
float dist = (agentPlayer.transform.position - ball.transform.position).magnitude;
float threshold = 5f;
if (dist < threshold)
{
AddReward(0.2f * Time.deltaTime * (1 - dist / threshold));
}
}
public override void Heuristic(float[] actionsOut)
{
actionsOut[0] = (Keyboard.current.rightArrowKey.isPressed ? 1 : 0) - (Keyboard.current.leftArrowKey.isPressed ? 1 : 0);
actionsOut[1] = (Keyboard.current.upArrowKey.isPressed ? 1 : 0) - (Keyboard.current.downArrowKey.isPressed ? 1 : 0);
actionsOut[2] = Keyboard.current.zKey.isPressed ? 1 : 0;
actionsOut[3] = Keyboard.current.xKey.isPressed ? 1 : 0;
}
public override void OnEpisodeBegin()
{
xFlipMul = XFlipped ? -1f : 1f;
ball.Game.Reset(xFlipMul);
ball.Game.leftPoint = 0;
ball.Game.rightPoint = 0;
accountedPoints = 0;
}
}
configuration.yaml
default_settings: null
behaviors:
PlayerBehaviour:
trainer_type: ppo
hyperparameters:
batch_size: 32
buffer_size: 512
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.99
num_epoch: 500
learning_rate_schedule: constant
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: null
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
curiosity:
gamma: 0.99
strength: 0.02
encoding_size: 256
learning_rate: 0.0003
init_path: null
keep_checkpoints: 5
checkpoint_interval: 500000
max_steps: 10000000
time_horizon: 32
summary_freq: 10000
threaded: true
self_play:
save_steps: 10000
team_change: 20000
swap_steps: 2000
window: 20
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0
behavioral_cloning: null
framework: tensorflow
env_settings:
env_path: null
env_args: null
base_port: 5005
num_envs: 1
seed: -1
engine_settings:
width: 84
height: 84
quality_level: 5
time_scale: 20
target_frame_rate: -1
capture_frame_rate: 60
no_graphics: false
environment_parameters: null
checkpoint_settings:
run_id: 22Nov
initialize_from: null
load_model: false
resume: true
force: false
train_model: false
inference: false
debug: false
TensorBoard:
Here is a link to the repo of the project, if you wish to try and test things out on your own or see how the game work: GitHub - TanelMarran/Voll-AI: 2 player volleyball versus game with machine learning AI
I am using Unity Version 2019.4.15f1.
Here are all of the dependencies in the python venv I use to train my models:
Package Version
---------------------- ---------
absl-py 0.11.0
astunparse 1.6.3
attrs 20.3.0
cachetools 4.1.1
cattrs 1.0.0
certifi 2020.11.8
chardet 3.0.4
cloudpickle 1.6.0
future 0.18.2
gast 0.3.3
google-auth 1.23.0
google-auth-oauthlib 0.4.2
google-pasta 0.2.0
grpcio 1.33.2
gym 0.17.3
gym-unity 0.21.1
h5py 2.10.0
idna 2.10
Keras-Preprocessing 1.1.2
Markdown 3.3.3
mlagents 0.21.1
mlagents-envs 0.21.1
numpy 1.18.0
oauthlib 3.1.0
opt-einsum 3.3.0
Pillow 8.0.1
pip 20.2.4
protobuf 3.14.0
pyasn1 0.4.8
pyasn1-modules 0.2.8
pyglet 1.5.0
pypiwin32 223
pywin32 300
PyYAML 5.3.1
requests 2.25.0
requests-oauthlib 1.3.0
rsa 4.6
scipy 1.5.4
setuptools 49.2.1
six 1.15.0
tensorboard 2.4.0
tensorboard-plugin-wit 1.7.0
tensorflow 2.3.1
tensorflow-estimator 2.3.0
termcolor 1.1.0
urllib3 1.26.2
Werkzeug 1.0.1
wheel 0.35.1
wrapt 1.12.1
If there is any other info you would like me to share, please let me know. Thank you in advance!