Hello!
After managing to install the ML Agents, I was able to train a model. However, when finishing training, I always get an error and the model is not saved. Can someone help me?
My setup:
macOS Sonoma 14.1 / M1
onnx = 1.12.0
protobuf = 3.19.6
torch = 2.1.0
python = 3.10.12
mlagents = 1.0.0
Unity = 2022.3.11f1
Here is the error:
[INFO] MoveToGoal. Step: 10000. Time Elapsed: 22.075 s. Mean Reward: -2.000. Std of Reward: 2.714. Training.
[WARNING] Restarting worker[0] after 'Communicator has exited.'
/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/torch/__init__.py:614: UserWarning: torch.set_default_tensor_type() is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and torch.set_default_device() as alternatives. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/tensor/python_tensor.cpp:453.)
_C._set_default_tensor_type(t)
[INFO] Listening on port 5004. Start training by pressing the Play button in the Unity Editor.
Traceback (most recent call last):
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/trainer_controller.py", line 175, in start_learning
n_steps = self.advance(env_manager)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents_envs/timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/trainer_controller.py", line 233, in advance
new_step_infos = env_manager.get_steps()
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/env_manager.py", line 124, in get_steps
new_step_infos = self._step()
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/subprocess_env_manager.py", line 420, in _step
self._restart_failed_workers(step)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/subprocess_env_manager.py", line 328, in _restart_failed_workers
self.reset(self.env_parameters)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/env_manager.py", line 68, in reset
self.first_step_infos = self._reset_env(config)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/subprocess_env_manager.py", line 446, in _reset_env
ew.previous_step = EnvironmentStep(ew.recv().payload, ew.worker_id, {}, {})
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/subprocess_env_manager.py", line 101, in recv
raise env_exception
mlagents_envs.exception.UnityTimeOutException: The Unity environment took too long to respond. Make sure that :
The environment does not need user interaction to launch
The Agents' Behavior Parameters > Behavior Type is set to "Default"
The environment and the Python interface have compatible versions.
If you're running on a headless server without graphics support, turn off display by either passing --no-graphics option or build your Unity executable as server build.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/torch/onnx/_internal/onnx_proto_utils.py", line 221, in _add_onnxscript_fn
import onnx
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/onnx/__init__.py", line 5, in <module>
from .onnx_cpp2py_export import ONNX_ML
ImportError: dlopen(/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/onnx/onnx_cpp2py_export.cpython-310-darwin.so, 0x0002): Library not loaded: @rpath/libprotobuf.31.dylib
Referenced from: <4A0F8F41-B487-3758-8FD3-BD8580182670> /Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/onnx/onnx_cpp2py_export.cpython-310-darwin.so
Reason: tried: '/Users/.../opt/anaconda3/envs/ml-agents/lib/libprotobuf.31.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/.../opt/anaconda3/envs/ml-agents/lib/libprotobuf.31.dylib' (no such file), '/Users/.../opt/anaconda3/envs/ml-agents/lib/libprotobuf.31.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/.../opt/anaconda3/envs/ml-agents/lib/libprotobuf.31.dylib' (no such file), '/Users/.../opt/anaconda3/envs/mlagents/bin/../lib/libprotobuf.31.dylib' (no such file), '/Users/.../opt/anaconda3/envs/mlagents/bin/../lib/libprotobuf.31.dylib' (no such file)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/.../opt/anaconda3/envs/mlagents/bin/mlagents-learn", line 8, in <module>
sys.exit(main())
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/learn.py", line 267, in main
run_cli(parse_command_line())
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/learn.py", line 263, in run_cli
run_training(run_seed, options, num_areas)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/learn.py", line 137, in run_training
tc.start_learning(env_manager)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents_envs/timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/trainer_controller.py", line 200, in start_learning
self._save_models()
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents_envs/timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/trainer_controller.py", line 80, in _save_models
self.trainers[brain_name].save_model()
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/trainer/rl_trainer.py", line 172, in save_model
model_checkpoint = self._checkpoint()
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents_envs/timers.py", line 305, in wrapped
return func(*args, **kwargs)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/trainer/rl_trainer.py", line 144, in _checkpoint
export_path, auxillary_paths = self.model_saver.save_checkpoint(
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/model_saver/torch_model_saver.py", line 60, in save_checkpoint
self.export(checkpoint_path, behavior_name)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/model_saver/torch_model_saver.py", line 65, in export
self.exporter.export_policy_model(output_filepath)
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/mlagents/trainers/torch_entities/model_serialization.py", line 164, in export_policy_model
torch.onnx.export(
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/torch/onnx/utils.py", line 516, in export
_export(
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/torch/onnx/utils.py", line 1670, in _export
proto = onnx_proto_utils._add_onnxscript_fn(
File "/Users/.../opt/anaconda3/envs/mlagents/lib/python3.10/site-packages/torch/onnx/_internal/onnx_proto_utils.py", line 223, in _add_onnxscript_fn
raise errors.OnnxExporterError("Module onnx is not installed!") from e
torch.onnx.errors.OnnxExporterError: Module onnx is not installed!