Hi,
so I am currently using ml-agents for my thesis and trying to implement MAML and Reptile meta-learning algorithms. See: https://arxiv.org/pdf/1703.03400.pdf and https://openai.com/blog/reptile/
For that I want to get the weights of the NNs (and possibly the calculated gradients) and update them in a meta learning way. I am using the release-3 version.
Where would be the best place to implement such an meta-learning algorithm? Do you have any experience with it?
For now, I am trying to access the weights in the RLTrainer.py class.
There I am calling
weights_before_update = deepcopy(self.get_policy(self.brain_name).get_weights())
before:
with hierarchical_timer("_update_policy"):
if self._update_policy():
for q in self.policy_queues:
# Get policies that correspond to the policy queue in question
q.put(self.get_policy(q.behavior_id))
and after the update I want to set the new weights to the old inital weights with:
self.get_policy(self.brain_name).init_load_weights()
self.get_policy(self.brain_name).load_weights(weights_before_update)
Unfortunately I am getting the following error when trying to set the weights to the old weights:
Exception in thread Thread-2:
Traceback (most recent call last):
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\client\session.py", line 1365, in _do_call
return fn(*args)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\client\session.py", line 1350, in _run_fn
target_list, run_metadata)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\client\session.py", line 1443, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: You must feed a value for placeholder tensor 'Placeholder_61' with dtype float and shape [128]
[[{{node Placeholder_61}}]]
[[Assign_90/_677]]
(1) Invalid argument: You must feed a value for placeholder tensor 'Placeholder_61' with dtype float and shape [128]
[[{{node Placeholder_61}}]]
0 successful operations.
0 derived errors ignored.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\threading.py", line 926, in _bootstrap_inner
self.run()
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "c:\users\\desktop\rlunity\ml-agents-release_3\ml-agents\mlagents\trainers\trainer_controller.py", line 340, in trainer_update_func
trainer.advance()
File "c:\users\\desktop\rlunity\ml-agents-release_3\ml-agents\mlagents\trainers\trainer\rl_trainer.py", line 211, in advance
self.get_policy(self.brain_name).load_weights(weights_before_update)
File "c:\users\\desktop\rlunity\ml-agents-release_3\ml-agents\mlagents\trainers\policy\tf_policy.py", line 235, in load_weights
self.sess.run(self.assign_ops, feed_dict=feed_dict)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\client\session.py", line 956, in run
run_metadata_ptr)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\client\session.py", line 1180, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\client\session.py", line 1359, in _do_run
run_metadata)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\client\session.py", line 1384, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: You must feed a value for placeholder tensor 'Placeholder_61' with dtype float and shape [128]
[[node Placeholder_61 (defined at C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\framework\ops.py:1751) ]]
[[Assign_90/_677]]
(1) Invalid argument: You must feed a value for placeholder tensor 'Placeholder_61' with dtype float and shape [128]
[[node Placeholder_61 (defined at C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\framework\ops.py:1751) ]]
0 successful operations.
0 derived errors ignored.
Original stack trace for 'Placeholder_61':
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\threading.py", line 890, in _bootstrap
self._bootstrap_inner()
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\threading.py", line 926, in _bootstrap_inner
self.run()
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "c:\users\\desktop\rlunity\ml-agents-release_3\ml-agents\mlagents\trainers\trainer_controller.py", line 340, in trainer_update_func
trainer.advance()
File "c:\users\\desktop\rlunity\ml-agents-release_3\ml-agents\mlagents\trainers\trainer\rl_trainer.py", line 210, in advance
self.get_policy(self.brain_name).init_load_weights()
File "c:\users\\desktop\rlunity\ml-agents-release_3\ml-agents\mlagents\trainers\policy\tf_policy.py", line 222, in init_load_weights
assign_ph = tf.placeholder(var.dtype, shape=value.shape)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\ops\array_ops.py", line 2630, in placeholder
return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py", line 8041, in placeholder
"Placeholder", dtype=dtype, shape=shape, name=name)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 793, in _apply_op_helper
op_def=op_def)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3360, in create_op
attrs, op_def, compute_device)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3429, in _create_op_internal
op_def=op_def)
File "C:\Users\\.conda\envs\tf_2_torch_mlagents_3\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1751, in __init__
self._traceback = tf_stack.extract_stack()
Why is updating the policy changing the weights so I cannot set them anymore? Do you have some ideas?