-
Notifications
You must be signed in to change notification settings - Fork 162
/
Copy pathsequence_agent_test_set_up.py
144 lines (123 loc) · 4.95 KB
/
sequence_agent_test_set_up.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for sequence_agent."""
from typing import Type
import numpy as np
from robotics_transformer import sequence_agent
from tensor2robot.utils import tensorspec_utils
import tensorflow as tf
from tf_agents.networks import network
from tf_agents.policies import policy_saver
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
class DummyActorNet(network.Network):
"""Used for testing SequenceAgent and its subclass."""
def __init__(self,
output_tensor_spec=None,
train_step_counter=None,
policy_info_spec=None,
time_sequence_length=1,
use_tcl=False,
**kwargs):
super().__init__(**kwargs)
@property
def tokens_per_action(self):
return 8
def set_actions(self, actions):
self._actions = actions
def get_actor_loss(self):
return self._actor_loss
def call(self,
observations,
step_type,
network_state,
actions=None,
training=False):
del step_type
image = observations['image']
tf.expand_dims(tf.reduce_mean(image, axis=-1), -1)
actions = tensorspec_utils.TensorSpecStruct(
world_vector=tf.constant(1., shape=[1, 3]),
rotation_delta=tf.constant(1., shape=[1, 3]),
terminate_episode=tf.constant(1, shape=[1, 2]),
gripper_closedness_action=tf.constant(1., shape=[1, 1]),
)
return actions, network_state
@property
def trainable_weights(self):
return [tf.Variable(1.0)]
class SequenceAgentTestSetUp(tf.test.TestCase):
"""Defines spec for testing SequenceAgent and its subclass, tests create."""
def setUp(self):
super().setUp()
self._action_spec = tensorspec_utils.TensorSpecStruct()
self._action_spec.world_vector = tensor_spec.BoundedTensorSpec(
(3,), dtype=tf.float32, minimum=-1., maximum=1., name='world_vector')
self._action_spec.rotation_delta = tensor_spec.BoundedTensorSpec(
(3,),
dtype=tf.float32,
minimum=-np.pi / 2,
maximum=np.pi / 2,
name='rotation_delta')
self._action_spec.gripper_closedness_action = tensor_spec.BoundedTensorSpec(
(1,),
dtype=tf.float32,
minimum=-1.,
maximum=1.,
name='gripper_closedness_action')
self._action_spec.terminate_episode = tensor_spec.BoundedTensorSpec(
(2,), dtype=tf.int32, minimum=0, maximum=1, name='terminate_episode')
state_spec = tensorspec_utils.TensorSpecStruct()
state_spec.image = tensor_spec.BoundedTensorSpec([256, 320, 3],
dtype=tf.float32,
name='image',
minimum=0.,
maximum=1.)
state_spec.natural_language_embedding = tensor_spec.TensorSpec(
shape=[512], dtype=tf.float32, name='natural_language_embedding')
self._time_step_spec = ts.time_step_spec(observation_spec=state_spec)
self.sequence_agent_cls = sequence_agent.SequenceAgent
def create_agent_and_initialize(self,
actor_network: Type[
network.Network] = DummyActorNet,
**kwargs):
"""Creates the agent and initialize it."""
agent = self.sequence_agent_cls(
time_step_spec=self._time_step_spec,
action_spec=self._action_spec,
actor_network=actor_network,
actor_optimizer=tf.keras.optimizers.Adam(),
train_step_counter=tf.compat.v1.train.get_or_create_global_step(),
**kwargs)
agent.initialize()
return agent
def testCreateAgent(self):
"""Creates the Agent and save the agent.policy."""
agent = self.create_agent_and_initialize()
self.assertIsNotNone(agent.policy)
policy_model_saver = policy_saver.PolicySaver(
agent.policy,
train_step=tf.compat.v2.Variable(
0,
trainable=False,
dtype=tf.int64,
aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA,
shape=()),
input_fn_and_spec=None)
save_options = tf.saved_model.SaveOptions(
experimental_io_device='/job:localhost',
experimental_custom_gradients=False)
policy_model_saver.save('/tmp/unittest/policy/0', options=save_options)
if __name__ == '__main__':
tf.test.main()