Skip to content

Commit

Permalink
add more starting codes
Browse files Browse the repository at this point in the history
  • Loading branch information
KzZheng committed Mar 5, 2023
1 parent c0bc31e commit dfc3e53
Show file tree
Hide file tree
Showing 10 changed files with 240 additions and 25 deletions.
19 changes: 14 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ This repo include the implementaions of AMSolver, VLMbench, and 6D-CLIPort.

## News

### 03/04/2023

- More starting codes have been added under the [examples](examples) folder!

### 09/16/2022

- The work has been accepted by NeurIPS 2022 (Datasets and Benchmarks) !
Expand Down Expand Up @@ -64,6 +68,9 @@ To train new 6D-CLIPort models:
```bash
python vlm/scripts/train_baselines.py --data_dir /Path/to/VLMbench/Dataset --train_tasks TASK_NEED_TO_TRAIN
```
## Examples
We provided several example codes for getting start. Please check the code under ```examples``` folder. The ```gym_test.py``` shows how to run the vlmbench as ```gym``` environment.
Ensure you have gym installed (`pip install gymnasium`)

## Generate Customized Demonstrations

Expand All @@ -85,10 +92,12 @@ All tasks templates in the current vlmbench can be found in *vlm/tasks*. To gene
## Citation

```
@inproceedings{zheng2022vlmbench,
author = {Zheng, Kaizhi and Chen, Xiaotong and Jenkins, Odest Chadwicke and Wang, Xin Eric},
booktitle = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks},
title = {VLMbench: A Compositional Benchmark for Vision-and-Language Manipulation},
year = {2022}
@inproceedings{
zheng2022vlmbench,
title={{VLM}bench: A Compositional Benchmark for Vision-and-Language Manipulation},
author={Kaizhi Zheng and Xiaotong Chen and Odest Jenkins and Xin Eric Wang},
booktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
year={2022},
url={https://openreview.net/forum?id=NAYoSV3tk9}
}
```
2 changes: 1 addition & 1 deletion amsolver/backend/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from amsolver.backend.utils import ReadCustomDataBlock, WriteCustomDataBlock
from amsolver.backend.waypoints import Point, PredefinedPath, Waypoint

TASKS_PATH = join(dirname(abspath(__file__)), '../tasks')
TASKS_PATH = join(dirname(abspath(__file__)), '../../vlm/tasks')


class Task(object):
Expand Down
4 changes: 2 additions & 2 deletions amsolver/backend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,8 @@ def task_file_to_task_class(task_file, parent_folder = 'amsolver'):
def rgb_handles_to_mask(rgb_coded_handles):
# rgb_coded_handles should be (w, h, c)
# Handle encoded as : handle = R + G * 256 + B * 256 * 256
# rgb_coded_handles *= 255 # takes rgb range to 0 -> 255
rgb_coded_handles.astype(int)
rgb_coded_handles *= 255 # takes rgb range to 0 -> 255
rgb_coded_handles = rgb_coded_handles.astype(int)
return (rgb_coded_handles[:, :, 0] +
rgb_coded_handles[:, :, 1] * 256 +
rgb_coded_handles[:, :, 2] * 256 * 256)
Expand Down
12 changes: 6 additions & 6 deletions amsolver/gym/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from gym.envs.registration import register
import rlbench.backend.task as task
from gymnasium.envs.registration import register
import amsolver.backend.task as task
import os
from rlbench.utils import name_to_task_class
from rlbench.gym.rlbench_env import RLBenchEnv
from amsolver.utils import name_to_task_class
from amsolver.gym.vlmbench_env import VLMBenchEnv

TASKS = [t for t in os.listdir(task.TASKS_PATH)
if t != '__init__.py' and t.endswith('.py')]
Expand All @@ -12,15 +12,15 @@
task_class = name_to_task_class(task_name)
register(
id='%s-state-v0' % task_name,
entry_point='rlbench.gym:RLBenchEnv',
entry_point='amsolver.gym:VLMBenchEnv',
kwargs={
'task_class': task_class,
'observation_mode': 'state'
}
)
register(
id='%s-vision-v0' % task_name,
entry_point='rlbench.gym:RLBenchEnv',
entry_point='amsolver.gym:VLMBenchEnv',
kwargs={
'task_class': task_class,
'observation_mode': 'vision'
Expand Down
21 changes: 11 additions & 10 deletions amsolver/gym/rlbench_env.py → amsolver/gym/vlmbench_env.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from typing import Union, Dict, Tuple

import gym
from gym import spaces
import gymnasium as gym
from gymnasium import spaces
from pyrep.const import RenderMode
from pyrep.objects.dummy import Dummy
from pyrep.objects.vision_sensor import VisionSensor
from rlbench.environment import Environment
from rlbench.action_modes import ArmActionMode, ActionMode
from rlbench.observation_config import ObservationConfig
from amsolver.environment import Environment
from amsolver.action_modes import ArmActionMode, ActionMode
from amsolver.observation_config import ObservationConfig
import numpy as np


class RLBenchEnv(gym.Env):
"""An gym wrapper for RLBench."""
class VLMBenchEnv(gym.Env):
"""An gym wrapper for VLMbench. Copy from RLbenchEnv."""

metadata = {'render.modes': ['human', 'rgb_array']}

Expand All @@ -35,7 +35,7 @@ def __init__(self, task_class, observation_mode='state',
self.env.launch()
self.task = self.env.get_task(task_class)

_, obs = self.task.reset()
desc, obs = self.task.reset()

self.action_space = spaces.Box(
low=-1.0, high=1.0, shape=(self.env.action_size,))
Expand Down Expand Up @@ -94,8 +94,9 @@ def render(self, mode='human') -> Union[None, np.ndarray]:

def reset(self) -> Dict[str, np.ndarray]:
descriptions, obs = self.task.reset()
del descriptions # Not used.
return self._extract_obs(obs)
obs_info = self._extract_obs(obs)
obs_info['descriptions'] = descriptions
return obs_info

def step(self, action) -> Tuple[Dict[str, np.ndarray], float, bool, dict]:
obs, reward, terminate = self.task.step(action)
Expand Down
14 changes: 14 additions & 0 deletions amsolver/observation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,20 @@ def set_all_low_dim(self, value: bool):
self.wrist_camera_matrix = value
self.task_low_dim_state = value

def set_image_size(self, image_size):
self.left_shoulder_camera.image_size = image_size
self.right_shoulder_camera.image_size = image_size
self.overhead_camera.image_size = image_size
self.wrist_camera.image_size = image_size
self.front_camera.image_size = image_size

def set_render_mode(self, render_mode):
self.left_shoulder_camera.render_mode = render_mode
self.right_shoulder_camera.render_mode = render_mode
self.overhead_camera.render_mode = render_mode
self.wrist_camera.render_mode = render_mode
self.front_camera.render_mode = render_mode

def __eq__(self, o: object) -> bool:
is_equal = True
for attribute in self.__dict__:
Expand Down
1 change: 1 addition & 0 deletions amsolver/task_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ def step(self, action, collision_checking=None, use_auto_move=True, recorder = N
elif ee_action < 0.5:
ee_action = 0.0

success_in_path= []
if self._action_mode.arm == ArmActionMode.ABS_JOINT_VELOCITY:

self._assert_action_space(arm_action,
Expand Down
2 changes: 1 addition & 1 deletion amsolver/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class InvalidTaskName(Exception):
pass


def name_to_task_class(task_file: str, parent_folder = "amsolver"):
def name_to_task_class(task_file: str, parent_folder = "vlm"):
name = task_file.replace('.py', '')
class_name = ''.join([w[0].upper() + w[1:] for w in name.split('_')])
try:
Expand Down
55 changes: 55 additions & 0 deletions examples/gym_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import gymnasium as gym
import amsolver.gym

"""
Data structure of Observation:
{
'front_rgb': np.array,
'front_depth': np.array,
'front_mask': np.array,
'front_point_cloud': np.array,
'left_shoulder_rgb': np.array,
'left_shoulder_depth': np.array,
'left_shoulder_mask': np.array,
'left_shoulder_point_cloud': np.array,
'right_shoulder_rgb': np.array,
'right_shoulder_depth': np.array,
'right_shoulder_mask': np.array,
'right_shoulder_point_cloud': np.array,
'wrist_rgb': np.array,
'wrist_depth': np.array,
'wrist_mask': np.array,
'wrist_point_cloud': np.array,
'overhead_rgb': np.array,
'overhead_depth': np.array,
'overhead_mask': np.array,
'overhead_point_cloud': np.array,
'gripper_joint_positions': np.array,
'gripper_touch_forces': np.array,
'gripper_pose': np.array,
'gripper_matrix': np.array,
'gripper_open': np.array,
'joint_positions': np.array,
'joint_velocities': np.array,
'joint_forces': np.array,
'misc': dict, # contains camera extrinsics, camera intrinsics, near/far clipping planes, etc.
'object_informations': dict, # contains two types of information: object information and waypoint information \
for each object, the dict includes the convex hull of the object and the visual hull of the object e.g. 'pencil1_0' and 'pencil1_visual0'\
for each waypoint, the dict includes the pose, type, target object (this waypoint is used for moving/getting close to which object), and low-level description of the waypoint.
}
"""

env = gym.make('drop_pen_color-vision-v0', render_mode='human')

training_steps = 120
episode_length = 40
for i in range(training_steps):
if i % episode_length == 0:
print('Reset Episode')
obs = env.reset()
descriptions = obs['descriptions']
obs, reward, terminate, _ = env.step(env.action_space.sample())
env.render() # Note: rendering increases step time.

print('Done')
env.close()
135 changes: 135 additions & 0 deletions examples/multi_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import argparse
from distutils.util import strtobool
from pathlib import Path
import os
from amsolver.environment import Environment
from amsolver.action_modes import ArmActionMode, ActionMode
from amsolver.observation_config import ObservationConfig
import numpy as np
from amsolver.backend.utils import task_file_to_task_class

"""
Data structure of Observation:
{
'front_rgb': np.array,
'front_depth': np.array,
'front_mask': np.array,
'front_point_cloud': np.array,
'left_shoulder_rgb': np.array,
'left_shoulder_depth': np.array,
'left_shoulder_mask': np.array,
'left_shoulder_point_cloud': np.array,
'right_shoulder_rgb': np.array,
'right_shoulder_depth': np.array,
'right_shoulder_mask': np.array,
'right_shoulder_point_cloud': np.array,
'wrist_rgb': np.array,
'wrist_depth': np.array,
'wrist_mask': np.array,
'wrist_point_cloud': np.array,
'overhead_rgb': np.array,
'overhead_depth': np.array,
'overhead_mask': np.array,
'overhead_point_cloud': np.array,
'gripper_joint_positions': np.array,
'gripper_touch_forces': np.array,
'gripper_pose': np.array,
'gripper_matrix': np.array,
'gripper_open': np.array,
'joint_positions': np.array,
'joint_velocities': np.array,
'joint_forces': np.array,
'misc': dict, # contains camera extrinsics, camera intrinsics, near/far clipping planes, etc.
'object_informations': dict, # contains two types of information: object information and waypoint information \
for each object, the dict includes the convex hull of the object and the visual hull of the object e.g. 'pencil1_0' and 'pencil1_visual0'\
for each waypoint, the dict includes the pose, type, target object (this waypoint is used for moving/getting close to which object), and low-level description of the waypoint.
}
"""

task_dict = {
'drop': ['drop_pen_color', 'drop_pen_relative', 'drop_pen_size'],
'pick': ['pick_cube_shape', 'pick_cube_relative', 'pick_cube_color', 'pick_cube_size'],
'stack': ['stack_cubes_color', 'stack_cubes_relative', 'stack_cubes_shape', 'stack_cubes_size'],
'shape_sorter': ['place_into_shape_sorter_color', 'place_into_shape_sorter_relative', 'place_into_shape_sorter_shape'],
'wipe': ['wipe_table_shape', 'wipe_table_color', 'wipe_table_relative', 'wipe_table_size', 'wipe_table_direction'],
'pour': ['pour_demo_color', 'pour_demo_relative', 'pour_demo_size'],
'drawer': ['open_drawer'],
'door': ['open_door'], # untested
'door_complex': ['open_door_complex'],
}

class Agent(object):

def __init__(self, action_shape):
self.action_shape = action_shape

def act(self, obs, descriptions):
arm = np.random.normal(0.0, 0.1, size=(self.action_shape-1,))
gripper = [1.0] # Always open
return np.concatenate([arm, gripper], axis=-1)

def add_argments():
parser = argparse.ArgumentParser(description='')
#dataset
parser.add_argument('--data_folder', type=str)
parser.add_argument('--setd', type=str, default="seen")
parser.add_argument('--img_size',nargs='+', type=int, default=[360,360])
parser.add_argument('--task', type=str, default=None, help="select a task type from drop, pick, stack, shape_sorter, wipe, pour, drawer, door_complex")
parser.add_argument('--use_collect_data', type=lambda x:bool(strtobool(x)), default=True)
args = parser.parse_args()
return args

def load_test_config(data_folder: Path, task_name):
episode_list = []
for path in data_folder.rglob('configs*'):
t_name = path.parents[3].name
if t_name == task_name:
episode_list.append(path.parent)
return episode_list

if __name__=="__main__":
args = add_argments()
obs_config = ObservationConfig()
obs_config.set_all(True)
obs_config.set_image_size(args.img_size)

if args.task in task_dict:
task_files = task_dict[args.task]
else:
task_files = [args.task]

eval_tasks = [task_file_to_task_class(t, parent_folder = 'vlm') for t in task_files]
data_folder = Path(os.path.join(args.data_folder, args.setd))

action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
env = Environment(action_mode, obs_config=obs_config, headless=False) # set headless=False, if user want to visualize the simulator
env.launch()

agent = Agent(env.action_size)
need_test_numbers = 10
action_steps = 10
for i, task_to_use in enumerate(eval_tasks):
task = env.get_task(task_to_use)

if args.use_collect_data:
e_path = load_test_config(data_folder, task_files[i])
for num, e in enumerate(e_path):
if num >= need_test_numbers:
break
task_base = str(e/"task_base.ttm")
waypoint_sets = str(e/"waypoint_sets.ttm")
config = str(e/"configs.pkl")
descriptions, obs = task.load_config(task_base, waypoint_sets, config)
print(descriptions)
for _ in range(action_steps):
action = agent.act(obs, descriptions)
obs, reward, terminate = task.step(action)
else:
for _ in range(need_test_numbers):
descriptions, obs = task.reset()
print(descriptions)
for _ in range(action_steps):
action = agent.act(obs, descriptions)
obs, reward, terminate = task.step(action)

env.shutdown()

0 comments on commit dfc3e53

Please sign in to comment.