From dfc3e531bddd933f97d2cdc16f7194d46a608edf Mon Sep 17 00:00:00 2001 From: kz Date: Sat, 4 Mar 2023 20:59:10 -0800 Subject: [PATCH] add more starting codes --- README.md | 19 ++- amsolver/backend/task.py | 2 +- amsolver/backend/utils.py | 4 +- amsolver/gym/__init__.py | 12 +- .../gym/{rlbench_env.py => vlmbench_env.py} | 21 +-- amsolver/observation_config.py | 14 ++ amsolver/task_environment.py | 1 + amsolver/utils.py | 2 +- examples/gym_test.py | 55 +++++++ examples/multi_task.py | 135 ++++++++++++++++++ 10 files changed, 240 insertions(+), 25 deletions(-) rename amsolver/gym/{rlbench_env.py => vlmbench_env.py} (89%) create mode 100644 examples/gym_test.py create mode 100644 examples/multi_task.py diff --git a/README.md b/README.md index 882fbc5..3729ccc 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,10 @@ This repo include the implementaions of AMSolver, VLMbench, and 6D-CLIPort. ## News +### 03/04/2023 + +- More starting codes have been added under the [examples](examples) folder! + ### 09/16/2022 - The work has been accepted by NeurIPS 2022 (Datasets and Benchmarks) ! @@ -64,6 +68,9 @@ To train new 6D-CLIPort models: ```bash python vlm/scripts/train_baselines.py --data_dir /Path/to/VLMbench/Dataset --train_tasks TASK_NEED_TO_TRAIN ``` +## Examples +We provided several example codes for getting start. Please check the code under ```examples``` folder. The ```gym_test.py``` shows how to run the vlmbench as ```gym``` environment. +Ensure you have gym installed (`pip install gymnasium`) ## Generate Customized Demonstrations @@ -85,10 +92,12 @@ All tasks templates in the current vlmbench can be found in *vlm/tasks*. To gene ## Citation ``` -@inproceedings{zheng2022vlmbench, - author = {Zheng, Kaizhi and Chen, Xiaotong and Jenkins, Odest Chadwicke and Wang, Xin Eric}, - booktitle = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks}, - title = {VLMbench: A Compositional Benchmark for Vision-and-Language Manipulation}, - year = {2022} +@inproceedings{ +zheng2022vlmbench, +title={{VLM}bench: A Compositional Benchmark for Vision-and-Language Manipulation}, +author={Kaizhi Zheng and Xiaotong Chen and Odest Jenkins and Xin Eric Wang}, +booktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track}, +year={2022}, +url={https://openreview.net/forum?id=NAYoSV3tk9} } ``` \ No newline at end of file diff --git a/amsolver/backend/task.py b/amsolver/backend/task.py index e18d21f..37a57b5 100644 --- a/amsolver/backend/task.py +++ b/amsolver/backend/task.py @@ -22,7 +22,7 @@ from amsolver.backend.utils import ReadCustomDataBlock, WriteCustomDataBlock from amsolver.backend.waypoints import Point, PredefinedPath, Waypoint -TASKS_PATH = join(dirname(abspath(__file__)), '../tasks') +TASKS_PATH = join(dirname(abspath(__file__)), '../../vlm/tasks') class Task(object): diff --git a/amsolver/backend/utils.py b/amsolver/backend/utils.py index 64490a2..cb226e0 100644 --- a/amsolver/backend/utils.py +++ b/amsolver/backend/utils.py @@ -207,8 +207,8 @@ def task_file_to_task_class(task_file, parent_folder = 'amsolver'): def rgb_handles_to_mask(rgb_coded_handles): # rgb_coded_handles should be (w, h, c) # Handle encoded as : handle = R + G * 256 + B * 256 * 256 - # rgb_coded_handles *= 255 # takes rgb range to 0 -> 255 - rgb_coded_handles.astype(int) + rgb_coded_handles *= 255 # takes rgb range to 0 -> 255 + rgb_coded_handles = rgb_coded_handles.astype(int) return (rgb_coded_handles[:, :, 0] + rgb_coded_handles[:, :, 1] * 256 + rgb_coded_handles[:, :, 2] * 256 * 256) diff --git a/amsolver/gym/__init__.py b/amsolver/gym/__init__.py index 26ff7f4..97866d7 100644 --- a/amsolver/gym/__init__.py +++ b/amsolver/gym/__init__.py @@ -1,8 +1,8 @@ -from gym.envs.registration import register -import rlbench.backend.task as task +from gymnasium.envs.registration import register +import amsolver.backend.task as task import os -from rlbench.utils import name_to_task_class -from rlbench.gym.rlbench_env import RLBenchEnv +from amsolver.utils import name_to_task_class +from amsolver.gym.vlmbench_env import VLMBenchEnv TASKS = [t for t in os.listdir(task.TASKS_PATH) if t != '__init__.py' and t.endswith('.py')] @@ -12,7 +12,7 @@ task_class = name_to_task_class(task_name) register( id='%s-state-v0' % task_name, - entry_point='rlbench.gym:RLBenchEnv', + entry_point='amsolver.gym:VLMBenchEnv', kwargs={ 'task_class': task_class, 'observation_mode': 'state' @@ -20,7 +20,7 @@ ) register( id='%s-vision-v0' % task_name, - entry_point='rlbench.gym:RLBenchEnv', + entry_point='amsolver.gym:VLMBenchEnv', kwargs={ 'task_class': task_class, 'observation_mode': 'vision' diff --git a/amsolver/gym/rlbench_env.py b/amsolver/gym/vlmbench_env.py similarity index 89% rename from amsolver/gym/rlbench_env.py rename to amsolver/gym/vlmbench_env.py index 081a2cf..ac508c0 100644 --- a/amsolver/gym/rlbench_env.py +++ b/amsolver/gym/vlmbench_env.py @@ -1,18 +1,18 @@ from typing import Union, Dict, Tuple -import gym -from gym import spaces +import gymnasium as gym +from gymnasium import spaces from pyrep.const import RenderMode from pyrep.objects.dummy import Dummy from pyrep.objects.vision_sensor import VisionSensor -from rlbench.environment import Environment -from rlbench.action_modes import ArmActionMode, ActionMode -from rlbench.observation_config import ObservationConfig +from amsolver.environment import Environment +from amsolver.action_modes import ArmActionMode, ActionMode +from amsolver.observation_config import ObservationConfig import numpy as np -class RLBenchEnv(gym.Env): - """An gym wrapper for RLBench.""" +class VLMBenchEnv(gym.Env): + """An gym wrapper for VLMbench. Copy from RLbenchEnv.""" metadata = {'render.modes': ['human', 'rgb_array']} @@ -35,7 +35,7 @@ def __init__(self, task_class, observation_mode='state', self.env.launch() self.task = self.env.get_task(task_class) - _, obs = self.task.reset() + desc, obs = self.task.reset() self.action_space = spaces.Box( low=-1.0, high=1.0, shape=(self.env.action_size,)) @@ -94,8 +94,9 @@ def render(self, mode='human') -> Union[None, np.ndarray]: def reset(self) -> Dict[str, np.ndarray]: descriptions, obs = self.task.reset() - del descriptions # Not used. - return self._extract_obs(obs) + obs_info = self._extract_obs(obs) + obs_info['descriptions'] = descriptions + return obs_info def step(self, action) -> Tuple[Dict[str, np.ndarray], float, bool, dict]: obs, reward, terminate = self.task.step(action) diff --git a/amsolver/observation_config.py b/amsolver/observation_config.py index d9df413..10a5076 100644 --- a/amsolver/observation_config.py +++ b/amsolver/observation_config.py @@ -116,6 +116,20 @@ def set_all_low_dim(self, value: bool): self.wrist_camera_matrix = value self.task_low_dim_state = value + def set_image_size(self, image_size): + self.left_shoulder_camera.image_size = image_size + self.right_shoulder_camera.image_size = image_size + self.overhead_camera.image_size = image_size + self.wrist_camera.image_size = image_size + self.front_camera.image_size = image_size + + def set_render_mode(self, render_mode): + self.left_shoulder_camera.render_mode = render_mode + self.right_shoulder_camera.render_mode = render_mode + self.overhead_camera.render_mode = render_mode + self.wrist_camera.render_mode = render_mode + self.front_camera.render_mode = render_mode + def __eq__(self, o: object) -> bool: is_equal = True for attribute in self.__dict__: diff --git a/amsolver/task_environment.py b/amsolver/task_environment.py index 7bea41d..ee236e6 100644 --- a/amsolver/task_environment.py +++ b/amsolver/task_environment.py @@ -253,6 +253,7 @@ def step(self, action, collision_checking=None, use_auto_move=True, recorder = N elif ee_action < 0.5: ee_action = 0.0 + success_in_path= [] if self._action_mode.arm == ArmActionMode.ABS_JOINT_VELOCITY: self._assert_action_space(arm_action, diff --git a/amsolver/utils.py b/amsolver/utils.py index 6ed48ff..3dee026 100644 --- a/amsolver/utils.py +++ b/amsolver/utils.py @@ -20,7 +20,7 @@ class InvalidTaskName(Exception): pass -def name_to_task_class(task_file: str, parent_folder = "amsolver"): +def name_to_task_class(task_file: str, parent_folder = "vlm"): name = task_file.replace('.py', '') class_name = ''.join([w[0].upper() + w[1:] for w in name.split('_')]) try: diff --git a/examples/gym_test.py b/examples/gym_test.py new file mode 100644 index 0000000..136387d --- /dev/null +++ b/examples/gym_test.py @@ -0,0 +1,55 @@ +import gymnasium as gym +import amsolver.gym + +""" +Data structure of Observation: +{ + 'front_rgb': np.array, + 'front_depth': np.array, + 'front_mask': np.array, + 'front_point_cloud': np.array, + 'left_shoulder_rgb': np.array, + 'left_shoulder_depth': np.array, + 'left_shoulder_mask': np.array, + 'left_shoulder_point_cloud': np.array, + 'right_shoulder_rgb': np.array, + 'right_shoulder_depth': np.array, + 'right_shoulder_mask': np.array, + 'right_shoulder_point_cloud': np.array, + 'wrist_rgb': np.array, + 'wrist_depth': np.array, + 'wrist_mask': np.array, + 'wrist_point_cloud': np.array, + 'overhead_rgb': np.array, + 'overhead_depth': np.array, + 'overhead_mask': np.array, + 'overhead_point_cloud': np.array, + 'gripper_joint_positions': np.array, + 'gripper_touch_forces': np.array, + 'gripper_pose': np.array, + 'gripper_matrix': np.array, + 'gripper_open': np.array, + 'joint_positions': np.array, + 'joint_velocities': np.array, + 'joint_forces': np.array, + 'misc': dict, # contains camera extrinsics, camera intrinsics, near/far clipping planes, etc. + 'object_informations': dict, # contains two types of information: object information and waypoint information \ + for each object, the dict includes the convex hull of the object and the visual hull of the object e.g. 'pencil1_0' and 'pencil1_visual0'\ + for each waypoint, the dict includes the pose, type, target object (this waypoint is used for moving/getting close to which object), and low-level description of the waypoint. +} +""" + +env = gym.make('drop_pen_color-vision-v0', render_mode='human') + +training_steps = 120 +episode_length = 40 +for i in range(training_steps): + if i % episode_length == 0: + print('Reset Episode') + obs = env.reset() + descriptions = obs['descriptions'] + obs, reward, terminate, _ = env.step(env.action_space.sample()) + env.render() # Note: rendering increases step time. + +print('Done') +env.close() \ No newline at end of file diff --git a/examples/multi_task.py b/examples/multi_task.py new file mode 100644 index 0000000..b39ee03 --- /dev/null +++ b/examples/multi_task.py @@ -0,0 +1,135 @@ +import argparse +from distutils.util import strtobool +from pathlib import Path +import os +from amsolver.environment import Environment +from amsolver.action_modes import ArmActionMode, ActionMode +from amsolver.observation_config import ObservationConfig +import numpy as np +from amsolver.backend.utils import task_file_to_task_class + +""" +Data structure of Observation: +{ + 'front_rgb': np.array, + 'front_depth': np.array, + 'front_mask': np.array, + 'front_point_cloud': np.array, + 'left_shoulder_rgb': np.array, + 'left_shoulder_depth': np.array, + 'left_shoulder_mask': np.array, + 'left_shoulder_point_cloud': np.array, + 'right_shoulder_rgb': np.array, + 'right_shoulder_depth': np.array, + 'right_shoulder_mask': np.array, + 'right_shoulder_point_cloud': np.array, + 'wrist_rgb': np.array, + 'wrist_depth': np.array, + 'wrist_mask': np.array, + 'wrist_point_cloud': np.array, + 'overhead_rgb': np.array, + 'overhead_depth': np.array, + 'overhead_mask': np.array, + 'overhead_point_cloud': np.array, + 'gripper_joint_positions': np.array, + 'gripper_touch_forces': np.array, + 'gripper_pose': np.array, + 'gripper_matrix': np.array, + 'gripper_open': np.array, + 'joint_positions': np.array, + 'joint_velocities': np.array, + 'joint_forces': np.array, + 'misc': dict, # contains camera extrinsics, camera intrinsics, near/far clipping planes, etc. + 'object_informations': dict, # contains two types of information: object information and waypoint information \ + for each object, the dict includes the convex hull of the object and the visual hull of the object e.g. 'pencil1_0' and 'pencil1_visual0'\ + for each waypoint, the dict includes the pose, type, target object (this waypoint is used for moving/getting close to which object), and low-level description of the waypoint. +} +""" + +task_dict = { + 'drop': ['drop_pen_color', 'drop_pen_relative', 'drop_pen_size'], + 'pick': ['pick_cube_shape', 'pick_cube_relative', 'pick_cube_color', 'pick_cube_size'], + 'stack': ['stack_cubes_color', 'stack_cubes_relative', 'stack_cubes_shape', 'stack_cubes_size'], + 'shape_sorter': ['place_into_shape_sorter_color', 'place_into_shape_sorter_relative', 'place_into_shape_sorter_shape'], + 'wipe': ['wipe_table_shape', 'wipe_table_color', 'wipe_table_relative', 'wipe_table_size', 'wipe_table_direction'], + 'pour': ['pour_demo_color', 'pour_demo_relative', 'pour_demo_size'], + 'drawer': ['open_drawer'], + 'door': ['open_door'], # untested + 'door_complex': ['open_door_complex'], +} + +class Agent(object): + + def __init__(self, action_shape): + self.action_shape = action_shape + + def act(self, obs, descriptions): + arm = np.random.normal(0.0, 0.1, size=(self.action_shape-1,)) + gripper = [1.0] # Always open + return np.concatenate([arm, gripper], axis=-1) + +def add_argments(): + parser = argparse.ArgumentParser(description='') + #dataset + parser.add_argument('--data_folder', type=str) + parser.add_argument('--setd', type=str, default="seen") + parser.add_argument('--img_size',nargs='+', type=int, default=[360,360]) + parser.add_argument('--task', type=str, default=None, help="select a task type from drop, pick, stack, shape_sorter, wipe, pour, drawer, door_complex") + parser.add_argument('--use_collect_data', type=lambda x:bool(strtobool(x)), default=True) + args = parser.parse_args() + return args + +def load_test_config(data_folder: Path, task_name): + episode_list = [] + for path in data_folder.rglob('configs*'): + t_name = path.parents[3].name + if t_name == task_name: + episode_list.append(path.parent) + return episode_list + +if __name__=="__main__": + args = add_argments() + obs_config = ObservationConfig() + obs_config.set_all(True) + obs_config.set_image_size(args.img_size) + + if args.task in task_dict: + task_files = task_dict[args.task] + else: + task_files = [args.task] + + eval_tasks = [task_file_to_task_class(t, parent_folder = 'vlm') for t in task_files] + data_folder = Path(os.path.join(args.data_folder, args.setd)) + + action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) + env = Environment(action_mode, obs_config=obs_config, headless=False) # set headless=False, if user want to visualize the simulator + env.launch() + + agent = Agent(env.action_size) + need_test_numbers = 10 + action_steps = 10 + for i, task_to_use in enumerate(eval_tasks): + task = env.get_task(task_to_use) + + if args.use_collect_data: + e_path = load_test_config(data_folder, task_files[i]) + for num, e in enumerate(e_path): + if num >= need_test_numbers: + break + task_base = str(e/"task_base.ttm") + waypoint_sets = str(e/"waypoint_sets.ttm") + config = str(e/"configs.pkl") + descriptions, obs = task.load_config(task_base, waypoint_sets, config) + print(descriptions) + for _ in range(action_steps): + action = agent.act(obs, descriptions) + obs, reward, terminate = task.step(action) + else: + for _ in range(need_test_numbers): + descriptions, obs = task.reset() + print(descriptions) + for _ in range(action_steps): + action = agent.act(obs, descriptions) + obs, reward, terminate = task.step(action) + + env.shutdown() \ No newline at end of file