add more starting codes

eric-ai-lab · Mar 5, 2023 · dfc3e53 · dfc3e53
1 parent c0bc31e
commit dfc3e53
Show file tree

Hide file tree

Showing 10 changed files with 240 additions and 25 deletions.
diff --git a/README.md b/README.md
@@ -8,6 +8,10 @@ This repo include the implementaions of AMSolver, VLMbench, and 6D-CLIPort.
 
 ## News
 
+### 03/04/2023
+
+- More starting codes have been added under the [examples](examples) folder!
+
 ### 09/16/2022
 
 - The work has been accepted by NeurIPS 2022 (Datasets and Benchmarks) !
@@ -64,6 +68,9 @@ To train new 6D-CLIPort models:
 ```bash
 python vlm/scripts/train_baselines.py --data_dir /Path/to/VLMbench/Dataset --train_tasks TASK_NEED_TO_TRAIN
 ```
+## Examples
+We provided several example codes for getting start. Please check the code under ```examples``` folder. The ```gym_test.py``` shows how to run the vlmbench as ```gym``` environment.
+Ensure you have gym installed (`pip install gymnasium`)
 
 ## Generate Customized Demonstrations
 
@@ -85,10 +92,12 @@ All tasks templates in the current vlmbench can be found in *vlm/tasks*. To gene
 ## Citation
 
 ```
-@inproceedings{zheng2022vlmbench,
- author = {Zheng, Kaizhi and Chen, Xiaotong and Jenkins, Odest Chadwicke and Wang, Xin Eric},
- booktitle = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks},
- title = {VLMbench: A Compositional Benchmark for Vision-and-Language Manipulation},
- year = {2022}
+@inproceedings{
+zheng2022vlmbench,
+title={{VLM}bench: A Compositional Benchmark for Vision-and-Language Manipulation},
+author={Kaizhi Zheng and Xiaotong Chen and Odest Jenkins and Xin Eric Wang},
+booktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
+year={2022},
+url={https://openreview.net/forum?id=NAYoSV3tk9}
 }
 ```
diff --git a/amsolver/backend/task.py b/amsolver/backend/task.py
@@ -22,7 +22,7 @@
 from amsolver.backend.utils import ReadCustomDataBlock, WriteCustomDataBlock
 from amsolver.backend.waypoints import Point, PredefinedPath, Waypoint
 
-TASKS_PATH = join(dirname(abspath(__file__)), '../tasks')
+TASKS_PATH = join(dirname(abspath(__file__)), '../../vlm/tasks')
 
 
 class Task(object):

diff --git a/amsolver/backend/utils.py b/amsolver/backend/utils.py
@@ -207,8 +207,8 @@ def task_file_to_task_class(task_file, parent_folder = 'amsolver'):
 def rgb_handles_to_mask(rgb_coded_handles):
   # rgb_coded_handles should be (w, h, c)
   # Handle encoded as : handle = R + G * 256 + B * 256 * 256
-  # rgb_coded_handles *= 255  # takes rgb range to 0 -> 255
-  rgb_coded_handles.astype(int)
+  rgb_coded_handles *= 255  # takes rgb range to 0 -> 255
+  rgb_coded_handles = rgb_coded_handles.astype(int)
   return (rgb_coded_handles[:, :, 0] +
           rgb_coded_handles[:, :, 1] * 256 +
           rgb_coded_handles[:, :, 2] * 256 * 256)

diff --git a/amsolver/gym/__init__.py b/amsolver/gym/__init__.py
@@ -1,8 +1,8 @@
-from gym.envs.registration import register
-import rlbench.backend.task as task
+from gymnasium.envs.registration import register
+import amsolver.backend.task as task
 import os
-from rlbench.utils import name_to_task_class
-from rlbench.gym.rlbench_env import RLBenchEnv
+from amsolver.utils import name_to_task_class
+from amsolver.gym.vlmbench_env import VLMBenchEnv
 
 TASKS = [t for t in os.listdir(task.TASKS_PATH)
          if t != '__init__.py' and t.endswith('.py')]
@@ -12,15 +12,15 @@
     task_class = name_to_task_class(task_name)
     register(
         id='%s-state-v0' % task_name,
-        entry_point='rlbench.gym:RLBenchEnv',
+        entry_point='amsolver.gym:VLMBenchEnv',
         kwargs={
             'task_class': task_class,
             'observation_mode': 'state'
         }
     )
     register(
         id='%s-vision-v0' % task_name,
-        entry_point='rlbench.gym:RLBenchEnv',
+        entry_point='amsolver.gym:VLMBenchEnv',
         kwargs={
             'task_class': task_class,
             'observation_mode': 'vision'

diff --git a/amsolver/gym/rlbench_env.py → amsolver/gym/vlmbench_env.py b/amsolver/gym/rlbench_env.py → amsolver/gym/vlmbench_env.py
@@ -1,18 +1,18 @@
 from typing import Union, Dict, Tuple
 
-import gym
-from gym import spaces
+import gymnasium as gym
+from gymnasium import spaces
 from pyrep.const import RenderMode
 from pyrep.objects.dummy import Dummy
 from pyrep.objects.vision_sensor import VisionSensor
-from rlbench.environment import Environment
-from rlbench.action_modes import ArmActionMode, ActionMode
-from rlbench.observation_config import ObservationConfig
+from amsolver.environment import Environment
+from amsolver.action_modes import ArmActionMode, ActionMode
+from amsolver.observation_config import ObservationConfig
 import numpy as np
 
 
-class RLBenchEnv(gym.Env):
-    """An gym wrapper for RLBench."""
+class VLMBenchEnv(gym.Env):
+    """An gym wrapper for VLMbench. Copy from RLbenchEnv."""
 
     metadata = {'render.modes': ['human', 'rgb_array']}
 
@@ -35,7 +35,7 @@ def __init__(self, task_class, observation_mode='state',
         self.env.launch()
         self.task = self.env.get_task(task_class)
 
-        _, obs = self.task.reset()
+        desc, obs = self.task.reset()
 
         self.action_space = spaces.Box(
             low=-1.0, high=1.0, shape=(self.env.action_size,))
@@ -94,8 +94,9 @@ def render(self, mode='human') -> Union[None, np.ndarray]:
 
     def reset(self) -> Dict[str, np.ndarray]:
         descriptions, obs = self.task.reset()
-        del descriptions  # Not used.
-        return self._extract_obs(obs)
+        obs_info = self._extract_obs(obs)
+        obs_info['descriptions'] = descriptions
+        return obs_info
 
     def step(self, action) -> Tuple[Dict[str, np.ndarray], float, bool, dict]:
         obs, reward, terminate = self.task.step(action)

diff --git a/amsolver/observation_config.py b/amsolver/observation_config.py
@@ -116,6 +116,20 @@ def set_all_low_dim(self, value: bool):
         self.wrist_camera_matrix = value
         self.task_low_dim_state = value
 
+    def set_image_size(self, image_size):
+        self.left_shoulder_camera.image_size = image_size
+        self.right_shoulder_camera.image_size = image_size
+        self.overhead_camera.image_size = image_size
+        self.wrist_camera.image_size = image_size
+        self.front_camera.image_size = image_size
+
+    def set_render_mode(self, render_mode):
+        self.left_shoulder_camera.render_mode = render_mode
+        self.right_shoulder_camera.render_mode = render_mode
+        self.overhead_camera.render_mode = render_mode
+        self.wrist_camera.render_mode = render_mode
+        self.front_camera.render_mode = render_mode
+
     def __eq__(self, o: object) -> bool:
         is_equal = True
         for attribute in self.__dict__:

diff --git a/amsolver/task_environment.py b/amsolver/task_environment.py
@@ -253,6 +253,7 @@ def step(self, action, collision_checking=None, use_auto_move=True, recorder = N
         elif ee_action < 0.5:
             ee_action = 0.0
 
+        success_in_path= []
         if self._action_mode.arm == ArmActionMode.ABS_JOINT_VELOCITY:
 
             self._assert_action_space(arm_action,

diff --git a/amsolver/utils.py b/amsolver/utils.py
@@ -20,7 +20,7 @@ class InvalidTaskName(Exception):
     pass
 
 
-def name_to_task_class(task_file: str, parent_folder = "amsolver"):
+def name_to_task_class(task_file: str, parent_folder = "vlm"):
     name = task_file.replace('.py', '')
     class_name = ''.join([w[0].upper() + w[1:] for w in name.split('_')])
     try:

diff --git a/examples/gym_test.py b/examples/gym_test.py
@@ -0,0 +1,55 @@
+import gymnasium as gym
+import amsolver.gym
+
+"""
+Data structure of Observation:
+{
+    'front_rgb': np.array,
+    'front_depth': np.array,
+    'front_mask': np.array,
+    'front_point_cloud': np.array,
+    'left_shoulder_rgb': np.array,
+    'left_shoulder_depth': np.array,
+    'left_shoulder_mask': np.array,
+    'left_shoulder_point_cloud': np.array,
+    'right_shoulder_rgb': np.array,
+    'right_shoulder_depth': np.array,
+    'right_shoulder_mask': np.array,
+    'right_shoulder_point_cloud': np.array,
+    'wrist_rgb': np.array,
+    'wrist_depth': np.array,
+    'wrist_mask': np.array,
+    'wrist_point_cloud': np.array,
+    'overhead_rgb': np.array,
+    'overhead_depth': np.array,
+    'overhead_mask': np.array,
+    'overhead_point_cloud': np.array,
+    'gripper_joint_positions': np.array,
+    'gripper_touch_forces': np.array,
+    'gripper_pose': np.array,
+    'gripper_matrix': np.array,
+    'gripper_open': np.array,
+    'joint_positions': np.array,
+    'joint_velocities': np.array,
+    'joint_forces': np.array,
+    'misc': dict, # contains camera extrinsics, camera intrinsics, near/far clipping planes, etc.
+    'object_informations': dict, # contains two types of information: object information and waypoint information \
+        for each object, the dict includes the convex hull of the object and the visual hull of the object e.g. 'pencil1_0' and 'pencil1_visual0'\
+        for each waypoint, the dict includes the pose, type, target object (this waypoint is used for moving/getting close to which object), and low-level description of the waypoint.
+}
+"""
+
+env = gym.make('drop_pen_color-vision-v0', render_mode='human')
+
+training_steps = 120
+episode_length = 40
+for i in range(training_steps):
+    if i % episode_length == 0:
+        print('Reset Episode')
+        obs = env.reset()
+        descriptions = obs['descriptions']
+    obs, reward, terminate, _ = env.step(env.action_space.sample())
+    env.render()  # Note: rendering increases step time.
+
+print('Done')
+env.close()
diff --git a/examples/multi_task.py b/examples/multi_task.py
@@ -0,0 +1,135 @@
+import argparse
+from distutils.util import strtobool
+from pathlib import Path
+import os
+from amsolver.environment import Environment
+from amsolver.action_modes import ArmActionMode, ActionMode
+from amsolver.observation_config import ObservationConfig
+import numpy as np
+from amsolver.backend.utils import task_file_to_task_class
+
+"""
+Data structure of Observation:
+{
+    'front_rgb': np.array,
+    'front_depth': np.array,
+    'front_mask': np.array,
+    'front_point_cloud': np.array,
+    'left_shoulder_rgb': np.array,
+    'left_shoulder_depth': np.array,
+    'left_shoulder_mask': np.array,
+    'left_shoulder_point_cloud': np.array,
+    'right_shoulder_rgb': np.array,
+    'right_shoulder_depth': np.array,
+    'right_shoulder_mask': np.array,
+    'right_shoulder_point_cloud': np.array,
+    'wrist_rgb': np.array,
+    'wrist_depth': np.array,
+    'wrist_mask': np.array,
+    'wrist_point_cloud': np.array,
+    'overhead_rgb': np.array,
+    'overhead_depth': np.array,
+    'overhead_mask': np.array,
+    'overhead_point_cloud': np.array,
+    'gripper_joint_positions': np.array,
+    'gripper_touch_forces': np.array,
+    'gripper_pose': np.array,
+    'gripper_matrix': np.array,
+    'gripper_open': np.array,
+    'joint_positions': np.array,
+    'joint_velocities': np.array,
+    'joint_forces': np.array,
+    'misc': dict, # contains camera extrinsics, camera intrinsics, near/far clipping planes, etc.
+    'object_informations': dict, # contains two types of information: object information and waypoint information \
+        for each object, the dict includes the convex hull of the object and the visual hull of the object e.g. 'pencil1_0' and 'pencil1_visual0'\
+        for each waypoint, the dict includes the pose, type, target object (this waypoint is used for moving/getting close to which object), and low-level description of the waypoint.
+}
+"""
+
+task_dict = {
+    'drop': ['drop_pen_color', 'drop_pen_relative', 'drop_pen_size'],
+    'pick': ['pick_cube_shape', 'pick_cube_relative', 'pick_cube_color', 'pick_cube_size'],
+    'stack': ['stack_cubes_color', 'stack_cubes_relative', 'stack_cubes_shape', 'stack_cubes_size'],
+    'shape_sorter': ['place_into_shape_sorter_color', 'place_into_shape_sorter_relative', 'place_into_shape_sorter_shape'],
+    'wipe': ['wipe_table_shape', 'wipe_table_color', 'wipe_table_relative', 'wipe_table_size', 'wipe_table_direction'],
+    'pour': ['pour_demo_color', 'pour_demo_relative', 'pour_demo_size'],
+    'drawer': ['open_drawer'],
+    'door': ['open_door'], # untested
+    'door_complex': ['open_door_complex'],
+}
+
+class Agent(object):
+
+    def __init__(self, action_shape):
+        self.action_shape = action_shape
+
+    def act(self, obs, descriptions):
+        arm = np.random.normal(0.0, 0.1, size=(self.action_shape-1,))
+        gripper = [1.0]  # Always open
+        return np.concatenate([arm, gripper], axis=-1)
+
+def add_argments():
+    parser = argparse.ArgumentParser(description='')
+    #dataset
+    parser.add_argument('--data_folder', type=str)
+    parser.add_argument('--setd', type=str, default="seen")
+    parser.add_argument('--img_size',nargs='+', type=int, default=[360,360])
+    parser.add_argument('--task', type=str, default=None, help="select a task type from drop, pick, stack, shape_sorter, wipe, pour, drawer, door_complex")
+    parser.add_argument('--use_collect_data', type=lambda x:bool(strtobool(x)), default=True)
+    args = parser.parse_args()
+    return args
+
+def load_test_config(data_folder: Path, task_name):
+    episode_list = []
+    for path in data_folder.rglob('configs*'):
+        t_name = path.parents[3].name
+        if t_name == task_name:
+            episode_list.append(path.parent)
+    return episode_list
+
+if __name__=="__main__":
+    args = add_argments()
+    obs_config = ObservationConfig()
+    obs_config.set_all(True)
+    obs_config.set_image_size(args.img_size)
+
+    if args.task in task_dict:
+        task_files = task_dict[args.task]
+    else:
+        task_files = [args.task]
+
+    eval_tasks = [task_file_to_task_class(t, parent_folder = 'vlm') for t in task_files]
+    data_folder = Path(os.path.join(args.data_folder, args.setd))
+
+    action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
+    env = Environment(action_mode, obs_config=obs_config, headless=False) # set headless=False, if user want to visualize the simulator
+    env.launch()
+
+    agent = Agent(env.action_size)
+    need_test_numbers = 10
+    action_steps = 10
+    for i, task_to_use in enumerate(eval_tasks):
+        task = env.get_task(task_to_use)
+
+        if args.use_collect_data:
+            e_path = load_test_config(data_folder, task_files[i])
+            for num, e in enumerate(e_path):
+                if num >= need_test_numbers:
+                    break
+                task_base = str(e/"task_base.ttm")
+                waypoint_sets = str(e/"waypoint_sets.ttm")
+                config = str(e/"configs.pkl")
+                descriptions, obs = task.load_config(task_base, waypoint_sets, config)
+                print(descriptions)
+                for _ in range(action_steps):
+                    action = agent.act(obs, descriptions)
+                    obs, reward, terminate = task.step(action)
+        else:
+            for _ in range(need_test_numbers):
+                descriptions, obs = task.reset()
+                print(descriptions)
+                for _ in range(action_steps):
+                    action = agent.act(obs, descriptions)
+                    obs, reward, terminate = task.step(action)
+
+    env.shutdown()