From 28e36004d821e7aaa0bb117ad4bfaa2599ebf5b0 Mon Sep 17 00:00:00 2001 From: kz Date: Sun, 28 Aug 2022 23:18:29 -0700 Subject: [PATCH] automatic dataset download --- README.md | 6 +++ download_dataset.sh | 92 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 download_dataset.sh diff --git a/README.md b/README.md index 7cd5bb1..d3557fc 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,12 @@ export DISPLAY=:0.0 #Keep the first number as same as the argument of startx; th The precollected full dataset can be found at here: [Dataset](https://drive.google.com/drive/folders/1Qx_2_ePIqf_Z6SnpPkocUiPgFeCfePQh?usp=sharing). The smaller sample dataset can be found at here: [Sample Dtaset](https://drive.google.com/drive/folders/1jm0uLxoVYHotCi0HVZotkhpNG45lfCzW?usp=sharing). The dataset is under [CC BY 4.0 license](https://creativecommons.org/licenses/by/4.0/). +We also provide a script to automatically download the dataset by using [gdrive](https://github.com/prasmussen/gdrive). +```bash +bash ./download_dataset.sh -s /Save/Path/For/Dataset -p Dataset_split -t Tasks +# bash ./download_dataset.sh -h for more help on arguments +``` + The pretrained models of all baselines can be found at here: [Model](https://drive.google.com/drive/folders/130w8I7QTOwcBYir0Ge3dX18Y43k_URie?usp=sharing) To test pretrained 6D-CLIPort models: diff --git a/download_dataset.sh b/download_dataset.sh new file mode 100644 index 0000000..1849362 --- /dev/null +++ b/download_dataset.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +#Download and setup gdrive +if [ ! -x "$gdrive"]; then +echo "Downloading gdrive for the linux amd64 system. If you use other systems, please select the correct one on https://github.com/prasmussen/gdrive/releases" +wget --no-check-certificate https://github.com/prasmussen/gdrive/releases/download/2.1.1/gdrive_2.1.1_linux_amd64.tar.gz +tar -xzf gdrive_2.1.1_linux_amd64.tar.gz +rm gdrive_2.1.1_linux_amd64.tar.gz +fi +./gdrive about + +#Download dataset +Split="test_seen" +Tasks="all" +while getopts 's:p:t:h' OPT; do + case $OPT in + s) SaveFolder="$OPTARG";; + p) Split="$OPTARG";; + t) Tasks="$OPTARG";; + h) + echo -e "OPTIONS:\n-s: The save path for dataset (required)" + echo -e "-p: The split of dataset to download (optional, default: test_seen)" + echo -e "-t: The tasks of dataset to download (optional, default: all)" + exit 1;; + ?) echo "Unrecognized arguments. Please use -h to check the useage.";; + esac +done + +if [ -z ${SaveFolder} ];then + echo "Missing save path,exit" + exit 1 +fi + +split_list=( "train" "valid_seen" "valid_unseen" "test_seen" "test_unseen" ) +valid_input=false +for s in ${split_list[@]} +do + if [ $Split = $s ]; then + valid_input=true + fi +done + +if [ ${valid_input} == false ]; then + echo "Wrong split input. Please select one from '${split_list[@]}'" + exit 1 +fi + +tasks_list=( "all" "pick" "stack" "shape_sorter" "drop" "wipe" "pour" "door" "drawer" ) +valid_input=false +for t in ${tasks_list[@]} +do + if [ $Tasks = $t ]; then + valid_input=true + fi +done + +if [ ${valid_input} == false ]; then + echo "Wrong tasks input. Please select one from '${tasks_list[@]}'" + exit 1 +fi + +echo "Save dataset into: $SaveFolder. The split is: $Split. The tasks are: $Tasks" + +case "$Split" in + "train") + id="1RTzJZWO3TUtA2iH9bucPDUVEvjF2AL5l" + SaveFolder="$SaveFolder/train" + ;; + "valid_seen") + id="1h7wtA0aTuVeDZQFRouDuiDlFnjeNN0qV" + SaveFolder="$SaveFolder/valid/seen" + ;; + "valid_unseen") + id="1kK_xgfwVWm7liJtai4SO75L7OpA1GexP" + SaveFolder="$SaveFolder/valid/unseen" + ;; + "test_seen") + id="1tuGIlRm_0xUh1WZFlYjJmcN0QX6sf6Nl" + SaveFolder="$SaveFolder/test/seen" + ;; + "test_unseen") + id="1esNV1--eWiYRvAozrXQHY8tZhVkyFrQf" + SaveFolder="$SaveFolder/test/unseen" + ;; +esac + +query_string="'$id' in parents" +if [ $Tasks != "all" ]; then + query_string="$query_string and name contains '$Tasks'" +fi +echo $query_string +./gdrive download query --skip --path $SaveFolder "$query_string" \ No newline at end of file