Skip to content

Commit

Permalink
Merge pull request #895 from AlexVCaron/fix/gdrive_quotas
Browse files Browse the repository at this point in the history
pass fetcher to DVC
  • Loading branch information
arnaudbore authored Feb 13, 2024
2 parents d20d3d4 + 214090a commit 2de137c
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
test:
runs-on: scilus-runners
steps:
- name: Checkout repository
- name: Checkout repository for merge
uses: actions/checkout@v4.1.1

- name: Fetch python version from repository
Expand Down
111 changes: 31 additions & 80 deletions scilpy/io/fetcher.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
# -*- coding: utf-8 -*-

import logging
import hashlib
import os
import pathlib
import requests
import zipfile

GOOGLE_URL = "https://drive.usercontent.google.com/download?"

DVC_URL = "https://scil.usherbrooke.ca/scil_test_data/dvc-store/files/md5"


def download_file_from_google_drive(id, destination):
def download_file_from_google_drive(url, destination):
"""
Download large file from Google Drive.
Parameters
Expand All @@ -19,13 +21,6 @@ def download_file_from_google_drive(id, destination):
destination: str
path to destination file with its name and extension
"""
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value

return None

def save_response_content(response, destination):
CHUNK_SIZE = 32768

Expand All @@ -34,13 +29,7 @@ def save_response_content(response, destination):
f.write(chunk)

session = requests.Session()
params = {'id': id, 'confirm': True}
response = session.get(GOOGLE_URL, params=params, stream=True)
token = get_confirm_token(response)

if token:
params['confirm'] = token
response = session.get(GOOGLE_URL, params=params, stream=True)
response = session.get(url, stream=True)

save_response_content(response, destination)

Expand All @@ -56,66 +45,28 @@ def get_home():

def get_testing_files_dict():
""" Get dictionary linking zip file to their GDrive ID & MD5SUM """
return {'bids_json.zip':
['1bMl5YtEufoKh-gjen940QTO5BpT5Y9TF',
'521eed4911c456cc10cc3cb1f6a5dc83'],
'plot.zip':
['1Ab-oVWI1Fu7fHTEz1H3-s1TfR_oW-GOE',
'cca8f1e19da357f44365a7e27b9029ca'],
'ihMT.zip':
['1V0xzvmVrVlL9dRKhc5-7xWESkmof1zyS',
'5d28430ac46b4fc04b6d77f9efaefb5c'],
'MT.zip':
['1C2LEUkGaLFdsmym3kBrAtfPjPtv5mJuZ',
'13532c593efdf09350667df14ea4e93a'],
'atlas.zip':
['1waYx4ED3qwzyJqrICjjgGXXBW2v4ZCYJ',
'eb37427054cef5d50ac3d429ff53de47'],
'bst.zip':
['1YprJRnyXk7VRHUkb-bJLs69C1v3tPd1S',
'c0551a28dcefcd7cb53f572b1794b3e8'],
'bundles.zip':
['1VaGWwhVhnfsZBCCYu12dta9qi0SgZFP7',
'5fbf5c8eaabff2648ad509e06b003e67'],
'commit_amico.zip':
['1vyMtQd1u2h2pza9M0bncDWLc34_4MRPK',
'b40800ab4290e4f58c375140fe59b44f'],
'connectivity.zip':
['1lZqiOKmwTluPIRqblthOnBc4KI2kfKUC',
'6d13bd076225fa2f786f416fa754623a'],
'filtering.zip':
['1yzHSL4tBtmm_aeI1i0qJhrA9z040k0im',
'dbe796fb75c3e1e5559fad3308982769'],
'others.zip':
['12BAszPjE1A9L2RbQJIFpkPzqUJfPdYO6',
'981dccd8b23aad43aa014f4fdd907e70'],
'processing.zip':
['1caaKoAChyPs5c4WemQWUsR-efD_q2z_b',
'a2f982b8d84833f5ccfe709b725307d2'],
'surface_vtk_fib.zip':
['1c9KMNFeSkyYDgu3SH_aMf0kduIlpt7cN',
'bf131869a6722778a234869bf585520a'],
'tracking.zip':
['1QSekZYDoMvv-An6FRMSt_s_qPeB3BHfw',
'6d88910403fb4d9b79604f11100d8915'],
'tractograms.zip':
['1f98s0TP-862KtB_xsSmRNrmi2nrqRaNU',
'911bc59dfdcee3656564f212ae8ed3a1'],
'tractometry.zip':
['130mxBo4IJWPnDFyOELSYDif1puRLGHMX',
'3e27625a1e7f2484b7fa5028c95324cc'],
'stats.zip':
['1vsM7xuU0jF5fL5PIgN6stAH7oO683tw0',
'03aed629dea754bbc2041e7ab5f94112'],
'anatomical_filtering.zip':
['1Li8DdySnMnO9Gich4pilhXisjkjz1-Dy',
'6f0eff5154ff0973a3dc26db00e383ea'],
'btensor_testdata.zip':
['1AMsKlbOZyPnT9TAbxcFzHS1b29aJWKDg',
'7c68524fca01268203dc8bfee340f037'],
'fodf_filtering.zip':
['1iyoX2ltLOoLer-v-49LHOzopHCFZ_Tv6',
'e79c4291af584fdb25814aa7b403a6ce']}
return {
"commit_amico.zip": "c190e6b9d22350b51e222c60febe13b4",
"bundles.zip": "54b6e2bf2dda579886efe4e2a8989486",
"stats.zip": "2aeac4da5ab054b3a460fc5fdc5e4243",
"bst.zip": "eed227fd246255e7417f92d49eb1066a",
"filtering.zip": "19116ff4244d057c8214ee3fe8e05f71",
"ihMT.zip": "08fcf44848ba2649aad5a5a470b3cb06",
"tractometry.zip": "890bfa70e44b15c0d044085de54e00c6",
"bids_json.zip": "97fd9a414849567fbfdfdb0ef400488b",
"MT.zip": "1f4345485248683b3652c97f2630950e",
"btensor_testdata.zip": "7ada72201a767292d56634e0a7bbd9ad",
"tracking.zip": "4793a470812318ce15f1624e24750e4d",
"atlas.zip": "dc34e073fc582476504b3caf127e53ef",
"anatomical_filtering.zip": "5282020575bd485e15d3251257b97e01",
"connectivity.zip": "fe8c47f444d33067f292508d7050acc4",
"plot.zip": "a1dc54cad7e1d17e55228c2518a1b34e",
"others.zip": "82248b4888a63b0aeffc8070cc206995",
"fodf_filtering.zip": "5985c0644321ecf81fd694fb91e2c898",
"processing.zip": "eece5cdbf437b8e4b5cb89c797872e28",
"surface_vtk_fib.zip": "241f3afd6344c967d7176b43e4a99a41",
"tractograms.zip": "5497d0bf3ccc35f8f4f117829d790267"
}


def fetch_data(files_dict, keys=None):
Expand All @@ -134,23 +85,23 @@ def fetch_data(files_dict, keys=None):
elif isinstance(keys, str):
keys = [keys]
for f in keys:
url_id, md5 = files_dict[f]
url_md5 = files_dict[f]
full_path = os.path.join(scilpy_home, f)
full_path_no_ext, ext = os.path.splitext(full_path)

CURR_URL = GOOGLE_URL + 'id=' + url_id
CURR_URL = DVC_URL + "/" + url_md5[:2] + "/" + url_md5[2:]
if not os.path.isdir(full_path_no_ext):
if ext == '.zip' and not os.path.isdir(full_path_no_ext):
logging.warning('Downloading and extracting {} from url {} to '
'{}'.format(f, CURR_URL, scilpy_home))

# Robust method to Virus/Size check from GDrive
download_file_from_google_drive(url_id, full_path)
download_file_from_google_drive(CURR_URL, full_path)

with open(full_path, 'rb') as file_to_check:
data = file_to_check.read()
md5_returned = hashlib.md5(data).hexdigest()
if md5_returned != md5:
if md5_returned != url_md5:
try:
zipfile.ZipFile(full_path)
except zipfile.BadZipFile:
Expand Down

0 comments on commit 2de137c

Please sign in to comment.