Skip to content

Commit

Permalink
Added support for skipping/disabling MD5 checksums for large files/tr…
Browse files Browse the repository at this point in the history
…ansfers. Version 1.4.0
  • Loading branch information
victor73 committed Feb 28, 2019
1 parent e7618aa commit a08545d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 15 deletions.
38 changes: 29 additions & 9 deletions lib/manifest_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ def __init__(self, username=None, password=None, google_client_secrets=None,

self.password = password

# By default, we will check MD5 checksums after each file is
# retrieved/downloaded.
self.validation = True

if google_client_secrets is not None and google_project_id is not None:
self.logger.info("Create GCP client.")
from gcp import GCP
Expand Down Expand Up @@ -152,6 +156,15 @@ def _get_s3_obj(self, url, file_name):

return result

# Method to turn off MD5 checksum checking after a file is downloaded.
# For large files and particularly with manifests that contain large
# numbers of large files, disabling validation may significantly boost
# performance.
def disable_validation(self):
self.logger.debug("In disable_validation.")

self.validation = False

# Function to download each URL from the manifest.
# Arguments:
# manifest = manifest list created by functions in convert_to_manifest.py
Expand Down Expand Up @@ -221,17 +234,24 @@ def download_manifest(self, manifest, destination, priorities):
failed_files.append(2)
continue

# Now that the download is complete, verify the checksum, and then
# establish the final file
if self._checksum_matches(tmp_file_name, mfile['md5']):
if self.validation:
# Now that the download is complete, verify the checksum, and then
# establish the final file
if self._checksum_matches(tmp_file_name, mfile['md5']):
self.logger.debug("Renaming {} to {}".format(tmp_file_name, file_name))
shutil.move(tmp_file_name, file_name)
failed_files.append(0)
else:
print("\r")
msg = "MD5 check failed for the file ID {0}. " + \
"Data may be corrupted."
print(msg.format(mfile['id']))
failed_files.append(3)
else:
self.logger.debug("Skipping checksumming. " + \
"Renaming {} to {}".format(tmp_file_name, file_name))
shutil.move(tmp_file_name, file_name)
failed_files.append(0)
else:
print("\r")
msg = "MD5 check failed for the file ID {0}. " + \
"Data may be corrupted."
print(msg.format(mfile['id']))
failed_files.append(3)

return failed_files

Expand Down
20 changes: 15 additions & 5 deletions lib/portal_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,27 @@ def parse_cli():
'generated from a portal instance.'
)

group = parser.add_mutually_exclusive_group(required=True)

group.add_argument(
parser.add_argument(
'-m', '--manifest',
type=str,
help='Location of a locally stored manifest file from.'
)

group.add_argument(
parser.add_argument(
'-u', '--url',
type=str,
required=False,
help='URL path to a manifest file stored at an HTTP endpoint.'
)

group.add_argument(
parser.add_argument(
'--disable-validation',
dest='disable_validation',
action='store_true',
help='Disable MD5 checksum validation.'
)

parser.add_argument(
'-t', '--token',
type=str,
required=False,
Expand Down Expand Up @@ -184,6 +189,11 @@ def main():
mp = ManifestProcessor(username, password,
google_client_secrets=client_secrets, google_project_id=project_id)

# Turn off MD5 checksumming if specified by the user
if args.disable_validation:
logger.debug("Turning off checksum validation.")
mp.disable_validation()

while keep_trying:
manifest = {}

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def read(fname):
setup(name='portal-client',
description='Download client tool for IGS Portal servers.',
long_description=read('DESC'),
version='1.3.0',
version='1.4.0',
py_modules=['portal_client'],
author='Victor F',
author_email='victor73@github.com',
Expand Down

0 comments on commit a08545d

Please sign in to comment.