diff --git a/lib/manifest_processor.py b/lib/manifest_processor.py index 890e75e..3b9d704 100644 --- a/lib/manifest_processor.py +++ b/lib/manifest_processor.py @@ -39,6 +39,10 @@ def __init__(self, username=None, password=None, google_client_secrets=None, self.password = password + # By default, we will check MD5 checksums after each file is + # retrieved/downloaded. + self.validation = True + if google_client_secrets is not None and google_project_id is not None: self.logger.info("Create GCP client.") from gcp import GCP @@ -152,6 +156,15 @@ def _get_s3_obj(self, url, file_name): return result + # Method to turn off MD5 checksum checking after a file is downloaded. + # For large files and particularly with manifests that contain large + # numbers of large files, disabling validation may significantly boost + # performance. + def disable_validation(self): + self.logger.debug("In disable_validation.") + + self.validation = False + # Function to download each URL from the manifest. # Arguments: # manifest = manifest list created by functions in convert_to_manifest.py @@ -221,17 +234,24 @@ def download_manifest(self, manifest, destination, priorities): failed_files.append(2) continue - # Now that the download is complete, verify the checksum, and then - # establish the final file - if self._checksum_matches(tmp_file_name, mfile['md5']): + if self.validation: + # Now that the download is complete, verify the checksum, and then + # establish the final file + if self._checksum_matches(tmp_file_name, mfile['md5']): + self.logger.debug("Renaming {} to {}".format(tmp_file_name, file_name)) + shutil.move(tmp_file_name, file_name) + failed_files.append(0) + else: + print("\r") + msg = "MD5 check failed for the file ID {0}. " + \ + "Data may be corrupted." + print(msg.format(mfile['id'])) + failed_files.append(3) + else: + self.logger.debug("Skipping checksumming. " + \ + "Renaming {} to {}".format(tmp_file_name, file_name)) shutil.move(tmp_file_name, file_name) failed_files.append(0) - else: - print("\r") - msg = "MD5 check failed for the file ID {0}. " + \ - "Data may be corrupted." - print(msg.format(mfile['id'])) - failed_files.append(3) return failed_files diff --git a/lib/portal_client.py b/lib/portal_client.py index 755bc5d..2280176 100644 --- a/lib/portal_client.py +++ b/lib/portal_client.py @@ -31,22 +31,27 @@ def parse_cli(): 'generated from a portal instance.' ) - group = parser.add_mutually_exclusive_group(required=True) - - group.add_argument( + parser.add_argument( '-m', '--manifest', type=str, help='Location of a locally stored manifest file from.' ) - group.add_argument( + parser.add_argument( '-u', '--url', type=str, required=False, help='URL path to a manifest file stored at an HTTP endpoint.' ) - group.add_argument( + parser.add_argument( + '--disable-validation', + dest='disable_validation', + action='store_true', + help='Disable MD5 checksum validation.' + ) + + parser.add_argument( '-t', '--token', type=str, required=False, @@ -184,6 +189,11 @@ def main(): mp = ManifestProcessor(username, password, google_client_secrets=client_secrets, google_project_id=project_id) + # Turn off MD5 checksumming if specified by the user + if args.disable_validation: + logger.debug("Turning off checksum validation.") + mp.disable_validation() + while keep_trying: manifest = {} diff --git a/setup.py b/setup.py index 67fc8be..c663010 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def read(fname): setup(name='portal-client', description='Download client tool for IGS Portal servers.', long_description=read('DESC'), - version='1.3.0', + version='1.4.0', py_modules=['portal_client'], author='Victor F', author_email='victor73@github.com',