Skip to content

Commit

Permalink
fix comments, pylint and ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
Miryam-Schwartz committed Dec 19, 2024
1 parent e5827e5 commit f262323
Showing 1 changed file with 6 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ def _get_files_from_tar(
if len(logs_with_dirs[parent_dir_name]) == 0:
del logs_with_dirs[parent_dir_name]



files_extracted = files_went_over.difference(failed_extract)
# When extracting the files from the tar, they are also taken with their
# directories from inside the tar, there is no way to only take the file
Expand Down Expand Up @@ -128,21 +126,21 @@ def extract_files(
]
for inner_tar_name in inner_tar_files:
with outer_tar.extractfile(inner_tar_name) as inner_tar_stream:

# Check if the inner stream can be read
try:
# Read some bytes to verify the file is not corrupted
inner_tar_stream.peek(1) # Peek at the first byte
except Exception as e:
log.Logger.info("Error reading inner tar file %s: %s", inner_tar_name, e)
log.LOGGER.info(
"Error reading inner tar file %s: %s", inner_tar_name, e
)
continue # Skip this file if it's invalid

inner_file_open_mode = (
"r:gz" if self.is_gzip_file_obj(inner_tar_stream) else "r:"
)

try:

with tarfile.open(
fileobj=inner_tar_stream, mode=inner_file_open_mode
) as inner_tar:
Expand All @@ -155,9 +153,10 @@ def extract_files(
if len(extracted_files) > 0:
return extracted_files, failed_files


except EOFError as e:
log.LOGGER.info("EOFError in inner tar %s: %s", inner_tar_name, e)
log.LOGGER.info(
"EOFError in inner tar %s: %s", inner_tar_name, e
)
continue
# If we got to this point, we might have a simple tar, try to extract from it
return self._get_files_from_tar(
Expand Down

0 comments on commit f262323

Please sign in to comment.