Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

issue: 4207369: EOFError while extracting logs #288

Merged
merged 4 commits into from
Dec 19, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def _get_files_from_tar(
failed_extract = set()
folders_to_remove = set()
single_log_name, logs_with_dirs = self._split_based_on_dir(files_to_extract)

for member in opened_file:
base_name = os.path.basename(member.name)
full_dir_path = os.path.dirname(member.name)
Expand Down Expand Up @@ -83,6 +84,8 @@ def _get_files_from_tar(
if len(logs_with_dirs[parent_dir_name]) == 0:
del logs_with_dirs[parent_dir_name]


Miryam-Schwartz marked this conversation as resolved.
Show resolved Hide resolved

files_extracted = files_went_over.difference(failed_extract)
# When extracting the files from the tar, they are also taken with their
# directories from inside the tar, there is no way to only take the file
Expand Down Expand Up @@ -125,20 +128,39 @@ def extract_files(
]
for inner_tar_name in inner_tar_files:
with outer_tar.extractfile(inner_tar_name) as inner_tar_stream:

# Check if the inner stream can be read
try:
# Read some bytes to verify the file is not corrupted
inner_tar_stream.peek(1) # Peek at the first byte
except Exception as e:
log.Logger.info(f"Error reading inner tar file {inner_tar_name}: {e}")
Miryam-Schwartz marked this conversation as resolved.
Show resolved Hide resolved
continue # Skip this file if it's invalid

inner_file_open_mode = (
"r:gz" if self.is_gzip_file_obj(inner_tar_stream) else "r:"
)
with tarfile.open(
fileobj=inner_tar_stream, mode=inner_file_open_mode
) as inner_tar:
extracted_files, failed_files = self._get_files_from_tar(
inner_tar,
files_to_extract,
directories_to_extract,
destination,
)
if len(extracted_files) > 0:
return extracted_files, failed_files

try:

with tarfile.open(
fileobj=inner_tar_stream, mode=inner_file_open_mode
) as inner_tar:
extracted_files, failed_files = self._get_files_from_tar(
inner_tar,
files_to_extract,
directories_to_extract,
destination,
)
if len(extracted_files) > 0:
return extracted_files, failed_files


except EOFError as e:
log.logger.info(f"EOFError in inner tar {inner_tar_name}: {e}")
continue # Handle the EOFError and continue with the next file
Miryam-Schwartz marked this conversation as resolved.
Show resolved Hide resolved


# If we got to this point, we might have a simple tar, try to extract from it
return self._get_files_from_tar(
outer_tar, files_to_extract, directories_to_extract, destination
Expand Down
Loading