Skip to content

Commit

Permalink
Item: remove .chunks_healthy, fixes borgbackup#8559
Browse files Browse the repository at this point in the history
Well, it's not totally removed, some code in Item, Archive and
borg transfer -from-borg1 needs to stay in place, so that we
can pick the CORRECT chunks list that is in .chunks_healthy
for all-zero-replacement-chunk-patched items when transferring
archives from borg1 to borg2 repos.

FUSE fs read: IOError or all-zero result

Other reads: TODO
  • Loading branch information
ThomasWaldmann committed Nov 25, 2024
1 parent 84744ac commit 88fe437
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 229 deletions.
103 changes: 13 additions & 90 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def unpack_many(self, ids, *, filter=None, preload=False):
item = Item(internal_dict=_item)
if "chunks" in item:
item.chunks = [ChunkListEntry(*e) for e in item.chunks]
if "chunks_healthy" in item:
if "chunks_healthy" in item: # legacy
item.chunks_healthy = [ChunkListEntry(*e) for e in item.chunks_healthy]
if filter and not filter(item):
continue
Expand Down Expand Up @@ -744,7 +744,6 @@ def same_item(item, st):
# if a previous extraction was interrupted between setting the mtime and setting non-default flags.
return True

has_damaged_chunks = "chunks_healthy" in item
if dry_run or stdout:
with self.extract_helper(item, "", hlm, dry_run=dry_run or stdout) as hardlink_set:
if not hardlink_set:
Expand All @@ -771,8 +770,6 @@ def same_item(item, st):
item_size, item_chunks_size
)
)
if has_damaged_chunks:
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return

dest = self.cwd
Expand Down Expand Up @@ -827,8 +824,6 @@ def make_parent(path):
raise BackupError(
f"Size inconsistency detected: size {item_size}, chunks size {item_chunks_size}"
)
if has_damaged_chunks:
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return
with backup_io:
# No repository access beyond this point.
Expand Down Expand Up @@ -1141,10 +1136,6 @@ def chunk_processor(chunk):
return chunk_entry

item.chunks = []
# if we rechunkify, we'll get a fundamentally different chunks list, thus we need
# to get rid of .chunks_healthy, as it might not correspond to .chunks any more.
if self.rechunkify and "chunks_healthy" in item:
del item.chunks_healthy
for chunk in chunk_iter:
chunk_entry = chunk_processor(chunk)
item.chunks.append(chunk_entry)
Expand Down Expand Up @@ -1761,13 +1752,10 @@ def verify_data(self):
if defect_chunks:
if self.repair:
# if we kill the defect chunk here, subsequent actions within this "borg check"
# run will find missing chunks and replace them with all-zero replacement
# chunks and flag the files as "repaired".
# if another backup is done later and the missing chunks get backed up again,
# a "borg check" afterwards can heal all files where this chunk was missing.
# run will find missing chunks.
logger.warning(
"Found defect chunks. They will be deleted now, so affected files can "
"get repaired now and maybe healed later."
"Found defect chunks and will delete them now. "
"Reading files referencing these chunks will result in an I/O error."
)
for defect_chunk in defect_chunks:
# remote repo (ssh): retry might help for strange network / NIC / RAM errors
Expand All @@ -1787,10 +1775,7 @@ def verify_data(self):
else:
logger.warning("chunk %s not deleted, did not consistently fail.", bin_to_hex(defect_chunk))
else:
logger.warning(
"Found defect chunks. With --repair, they would get deleted, so affected "
"files could get repaired then and maybe healed later."
)
logger.warning("Found defect chunks. With --repair, they would get deleted.")
for defect_chunk in defect_chunks:
logger.debug("chunk %s is defect.", bin_to_hex(defect_chunk))
log = logger.error if errors else logger.info
Expand Down Expand Up @@ -1901,80 +1886,18 @@ def add_reference(id_, size, cdata):
self.repository.put(id_, cdata)

def verify_file_chunks(archive_name, item):
"""Verifies that all file chunks are present.
Missing file chunks will be replaced with new chunks of the same length containing all zeros.
If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
"""

def replacement_chunk(size):
chunk = Chunk(None, allocation=CH_ALLOC, size=size)
chunk_id, data = cached_hash(chunk, self.key.id_hash)
cdata = self.repo_objs.format(chunk_id, {}, data, ro_type=ROBJ_FILE_STREAM)
return chunk_id, size, cdata

"""Verifies that all file chunks are present. Missing file chunks will be logged."""
offset = 0
chunk_list = []
chunks_replaced = False
has_chunks_healthy = "chunks_healthy" in item
chunks_current = item.chunks
chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
if has_chunks_healthy and len(chunks_current) != len(chunks_healthy):
# should never happen, but there was issue #3218.
logger.warning(f"{archive_name}: {item.path}: Invalid chunks_healthy metadata removed!")
del item.chunks_healthy
has_chunks_healthy = False
chunks_healthy = chunks_current
for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
chunk_id, size = chunk_healthy
for chunk in item.chunks:
chunk_id, size = chunk
if chunk_id not in self.chunks:
# a chunk of the healthy list is missing
if chunk_current == chunk_healthy:
logger.error(
"{}: {}: New missing file chunk detected (Byte {}-{}, Chunk {}). "
"Replacing with all-zero chunk.".format(
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
)
logger.error(
"{}: {}: Missing file chunk detected (Byte {}-{}, Chunk {}).".format(
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
)
self.error_found = chunks_replaced = True
chunk_id, size, cdata = replacement_chunk(size)
add_reference(chunk_id, size, cdata)
else:
logger.info(
"{}: {}: Previously missing file chunk is still missing (Byte {}-{}, Chunk {}). "
"It has an all-zero replacement chunk already.".format(
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
)
)
chunk_id, size = chunk_current
if chunk_id not in self.chunks:
logger.warning(
"{}: {}: Missing all-zero replacement chunk detected (Byte {}-{}, Chunk {}). "
"Generating new replacement chunk.".format(
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
)
)
self.error_found = chunks_replaced = True
chunk_id, size, cdata = replacement_chunk(size)
add_reference(chunk_id, size, cdata)
else:
if chunk_current == chunk_healthy:
pass # normal case, all fine.
else:
logger.info(
"{}: {}: Healed previously missing file chunk! (Byte {}-{}, Chunk {}).".format(
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
)
)
chunk_list.append([chunk_id, size]) # list-typed element as chunks_healthy is list-of-lists
)
self.error_found = True
offset += size
if chunks_replaced and not has_chunks_healthy:
# if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
item.chunks_healthy = item.chunks
if has_chunks_healthy and chunk_list == chunks_healthy:
logger.info(f"{archive_name}: {item.path}: Completely healed previously damaged file!")
del item.chunks_healthy
item.chunks = chunk_list
if "size" in item:
item_size = item.size
item_chunks_size = item.get_size(from_chunks=True)
Expand Down
23 changes: 1 addition & 22 deletions src/borg/archiver/check_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,28 +168,7 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser):
2. When checking the consistency and correctness of archives, repair mode might
remove whole archives from the manifest if their archive metadata chunk is
corrupt or lost. On a chunk level (i.e. the contents of files), repair mode
will replace corrupt or lost chunks with a same-size replacement chunk of
zeroes. If a previously zeroed chunk reappears, repair mode will restore
this lost chunk using the new chunk.
Most steps taken by repair mode have a one-time effect on the repository, like
removing a lost archive from the repository. However, replacing a corrupt or
lost chunk with an all-zero replacement will have an ongoing effect on the
repository: When attempting to extract a file referencing an all-zero chunk,
the ``extract`` command will distinctly warn about it. The FUSE filesystem
created by the ``mount`` command will reject reading such a "zero-patched"
file unless a special mount option is given.
As mentioned earlier, Borg might be able to "heal" a "zero-patched" file in
repair mode, if all its previously lost chunks reappear (e.g. via a later
backup). This is achieved by Borg not only keeping track of the all-zero
replacement chunks, but also by keeping metadata about the lost chunks. In
repair mode Borg will check whether a previously lost chunk reappeared and will
replace the all-zero replacement chunk by the reappeared chunk. If all lost
chunks of a "zero-patched" file reappear, this effectively "heals" the file.
Consequently, if lost chunks were repaired earlier, it is advised to run
``--repair`` a second time after creating some new backups.
corrupt or lost. Borg will also report files that reference missing chunks.
If ``--repair --find-lost-archives`` is given, previously lost entries will
be recreated in the archive directory. This is only possible before
Expand Down
31 changes: 7 additions & 24 deletions src/borg/archiver/compact_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ..cache import write_chunkindex_to_repo_cache, build_chunkindex_from_repo
from ..constants import * # NOQA
from ..hashindex import ChunkIndex, ChunkIndexEntry
from ..helpers import set_ec, EXIT_WARNING, EXIT_ERROR, format_file_size, bin_to_hex
from ..helpers import set_ec, EXIT_ERROR, format_file_size, bin_to_hex
from ..helpers import ProgressIndicatorPercent
from ..manifest import Manifest
from ..remote import RemoteRepository
Expand Down Expand Up @@ -39,9 +39,7 @@ def garbage_collect(self):
logger.info("Starting compaction / garbage collection...")
self.chunks = self.get_repository_chunks()
logger.info("Computing object IDs used by archives...")
(self.missing_chunks, self.reappeared_chunks, self.total_files, self.total_size, self.archives_count) = (
self.analyze_archives()
)
(self.missing_chunks, self.total_files, self.total_size, self.archives_count) = self.analyze_archives()
self.report_and_delete()
self.save_chunk_index()
logger.info("Finished compaction / garbage collection...")
Expand Down Expand Up @@ -74,27 +72,23 @@ def save_chunk_index(self):
self.chunks = None # nothing there (cleared!)

def analyze_archives(self) -> Tuple[Set, Set, int, int, int]:
"""Iterate over all items in all archives, create the dicts id -> size of all used/wanted chunks."""
"""Iterate over all items in all archives, create the dicts id -> size of all used chunks."""

def use_it(id, *, wanted=False):
def use_it(id):
entry = self.chunks.get(id)
if entry is not None:
# the chunk is in the repo, mark it used.
self.chunks[id] = entry._replace(flags=entry.flags | ChunkIndex.F_USED)
if wanted:
# chunk id is from chunks_healthy list: a lost chunk has re-appeared!
reappeared_chunks.add(id)
else:
# with --stats: we do NOT have this chunk in the repository!
# without --stats: we do not have this chunk or the chunks index is incomplete.
missing_chunks.add(id)

missing_chunks: set[bytes] = set()
reappeared_chunks: set[bytes] = set()
archive_infos = self.manifest.archives.list(sort_by=["ts"])
num_archives = len(archive_infos)
pi = ProgressIndicatorPercent(
total=num_archives, msg="Computing used/wanted chunks %3.1f%%", step=0.1, msgid="compact.analyze_archives"
total=num_archives, msg="Computing used chunks %3.1f%%", step=0.1, msgid="compact.analyze_archives"
)
total_size, total_files = 0, 0
for i, info in enumerate(archive_infos):
Expand All @@ -114,25 +108,14 @@ def use_it(id, *, wanted=False):
for id, size in item.chunks:
total_size += size # original, uncompressed file content size
use_it(id)
if "chunks_healthy" in item:
# we also consider the chunks_healthy chunks as referenced - do not throw away
# anything that borg check --repair might still need.
for id, size in item.chunks_healthy:
use_it(id, wanted=True)
pi.finish()
return missing_chunks, reappeared_chunks, total_files, total_size, num_archives
return missing_chunks, total_files, total_size, num_archives

def report_and_delete(self):
run_repair = " Run borg check --repair!"

if self.missing_chunks:
logger.error(f"Repository has {len(self.missing_chunks)} missing objects." + run_repair)
logger.error(f"Repository has {len(self.missing_chunks)} missing objects!")
set_ec(EXIT_ERROR)

if self.reappeared_chunks:
logger.warning(f"{len(self.reappeared_chunks)} previously missing objects re-appeared!" + run_repair)
set_ec(EXIT_WARNING)

logger.info("Cleaning archives directory from soft-deleted archives...")
archive_infos = self.manifest.archives.list(sort_by=["ts"], deleted=True)
for archive_info in archive_infos:
Expand Down
6 changes: 3 additions & 3 deletions src/borg/archiver/mount_cmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ def build_parser_mount_umount(self, subparsers, common_parser, mid_common_parser
- ``versions``: when used with a repository mount, this gives a merged, versioned
view of the files in the archives. EXPERIMENTAL, layout may change in future.
- ``allow_damaged_files``: by default damaged files (where missing chunks were
replaced with runs of zeros by ``borg check --repair``) are not readable and
return EIO (I/O error). Set this option to read such files.
- ``allow_damaged_files``: by default damaged files (where chunks are missing)
will return EIO (I/O error) when trying to read the related parts of the file.
Set this option to replace the missing parts with all-zero bytes.
- ``ignore_permissions``: for security reasons the ``default_permissions`` mount
option is internally enforced by borg. ``ignore_permissions`` can be given to
not enforce ``default_permissions``.
Expand Down
14 changes: 4 additions & 10 deletions src/borg/archiver/recreate_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,10 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
at least the entire deduplicated size of the archives using the previous
chunker params.
If you recently ran borg check --repair and it had to fix lost chunks with all-zero
replacement chunks, please first run another backup for the same data and re-run
borg check --repair afterwards to heal any archives that had lost chunks which are
still generated from the input data.
Important: running borg recreate to re-chunk will remove the chunks_healthy
metadata of all items with replacement chunks, so healing will not be possible
any more after re-chunking (it is also unlikely it would ever work: due to the
change of chunking parameters, the missing chunk likely will never be seen again
even if you still have the data that produced it).
If your most recent borg check found missing chunks, please first run another
backup for the same data, before doing any rechunking. If you are lucky, that
will re-create the missing chunks. Optionally, do another borg check, to see
if the chunks are still missing).
"""
)
subparser = subparsers.add_parser(
Expand Down
24 changes: 12 additions & 12 deletions src/borg/fuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from collections import defaultdict, Counter
from signal import SIGINT

from . import constants
from .constants import ROBJ_FILE_STREAM
from .fuse_impl import llfuse, has_pyfuse3

Expand Down Expand Up @@ -46,6 +47,7 @@ def async_wrapper(fn):
from .item import Item
from .platform import uid2user, gid2group
from .platformflags import is_darwin
from .repository import Repository
from .remote import RemoteRepository


Expand Down Expand Up @@ -652,17 +654,6 @@ def lookup(self, parent_inode, name, ctx=None):

@async_wrapper
def open(self, inode, flags, ctx=None):
if not self.allow_damaged_files:
item = self.get_item(inode)
if "chunks_healthy" in item:
# Processed archive items don't carry the path anymore; for converting the inode
# to the path we'd either have to store the inverse of the current structure,
# or search the entire archive. So we just don't print it. It's easy to correlate anyway.
logger.warning(
"File has damaged (all-zero) chunks. Try running borg check --repair. "
"Mount with allow_damaged_files to read damaged files."
)
raise llfuse.FUSEError(errno.EIO)
return llfuse.FileInfo(fh=inode) if has_pyfuse3 else inode

@async_wrapper
Expand Down Expand Up @@ -699,7 +690,16 @@ def read(self, fh, offset, size):
# evict fully read chunk from cache
del self.data_cache[id]
else:
_, data = self.repo_objs.parse(id, self.repository_uncached.get(id), ro_type=ROBJ_FILE_STREAM)
try:
cdata = self.repository_uncached.get(id)
except Repository.ObjectNotFound:
if self.allow_damaged_files:
data = constants.zeros[:s]
assert len(data) == s
else:
raise llfuse.FUSEError(errno.EIO) from None
else:
_, data = self.repo_objs.parse(id, cdata, ro_type=ROBJ_FILE_STREAM)
if offset + n < len(data):
# chunk was only partially read, cache it
self.data_cache[id] = data
Expand Down
Loading

0 comments on commit 88fe437

Please sign in to comment.