Skip to content

Commit ec1937d

Browse files
extract: --skip-errors ignores corrupted chunks (w/ log message), see #840
Forward port of a change implemented by @enkore back in 2016: enkore@09b21b1
1 parent 1f48e50 commit ec1937d

File tree

3 files changed

+82
-18
lines changed

3 files changed

+82
-18
lines changed

src/borg/archive.py

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,7 @@ def extract_item(
788788
hlm=None,
789789
pi=None,
790790
continue_extraction=False,
791+
skip_integrity_errors=False,
791792
):
792793
"""
793794
Extract archive item.
@@ -800,6 +801,8 @@ def extract_item(
800801
:param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
801802
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
802803
:param continue_extraction: continue a previously interrupted extraction of same archive
804+
:param skip_integrity_errors: skip over corrupted chunks instead of raising IntegrityError
805+
(ignored for dry_run and stdout)
803806
"""
804807

805808
def same_item(item, st):
@@ -849,15 +852,15 @@ def same_item(item, st):
849852
)
850853
if has_damaged_chunks:
851854
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
852-
return
855+
return True
853856

854857
dest = self.cwd
855858
path = os.path.join(dest, item.path)
856859
# Attempt to remove existing files, ignore errors on failure
857860
try:
858861
st = os.stat(path, follow_symlinks=False)
859862
if continue_extraction and same_item(item, st):
860-
return # done! we already have fully extracted this file in a previous run.
863+
return True # done! we already have fully extracted this file in a previous run.
861864
elif stat.S_ISDIR(st.st_mode):
862865
os.rmdir(path)
863866
else:
@@ -878,20 +881,43 @@ def make_parent(path):
878881
make_parent(path)
879882
with self.extract_helper(item, path, hlm) as hardlink_set:
880883
if hardlink_set:
881-
return
884+
return True
882885
with backup_io("open"):
883886
fd = open(path, "wb")
884887
with fd:
885888
ids = [c.id for c in item.chunks]
886-
for data in self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM):
889+
chunk_index = -1
890+
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM)
891+
skipped_errors = False
892+
while True:
893+
try:
894+
chunk_index += 1
895+
data = next(chunk_iterator)
896+
except StopIteration:
897+
break
898+
except IntegrityError as err:
899+
if not skip_integrity_errors:
900+
raise
901+
c = item.chunks[chunk_index]
902+
size = c.size
903+
logger.warning("%s: chunk %s: %s", remove_surrogates(item.path), bin_to_hex(c.id), err)
904+
with backup_io("seek"):
905+
fd.seek(size, 1)
906+
skipped_errors = True
907+
# restart chunk data generator
908+
ids = [c.id for c in item.chunks[chunk_index + 1 :]]
909+
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM)
910+
else:
911+
with backup_io("write"):
912+
size = len(data)
913+
if sparse and zeros.startswith(data):
914+
# all-zero chunk: create a hole in a sparse file
915+
fd.seek(size, 1)
916+
else:
917+
fd.write(data)
887918
if pi:
888-
pi.show(increase=len(data), info=[remove_surrogates(item.path)])
889-
with backup_io("write"):
890-
if sparse and zeros.startswith(data):
891-
# all-zero chunk: create a hole in a sparse file
892-
fd.seek(len(data), 1)
893-
else:
894-
fd.write(data)
919+
pi.show(increase=size, info=[remove_surrogates(item.path)])
920+
895921
with backup_io("truncate_and_attrs"):
896922
pos = item_chunks_size = fd.tell()
897923
fd.truncate(pos)
@@ -905,7 +931,7 @@ def make_parent(path):
905931
)
906932
if has_damaged_chunks:
907933
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
908-
return
934+
return not skipped_errors
909935
with backup_io:
910936
# No repository access beyond this point.
911937
if stat.S_ISDIR(mode):
@@ -919,7 +945,7 @@ def make_parent(path):
919945
with self.extract_helper(item, path, hlm) as hardlink_set:
920946
if hardlink_set:
921947
# unusual, but possible: this is a hardlinked symlink.
922-
return
948+
return True
923949
target = item.target
924950
try:
925951
os.symlink(target, path)
@@ -930,18 +956,19 @@ def make_parent(path):
930956
make_parent(path)
931957
with self.extract_helper(item, path, hlm) as hardlink_set:
932958
if hardlink_set:
933-
return
959+
return True
934960
os.mkfifo(path)
935961
self.restore_attrs(path, item)
936962
elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
937963
make_parent(path)
938964
with self.extract_helper(item, path, hlm) as hardlink_set:
939965
if hardlink_set:
940-
return
966+
return True
941967
os.mknod(path, item.mode, item.rdev)
942968
self.restore_attrs(path, item)
943969
else:
944970
raise Exception("Unknown archive item type %r" % item.mode)
971+
return True
945972

946973
def restore_attrs(self, path, item, symlink=False, fd=None):
947974
"""

src/borg/archiver/extract_cmd.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def do_extract(self, args, repository, manifest, archive):
3939
progress = args.progress
4040
output_list = args.output_list
4141
dry_run = args.dry_run
42+
skip_errors = args.skip_errors
4243
stdout = args.stdout
4344
sparse = args.sparse
4445
strip_components = args.strip_components
@@ -76,9 +77,16 @@ def do_extract(self, args, repository, manifest, archive):
7677
dirs.append(item)
7778
archive.extract_item(item, stdout=stdout, restore_attrs=False)
7879
else:
79-
archive.extract_item(
80-
item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi, continue_extraction=continue_extraction
81-
)
80+
if not archive.extract_item(
81+
item,
82+
stdout=stdout,
83+
sparse=sparse,
84+
hlm=hlm,
85+
pi=pi,
86+
continue_extraction=continue_extraction,
87+
skip_integrity_errors=skip_errors,
88+
):
89+
self.exit_code = EXIT_WARNING
8290
except (BackupOSError, BackupError) as e:
8391
self.print_warning("%s: %s", remove_surrogates(orig_path), e)
8492

@@ -175,6 +183,13 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser):
175183
action="store_true",
176184
help="continue a previously interrupted extraction of same archive",
177185
)
186+
subparser.add_argument(
187+
"--skip-errors",
188+
dest="skip_errors",
189+
action="store_true",
190+
help="skip corrupted chunks with a log message (exit 1) instead of aborting "
191+
"(no effect for --dry-run and --stdout)",
192+
)
178193
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
179194
subparser.add_argument(
180195
"paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported"

src/borg/testsuite/archiver/extract_cmd.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,28 @@ def test_overwrite(archivers, request):
625625
cmd(archiver, "extract", "test", exit_code=1)
626626

627627

628+
def test_extract_skip_errors(archivers, request):
629+
archiver = request.getfixturevalue(archivers)
630+
create_regular_file(archiver.input_path, "file1", contents=b"a" * 280 + b"b" * 280)
631+
cmd(archiver, "rcreate", "-e" "none")
632+
cmd(archiver, "create", "--chunker-params", "7,9,8,128", "test", "input")
633+
segment_files = sorted(os.listdir(os.path.join(archiver.repository_path, "data", "0")), reverse=True)
634+
print(
635+
", ".join(
636+
f"{fn}: {os.stat(os.path.join(archiver.repository_path, 'data', '0', fn)).st_size}b" for fn in segment_files
637+
)
638+
)
639+
name = segment_files[3] # must be the segment file that has the file's chunks
640+
with open(os.path.join(archiver.repository_path, "data", "0", name), "r+b") as fd:
641+
fd.seek(100)
642+
fd.write(b"XXXX")
643+
with changedir("output"):
644+
output = cmd(archiver, "extract", "--skip-errors", "test", exit_code=1)
645+
assert "input/file1: chunk" in output
646+
assert os.stat("input/file1").st_size == 560
647+
cmd(archiver, "check", exit_code=1)
648+
649+
628650
# derived from test_extract_xattrs_errors()
629651
@pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason="xattr not supported on this system, or this version of fakeroot")
630652
def test_do_not_fail_when_percent_is_in_xattr_name(archivers, request):

0 commit comments

Comments
 (0)