Skip to content

Commit

Permalink
remove empty shadowed_segments lists, fixes borgbackup#5275
Browse files Browse the repository at this point in the history
also:
- add test for removed empty shadowed_segments list
- add some comments
- add repo_dump test debug tool
  • Loading branch information
ThomasWaldmann committed Jan 18, 2021
1 parent 699256e commit c3a3f34
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 4 deletions.
7 changes: 7 additions & 0 deletions src/borg/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,8 @@ def complete_xfer(intermediate=True):
try:
self.shadow_index[key].remove(segment)
except (KeyError, ValueError):
# do not remove entry with empty shadowed_segments list here,
# it is needed for shadowed_put_exists code (see below)!
pass
elif tag == TAG_DELETE and not in_index:
# If the shadow index doesn't contain this key, then we can't say if there's a shadowed older tag,
Expand Down Expand Up @@ -845,6 +847,11 @@ def complete_xfer(intermediate=True):
new_segment, size = self.io.write_delete(key)
self.compact[new_segment] += size
segments.setdefault(new_segment, 0)
else:
# we did not keep the delete tag for key (see if-branch)
if not self.shadow_index[key]:
# shadowed segments list is empty -> remove it
del self.shadow_index[key]
assert segments[segment] == 0, 'Corrupted segment reference count - corrupted index or hints'
unused.append(segment)
pi.show()
Expand Down
28 changes: 24 additions & 4 deletions src/borg/testsuite/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..helpers import msgpack
from ..locking import Lock, LockFailed
from ..remote import RemoteRepository, InvalidRPCMethod, PathNotAllowed, ConnectionClosedWithHint, handle_remote_line
from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE
from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE, TAG_PUT, TAG_COMMIT
from . import BaseTestCase
from .hashindex import H

Expand Down Expand Up @@ -54,6 +54,16 @@ def add_keys(self):
self.repository.put(H(2), b'boo')
self.repository.delete(H(3))

def repo_dump(self, label=None):
label = label + ': ' if label is not None else ''
H_trans = {H(i): i for i in range(10)}
H_trans[None] = -1 # key == None appears in commits
tag_trans = {TAG_PUT: 'put', TAG_DELETE: 'del', TAG_COMMIT: 'comm'}
for segment, fn in self.repository.io.segment_iterator():
for tag, key, offset, size in self.repository.io.iter_objects(segment):
print("%s%s H(%d) -> %s[%d..+%d]" % (label, tag_trans[tag], H_trans[key], fn, offset, size))
print()


class RepositoryTestCase(RepositoryTestCaseBase):

Expand Down Expand Up @@ -315,8 +325,10 @@ def test_moved_deletes_are_tracked(self):
self.repository.put(H(1), b'1')
self.repository.put(H(2), b'2')
self.repository.commit(compact=False)
self.repo_dump('p1 p2 c')
self.repository.delete(H(1))
self.repository.commit(compact=True)
self.repo_dump('d1 cc')
last_segment = self.repository.io.get_latest_segment() - 1
num_deletes = 0
for tag, key, offset, size in self.repository.io.iter_objects(last_segment):
Expand All @@ -327,11 +339,16 @@ def test_moved_deletes_are_tracked(self):
assert last_segment in self.repository.compact
self.repository.put(H(3), b'3')
self.repository.commit(compact=True)
self.repo_dump('p3 cc')
assert last_segment not in self.repository.compact
assert not self.repository.io.segment_exists(last_segment)
for segment, _ in self.repository.io.segment_iterator():
for tag, key, offset, size in self.repository.io.iter_objects(segment):
assert tag != TAG_DELETE
assert key != H(1)
# after compaction, there should be no empty shadowed_segments lists left over.
# we have no put or del any more for H(1), so we lost knowledge about H(1).
assert H(1) not in self.repository.shadow_index

def test_shadowed_entries_are_preserved(self):
get_latest_segment = self.repository.io.get_latest_segment
Expand Down Expand Up @@ -372,16 +389,19 @@ def test_shadow_index_rollback(self):
self.repository.delete(H(1))
assert self.repository.shadow_index[H(1)] == [0]
self.repository.commit(compact=True)
self.repo_dump('p1 d1 cc')
# note how an empty list means that nothing is shadowed for sure
assert self.repository.shadow_index[H(1)] == []
assert self.repository.shadow_index[H(1)] == [] # because the delete is considered unstable
self.repository.put(H(1), b'1')
self.repository.delete(H(1))
self.repo_dump('p1 d1')
# 0 put/delete; 1 commit; 2 compacted; 3 commit; 4 put/delete
assert self.repository.shadow_index[H(1)] == [4]
self.repository.rollback()
self.repo_dump('r')
self.repository.put(H(2), b'1')
# After the rollback segment 4 shouldn't be considered anymore
assert self.repository.shadow_index[H(1)] == []
assert self.repository.shadow_index[H(1)] == [] # because the delete is considered unstable


class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
Expand Down Expand Up @@ -826,7 +846,7 @@ def test_hints_behaviour(self):
self.repository.put(H(42), b'foobar') # see also do_compact()
self.repository.commit(compact=True, threshold=0.0) # compact completely!
# nothing to compact any more! no info left about stuff that does not exist any more:
self.assert_equal(self.repository.shadow_index[H(0)], [])
self.assert_not_in(H(0), self.repository.shadow_index)
# segment 0 was compacted away, no info about it left:
self.assert_not_in(0, self.repository.compact)
self.assert_not_in(0, self.repository.segments)
Expand Down

0 comments on commit c3a3f34

Please sign in to comment.