Skip to content

Commit

Permalink
Merge pull request #5706 from ThomasWaldmann/files-cache-debug-loggin…
Browse files Browse the repository at this point in the history
…g-1.1

verbose files cache logging via --debug-topic=files_cache, fixes #5659
  • Loading branch information
ThomasWaldmann authored Feb 28, 2021
2 parents fa2da99 + 433658d commit abe2b6b
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 12 deletions.
9 changes: 5 additions & 4 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,13 +1059,14 @@ def process_file(self, path, st, cache):
item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
if not hardlinked or hardlink_master:
if not is_special_file:
path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
known, ids = cache.file_known_and_unchanged(path_hash, st)
hashed_path = safe_encode(os.path.join(self.cwd, path))
path_hash = self.key.id_hash(hashed_path)
known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
else:
# in --read-special mode, we may be called for special files.
# there should be no information in the cache about special files processed in
# read-special mode, but we better play safe as this was wrong in the past:
path_hash = None
hashed_path = path_hash = None
known, ids = False, None
chunks = None
if ids is not None:
Expand All @@ -1091,7 +1092,7 @@ def process_file(self, path, st, cache):
if not is_special_file:
# we must not memorize special files, because the contents of e.g. a
# block or char device will change without its mtime/size/inode changing.
cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
self.stats.nfiles += 1
item.update(self.stat_ext_attrs(st, path))
item.get_size(memorize=True)
Expand Down
43 changes: 36 additions & 7 deletions src/borg/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

logger = create_logger()

files_cache_logger = create_logger('borg.debug.files_cache')

from .constants import CACHE_README, DEFAULT_FILES_CACHE_MODE
from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
from .helpers import Location
Expand Down Expand Up @@ -518,6 +520,7 @@ def _read_files(self):
self.files = {}
self._newest_cmtime = None
logger.debug('Reading files cache ...')
files_cache_logger.debug("FILES-CACHE-LOAD: starting...")
msg = None
try:
with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=False,
Expand All @@ -544,6 +547,7 @@ def _read_files(self):
logger.warning(msg)
logger.warning('Continuing without files cache - expect lower performance.')
self.files = {}
files_cache_logger.debug("FILES-CACHE-LOAD: finished, %d entries loaded.", len(self.files))

def begin_txn(self):
# Initialize transaction snapshot
Expand Down Expand Up @@ -578,7 +582,9 @@ def commit(self):
self._newest_cmtime = 2 ** 63 - 1 # nanoseconds, good until y2262
ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
pi.output('Saving files cache')
files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=True) as fd:
entry_count = 0
for path_hash, item in self.files.items():
# Only keep files seen in this backup that are older than newest cmtime seen in this backup -
# this is to avoid issues with filesystem snapshots and cmtime granularity.
Expand All @@ -587,6 +593,10 @@ def commit(self):
if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \
entry.age > 0 and entry.age < ttl:
msgpack.pack((path_hash, entry), fd)
entry_count += 1
files_cache_logger.debug("FILES-CACHE-KILL: removed all old entries with age >= TTL [%d]", ttl)
files_cache_logger.debug("FILES-CACHE-KILL: removed all current entries with newest cmtime %d", self._newest_cmtime)
files_cache_logger.debug("FILES-CACHE-SAVE: finished, %d remaining entries saved.", entry_count)
self.cache_config.integrity[files_cache_name()] = fd.integrity_data
pi.output('Saving chunks cache')
with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
Expand Down Expand Up @@ -937,36 +947,46 @@ def chunk_decref(self, id, stats, wait=True):
else:
stats.update(-size, -csize, False)

def file_known_and_unchanged(self, path_hash, st):
def file_known_and_unchanged(self, hashed_path, path_hash, st):
"""
Check if we know the file that has this path_hash (know == it is in our files cache) and
whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
:param path_hash: hash(file_path), to save some memory in the files cache
:param hashed_path: the file's path as we gave it to hash(hashed_path)
:param path_hash: hash(hashed_path), to save some memory in the files cache
:param st: the file's stat() result
:return: known, ids (known is True if we have infos about this file in the cache,
ids is the list of chunk ids IF the file has not changed, otherwise None).
"""
if not stat.S_ISREG(st.st_mode):
return False, None
cache_mode = self.cache_mode
if 'd' in cache_mode or not stat.S_ISREG(st.st_mode): # d(isabled)
if 'd' in cache_mode: # d(isabled)
files_cache_logger.debug('UNKNOWN: files cache disabled')
return False, None
# note: r(echunk) does not need the files cache in this method, but the files cache will
# be updated and saved to disk to memorize the files. To preserve previous generations in
# the cache, this means that it also needs to get loaded from disk first.
if 'r' in cache_mode: # r(echunk)
files_cache_logger.debug('UNKNOWN: rechunking enforced')
return False, None
entry = self.files.get(path_hash)
if not entry:
files_cache_logger.debug('UNKNOWN: no file metadata in cache for: %r', hashed_path)
return False, None
# we know the file!
entry = FileCacheEntry(*msgpack.unpackb(entry))
if 's' in cache_mode and entry.size != st.st_size:
files_cache_logger.debug('KNOWN-CHANGED: file size has changed: %r', hashed_path)
return True, None
if 'i' in cache_mode and entry.inode != st.st_ino:
files_cache_logger.debug('KNOWN-CHANGED: file inode number has changed: %r', hashed_path)
return True, None
if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
files_cache_logger.debug('KNOWN-CHANGED: file ctime has changed: %r', hashed_path)
return True, None
elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
files_cache_logger.debug('KNOWN-CHANGED: file mtime has changed: %r', hashed_path)
return True, None
# we ignored the inode number in the comparison above or it is still same.
# if it is still the same, replacing it in the tuple doesn't change it.
Expand All @@ -979,18 +999,26 @@ def file_known_and_unchanged(self, path_hash, st):
self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
return True, entry.chunk_ids

def memorize_file(self, path_hash, st, ids):
def memorize_file(self, hashed_path, path_hash, st, ids):
if not stat.S_ISREG(st.st_mode):
return
cache_mode = self.cache_mode
# note: r(echunk) modes will update the files cache, d(isabled) mode won't
if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):
if 'd' in cache_mode:
files_cache_logger.debug('FILES-CACHE-NOUPDATE: files cache disabled')
return
if 'c' in cache_mode:
cmtime_type = 'ctime'
cmtime_ns = safe_ns(st.st_ctime_ns)
elif 'm' in cache_mode:
cmtime_type = 'mtime'
cmtime_ns = safe_ns(st.st_mtime_ns)
entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids)
self.files[path_hash] = msgpack.packb(entry)
self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
files_cache_logger.debug('FILES-CACHE-UPDATE: put %r [has %s] <- %r',
entry._replace(chunk_ids='[%d entries]' % len(entry.chunk_ids)),
cmtime_type, hashed_path)


class AdHocCache(CacheStatsMixin):
Expand Down Expand Up @@ -1031,10 +1059,11 @@ def __exit__(self, exc_type, exc_val, exc_tb):
files = None
cache_mode = 'd'

def file_known_and_unchanged(self, path_hash, st):
def file_known_and_unchanged(self, hashed_path, path_hash, st):
files_cache_logger.debug("UNKNOWN: files cache not implemented")
return False, None

def memorize_file(self, path_hash, st, ids):
def memorize_file(self, hashed_path, path_hash, st, ids):
pass

def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):
Expand Down
2 changes: 1 addition & 1 deletion src/borg/testsuite/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def test_deletes_chunks_during_lifetime(self, cache, repository):
repository.get(H(5))

def test_files_cache(self, cache):
assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
assert cache.file_known_and_unchanged(b'foo', bytes(32), None) == (False, None)
assert cache.cache_mode == 'd'
assert cache.files is None

Expand Down

0 comments on commit abe2b6b

Please sign in to comment.