Skip to content

Commit

Permalink
Merge pull request carykh#37 from valadaptive/cache-hash
Browse files Browse the repository at this point in the history
Cache by hash as well as modification time
  • Loading branch information
valadaptive authored May 25, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents 0b6f417 + 0da61f9 commit 288997e
Showing 1 changed file with 48 additions and 13 deletions.
61 changes: 48 additions & 13 deletions code/cache.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
import pathlib
import os
import sys
import hashlib

import numpy

@@ -35,17 +36,29 @@ def get(self, pair):

def insert(self, pair, avgScoreA, avgScoreB, stdevA, stdevB, firstRoundHistory, roundResultsStr):
raise NotImplementedError

def pair_paths(self, pair):
return (pathlib.Path(file_location_from_spec(pair[0])).absolute(),
pathlib.Path(file_location_from_spec(pair[1])).absolute())

def get_last_modified(self, pair):
afile = pathlib.Path(file_location_from_spec(pair[0])).absolute()
bfile = pathlib.Path(file_location_from_spec(pair[1])).absolute()
afile, bfile = self.pair_paths(pair)
sfile = pathlib.Path(sys.argv[0]).absolute()
almod = os.path.getmtime(afile)
blmod = os.path.getmtime(bfile)
slmod = os.path.getmtime(sfile)
mod = max(almod, blmod, slmod)
return mod

def hash_file(filePath):
sha256 = hashlib.sha256()
with open(filePath, "rb") as file:
while True:
data = file.read(65536)
if not data:
break
sha256.update(data)
return sha256.digest()

class SQLiteCache(AbstractCache):
default = "cache"
@@ -61,12 +74,19 @@ def setup(self):
self.cur.execute("PRAGMA read_uncommitted=1")
self.cur.execute("PRAGMA journal_mode=wal")
self.cur.execute("PRAGMA wal_autocheckpoint=0")

if self.cur.execute("PRAGMA user_version").fetchone()[0] != 1:
self.cur.execute("DROP TABLE IF EXISTS cache")
self.cur.execute("PRAGMA user_version=1")

self.cur.execute((
"CREATE TABLE IF NOT EXISTS cache ("
"moduleA text NOT NULL,"
"moduleB text NOT NULL,"
"result text NOT NULL,"
"timestamp number NOT NULL )"
"timestamp number NOT NULL,"
"hashA blob NOT NULL,"
"hashB blob NOT NULL )"
))
self.cur.execute("CREATE INDEX IF NOT EXISTS idx_modules ON cache (moduleA, moduleB)")

@@ -75,6 +95,15 @@ def setup(self):
def shutdown(self):
self.cur.execute("PRAGMA wal_checkpoint(FULL)")
self.close()

def _load(self, res):
unpacked = pickle.loads(base64.b64decode(res[0]))
return (unpacked.get("avgScoreA"),
unpacked.get("avgScoreB"),
unpacked.get("stdevA"),
unpacked.get("stdevB"),
numpy.array(unpacked.get("firstRoundHistory")),
unpacked.get("roundResultsStr"),) # This is a tuple.

def get(self, pair):
mod = self.get_last_modified(pair)
@@ -84,15 +113,16 @@ def get(self, pair):
res = cur.execute(f"SELECT result FROM cache WHERE timestamp >= ? AND moduleA = ? AND moduleB = ?",
(mod, pair[0], pair[1])).fetchone()
if res:
unpacked = pickle.loads(base64.b64decode(res[0]))
return (unpacked.get("avgScoreA"),
unpacked.get("avgScoreB"),
unpacked.get("stdevA"),
unpacked.get("stdevB"),
numpy.array(unpacked.get("firstRoundHistory")),
unpacked.get("roundResultsStr"),) # This is a tuple.
return self._load(res)
else:
return False
pathA, pathB = self.pair_paths(pair)

hashA = hash_file(pathA)
hashB = hash_file(pathB)

res = cur.execute(f"SELECT result FROM cache WHERE hashA = ? AND hashB = ? AND moduleA = ? AND moduleB = ?",
(hashA, hashB, pair[0], pair[1])).fetchone()
return self._load(res) if res else False

def insert(self, pair, avgScoreA, avgScoreB, stdevA, stdevB, firstRoundHistory, roundResultsStr):
mod = self.get_last_modified(pair)
@@ -106,8 +136,13 @@ def insert(self, pair, avgScoreA, avgScoreB, stdevA, stdevB, firstRoundHistory,
"roundResultsStr": roundResultsStr
}
rstr = base64.b64encode(pickle.dumps(renc))
self.cur.execute("INSERT INTO cache (moduleA, moduleB, result, timestamp)"
"VALUES(?, ?, ?, ?)", (pair[0], pair[1], rstr, mod))

pathA, pathB = self.pair_paths(pair)

hashA = hash_file(pathA)
hashB = hash_file(pathB)
self.cur.execute("INSERT INTO cache (moduleA, moduleB, result, timestamp, hashA, hashB)"
"VALUES(?, ?, ?, ?, ?, ?)", (pair[0], pair[1], rstr, mod, hashA, hashB))

def close(self):
self.cur.close()

0 comments on commit 288997e

Please sign in to comment.