Skip to content

Commit

Permalink
crc32 checks for all blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
rtmigo committed Jun 23, 2021
1 parent 98086d7 commit 1861037
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 76 deletions.
87 changes: 54 additions & 33 deletions dmk/b_cryptoblobs/_20_encdec_part.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def __init__(self,
self._tried_to_read_header = False

self._data_read = False
# self._data = None
self._data = None

pos = self._source.tell()
if pos != 0:
Expand Down Expand Up @@ -509,41 +509,62 @@ def __read_header(self) -> Header:
valid=True)

def read_data(self) -> bytes:
if self._data_read:
raise RuntimeError("Cannot read data more than once")

if not self.contains_data:
raise RuntimeError("contains_data is False")

assert self._source.tell() == CLUSTER_META_SIZE, f"pos is {self._source.tell()}"
# todo remove
result = self.data
if result is None:
raise TypeError
return result
# if self._data_read:
# raise RuntimeError("Cannot read data more than once")
#
# if not self.contains_data:
# raise RuntimeError("contains_data is False")
#
# assert self._source.tell() == CLUSTER_META_SIZE, f"pos is {self._source.tell()}"
#
# body = self.__read_and_decrypt(self.header.part_size)
# if zlib.crc32(body) != self.header.content_crc32:
# raise VerificationFailure("Body CRC mismatch.")
#
# self._data_read = True
# return body

body = self.__read_and_decrypt(self.header.part_size)
if zlib.crc32(body) != self.header.content_crc32:
raise VerificationFailure("Body CRC mismatch.")
@property
def data(self):
if self._data_read:
return self._data
assert not self._data_read

self._data_read = True
return body

def verify_data(self) -> bool:
"""This can be called before removing an old block.
Usually, by this point, the block's codename has already been verified
by a 256-bit hash. But we will check it again with a 32-bit checksum
(for data blocks) or a 48-bit version number (for fake blocks).
Just because we can
"""

# todo unit test

if not self.contains_data:
# 48-bit match
assert self.header.data_version == FAKE_CONTENT_VERSION
return True
try:
self.read_data() # checking 32-bit crc32 match
return True
except VerificationFailure:
return False
if self.contains_data:
assert self._source.tell() == CLUSTER_META_SIZE, f"pos is {self._source.tell()}"

self._data = self.__read_and_decrypt(self.header.part_size)
if zlib.crc32(self._data) != self.header.content_crc32:
raise VerificationFailure("Body CRC mismatch.")

return self._data

# def verify_data(self) -> bool:
# """This can be called before removing an old block.
#
# Usually, by this point, the block's codename has already been verified
# by a 256-bit hash. But we will check it again with a 32-bit checksum
# (for data blocks) or a 48-bit version number (for fake blocks).
# Just because we can
# """
#
# # todo unit test
#
# if not self.contains_data:
# # 48-bit match
# assert self.header.data_version == FAKE_CONTENT_VERSION
# return True
# try:
# self.read_data() # checking 32-bit crc32 match
# return True
# except VerificationFailure:
# return False


def is_content_io(fpk: CodenameKey, stream: BinaryIO) -> bool:
Expand Down
17 changes: 17 additions & 0 deletions dmk/c_namegroups/_namegroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,23 @@ def __init__(self, blobs: BlocksIndexedReader, cnk: CodenameKey):
if not dio.belongs_to_namegroup:
continue
assert dio.belongs_to_namegroup

# We have checked that the block belongs to this code name.
# This assumption is wrong only in the case of a collision
# of the 256-bit private keys or the 256-bit imprints.
# We believe that any of these collisions are impossible.
#
# But if speed is not a priority, we can double-check our belief
# in the absence of imprint collisions.

if dio.contains_data:
# checking CRC-32 of the decrypted body is ok
assert dio.data is not None
else:
# already checked 48-bit decrypted constant content_ver is ok,
# just recheck the property returns none
assert dio.data is None

gf = NameGroupItem(idx, dio)
self.items.append(gf)

Expand Down
53 changes: 10 additions & 43 deletions dmk/c_namegroups/_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,28 +43,18 @@ def get_stream_size(stream: BinaryIO) -> int:
return result


# def remove_random_items(source: Set[int],
# min_to_delete=1,
# max_to_delete=5) -> Set[int]:
# if len(source) < min_to_delete:
# raise ValueError("List is too small")
#
# max_to_delete = min(max_to_delete, len(source))
# num_to_delete = random.randint(min_to_delete, max_to_delete)
# indexes_to_delete = random.sample(list(source), num_to_delete)
# result = source - set(indexes_to_delete)
# assert len(result) < len(source) or min_to_delete == 0
# return result

def random_indexes_to_delete(source: Set[int],
def remove_random_items(source: Set[int],
min_to_delete=1,
max_to_delete=5) -> List[int]:
max_to_delete=5) -> Set[int]:
if len(source) < min_to_delete:
raise ValueError("List is too small")

max_to_delete = min(max_to_delete, len(source))
num_to_delete = random.randint(min_to_delete, max_to_delete)
return random.sample(list(source), num_to_delete)
indexes_to_delete = random.sample(list(source), num_to_delete)
result = source - set(indexes_to_delete)
assert len(result) < len(source) or min_to_delete == 0
return result


class TaskKeep(NamedTuple):
Expand Down Expand Up @@ -169,42 +159,19 @@ def update_namegroup_b(cdk: CodenameKey,
)

if len(ng_old_indexes) >= 1:
indexes_to_delete = random_indexes_to_delete(
ng_new_indexes = remove_random_items(
ng_old_indexes,
min_to_delete=1,
max_to_delete=fake_deltas.max_loss
)

# we already verified (with 256-bit hash) that all the blocks in
# ng_old_indexes related to to CodenameKey. But before deleting a block
# (that the most dangerous thing we ever do), we additionally verify
# that the data of the block matches the block header. So it's actually
# 256+32 or 256+48 bit match.
#
# In case of speed problems this check can be safely ignored, since
# collisions of 256-bit hashes are almost impossible
indexes_to_delete = [idx for idx in indexes_to_delete
if name_group.block_idx_to_item(idx)
.dio.verify_data()]

assert all(idx in ng_old_indexes
for idx in indexes_to_delete)

our_new_blob_indexes = ng_old_indexes - set(indexes_to_delete)

# our_new_blob_indexes = remove_random_items(
# our_old_blob_indexes,
# min_to_delete=1,
# max_to_delete=fake_deltas.max_loss)
max_to_delete=fake_deltas.max_loss)
else:
assert len(ng_old_indexes) == 0
our_new_blob_indexes = set()
ng_new_indexes = set()

tasks: List[object] = list()

indexes_to_keep = set(all_blob_indexes)
indexes_to_keep -= ng_old_indexes
indexes_to_keep.update(our_new_blob_indexes)
indexes_to_keep.update(ng_new_indexes)

for idx in indexes_to_keep:
tasks.append(TaskKeep(idx))
Expand Down

0 comments on commit 1861037

Please sign in to comment.