From 1861037bbbcab93b1a82a0c738e9894167005b50 Mon Sep 17 00:00:00 2001 From: rtmigo Date: Wed, 23 Jun 2021 20:47:23 +0300 Subject: [PATCH] crc32 checks for all blocks --- dmk/b_cryptoblobs/_20_encdec_part.py | 87 +++++++++++++++++----------- dmk/c_namegroups/_namegroup.py | 17 ++++++ dmk/c_namegroups/_update.py | 53 ++++------------- 3 files changed, 81 insertions(+), 76 deletions(-) diff --git a/dmk/b_cryptoblobs/_20_encdec_part.py b/dmk/b_cryptoblobs/_20_encdec_part.py index c402164..fa30043 100644 --- a/dmk/b_cryptoblobs/_20_encdec_part.py +++ b/dmk/b_cryptoblobs/_20_encdec_part.py @@ -391,7 +391,7 @@ def __init__(self, self._tried_to_read_header = False self._data_read = False - # self._data = None + self._data = None pos = self._source.tell() if pos != 0: @@ -509,41 +509,62 @@ def __read_header(self) -> Header: valid=True) def read_data(self) -> bytes: - if self._data_read: - raise RuntimeError("Cannot read data more than once") - - if not self.contains_data: - raise RuntimeError("contains_data is False") - - assert self._source.tell() == CLUSTER_META_SIZE, f"pos is {self._source.tell()}" + # todo remove + result = self.data + if result is None: + raise TypeError + return result + # if self._data_read: + # raise RuntimeError("Cannot read data more than once") + # + # if not self.contains_data: + # raise RuntimeError("contains_data is False") + # + # assert self._source.tell() == CLUSTER_META_SIZE, f"pos is {self._source.tell()}" + # + # body = self.__read_and_decrypt(self.header.part_size) + # if zlib.crc32(body) != self.header.content_crc32: + # raise VerificationFailure("Body CRC mismatch.") + # + # self._data_read = True + # return body - body = self.__read_and_decrypt(self.header.part_size) - if zlib.crc32(body) != self.header.content_crc32: - raise VerificationFailure("Body CRC mismatch.") + @property + def data(self): + if self._data_read: + return self._data + assert not self._data_read self._data_read = True - return body - - def verify_data(self) -> bool: - """This can be called before removing an old block. - - Usually, by this point, the block's codename has already been verified - by a 256-bit hash. But we will check it again with a 32-bit checksum - (for data blocks) or a 48-bit version number (for fake blocks). - Just because we can - """ - - # todo unit test - - if not self.contains_data: - # 48-bit match - assert self.header.data_version == FAKE_CONTENT_VERSION - return True - try: - self.read_data() # checking 32-bit crc32 match - return True - except VerificationFailure: - return False + if self.contains_data: + assert self._source.tell() == CLUSTER_META_SIZE, f"pos is {self._source.tell()}" + + self._data = self.__read_and_decrypt(self.header.part_size) + if zlib.crc32(self._data) != self.header.content_crc32: + raise VerificationFailure("Body CRC mismatch.") + + return self._data + + # def verify_data(self) -> bool: + # """This can be called before removing an old block. + # + # Usually, by this point, the block's codename has already been verified + # by a 256-bit hash. But we will check it again with a 32-bit checksum + # (for data blocks) or a 48-bit version number (for fake blocks). + # Just because we can + # """ + # + # # todo unit test + # + # if not self.contains_data: + # # 48-bit match + # assert self.header.data_version == FAKE_CONTENT_VERSION + # return True + # try: + # self.read_data() # checking 32-bit crc32 match + # return True + # except VerificationFailure: + # return False def is_content_io(fpk: CodenameKey, stream: BinaryIO) -> bool: diff --git a/dmk/c_namegroups/_namegroup.py b/dmk/c_namegroups/_namegroup.py index 177cc75..dc39ce2 100644 --- a/dmk/c_namegroups/_namegroup.py +++ b/dmk/c_namegroups/_namegroup.py @@ -49,6 +49,23 @@ def __init__(self, blobs: BlocksIndexedReader, cnk: CodenameKey): if not dio.belongs_to_namegroup: continue assert dio.belongs_to_namegroup + + # We have checked that the block belongs to this code name. + # This assumption is wrong only in the case of a collision + # of the 256-bit private keys or the 256-bit imprints. + # We believe that any of these collisions are impossible. + # + # But if speed is not a priority, we can double-check our belief + # in the absence of imprint collisions. + + if dio.contains_data: + # checking CRC-32 of the decrypted body is ok + assert dio.data is not None + else: + # already checked 48-bit decrypted constant content_ver is ok, + # just recheck the property returns none + assert dio.data is None + gf = NameGroupItem(idx, dio) self.items.append(gf) diff --git a/dmk/c_namegroups/_update.py b/dmk/c_namegroups/_update.py index 0a81540..0ceaece 100644 --- a/dmk/c_namegroups/_update.py +++ b/dmk/c_namegroups/_update.py @@ -43,28 +43,18 @@ def get_stream_size(stream: BinaryIO) -> int: return result -# def remove_random_items(source: Set[int], -# min_to_delete=1, -# max_to_delete=5) -> Set[int]: -# if len(source) < min_to_delete: -# raise ValueError("List is too small") -# -# max_to_delete = min(max_to_delete, len(source)) -# num_to_delete = random.randint(min_to_delete, max_to_delete) -# indexes_to_delete = random.sample(list(source), num_to_delete) -# result = source - set(indexes_to_delete) -# assert len(result) < len(source) or min_to_delete == 0 -# return result - -def random_indexes_to_delete(source: Set[int], +def remove_random_items(source: Set[int], min_to_delete=1, - max_to_delete=5) -> List[int]: + max_to_delete=5) -> Set[int]: if len(source) < min_to_delete: raise ValueError("List is too small") max_to_delete = min(max_to_delete, len(source)) num_to_delete = random.randint(min_to_delete, max_to_delete) - return random.sample(list(source), num_to_delete) + indexes_to_delete = random.sample(list(source), num_to_delete) + result = source - set(indexes_to_delete) + assert len(result) < len(source) or min_to_delete == 0 + return result class TaskKeep(NamedTuple): @@ -169,42 +159,19 @@ def update_namegroup_b(cdk: CodenameKey, ) if len(ng_old_indexes) >= 1: - indexes_to_delete = random_indexes_to_delete( + ng_new_indexes = remove_random_items( ng_old_indexes, min_to_delete=1, - max_to_delete=fake_deltas.max_loss - ) - - # we already verified (with 256-bit hash) that all the blocks in - # ng_old_indexes related to to CodenameKey. But before deleting a block - # (that the most dangerous thing we ever do), we additionally verify - # that the data of the block matches the block header. So it's actually - # 256+32 or 256+48 bit match. - # - # In case of speed problems this check can be safely ignored, since - # collisions of 256-bit hashes are almost impossible - indexes_to_delete = [idx for idx in indexes_to_delete - if name_group.block_idx_to_item(idx) - .dio.verify_data()] - - assert all(idx in ng_old_indexes - for idx in indexes_to_delete) - - our_new_blob_indexes = ng_old_indexes - set(indexes_to_delete) - - # our_new_blob_indexes = remove_random_items( - # our_old_blob_indexes, - # min_to_delete=1, - # max_to_delete=fake_deltas.max_loss) + max_to_delete=fake_deltas.max_loss) else: assert len(ng_old_indexes) == 0 - our_new_blob_indexes = set() + ng_new_indexes = set() tasks: List[object] = list() indexes_to_keep = set(all_blob_indexes) indexes_to_keep -= ng_old_indexes - indexes_to_keep.update(our_new_blob_indexes) + indexes_to_keep.update(ng_new_indexes) for idx in indexes_to_keep: tasks.append(TaskKeep(idx))