Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize DHT components and methods #8

Merged
merged 6 commits into from
Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/unitary_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ jobs:
run: |
python -m unittest tests/test_hashes.py
python -m unittest tests/test_routing.py
python -m unittest tests/test_dht.py
python -m unittest tests/test_network.py


23 changes: 23 additions & 0 deletions dht/bitarray_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import bitarray
import ctypes
from bitarray import util
import random
from collections import deque, defaultdict

rint = random.randint(0, 99)
inthash = hash(hex(rint))

barray = util.int2ba(ctypes.c_ulong(inthash).value, length=64)
print(barray)
print(len(barray))
print(barray.to01())

defD = {'b': 1, 'g': 2, 'r': 3, 'y': 4}
d = defaultdict()
for i, v in defD.items():
d[i] = v

print(d.items())
q = deque(d.items())
print(q[2:])

197 changes: 97 additions & 100 deletions dht/dht.py

Large diffs are not rendered by default.

67 changes: 24 additions & 43 deletions dht/hashes.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import ctypes
from bitarray.util import int2ba

# TODO: swapt hash to SHA256 with the possibility of reusing a given seed for reproducibility
# at the moment, I'm using the default 64bit hash function from Python
HASH_BASE = 64

class Hash():

class Hash:
def __init__(self, value):
""" basic representation of a Hash object for the DHT, which includes the main utilities related to a hash """
self.value = self.hash_key(value)
self.bitArray = BitArray(self.value, HASH_BASE)
self.bitarray = BitArray(self.value, HASH_BASE)
# TODO: the hash values could be reproduced if the ENVIRONMENT VARIABLE PYTHONHASHSEED is set to a 64 bit integer https://docs.python.org/3/using/cmdline.html#envvar-PYTHONHASHSEED

def hash_key(self, key):
Expand All @@ -20,62 +22,41 @@ def hash_key(self, key):
# ensure that the hash is unsigned
return ctypes.c_ulong(h).value

def xor_to(self, targetHash:int) -> int:
def xor_to(self, targetint: int) -> int:
""" Returns the XOR distance between both hash values"""
distance = self.value ^ targetHash
return ctypes.c_ulong(distance).value
return ctypes.c_ulong(self.value ^ targetint).value

def xor_to_hash(self, targetHash) -> int:
def xor_to_hash(self, targethash) -> int:
""" Returns the XOR distance between both hash values"""
distance = self.value ^ targetHash.value
return ctypes.c_ulong(distance).value
return ctypes.c_ulong(self.value ^ targethash.value).value

def shared_upper_bits(self, targetHash) -> int:
def shared_upper_bits(self, targethash) -> int:
""" returns the number of upper sharing bits between 2 hash values """
targetBits = BitArray(targetHash.value, HASH_BASE)
sBits = self.bitArray.upper_sharing_bits(targetBits)
return sBits
return self.bitarray.upper_sharing_bits(targethash.bitarray)

def __repr__(self) -> str:
return str(hex(self.value))

def __eq__(self, targetHash) -> bool:
return self.value == targetHash.value

def is_smaller_than(self, targetHash) -> bool:
return self.value < targetHash.value
def __eq__(self, targethash) -> bool:
return self.value == targethash.value

def is_greater_than(self, targetHash) -> bool:
return self.value > targetHash.value

class BitArray():
class BitArray:
""" array representation of an integer using only bits, ideal for finding matching upper bits"""
def __init__(self, intValue:int, base:int):
def __init__(self, uintval:int, base:int):
self.base = base
self.bitArray = self.bin(intValue)
self.bitarray = int2ba(uintval, length=base)

def __repr__(self):
return str(self.bitArray)

def upper_sharing_bits(self, targetBitArray) -> int:
sBits = 0
for i, bit in enumerate(self.bitArray):
if bit == targetBitArray.get_x_bit(i):
sBits += 1
return self.bitarray.to01()

def upper_sharing_bits(self, targetba) -> int:
sbits = 0
proc = self.bitarray ^ targetba.bitarray
for bit in proc:
if bit == 0:
sbits += 1
else:
break
return sBits

def get_x_bit(self, idx:int = 0):
return self.bitArray[idx]
return sbits

def bin(self, n):
s = ""
i = 1 << self.base-1
while(i > 0) :
if((n & i) != 0) :
s += "1"
else :
s += "0"
i = i // 2
return s
17 changes: 9 additions & 8 deletions dht/key_store.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
from dht.hashes import Hash
from collections import defaultdict

class KeyValueStore():

class KeyValueStore:
""" Memory-storage unit that will keep track of each of the key-values that a DHT client has to keep locally"""

def __init__(self):
""" compose the storage unit in memory """
self.storage = {}
self.storage = defaultdict()

def add(self, key:Hash, value):
def add(self, key: Hash, value):
""" aggregates a new value to the store, or overrides it if it was already a value for the key """
self.storage[key.value] = value

def remove(self, key:Hash):
def remove(self, key: Hash):
self.storage.pop(key.value)

def read(self, key:Hash):
def read(self, key: Hash):
""" reads a value for the given Key, or return false if it wasn't found """
try:
value = self.storage[key.value]
Expand All @@ -24,6 +26,5 @@ def read(self, key:Hash):
ok = False
return value, ok

def summary(self) -> int:
""" returns the number of items stored in the local KeyValueStore """
return len(self.storage)
def __len__(self):
return len(self.storage)
129 changes: 63 additions & 66 deletions dht/routing_table.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,50 @@
from ctypes import sizeof
from dht.hashes import Hash
from dht.hashes import Hash
from collections import deque, defaultdict, OrderedDict

class RoutingTable():

def __init__(self, localNodeID:int, bucketSize:int) -> None:
self.localNodeID = localNodeID
self.bucketSize = bucketSize
self.kbuckets = []
self.lastUpdated = 0
class RoutingTable:
def __init__(self, localnodeid:int, bucketsize:int) -> None:
self.localnodeid = localnodeid
self.bucketsize = bucketsize
self.kbuckets = deque()
self.lastupdated = 0 # not really used at this time

def new_discovered_peer(self, nodeID:int):
def new_discovered_peer(self, nodeid:int):
""" notify the routing table of a new discovered node
in the network and check if it has a place in a given bucket """
# check matching bits
localNodeH = Hash(self.localNodeID)
nodeH = Hash(nodeID)
sBits = localNodeH.shared_upper_bits(nodeH)
localnodehash = Hash(self.localnodeid)
nodehash = Hash(nodeid)
sbits = localnodehash.shared_upper_bits(nodehash)
# Check if there is a kbucket already at that place
while len(self.kbuckets) < sBits+1:
while len(self.kbuckets) < sbits+1:
# Fill middle kbuckets if needed
self.kbuckets.append(KBucket(self.localNodeID, self.bucketSize))
self.kbuckets.append(KBucket(self.localnodeid, self.bucketsize))
# check/update the bucket with the newest nodeID
self.kbuckets[sBits] = self.kbuckets[sBits].add_peer_to_bucket(nodeID)
self.kbuckets[sbits] = self.kbuckets[sbits].add_peer_to_bucket(nodeid)
return self

def get_closest_nodes_to(self, key:Hash):
def get_closest_nodes_to(self, key: Hash):
""" return the list of Nodes (in order) close to the given key in the routing table """
closestNodes = {}
closestnodes = defaultdict()
# check the distances for all the nodes in the rt
for b in self.kbuckets:
for n in b.bucketNodes:
for n in b.bucketnodes:
nH = Hash(n)
dist = nH.xor_to_hash(key)
closestNodes[n] = dist
closestnodes[n] = dist
# sort the dict based on dist
closestNodes = dict(sorted(closestNodes.items(), key=lambda item: item[1])[:self.bucketSize])
return closestNodes
closestnodes = OrderedDict(sorted(closestnodes.items(), key=lambda item: item[1])[:self.bucketsize])
return closestnodes

def get_routing_nodes(self):
# get the closest nodes to the peer
rtNodes = []
rtnodes = deque()
for b in self.kbuckets:
for n in b.bucketNodes:
rtNodes.append(n)
return rtNodes
for n in b.bucketnodes:
rtnodes.append(n)
return rtnodes

def __repr__(self) -> str:
s = ""
Expand All @@ -52,68 +53,64 @@ def __repr__(self) -> str:
return s

def summary(self) -> str:
return self.__repr__()
return self.__repr__()

class KBucket():

class KBucket:
""" single representation of a kademlia kbucket, which contains the closest nodes
sharing X number of upper bits on their NodeID's Hashes """

def __init__(self, ourNodeID:int, size:int):
def __init__(self, localnodeid: int, size: int):
""" initialize the kbucket with setting a max size along some other control variables """
self.localNodeID = ourNodeID
self.bucketNodes = []
self.bucketSize = size
self.lastUpdated = 0
self.localnodeid = localnodeid
self.bucketnodes = deque(maxlen=size)
self.bucketsize = size
self.lastupdated = 0

def add_peer_to_bucket(self, nodeID:int):
def add_peer_to_bucket(self, nodeid: int):
""" check if the new node is elegible to replace a further one """
# Check if the distance between our NodeID and the remote one
localNodeH = Hash(self.localNodeID)
nodeH = Hash(nodeID)
dist = localNodeH.xor_to_hash(nodeH)

bucketDistances = self.get_distances_to_key(localNodeH)
if (self.len() > 0) and (self.len() >= self.bucketSize):
if bucketDistances[list(bucketDistances)[-1]] < dist:
pass
localnodehash = Hash(self.localnodeid)
nodehash = Hash(nodeid)
dist = localnodehash.xor_to_hash(nodehash)
bucketdistances = self.get_distances_to_key(localnodehash)
if (self.len() > 0) and (self.len() >= self.bucketsize):
if bucketdistances[deque(bucketdistances)[-1]] < dist:
pass
else:
# As the dist of the new node is smaller, add it to the list
bucketDistances[nodeID] = dist
bucketdistances[nodeid] = dist
# Sort back the nodes with the new one and remove the last remaining item
bucketDistances = dict(sorted(bucketDistances.items(), key=lambda item: item[1]))
bucketDistances.pop(list(bucketDistances)[-1])
bucketdistances = OrderedDict(sorted(bucketdistances.items(), key=lambda item: item[1]))
bucketdistances.pop(deque(bucketdistances)[-1])
# Update the new closest nodes in the bucket
self.bucketNodes = list(bucketDistances.keys())
self.bucketnodes = deque(bucketdistances.keys(), maxlen=len(bucketdistances))
else:
self.bucketNodes.append(nodeID)
self.bucketnodes.append(nodeid)
return self

def get_distances_to_key(self, key:Hash):
def get_distances_to_key(self, key: Hash):
""" return the distances from all the nodes in the bucket to a given key """
distances = {}
for nodeID in self.bucketNodes:
nodeH = Hash(nodeID)
dist = nodeH.xor_to_hash(key)
distances[nodeID] = dist
return dict(sorted(distances.items(), key=lambda item: item[1]))

def get_x_nodes_close_to(self, key:Hash, numberOfNodes:int):
distances = defaultdict()
for nodeid in self.bucketnodes:
nodehash = Hash(nodeid)
dist = nodehash.xor_to_hash(key)
distances[nodeid] = dist
return OrderedDict(sorted(distances.items(), key=lambda item: item[1]))

def get_x_nodes_close_to(self, key: Hash, nnodes: int):
""" return the XX number of nodes close to a key from this bucket """
print(f"checking in bucket {numberOfNodes} nodes")
distances = {}
if numberOfNodes <= 0:
return distances
distances = self.get_distances_to_key(key)
distances = self.get_distances_to_key(key)
# Get only the necessary and closest nodes to the key from the kbucket
nodes = {}
for node, dist in distances.items():
nodes[node] = dist
if len(nodes) <= numberOfNodes:
break
return nodes
for i, _ in list(distances.keys())[nnodes:]: # rely on std array, as the size is small and it can be sliced :)
distances.pop(i)
return distances

def len(self) -> int:
return len(self.bucketNodes)
return len(self.bucketnodes)

def __len__(self) -> int:
return len(self.bucketnodes)

def __repr__(self) -> str:
return f"{self.len()} nodes"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ maintainers = [
{name = "@cortze | Mikel Cortes ", email = "cortze@protonmail.com"},
]
requires-python = ">=3.10"
dependencies = [ "pandas", "jupyter" ]
dependencies = [ "bitarray" ]

dynamic = [
"version",
Expand Down
Loading