This repository has been archived on 2024-12-23. You can view files and clone it, but cannot push or open issues or pull requests.
PyBitmessage-2024-12-23/src/randomtrackingdict.py

135 lines
4.6 KiB
Python
Raw Normal View History

"""
2019-12-19 12:24:53 +01:00
Track randomize ordered dict
"""
from threading import RLock
from time import time
2021-05-31 18:44:22 +02:00
try:
import helper_random
except ImportError:
from . import helper_random
class RandomTrackingDict(object):
"""
Dict with randomised order and tracking.
2019-12-19 12:24:53 +01:00
Keeps a track of how many items have been requested from the dict,
and timeouts. Resets after all objects have been retrieved and timed out.
The main purpose of this isn't as much putting related code together
as performance optimisation and anonymisation of downloading of objects
from other peers. If done using a standard dict or array, it takes
too much CPU (and looks convoluted). Randomisation helps with anonymity.
"""
# pylint: disable=too-many-instance-attributes
maxPending = 10
pendingTimeout = 60
def __init__(self):
self.dictionary = {}
self.indexDict = []
self.len = 0
self.pendingLen = 0
self.lastPoll = 0
self.lastObject = 0
self.lock = RLock()
def __len__(self):
return self.len
def __contains__(self, key):
return bytes(key) in self.dictionary
def __getitem__(self, key):
return self.dictionary[bytes(key)][1]
def _swap(self, i1, i2):
with self.lock:
key1 = self.indexDict[i1]
key2 = self.indexDict[i2]
self.indexDict[i1] = key2
self.indexDict[i2] = key1
self.dictionary[bytes(key1)][0] = i2
self.dictionary[bytes(key2)][0] = i1
# for quick reassignment
return i2
def __setitem__(self, key, value):
with self.lock:
key_bytes = bytes(key)
if key_bytes in self.dictionary:
self.dictionary[key_bytes][1] = value
else:
self.indexDict.append(key)
self.dictionary[key_bytes] = [self.len, value]
self._swap(self.len, self.len - self.pendingLen)
self.len += 1
def __delitem__(self, key):
key_bytes = bytes(key)
if key_bytes not in self.dictionary:
raise KeyError
with self.lock:
index = self.dictionary[key_bytes][0]
# not pending
if index < self.len - self.pendingLen:
# left of pending part
index = self._swap(index, self.len - self.pendingLen - 1)
# pending
else:
self.pendingLen -= 1
# end
self._swap(index, self.len - 1)
# if the following del is batched, performance of this single
# operation can improve 4x, but it's already very fast so we'll
# ignore it for the time being
del self.indexDict[-1]
del self.dictionary[key_bytes]
self.len -= 1
def setMaxPending(self, maxPending):
"""
2019-12-19 12:24:53 +01:00
Sets maximum number of objects that can be retrieved from the class
simultaneously as long as there is no timeout
"""
self.maxPending = maxPending
def setPendingTimeout(self, pendingTimeout):
2019-12-19 12:24:53 +01:00
"""Sets how long to wait for a timeout if max pending is reached
(or all objects have been retrieved)"""
self.pendingTimeout = pendingTimeout
def setLastObject(self):
2018-04-02 19:57:09 +02:00
"""Update timestamp for tracking of received objects"""
self.lastObject = time()
def randomKeys(self, count=1):
2019-12-19 12:24:53 +01:00
"""Retrieve count random keys from the dict
that haven't already been retrieved"""
if self.len == 0 or (
(self.pendingLen >= self.maxPending or self.pendingLen == self.len)
and self.lastPoll + self.pendingTimeout > time()):
raise KeyError
# pylint: disable=redefined-outer-name
with self.lock:
# reset if we've requested all
# and if last object received too long time ago
2019-12-19 12:24:53 +01:00
if self.pendingLen == self.len and self.lastObject + \
self.pendingTimeout < time():
self.pendingLen = 0
self.setLastObject()
available = self.len - self.pendingLen
if count > available:
count = available
2019-12-19 12:24:53 +01:00
randomIndex = helper_random.randomsample(
range(self.len - self.pendingLen), count)
retval = [self.indexDict[i] for i in randomIndex]
for i in sorted(randomIndex, reverse=True):
# swap with one below lowest pending
self._swap(i, self.len - self.pendingLen - 1)
self.pendingLen += 1
self.lastPoll = time()
return retval