2018-10-10 14:00:53 +02:00
|
|
|
"""
|
2019-12-19 12:24:53 +01:00
|
|
|
Track randomize ordered dict
|
2018-10-10 14:00:53 +02:00
|
|
|
"""
|
2018-02-01 12:19:39 +01:00
|
|
|
from threading import RLock
|
|
|
|
from time import time
|
2018-10-10 14:00:53 +02:00
|
|
|
|
2020-12-29 09:28:48 +01:00
|
|
|
import helper_random
|
2018-02-01 12:19:39 +01:00
|
|
|
|
2018-10-10 14:00:53 +02:00
|
|
|
|
2018-02-01 12:19:39 +01:00
|
|
|
class RandomTrackingDict(object):
|
2018-10-10 14:00:53 +02:00
|
|
|
"""
|
|
|
|
Dict with randomised order and tracking.
|
|
|
|
|
2019-12-19 12:24:53 +01:00
|
|
|
Keeps a track of how many items have been requested from the dict,
|
|
|
|
and timeouts. Resets after all objects have been retrieved and timed out.
|
|
|
|
The main purpose of this isn't as much putting related code together
|
|
|
|
as performance optimisation and anonymisation of downloading of objects
|
|
|
|
from other peers. If done using a standard dict or array, it takes
|
|
|
|
too much CPU (and looks convoluted). Randomisation helps with anonymity.
|
2018-10-10 14:00:53 +02:00
|
|
|
"""
|
|
|
|
# pylint: disable=too-many-instance-attributes
|
2018-02-01 12:19:39 +01:00
|
|
|
maxPending = 10
|
|
|
|
pendingTimeout = 60
|
2018-10-10 14:00:53 +02:00
|
|
|
|
|
|
|
def __init__(self):
|
2018-02-01 12:19:39 +01:00
|
|
|
self.dictionary = {}
|
|
|
|
self.indexDict = []
|
|
|
|
self.len = 0
|
|
|
|
self.pendingLen = 0
|
|
|
|
self.lastPoll = 0
|
2018-04-02 19:33:41 +02:00
|
|
|
self.lastObject = 0
|
2018-02-01 12:19:39 +01:00
|
|
|
self.lock = RLock()
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return self.len
|
|
|
|
|
|
|
|
def __contains__(self, key):
|
|
|
|
return key in self.dictionary
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
return self.dictionary[key][1]
|
|
|
|
|
2018-02-01 14:43:14 +01:00
|
|
|
def _swap(self, i1, i2):
|
|
|
|
with self.lock:
|
|
|
|
key1 = self.indexDict[i1]
|
|
|
|
key2 = self.indexDict[i2]
|
|
|
|
self.indexDict[i1] = key2
|
|
|
|
self.indexDict[i2] = key1
|
|
|
|
self.dictionary[key1][0] = i2
|
|
|
|
self.dictionary[key2][0] = i1
|
|
|
|
# for quick reassignment
|
|
|
|
return i2
|
|
|
|
|
2018-02-01 22:31:45 +01:00
|
|
|
def __setitem__(self, key, value):
|
|
|
|
with self.lock:
|
|
|
|
if key in self.dictionary:
|
|
|
|
self.dictionary[key][1] = value
|
|
|
|
else:
|
|
|
|
self.indexDict.append(key)
|
|
|
|
self.dictionary[key] = [self.len, value]
|
|
|
|
self._swap(self.len, self.len - self.pendingLen)
|
|
|
|
self.len += 1
|
|
|
|
|
2018-02-01 12:19:39 +01:00
|
|
|
def __delitem__(self, key):
|
2018-10-10 14:00:53 +02:00
|
|
|
if key not in self.dictionary:
|
2018-02-01 12:19:39 +01:00
|
|
|
raise KeyError
|
|
|
|
with self.lock:
|
|
|
|
index = self.dictionary[key][0]
|
2018-02-01 14:43:14 +01:00
|
|
|
# not pending
|
|
|
|
if index < self.len - self.pendingLen:
|
|
|
|
# left of pending part
|
|
|
|
index = self._swap(index, self.len - self.pendingLen - 1)
|
|
|
|
# pending
|
|
|
|
else:
|
|
|
|
self.pendingLen -= 1
|
|
|
|
# end
|
|
|
|
self._swap(index, self.len - 1)
|
2018-02-01 12:19:39 +01:00
|
|
|
# if the following del is batched, performance of this single
|
|
|
|
# operation can improve 4x, but it's already very fast so we'll
|
|
|
|
# ignore it for the time being
|
|
|
|
del self.indexDict[-1]
|
|
|
|
del self.dictionary[key]
|
|
|
|
self.len -= 1
|
|
|
|
|
|
|
|
def setMaxPending(self, maxPending):
|
2018-10-10 14:00:53 +02:00
|
|
|
"""
|
2019-12-19 12:24:53 +01:00
|
|
|
Sets maximum number of objects that can be retrieved from the class
|
|
|
|
simultaneously as long as there is no timeout
|
2018-10-10 14:00:53 +02:00
|
|
|
"""
|
2018-02-01 12:19:39 +01:00
|
|
|
self.maxPending = maxPending
|
|
|
|
|
|
|
|
def setPendingTimeout(self, pendingTimeout):
|
2019-12-19 12:24:53 +01:00
|
|
|
"""Sets how long to wait for a timeout if max pending is reached
|
|
|
|
(or all objects have been retrieved)"""
|
2018-02-01 12:19:39 +01:00
|
|
|
self.pendingTimeout = pendingTimeout
|
|
|
|
|
2018-04-02 19:33:41 +02:00
|
|
|
def setLastObject(self):
|
2018-04-02 19:57:09 +02:00
|
|
|
"""Update timestamp for tracking of received objects"""
|
2018-04-02 19:33:41 +02:00
|
|
|
self.lastObject = time()
|
|
|
|
|
2018-02-01 12:19:39 +01:00
|
|
|
def randomKeys(self, count=1):
|
2019-12-19 12:24:53 +01:00
|
|
|
"""Retrieve count random keys from the dict
|
|
|
|
that haven't already been retrieved"""
|
2018-02-02 12:44:43 +01:00
|
|
|
if self.len == 0 or ((self.pendingLen >= self.maxPending or
|
2018-10-10 14:00:53 +02:00
|
|
|
self.pendingLen == self.len) and self.lastPoll +
|
|
|
|
self.pendingTimeout > time()):
|
2018-02-01 12:19:39 +01:00
|
|
|
raise KeyError
|
2018-10-10 14:00:53 +02:00
|
|
|
|
|
|
|
# pylint: disable=redefined-outer-name
|
2018-02-01 12:19:39 +01:00
|
|
|
with self.lock:
|
2018-04-02 19:33:41 +02:00
|
|
|
# reset if we've requested all
|
2019-01-02 22:25:25 +01:00
|
|
|
# and if last object received too long time ago
|
2019-12-19 12:24:53 +01:00
|
|
|
if self.pendingLen == self.len and self.lastObject + \
|
|
|
|
self.pendingTimeout < time():
|
2018-02-01 22:31:45 +01:00
|
|
|
self.pendingLen = 0
|
2019-01-01 21:18:05 +01:00
|
|
|
self.setLastObject()
|
2018-02-01 12:19:39 +01:00
|
|
|
available = self.len - self.pendingLen
|
|
|
|
if count > available:
|
|
|
|
count = available
|
2019-12-19 12:24:53 +01:00
|
|
|
randomIndex = helper_random.randomsample(
|
|
|
|
range(self.len - self.pendingLen), count)
|
2018-02-01 14:43:14 +01:00
|
|
|
retval = [self.indexDict[i] for i in randomIndex]
|
|
|
|
|
|
|
|
for i in sorted(randomIndex, reverse=True):
|
2018-02-01 12:19:39 +01:00
|
|
|
# swap with one below lowest pending
|
2018-02-01 14:43:14 +01:00
|
|
|
self._swap(i, self.len - self.pendingLen - 1)
|
2018-02-01 12:19:39 +01:00
|
|
|
self.pendingLen += 1
|
|
|
|
self.lastPoll = time()
|
|
|
|
return retval
|