Started to move dev/bloomfiltertest.py into test case in tests package

This commit is contained in:
Dmitri Bogomolov 2019-01-30 11:03:42 +02:00
parent 16a11775e8
commit 2f752bbaa5
Signed by untrusted user: g1itch
GPG Key ID: 720A756F18DEED13
2 changed files with 140 additions and 66 deletions

View File

@ -1,66 +0,0 @@
"""
dev/bloomfiltertest.py
======================
"""
import sqlite3
from os import getenv, path
from time import time
from pybloom import BloomFilter as BloomFilter1 # pylint: disable=import-error
from pybloomfilter import BloomFilter as BloomFilter2 # pylint: disable=import-error
# Ubuntu: apt-get install python-pybloomfiltermmap
conn = sqlite3.connect(path.join(getenv("HOME"), '.config/PyBitmessage/messages.dat'))
conn.text_factory = str
cur = conn.cursor()
rawlen = 0
itemcount = 0
cur.execute('''SELECT COUNT(hash) FROM inventory''')
for row in cur.fetchall():
itemcount = row[0]
filtersize = 1000 * (int(itemcount / 1000) + 1)
errorrate = 1.0 / 1000.0
bf1 = BloomFilter1(capacity=filtersize, error_rate=errorrate)
bf2 = BloomFilter2(capacity=filtersize, error_rate=errorrate)
item = '''SELECT hash FROM inventory'''
cur.execute(item, '')
bf1time = 0
bf2time = 0
for row in cur.fetchall():
rawlen += len(row[0])
try:
times = [time()]
bf1.add(row[0])
times.append(time())
bf2.add(row[0])
times.append(time())
bf1time += times[1] - times[0]
bf2time += times[2] - times[1]
except IndexError:
pass
# f = open("/home/shurdeek/tmp/bloom.dat", "wb")
# sb1.tofile(f)
# f.close()
print "Item count: %i" % (itemcount)
print "Raw length: %i" % (rawlen)
print "Bloom filter 1 length: %i, reduction to: %.2f%%" % \
(bf1.bitarray.buffer_info()[1],
100.0 * bf1.bitarray.buffer_info()[1] / rawlen)
print "Bloom filter 1 capacity: %i and error rate: %.3f%%" % (bf1.capacity, 100.0 * bf1.error_rate)
print "Bloom filter 1 took %.2fs" % (bf1time)
print "Bloom filter 2 length: %i, reduction to: %.3f%%" % \
(bf2.num_bits / 8,
100.0 * bf2.num_bits / 8 / rawlen)
print "Bloom filter 2 capacity: %i and error rate: %.3f%%" % (bf2.capacity, 100.0 * bf2.error_rate)
print "Bloom filter 2 took %.2fs" % (bf2time)

140
src/tests/bloomfilter.py Normal file
View File

@ -0,0 +1,140 @@
"""
Test bloomfilter packages.
This module is imported from core tests module and ran by nose.
"""
import hashlib
import random # nosec
import string
import StringIO
import unittest
from importlib import import_module
# import inventory
def have_package(pkg_name):
try:
return getattr(have_package, pkg_name)
except AttributeError:
pass
try:
pkg = import_module(pkg_name)
except ImportError:
pkg = False
setattr(have_package, pkg_name, pkg)
return pkg
pybloomfilter = have_package('pybloomfilter')
pybloom = have_package('pybloom_live') or have_package('pybloom')
pybloof = have_package('pybloof')
if pybloof:
class BloomfilterPybloof(pybloof.StringBloomFilter):
def __init__(self, capacity, error_rate=0.001):
self.capacity = capacity
self.error_rate = error_rate
kwargs = pybloof.bloom_calculator(capacity, error_rate)
super(BloomfilterPybloof, self).__init__(**kwargs)
pybloof.BloomFilter = BloomfilterPybloof
# TODO: make this an option
# _inventory = inventory.Inventory()
_inventory = list(set(
hashlib.sha512(
''.join(random.choice(string.lowercase) for x in range(32))
).digest()[32:] for _ in range(100000)
))
_hashes_absent = _inventory[-50000:]
_inventory = [[item] for item in _inventory[:50000]]
_hashes_present = [
random.choice(_inventory)[0] for _ in range(10000)
]
_filters = {}
class BloomfilterTestCase(object):
"""Base class for bloomfilter test case"""
def setUp(self):
print('\n')
if self.filter is None:
self.skipTest('package not found')
def _filter_class(self):
filter_cls = getattr(self, 'filter_cls', 'BloomFilter')
return getattr(self._filter_mod, filter_cls)
def _export(self):
return self.filter.to_base64()
def _import(self, data):
return self._filter_class().from_base64(data)
@property
def filter(self):
filter_obj = _filters.get(self._filter_mod)
if filter_obj is None:
if not self._filter_mod:
return
filtersize = 1000 * (int(len(_inventory) / 1000.) + 1)
errorrate = 1 / 1000.
filter_obj = _filters[self._filter_mod] = self._filter_class(
)(filtersize, errorrate)
print(
'Filter class: %s\n'
'Filter capacity: %i and error rate: %.3f%%\n' % (
type(filter_obj), filter_obj.capacity,
100 * filter_obj.error_rate
)
)
return filter_obj
def test_0_add(self):
"""Add all Inventory hashes to the filter"""
for row in _inventory:
self.filter.add(row[0])
def test_absence(self):
"""Check absence of hashes in the filter"""
errors = sum(sample in self.filter for sample in _hashes_absent)
# print('Errors: %s from %s' % (errors, len(_hashes_absent)))
self.assertLessEqual(errors, len(_hashes_absent) / 1000. + 1)
def test_presence(self):
"""Check presence of hashes in the filter"""
for sample in _hashes_present:
self.assertTrue(sample in self.filter)
def test_portability(self):
"""Check filter's export/import ability"""
filter_copy = self._import(self._export())
self.assertTrue(random.choice(_hashes_present) in filter_copy)
self.assertFalse(random.choice(_hashes_absent) in filter_copy)
class TestPybloomfiltermmap(BloomfilterTestCase, unittest.TestCase):
_filter_mod = pybloomfilter
class TestPybloom(BloomfilterTestCase, unittest.TestCase):
_filter_mod = pybloom
def _export(self):
output = StringIO.StringIO()
self.filter.tofile(output)
return output.getvalue().encode('base64')
def _import(self, data):
return self._filter_class().fromfile(
StringIO.StringIO(data.decode('base64'))
)
class TestPybloof(BloomfilterTestCase, unittest.TestCase):
_filter_mod = pybloof