Started to move dev/bloomfiltertest.py into test case in tests package
This commit is contained in:
parent
16a11775e8
commit
2f752bbaa5
|
@ -1,66 +0,0 @@
|
|||
"""
|
||||
dev/bloomfiltertest.py
|
||||
======================
|
||||
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
from os import getenv, path
|
||||
from time import time
|
||||
|
||||
from pybloom import BloomFilter as BloomFilter1 # pylint: disable=import-error
|
||||
from pybloomfilter import BloomFilter as BloomFilter2 # pylint: disable=import-error
|
||||
|
||||
# Ubuntu: apt-get install python-pybloomfiltermmap
|
||||
|
||||
conn = sqlite3.connect(path.join(getenv("HOME"), '.config/PyBitmessage/messages.dat'))
|
||||
|
||||
conn.text_factory = str
|
||||
cur = conn.cursor()
|
||||
rawlen = 0
|
||||
itemcount = 0
|
||||
|
||||
cur.execute('''SELECT COUNT(hash) FROM inventory''')
|
||||
for row in cur.fetchall():
|
||||
itemcount = row[0]
|
||||
|
||||
filtersize = 1000 * (int(itemcount / 1000) + 1)
|
||||
errorrate = 1.0 / 1000.0
|
||||
|
||||
bf1 = BloomFilter1(capacity=filtersize, error_rate=errorrate)
|
||||
bf2 = BloomFilter2(capacity=filtersize, error_rate=errorrate)
|
||||
|
||||
item = '''SELECT hash FROM inventory'''
|
||||
cur.execute(item, '')
|
||||
bf1time = 0
|
||||
bf2time = 0
|
||||
for row in cur.fetchall():
|
||||
rawlen += len(row[0])
|
||||
try:
|
||||
times = [time()]
|
||||
bf1.add(row[0])
|
||||
times.append(time())
|
||||
bf2.add(row[0])
|
||||
times.append(time())
|
||||
bf1time += times[1] - times[0]
|
||||
bf2time += times[2] - times[1]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
# f = open("/home/shurdeek/tmp/bloom.dat", "wb")
|
||||
# sb1.tofile(f)
|
||||
# f.close()
|
||||
|
||||
|
||||
print "Item count: %i" % (itemcount)
|
||||
print "Raw length: %i" % (rawlen)
|
||||
print "Bloom filter 1 length: %i, reduction to: %.2f%%" % \
|
||||
(bf1.bitarray.buffer_info()[1],
|
||||
100.0 * bf1.bitarray.buffer_info()[1] / rawlen)
|
||||
print "Bloom filter 1 capacity: %i and error rate: %.3f%%" % (bf1.capacity, 100.0 * bf1.error_rate)
|
||||
print "Bloom filter 1 took %.2fs" % (bf1time)
|
||||
print "Bloom filter 2 length: %i, reduction to: %.3f%%" % \
|
||||
(bf2.num_bits / 8,
|
||||
100.0 * bf2.num_bits / 8 / rawlen)
|
||||
print "Bloom filter 2 capacity: %i and error rate: %.3f%%" % (bf2.capacity, 100.0 * bf2.error_rate)
|
||||
print "Bloom filter 2 took %.2fs" % (bf2time)
|
140
src/tests/bloomfilter.py
Normal file
140
src/tests/bloomfilter.py
Normal file
|
@ -0,0 +1,140 @@
|
|||
"""
|
||||
Test bloomfilter packages.
|
||||
|
||||
This module is imported from core tests module and ran by nose.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import random # nosec
|
||||
import string
|
||||
import StringIO
|
||||
import unittest
|
||||
from importlib import import_module
|
||||
|
||||
# import inventory
|
||||
|
||||
|
||||
def have_package(pkg_name):
|
||||
try:
|
||||
return getattr(have_package, pkg_name)
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
pkg = import_module(pkg_name)
|
||||
except ImportError:
|
||||
pkg = False
|
||||
setattr(have_package, pkg_name, pkg)
|
||||
return pkg
|
||||
|
||||
|
||||
pybloomfilter = have_package('pybloomfilter')
|
||||
pybloom = have_package('pybloom_live') or have_package('pybloom')
|
||||
pybloof = have_package('pybloof')
|
||||
|
||||
|
||||
if pybloof:
|
||||
class BloomfilterPybloof(pybloof.StringBloomFilter):
|
||||
def __init__(self, capacity, error_rate=0.001):
|
||||
self.capacity = capacity
|
||||
self.error_rate = error_rate
|
||||
kwargs = pybloof.bloom_calculator(capacity, error_rate)
|
||||
super(BloomfilterPybloof, self).__init__(**kwargs)
|
||||
|
||||
pybloof.BloomFilter = BloomfilterPybloof
|
||||
|
||||
|
||||
# TODO: make this an option
|
||||
# _inventory = inventory.Inventory()
|
||||
_inventory = list(set(
|
||||
hashlib.sha512(
|
||||
''.join(random.choice(string.lowercase) for x in range(32))
|
||||
).digest()[32:] for _ in range(100000)
|
||||
))
|
||||
|
||||
_hashes_absent = _inventory[-50000:]
|
||||
_inventory = [[item] for item in _inventory[:50000]]
|
||||
_hashes_present = [
|
||||
random.choice(_inventory)[0] for _ in range(10000)
|
||||
]
|
||||
_filters = {}
|
||||
|
||||
|
||||
class BloomfilterTestCase(object):
|
||||
"""Base class for bloomfilter test case"""
|
||||
def setUp(self):
|
||||
print('\n')
|
||||
if self.filter is None:
|
||||
self.skipTest('package not found')
|
||||
|
||||
def _filter_class(self):
|
||||
filter_cls = getattr(self, 'filter_cls', 'BloomFilter')
|
||||
return getattr(self._filter_mod, filter_cls)
|
||||
|
||||
def _export(self):
|
||||
return self.filter.to_base64()
|
||||
|
||||
def _import(self, data):
|
||||
return self._filter_class().from_base64(data)
|
||||
|
||||
@property
|
||||
def filter(self):
|
||||
filter_obj = _filters.get(self._filter_mod)
|
||||
if filter_obj is None:
|
||||
if not self._filter_mod:
|
||||
return
|
||||
filtersize = 1000 * (int(len(_inventory) / 1000.) + 1)
|
||||
errorrate = 1 / 1000.
|
||||
filter_obj = _filters[self._filter_mod] = self._filter_class(
|
||||
)(filtersize, errorrate)
|
||||
print(
|
||||
'Filter class: %s\n'
|
||||
'Filter capacity: %i and error rate: %.3f%%\n' % (
|
||||
type(filter_obj), filter_obj.capacity,
|
||||
100 * filter_obj.error_rate
|
||||
)
|
||||
)
|
||||
return filter_obj
|
||||
|
||||
def test_0_add(self):
|
||||
"""Add all Inventory hashes to the filter"""
|
||||
for row in _inventory:
|
||||
self.filter.add(row[0])
|
||||
|
||||
def test_absence(self):
|
||||
"""Check absence of hashes in the filter"""
|
||||
errors = sum(sample in self.filter for sample in _hashes_absent)
|
||||
# print('Errors: %s from %s' % (errors, len(_hashes_absent)))
|
||||
self.assertLessEqual(errors, len(_hashes_absent) / 1000. + 1)
|
||||
|
||||
def test_presence(self):
|
||||
"""Check presence of hashes in the filter"""
|
||||
for sample in _hashes_present:
|
||||
self.assertTrue(sample in self.filter)
|
||||
|
||||
def test_portability(self):
|
||||
"""Check filter's export/import ability"""
|
||||
filter_copy = self._import(self._export())
|
||||
self.assertTrue(random.choice(_hashes_present) in filter_copy)
|
||||
self.assertFalse(random.choice(_hashes_absent) in filter_copy)
|
||||
|
||||
|
||||
class TestPybloomfiltermmap(BloomfilterTestCase, unittest.TestCase):
|
||||
_filter_mod = pybloomfilter
|
||||
|
||||
|
||||
class TestPybloom(BloomfilterTestCase, unittest.TestCase):
|
||||
_filter_mod = pybloom
|
||||
|
||||
def _export(self):
|
||||
output = StringIO.StringIO()
|
||||
self.filter.tofile(output)
|
||||
return output.getvalue().encode('base64')
|
||||
|
||||
def _import(self, data):
|
||||
return self._filter_class().fromfile(
|
||||
StringIO.StringIO(data.decode('base64'))
|
||||
)
|
||||
|
||||
|
||||
class TestPybloof(BloomfilterTestCase, unittest.TestCase):
|
||||
_filter_mod = pybloof
|
Reference in New Issue
Block a user