Started to move dev/bloomfiltertest.py into test case in tests package
This commit is contained in:
parent
16a11775e8
commit
2f752bbaa5
|
@ -1,66 +0,0 @@
|
||||||
"""
|
|
||||||
dev/bloomfiltertest.py
|
|
||||||
======================
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sqlite3
|
|
||||||
from os import getenv, path
|
|
||||||
from time import time
|
|
||||||
|
|
||||||
from pybloom import BloomFilter as BloomFilter1 # pylint: disable=import-error
|
|
||||||
from pybloomfilter import BloomFilter as BloomFilter2 # pylint: disable=import-error
|
|
||||||
|
|
||||||
# Ubuntu: apt-get install python-pybloomfiltermmap
|
|
||||||
|
|
||||||
conn = sqlite3.connect(path.join(getenv("HOME"), '.config/PyBitmessage/messages.dat'))
|
|
||||||
|
|
||||||
conn.text_factory = str
|
|
||||||
cur = conn.cursor()
|
|
||||||
rawlen = 0
|
|
||||||
itemcount = 0
|
|
||||||
|
|
||||||
cur.execute('''SELECT COUNT(hash) FROM inventory''')
|
|
||||||
for row in cur.fetchall():
|
|
||||||
itemcount = row[0]
|
|
||||||
|
|
||||||
filtersize = 1000 * (int(itemcount / 1000) + 1)
|
|
||||||
errorrate = 1.0 / 1000.0
|
|
||||||
|
|
||||||
bf1 = BloomFilter1(capacity=filtersize, error_rate=errorrate)
|
|
||||||
bf2 = BloomFilter2(capacity=filtersize, error_rate=errorrate)
|
|
||||||
|
|
||||||
item = '''SELECT hash FROM inventory'''
|
|
||||||
cur.execute(item, '')
|
|
||||||
bf1time = 0
|
|
||||||
bf2time = 0
|
|
||||||
for row in cur.fetchall():
|
|
||||||
rawlen += len(row[0])
|
|
||||||
try:
|
|
||||||
times = [time()]
|
|
||||||
bf1.add(row[0])
|
|
||||||
times.append(time())
|
|
||||||
bf2.add(row[0])
|
|
||||||
times.append(time())
|
|
||||||
bf1time += times[1] - times[0]
|
|
||||||
bf2time += times[2] - times[1]
|
|
||||||
except IndexError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# f = open("/home/shurdeek/tmp/bloom.dat", "wb")
|
|
||||||
# sb1.tofile(f)
|
|
||||||
# f.close()
|
|
||||||
|
|
||||||
|
|
||||||
print "Item count: %i" % (itemcount)
|
|
||||||
print "Raw length: %i" % (rawlen)
|
|
||||||
print "Bloom filter 1 length: %i, reduction to: %.2f%%" % \
|
|
||||||
(bf1.bitarray.buffer_info()[1],
|
|
||||||
100.0 * bf1.bitarray.buffer_info()[1] / rawlen)
|
|
||||||
print "Bloom filter 1 capacity: %i and error rate: %.3f%%" % (bf1.capacity, 100.0 * bf1.error_rate)
|
|
||||||
print "Bloom filter 1 took %.2fs" % (bf1time)
|
|
||||||
print "Bloom filter 2 length: %i, reduction to: %.3f%%" % \
|
|
||||||
(bf2.num_bits / 8,
|
|
||||||
100.0 * bf2.num_bits / 8 / rawlen)
|
|
||||||
print "Bloom filter 2 capacity: %i and error rate: %.3f%%" % (bf2.capacity, 100.0 * bf2.error_rate)
|
|
||||||
print "Bloom filter 2 took %.2fs" % (bf2time)
|
|
140
src/tests/bloomfilter.py
Normal file
140
src/tests/bloomfilter.py
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
"""
|
||||||
|
Test bloomfilter packages.
|
||||||
|
|
||||||
|
This module is imported from core tests module and ran by nose.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import random # nosec
|
||||||
|
import string
|
||||||
|
import StringIO
|
||||||
|
import unittest
|
||||||
|
from importlib import import_module
|
||||||
|
|
||||||
|
# import inventory
|
||||||
|
|
||||||
|
|
||||||
|
def have_package(pkg_name):
|
||||||
|
try:
|
||||||
|
return getattr(have_package, pkg_name)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
pkg = import_module(pkg_name)
|
||||||
|
except ImportError:
|
||||||
|
pkg = False
|
||||||
|
setattr(have_package, pkg_name, pkg)
|
||||||
|
return pkg
|
||||||
|
|
||||||
|
|
||||||
|
pybloomfilter = have_package('pybloomfilter')
|
||||||
|
pybloom = have_package('pybloom_live') or have_package('pybloom')
|
||||||
|
pybloof = have_package('pybloof')
|
||||||
|
|
||||||
|
|
||||||
|
if pybloof:
|
||||||
|
class BloomfilterPybloof(pybloof.StringBloomFilter):
|
||||||
|
def __init__(self, capacity, error_rate=0.001):
|
||||||
|
self.capacity = capacity
|
||||||
|
self.error_rate = error_rate
|
||||||
|
kwargs = pybloof.bloom_calculator(capacity, error_rate)
|
||||||
|
super(BloomfilterPybloof, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
pybloof.BloomFilter = BloomfilterPybloof
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: make this an option
|
||||||
|
# _inventory = inventory.Inventory()
|
||||||
|
_inventory = list(set(
|
||||||
|
hashlib.sha512(
|
||||||
|
''.join(random.choice(string.lowercase) for x in range(32))
|
||||||
|
).digest()[32:] for _ in range(100000)
|
||||||
|
))
|
||||||
|
|
||||||
|
_hashes_absent = _inventory[-50000:]
|
||||||
|
_inventory = [[item] for item in _inventory[:50000]]
|
||||||
|
_hashes_present = [
|
||||||
|
random.choice(_inventory)[0] for _ in range(10000)
|
||||||
|
]
|
||||||
|
_filters = {}
|
||||||
|
|
||||||
|
|
||||||
|
class BloomfilterTestCase(object):
|
||||||
|
"""Base class for bloomfilter test case"""
|
||||||
|
def setUp(self):
|
||||||
|
print('\n')
|
||||||
|
if self.filter is None:
|
||||||
|
self.skipTest('package not found')
|
||||||
|
|
||||||
|
def _filter_class(self):
|
||||||
|
filter_cls = getattr(self, 'filter_cls', 'BloomFilter')
|
||||||
|
return getattr(self._filter_mod, filter_cls)
|
||||||
|
|
||||||
|
def _export(self):
|
||||||
|
return self.filter.to_base64()
|
||||||
|
|
||||||
|
def _import(self, data):
|
||||||
|
return self._filter_class().from_base64(data)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filter(self):
|
||||||
|
filter_obj = _filters.get(self._filter_mod)
|
||||||
|
if filter_obj is None:
|
||||||
|
if not self._filter_mod:
|
||||||
|
return
|
||||||
|
filtersize = 1000 * (int(len(_inventory) / 1000.) + 1)
|
||||||
|
errorrate = 1 / 1000.
|
||||||
|
filter_obj = _filters[self._filter_mod] = self._filter_class(
|
||||||
|
)(filtersize, errorrate)
|
||||||
|
print(
|
||||||
|
'Filter class: %s\n'
|
||||||
|
'Filter capacity: %i and error rate: %.3f%%\n' % (
|
||||||
|
type(filter_obj), filter_obj.capacity,
|
||||||
|
100 * filter_obj.error_rate
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return filter_obj
|
||||||
|
|
||||||
|
def test_0_add(self):
|
||||||
|
"""Add all Inventory hashes to the filter"""
|
||||||
|
for row in _inventory:
|
||||||
|
self.filter.add(row[0])
|
||||||
|
|
||||||
|
def test_absence(self):
|
||||||
|
"""Check absence of hashes in the filter"""
|
||||||
|
errors = sum(sample in self.filter for sample in _hashes_absent)
|
||||||
|
# print('Errors: %s from %s' % (errors, len(_hashes_absent)))
|
||||||
|
self.assertLessEqual(errors, len(_hashes_absent) / 1000. + 1)
|
||||||
|
|
||||||
|
def test_presence(self):
|
||||||
|
"""Check presence of hashes in the filter"""
|
||||||
|
for sample in _hashes_present:
|
||||||
|
self.assertTrue(sample in self.filter)
|
||||||
|
|
||||||
|
def test_portability(self):
|
||||||
|
"""Check filter's export/import ability"""
|
||||||
|
filter_copy = self._import(self._export())
|
||||||
|
self.assertTrue(random.choice(_hashes_present) in filter_copy)
|
||||||
|
self.assertFalse(random.choice(_hashes_absent) in filter_copy)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPybloomfiltermmap(BloomfilterTestCase, unittest.TestCase):
|
||||||
|
_filter_mod = pybloomfilter
|
||||||
|
|
||||||
|
|
||||||
|
class TestPybloom(BloomfilterTestCase, unittest.TestCase):
|
||||||
|
_filter_mod = pybloom
|
||||||
|
|
||||||
|
def _export(self):
|
||||||
|
output = StringIO.StringIO()
|
||||||
|
self.filter.tofile(output)
|
||||||
|
return output.getvalue().encode('base64')
|
||||||
|
|
||||||
|
def _import(self, data):
|
||||||
|
return self._filter_class().fromfile(
|
||||||
|
StringIO.StringIO(data.decode('base64'))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPybloof(BloomfilterTestCase, unittest.TestCase):
|
||||||
|
_filter_mod = pybloof
|
Reference in New Issue
Block a user