diff --git a/dev/bloomfiltertest.py b/dev/bloomfiltertest.py deleted file mode 100644 index 8f7b5f69..00000000 --- a/dev/bloomfiltertest.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -dev/bloomfiltertest.py -====================== - -""" - -import sqlite3 -from os import getenv, path -from time import time - -from pybloom import BloomFilter as BloomFilter1 # pylint: disable=import-error -from pybloomfilter import BloomFilter as BloomFilter2 # pylint: disable=import-error - -# Ubuntu: apt-get install python-pybloomfiltermmap - -conn = sqlite3.connect(path.join(getenv("HOME"), '.config/PyBitmessage/messages.dat')) - -conn.text_factory = str -cur = conn.cursor() -rawlen = 0 -itemcount = 0 - -cur.execute('''SELECT COUNT(hash) FROM inventory''') -for row in cur.fetchall(): - itemcount = row[0] - -filtersize = 1000 * (int(itemcount / 1000) + 1) -errorrate = 1.0 / 1000.0 - -bf1 = BloomFilter1(capacity=filtersize, error_rate=errorrate) -bf2 = BloomFilter2(capacity=filtersize, error_rate=errorrate) - -item = '''SELECT hash FROM inventory''' -cur.execute(item, '') -bf1time = 0 -bf2time = 0 -for row in cur.fetchall(): - rawlen += len(row[0]) - try: - times = [time()] - bf1.add(row[0]) - times.append(time()) - bf2.add(row[0]) - times.append(time()) - bf1time += times[1] - times[0] - bf2time += times[2] - times[1] - except IndexError: - pass - -# f = open("/home/shurdeek/tmp/bloom.dat", "wb") -# sb1.tofile(f) -# f.close() - - -print "Item count: %i" % (itemcount) -print "Raw length: %i" % (rawlen) -print "Bloom filter 1 length: %i, reduction to: %.2f%%" % \ - (bf1.bitarray.buffer_info()[1], - 100.0 * bf1.bitarray.buffer_info()[1] / rawlen) -print "Bloom filter 1 capacity: %i and error rate: %.3f%%" % (bf1.capacity, 100.0 * bf1.error_rate) -print "Bloom filter 1 took %.2fs" % (bf1time) -print "Bloom filter 2 length: %i, reduction to: %.3f%%" % \ - (bf2.num_bits / 8, - 100.0 * bf2.num_bits / 8 / rawlen) -print "Bloom filter 2 capacity: %i and error rate: %.3f%%" % (bf2.capacity, 100.0 * bf2.error_rate) -print "Bloom filter 2 took %.2fs" % (bf2time) diff --git a/src/tests/bloomfilter.py b/src/tests/bloomfilter.py new file mode 100644 index 00000000..387c5634 --- /dev/null +++ b/src/tests/bloomfilter.py @@ -0,0 +1,140 @@ +""" +Test bloomfilter packages. + +This module is imported from core tests module and ran by nose. +""" + +import hashlib +import random # nosec +import string +import StringIO +import unittest +from importlib import import_module + +# import inventory + + +def have_package(pkg_name): + try: + return getattr(have_package, pkg_name) + except AttributeError: + pass + try: + pkg = import_module(pkg_name) + except ImportError: + pkg = False + setattr(have_package, pkg_name, pkg) + return pkg + + +pybloomfilter = have_package('pybloomfilter') +pybloom = have_package('pybloom_live') or have_package('pybloom') +pybloof = have_package('pybloof') + + +if pybloof: + class BloomfilterPybloof(pybloof.StringBloomFilter): + def __init__(self, capacity, error_rate=0.001): + self.capacity = capacity + self.error_rate = error_rate + kwargs = pybloof.bloom_calculator(capacity, error_rate) + super(BloomfilterPybloof, self).__init__(**kwargs) + + pybloof.BloomFilter = BloomfilterPybloof + + +# TODO: make this an option +# _inventory = inventory.Inventory() +_inventory = list(set( + hashlib.sha512( + ''.join(random.choice(string.lowercase) for x in range(32)) + ).digest()[32:] for _ in range(100000) +)) + +_hashes_absent = _inventory[-50000:] +_inventory = [[item] for item in _inventory[:50000]] +_hashes_present = [ + random.choice(_inventory)[0] for _ in range(10000) +] +_filters = {} + + +class BloomfilterTestCase(object): + """Base class for bloomfilter test case""" + def setUp(self): + print('\n') + if self.filter is None: + self.skipTest('package not found') + + def _filter_class(self): + filter_cls = getattr(self, 'filter_cls', 'BloomFilter') + return getattr(self._filter_mod, filter_cls) + + def _export(self): + return self.filter.to_base64() + + def _import(self, data): + return self._filter_class().from_base64(data) + + @property + def filter(self): + filter_obj = _filters.get(self._filter_mod) + if filter_obj is None: + if not self._filter_mod: + return + filtersize = 1000 * (int(len(_inventory) / 1000.) + 1) + errorrate = 1 / 1000. + filter_obj = _filters[self._filter_mod] = self._filter_class( + )(filtersize, errorrate) + print( + 'Filter class: %s\n' + 'Filter capacity: %i and error rate: %.3f%%\n' % ( + type(filter_obj), filter_obj.capacity, + 100 * filter_obj.error_rate + ) + ) + return filter_obj + + def test_0_add(self): + """Add all Inventory hashes to the filter""" + for row in _inventory: + self.filter.add(row[0]) + + def test_absence(self): + """Check absence of hashes in the filter""" + errors = sum(sample in self.filter for sample in _hashes_absent) + # print('Errors: %s from %s' % (errors, len(_hashes_absent))) + self.assertLessEqual(errors, len(_hashes_absent) / 1000. + 1) + + def test_presence(self): + """Check presence of hashes in the filter""" + for sample in _hashes_present: + self.assertTrue(sample in self.filter) + + def test_portability(self): + """Check filter's export/import ability""" + filter_copy = self._import(self._export()) + self.assertTrue(random.choice(_hashes_present) in filter_copy) + self.assertFalse(random.choice(_hashes_absent) in filter_copy) + + +class TestPybloomfiltermmap(BloomfilterTestCase, unittest.TestCase): + _filter_mod = pybloomfilter + + +class TestPybloom(BloomfilterTestCase, unittest.TestCase): + _filter_mod = pybloom + + def _export(self): + output = StringIO.StringIO() + self.filter.tofile(output) + return output.getvalue().encode('base64') + + def _import(self, data): + return self._filter_class().fromfile( + StringIO.StringIO(data.decode('base64')) + ) + + +class TestPybloof(BloomfilterTestCase, unittest.TestCase): + _filter_mod = pybloof