This repository has been archived on 2024-12-25. You can view files and clone it, but cannot push or open issues or pull requests.
PyBitmessage-2024-12-25/dev/bloomfiltertest.py
Venkatesh Pitta c548ff96a1
Removed unused and unreachable/dead code
Signed-off-by: Venkatesh Pitta <venkateshpitta@gmail.com>
2018-03-13 09:16:01 +11:00

60 lines
1.8 KiB
Python

from math import ceil
from os import getenv, path
from pybloom import BloomFilter as BloomFilter1
from pybloomfilter import BloomFilter as BloomFilter2
import sqlite3
from time import time
# Ubuntu: apt-get install python-pybloomfiltermmap
conn = sqlite3.connect(path.join(getenv("HOME"), '.config/PyBitmessage/messages.dat'))
cur = conn.cursor()
rawlen = 0
itemcount = 0
cur.execute('''SELECT COUNT(hash) FROM inventory''')
for row in cur.fetchall():
itemcount = row[0]
filtersize = 1000 * (int(itemcount / 1000) + 1)
errorrate = 1.0 / 1000.0
bf1 = BloomFilter1(capacity=filtersize, error_rate=errorrate)
bf2 = BloomFilter2(capacity=filtersize, error_rate=errorrate)
item = '''SELECT hash FROM inventory'''
cur.execute(item, '')
bf1time = 0
bf2time = 0
for row in cur.fetchall():
rawlen += len(row[0])
try:
times = [time()]
bf1.add(row[0])
times.append(time())
bf2.add(row[0])
times.append(time())
bf1time += times[1] - times[0]
bf2time += times[2] - times[1]
except IndexError:
pass
#f = open("/home/shurdeek/tmp/bloom.dat", "wb")
#sb1.tofile(f)
#f.close()
print "Item count: %i" % (itemcount)
print "Raw length: %i" % (rawlen)
print "Bloom filter 1 length: %i, reduction to: %.2f%%" % \
(bf1.bitarray.buffer_info()[1],
100.0 * bf1.bitarray.buffer_info()[1] / rawlen)
print "Bloom filter 1 capacity: %i and error rate: %.3f%%" % (bf1.capacity, 100.0 * bf1.error_rate)
print "Bloom filter 1 took %.2fs" % (bf1time)
print "Bloom filter 2 length: %i, reduction to: %.3f%%" % \
(bf2.num_bits / 8,
100.0 * bf2.num_bits / 8 / rawlen)
print "Bloom filter 2 capacity: %i and error rate: %.3f%%" % (bf2.capacity, 100.0 * bf2.error_rate)
print "Bloom filter 2 took %.2fs" % (bf2time)