From 0688743c21e9d13c5ce4e0f27d5170aa9b7e464f Mon Sep 17 00:00:00 2001 From: Jay Baird Date: Fri, 11 Mar 2011 14:28:01 -0500 Subject: [PATCH] re #2, add changelog, union, intersection and copy to BloomFilter, not yet available in SBF. Bumped version number to 1.1 --- CHANGES.txt | 3 +++ LICENSE.txt | 2 +- pybloom/pybloom.py | 29 ++++++++++++++++++++++++++++- pybloom/tests.py | 27 +++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 CHANGES.txt diff --git a/CHANGES.txt b/CHANGES.txt new file mode 100644 index 0000000..44a24e2 --- /dev/null +++ b/CHANGES.txt @@ -0,0 +1,3 @@ +Changes in 1.1 +============== +Added copy, intersection and union functions to BloomFilter \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt index 1353e37..11a07db 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) <2009> +Copyright (c) <2011> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation diff --git a/pybloom/pybloom.py b/pybloom/pybloom.py index 504cea1..dcb48d9 100644 --- a/pybloom/pybloom.py +++ b/pybloom/pybloom.py @@ -42,7 +42,7 @@ except ImportError: raise ImportError('pybloom requires bitarray >= 0.3.4') -__version__ = '1.0.3' +__version__ = '1.1' __author__ = "Jay Baird , Bob Ippolito ,\ Marius Eriksen , Alex Brassetvik " @@ -187,6 +187,33 @@ def add(self, key, skip_check=False): self.count += 1 return False + def copy(self): + """Return a copy of this bloom filter. + """ + new_filter = BloomFilter(self.capacity, self.error_rate) + new_filter.bitarray = self.bitarray.copy() + return new_filter + + def union(self, other): + """ Calculates the union of the two underlying bitarrays and returns + a new bloom filter object.""" + new_bloom = self.copy() + new_bloom.bitarray = new_bloom.bitarray | other.bitarray + return new_bloom + + def __or__(self, other): + return self.union(other) + + def intersection(self, other): + """ Calculates the union of the two underlying bitarrays and returns + a new bloom filter object.""" + new_bloom = self.copy() + new_bloom.bitarray = new_bloom.bitarray & other.bitarray + return new_bloom + + def __and__(self, other): + return self.intersection(other) + def tofile(self, f): """Write the bloom filter to file object `f'. Underlying bits are written as machine values. This is much more space diff --git a/pybloom/tests.py b/pybloom/tests.py index 32f34ae..d7d59bb 100644 --- a/pybloom/tests.py +++ b/pybloom/tests.py @@ -14,6 +14,33 @@ def additional_tests(): suite.addTest(doctest.DocFileSuite(readme_fn, module_relative=False)) return suite +class TestUnionIntersection(unittest.TestCase): + def test_union(self): + bloom_one = BloomFilter(100, 0.001) + bloom_two = BloomFilter(100, 0.001) + chars = [chr(i) for i in range(97, 123)] + for char in chars[len(chars)/2:]: + bloom_one.add(char) + for char in chars[:len(chars)/2]: + bloom_two.add(char) + new_bloom = bloom_one.union(bloom_two) + for char in chars: + assert(char in new_bloom) + + def test_intersection(self): + bloom_one = BloomFilter(100, 0.001) + bloom_two = BloomFilter(100, 0.001) + chars = [chr(i) for i in range(97, 123)] + for char in chars: + bloom_one.add(char) + for char in chars[:len(chars)/2]: + bloom_two.add(char) + new_bloom = bloom_one.intersection(bloom_two) + for char in chars[:len(chars)/2]: + assert(char in new_bloom) + for char in chars[len(chars)/2:]: + assert(char not in new_bloom) + class Serialization(unittest.TestCase): SIZE = 12345 EXPECTED = set([random.randint(0, 10000100) for _ in xrange(SIZE)])