forked from jaybaird/python-bloomfilter
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request jaybaird#8 from joseph-fox/hotfix/readme
Change `README` to .md format
- Loading branch information
Showing
3 changed files
with
311 additions
and
84 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,228 @@ | ||
*.egg | ||
env | ||
*.pyc | ||
*.sqlite3 | ||
|
||
### Django ### | ||
*.log | ||
*.pot | ||
*.pyc | ||
*.egg-info | ||
build | ||
dist | ||
__pycache__/ | ||
local_settings.py | ||
db.sqlite3 | ||
media | ||
|
||
### macOS ### | ||
*.DS_Store | ||
.AppleDouble | ||
.LSOverride | ||
|
||
# Icon must end with two \r | ||
Icon | ||
|
||
# Thumbnails | ||
._* | ||
|
||
# Files that might appear in the root of a volume | ||
.DocumentRevisions-V100 | ||
.fseventsd | ||
.Spotlight-V100 | ||
.TemporaryItems | ||
.Trashes | ||
.VolumeIcon.icns | ||
.com.apple.timemachine.donotpresent | ||
|
||
# Directories potentially created on remote AFP share | ||
.AppleDB | ||
.AppleDesktop | ||
Network Trash Folder | ||
Temporary Items | ||
.apdisk | ||
|
||
### PyCharm ### | ||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm | ||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 | ||
.idea/* | ||
|
||
# CMake | ||
cmake-build-debug/ | ||
|
||
# Mongo Explorer plugin: | ||
.idea/**/mongoSettings.xml | ||
|
||
## File-based project format: | ||
*.iws | ||
|
||
## Plugin-specific files: | ||
|
||
# IntelliJ | ||
/out/ | ||
|
||
# mpeltonen/sbt-idea plugin | ||
.idea_modules/ | ||
|
||
# JIRA plugin | ||
atlassian-ide-plugin.xml | ||
|
||
# Cursive Clojure plugin | ||
.idea/replstate.xml | ||
|
||
# Crashlytics plugin (for Android Studio and IntelliJ) | ||
com_crashlytics_export_strings.xml | ||
crashlytics.properties | ||
crashlytics-build.properties | ||
fabric.properties | ||
|
||
### PyCharm Patch ### | ||
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 | ||
|
||
# *.iml | ||
# modules.xml | ||
# .idea/misc.xml | ||
# *.ipr | ||
|
||
# Sonarlint plugin | ||
.idea/sonarlint | ||
|
||
### Python ### | ||
# Byte-compiled / optimized / DLL files | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
env/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*,cover | ||
.hypothesis/ | ||
|
||
# Translations | ||
*.mo | ||
|
||
# Django stuff: | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# celery beat schedule file | ||
celerybeat-schedule | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# dotenv | ||
.env | ||
|
||
# virtualenv | ||
.venv | ||
.tox | ||
venv/ | ||
ENV/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
### SublimeText ### | ||
# cache files for sublime text | ||
*.tmlanguage.cache | ||
*.tmPreferences.cache | ||
*.stTheme.cache | ||
|
||
# workspace files are user-specific | ||
*.sublime-workspace | ||
|
||
# project files should be checked into the repository, unless a significant | ||
# proportion of contributors will probably not be using SublimeText | ||
# *.sublime-project | ||
|
||
# sftp configuration file | ||
sftp-config.json | ||
|
||
# Package control specific files | ||
Package Control.last-run | ||
Package Control.ca-list | ||
Package Control.ca-bundle | ||
Package Control.system-ca-bundle | ||
Package Control.cache/ | ||
Package Control.ca-certs/ | ||
Package Control.merged-ca-bundle | ||
Package Control.user-ca-bundle | ||
oscrypto-ca-bundle.crt | ||
bh_unicode_properties.cache | ||
|
||
# Sublime-github package stores a github token in this file | ||
# https://packagecontrol.io/packages/sublime-github | ||
GitHub.sublime-settings | ||
|
||
### Vim ### | ||
# swap | ||
[._]*.s[a-v][a-z] | ||
[._]*.sw[a-p] | ||
[._]s[a-v][a-z] | ||
[._]sw[a-p] | ||
# session | ||
Session.vim | ||
# temporary | ||
.netrwhist | ||
*~ | ||
# auto-generated tag files | ||
tags | ||
|
||
# End of https://www.gitignore.io/api/vim,macos,django,python,pycharm,sublimetext |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
[![Build Status](https://travis-ci.org/joseph-fox/python-bloomfilter.svg?branch=master)](https://travis-ci.org/joseph-fox/python-bloomfilter) | ||
|
||
#Python Bloom Filter | ||
|
||
|
||
This Bloom Filter has its tightening ratio updated to 0.9, and this ration | ||
is consistently used throughout the `pybloom` module.. | ||
Choosing r around 0.8 - 0.9 will result in better average space usage for wide | ||
range of growth, therefore the default value of model is set to | ||
LARGE_SET_GROWTH. This is a module that includes a Bloom Filter data structure | ||
along with an implementation of Scalable Bloom Filters as discussed in: | ||
|
||
``` | ||
P. Almeida, C.Baquero, N. Preguiça, D. Hutchison, Scalable Bloom Filters, (GLOBECOM 2007), IEEE, 2007. | ||
``` | ||
Bloom filters are great if you understand what amount of bits you need to set | ||
aside early to store your entire set. Scalable Bloom Filters allow your bloom | ||
filter bits to grow as a function of false positive probability and size. | ||
|
||
A filter is "full" when at capacity: `M * ((ln 2 ^ 2) / abs(ln p))`, where M | ||
is the number of bits and p is the false positive probability. When capacity | ||
is reached a new filter is then created exponentially larger than the last | ||
with a tighter probability of false positives and a larger number of hash | ||
functions. | ||
|
||
```python | ||
>>> import pybloom_live | ||
>>> f = pybloom_live.BloomFilter(capacity=1000, error_rate=0.001) | ||
>>> [f.add(x) for x in range(10)] | ||
[False, False, False, False, False, False, False, False, False, False] | ||
>>> all([(x in f) for x in range(10)]) | ||
True | ||
>>> 10 in f | ||
False | ||
>>> 5 in f | ||
True | ||
>>> f = pybloom_live.BloomFilter(capacity=1000, error_rate=0.001) | ||
>>> for i in xrange(0, f.capacity): | ||
... _ = f.add(i) | ||
>>> (1.0 - (len(f) / float(f.capacity))) <= f.error_rate + 2e-18 | ||
True | ||
|
||
>>> sbf = pybloom_live.ScalableBloomFilter(mode=pybloom_live.ScalableBloomFilter.SMALL_SET_GROWTH) | ||
>>> count = 10000 | ||
>>> for i in range(0, count): | ||
_ = sbf.add(i) | ||
|
||
>>> (1.0 - (len(sbf) / float(count))) <= sbf.error_rate + 2e-18 | ||
True | ||
# len(sbf) may not equal the entire input length. 0.01% error is well | ||
# below the default 0.1% error threshold. As the capacity goes up, the | ||
# error will approach 0.1%. | ||
``` | ||
#Development | ||
We follow this [git branching model](http://nvie.com/posts/a-successful-git-branching-model/), | ||
please have a look at it. | ||
|
||
|
||
#Installation instructions | ||
If you are installing from an internet-connected computer (or virtual | ||
install), you can use the pip python package manager to download and install | ||
this package. Simply type `pip install pybloom-live` from a DOS command | ||
prompt (`cmd.exe`) or a linux shell (e.g. `bash` or `dash` on MacOS X as well | ||
as linux OSes including debian, slackware, redhat, enoch and arch). | ||
|
||
If using Windows and you are installing onto an air-gapped computer or want | ||
the most up-to-date version from this repository, you can do the following: | ||
|
||
1. Download the zip file by clicking on the green "Clone or Download" | ||
link followed by "Download Zip." | ||
|
||
2. Extract all the contents of the the zip folder. | ||
|
||
3. Open command prompt (``cmd.exe``) to the extracted folder. | ||
a. Find the extracted folder in Windows Explorer. | ||
b. From the parent folder level Shift+RightClick on the folder. | ||
c. Select "Open command window here". | ||
|
||
4. Type `pip install .`. | ||
|
||
Similar steps are possible under linux and MacOS X. | ||
|
||
#Installation verification | ||
Type `pip show pybloom-live` from a command prompt. Version should be | ||
2.2.0 as of 2016-12-11. |
Oops, something went wrong.