Skip to content

Commit

Permalink
chg: [library] bloomfilter absclass, checking entries againt PSS files
Browse files Browse the repository at this point in the history
  • Loading branch information
gallypette committed Feb 5, 2024
1 parent 4cd2545 commit 874f8de
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 14 deletions.
28 changes: 28 additions & 0 deletions private_search_set/bloom_filter_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from abc import ABC, abstractmethod

class BloomFilterBase(ABC):
@abstractmethod
def __init__(self, parameters):
"""Initialize the Bloom filter with given parameters."""
loaded = False
pass

@abstractmethod
def add(self, data):
"""Add data to the Bloom filter."""
pass

@abstractmethod
def check(self, data):
"""Load a Bloom filter from file."""
pass

@abstractmethod
def load(self, data):
"""Load a Bloom filter from file."""
pass

@abstractmethod
def write(self):
"""Write the serialized bloom filter to a file descriptor."""
pass
26 changes: 26 additions & 0 deletions private_search_set/bloom_filter_dcso.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from flor import BloomFilter
from private_search_set.bloom_filter_base import BloomFilterBase

class BloomFilterDCSO(BloomFilterBase):
def __init__(self, parameters):
super().__init__(parameters)
self.bf = BloomFilter(n=parameters['capacity'], p=parameters['fp-probability'])

def add(self, data):
self.bf.add(data)
pass

def check(self, data):
return data in self.bf

def load(self, fd):
self.bf.read(fd)
if self.bf.N == 0:
self.loaded = False
else:
self.loaded = True
pass

def write(self, fd):
self.bf.write(fd)
pass
23 changes: 19 additions & 4 deletions private_search_set/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import click

import pdb
from private_search_set.main import PrivateSearchSet

@click.pass_context
Expand All @@ -8,18 +10,31 @@ def ingest_stdin(ctx):
pss.ingest_stdin()
pss.write_to_files(ctx.params["pss_home"])

@click.pass_context
def check_stdin(ctx):
pss = ctx.obj
pss.check_stdin()

@click.command()
@click.option('--pss-home', required=True, type=click.Path(exists=False) , help='PSS working folder.')
@click.option('--json-file', required=True, type=click.Path(exists=True), help='Path to the PSS JSON file.')
@click.option('--ingest', required=True, type=click.BOOL , help='ingest stdin to PSS file')
@click.option('--json-file', required=False, type=click.Path(exists=True), help='Path to the PSS JSON file.')
@click.option('--ingest', required=True, type=click.BOOL , help='ingest stdin to PSS files')
@click.option('--debug/--no-debug', default=False)
@click.pass_context
def cli(ctx, json_file, pss_home, ingest, debug):
ctx.obj = PrivateSearchSet.load_from_json_specs(json_file)
ctx.obj.init_filter()
if json_file and pss_home:
# TODO maybe allow conversion in the future
raise ValueError("Please provide either a json-file or a pss-home, not both.")
# If a json-file with PSS metadata is provided, load the PSS from the JSON file
if json_file:
ctx.obj = PrivateSearchSet.load_from_json_specs(json_file)
# If pss_home is provided, load the PSS from the files in the folder
if pss_home:
ctx.obj = PrivateSearchSet.load_from_pss_home(pss_home)
if ingest:
ingest_stdin()
else:
check_stdin()
pass

def main():
Expand Down
105 changes: 95 additions & 10 deletions private_search_set/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import sys
import hashlib
from flor import BloomFilter
from private_search_set.bloom_filter_dcso import BloomFilterDCSO

class PrivateSearchSet:
def __init__(self, algorithm, bloomfilter, canonicalization_format, description, generated_timestamp, keyid, misp_attribute_types, version):
Expand Down Expand Up @@ -31,26 +31,111 @@ def load_from_json_specs(json_file):
data = {k.replace('-', '_'): v for k, v in json_data.items()}
pss = PrivateSearchSet(**data) # Create an instance of the PrivateSearchSet class
if set(data.keys()) == set(pss.__dict__.keys()):
PrivateSearchSet.print_private_search_set(pss)
# PrivateSearchSet.print_private_search_set(pss)
pss.init_filter_and_set()
return pss
else:
raise ValueError("JSON file does not match the expected format.")

def init_filter(self):
def load_from_pss_home(pss_home):
if os.path.exists(pss_home):
file_path = os.path.join(pss_home, 'private-search-set.json')
if os.path.exists(file_path):
pss = PrivateSearchSet.load_from_json_specs(file_path)
else:
raise ValueError("No JSON file found in the PSS home.")
file_path = os.path.join(pss_home, 'private-search-set.bloom')
pss.load_bf_from_file(file_path)
file_path = os.path.join(pss_home, 'private-search-set.pss')
pss._ps = pss.load_pss_from_file(file_path)
return pss

def load_bf_from_file(self, file_path):
if os.path.exists(file_path):
with open(file_path, 'rb') as f:
self._bf.load(f)

def load_pss_from_file(self, file_path):
if os.path.exists(file_path):
with open(file_path, 'r') as f:
return set(f.read().splitlines())
else:
return None

def init_filter_and_set(self):
# init bloom filter
if self.bloomfilter['format'] == 'dcso-v1':
self._bf = BloomFilter(n=self.bloomfilter['capacity'], p=self.bloomfilter['fp-probability'])
self._bf = BloomFilterDCSO(self.bloomfilter)
else:
raise ValueError("Bloomfilter format not supported.")

# init the private search set
self._ps = set()

def ingest_stdin(self):
# Read bytes from stdin
self._ps = set()
for line in sys.stdin.buffer.read().splitlines():
if self.bloomfilter['format'] == 'dcso-v1':
self._bf.add(line)
if self.algorithm == 'Blake2':
# TODO use a salt
self._ps.add(hashlib.blake2b(line, key=self.keyid.encode()).hexdigest())
self.ingest(line)

def ingest(self, data):
# HMAC the data
hashed = b''
if self.algorithm == 'Blake2':
# TODO Use a salt
# TODO We use the keyid as the key for the time being
hashed_string = hashlib.blake2b(data, key=self.keyid.encode()).hexdigest()
hashed_bytes = hashed_string.encode()
else:
raise ValueError("HMAC algorithm not supported.")

# add the string digest to the private search set
self._ps.add(hashed_string)
# add the utf8 encoded bytes representation of the hexdigest to the bloom filter
if self.bloomfilter['format'] == 'dcso-v1':
self._bf.add(hashed_bytes)

def check_stdin(self):
# Read bytes from stdin
for line in sys.stdin.buffer.read().splitlines():
# check hashset in priority
if self._ps != None:
if self.check_pss(line):
print(line)
elif self._bf.loaded:
if self.check_bf(line):
print(line)
else:
raise ValueError("No private search set or bloom filter loaded.")

def check_pss(self, data):
# HMAC the data
hashed = b''
if self.algorithm == 'Blake2':
# TODO Use a salt
# TODO We use the keyid as the key for the time being
hashed_string = hashlib.blake2b(data, key=self.keyid.encode()).hexdigest()
else:
raise ValueError("HMAC algorithm not supported.")
if hashed_string in self._ps:
return True
else:
return False

def check_bf(self, data):
# HMAC the data
hashed_bytes = b''
if self.algorithm == 'Blake2':
# TODO Use a salt
# TODO We use the keyid as the key for the time being
hashed_bytes = hashlib.blake2b(data, key=self.keyid.encode()).hexdigest().encode()
else:
raise ValueError("HMAC algorithm not supported.")

if self.bloomfilter['format'] == 'dcso-v1':
return self._bf.check(hashed_bytes)
else:
raise ValueError("Bloomfilter format not supported.")


def write_to_files(self, pss_home):
if not os.path.exists(pss_home):
Expand Down

0 comments on commit 874f8de

Please sign in to comment.