Skip to content

Commit

Permalink
Merge pull request #79 from shilorigins/devagr/search
Browse files Browse the repository at this point in the history
ENH: Augment search methods
  • Loading branch information
shilorigins authored Sep 19, 2024
2 parents bcd8ff2 + f73ef72 commit cdda3a5
Show file tree
Hide file tree
Showing 9 changed files with 241 additions and 60 deletions.
24 changes: 24 additions & 0 deletions docs/source/upcoming_release_notes/79-augment_search_methods.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
79 augment search methods
#################

API Breaks
----------
- Client.search takes SearchTerms as *args rather than key-value pairs as **kwargs
Features
--------
- regex search on Entry text fields
- filter Entrys by tag
- filter Entrys by attribute value

Bugfixes
--------
- N/A

Maintenance
-----------
- N/A

Contributors
------------
- shilorigins
28 changes: 25 additions & 3 deletions superscore/backends/core.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,28 @@
"""
Base superscore data storage backend interface
"""
from typing import Generator
from collections.abc import Container, Generator
from typing import NamedTuple, Union
from uuid import UUID

from superscore.model import Entry, Root
from superscore.type_hints import AnyEpicsType

SearchTermValue = Union[AnyEpicsType, Container[AnyEpicsType], tuple[AnyEpicsType, ...]]
SearchTermType = tuple[str, str, SearchTermValue]


class SearchTerm(NamedTuple):
attr: str
operator: str
value: SearchTermValue


class _Backend:
"""
Base class for data storage backend.
"""

def get_entry(self, meta_id: UUID) -> Entry:
"""
Get entry with ``meta_id``
Expand Down Expand Up @@ -40,8 +52,18 @@ def update_entry(self, entry: Entry) -> None:
"""
raise NotImplementedError

def search(self, **search_kwargs) -> Generator[Entry, None, None]:
"""Yield a Entry objects corresponding matching ``search_kwargs``"""
def search(self, *search_terms: SearchTermType) -> Generator[Entry, None, None]:
"""
Yield Entry objects matching all ``search_terms``. Each SearchTerm has the format
(<attr>, <operator>, <value>). Some operators take tuples as values.
The supported operators are:
- eq (equals)
- lt (less than or equal to)
- gt (greater than or equal to)
- in
- like (fuzzy match, depends on type of value)
"""
raise NotImplementedError

@property
Expand Down
82 changes: 52 additions & 30 deletions superscore/backends/filestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
import json
import logging
import os
import re
import shutil
from dataclasses import fields, replace
from typing import Any, Dict, Generator, Optional, Union
from uuid import UUID, uuid4

from apischema import deserialize, serialize

from superscore.backends.core import _Backend
from superscore.backends.core import SearchTermType, SearchTermValue, _Backend
from superscore.errors import BackendError
from superscore.model import Entry, Root
from superscore.type_hints import AnyEpicsType
from superscore.utils import build_abs_path

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -284,43 +286,63 @@ def delete_entry(self, entry: Entry) -> None:
with self._load_and_store_context() as db:
db.pop(entry.uuid, None)

def search(self, **search_kwargs) -> Generator[Entry, None, None]:
def search(self, *search_terms: SearchTermType) -> Generator[Entry, None, None]:
"""
Search for an entry that matches ``search_kwargs``.
Keys are attributes on `Entry` subclasses
Values can be either a single value to match or a tuple of valid values
Currently does not support partial matches.
Return entries that match all ``search_terms``.
Keys are attributes on `Entry` subclasses, or special keywords.
Values can be a single value or a tuple of values depending on operator.
"""
with self._load_and_store_context() as db:
for entry in db.values():
match_found = True
for key, value in search_kwargs.items():
# specific type handling, assuming is tuple
if key == "entry_type":
if not isinstance(entry, search_kwargs["entry_type"]):
match_found = False

elif key == "start_time":
if value > entry.creation_time:
match_found = False
elif key == "end_time":
if entry.creation_time > value:
match_found = False

conditions = []
for attr, op, target in search_terms:
# TODO: search for child pvs?

# plain key-value match
if attr == "entry_type":
conditions.append(isinstance(entry, target))
else:
entry_value = getattr(entry, key, None)
if isinstance(value, tuple):
matched = entry_value in value
else:
matched = entry_value == value
try:
# check entry attribute by name
value = getattr(entry, attr)
conditions.append(self.compare(op, value, target))
except AttributeError:
conditions.append(False)
if all(conditions):
yield entry

match_found = match_found and matched
@staticmethod
def compare(op: str, data: AnyEpicsType, target: SearchTermValue) -> bool:
"""
Return whether data and target satisfy the op comparator, typically durihg application
of a search filter. Possible values of op are detailed in _Backend.search
if match_found:
yield entry
Parameters
----------
op: str
one of the comparators that all backends must support, detailed in _Backend.search
data: AnyEpicsType | Tuple[AnyEpicsType]
data from an Entry that is being used to decide whether the Entry passes a filter
target: AnyEpicsType | Tuple[AnyEpicsType]
the filter value
Returns
-------
bool
whether data and target satisfy the op condition
"""
if op == "eq":
return data == target
elif op == "lt":
return data <= target
elif op == "gt":
return data >= target
elif op == "in":
return data in target
elif op == "like":
if isinstance(data, UUID):
data = str(data)
return re.search(target, data)
else:
raise ValueError(f"SearchTerm does not support operator \"{op}\"")

@contextlib.contextmanager
def _load_and_store_context(self) -> Generator[Dict[UUID, Any], None, None]:
Expand Down
25 changes: 21 additions & 4 deletions superscore/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from uuid import UUID

from superscore.backends import get_backend
from superscore.backends.core import _Backend
from superscore.backends.core import SearchTerm, SearchTermType, _Backend
from superscore.control_layers import ControlLayer, EpicsData
from superscore.control_layers.status import TaskStatus
from superscore.errors import CommunicationError
Expand Down Expand Up @@ -147,9 +147,26 @@ def find_config() -> Path:
# If found nothing
raise OSError("No superscore configuration file found. Check SUPERSCORE_CFG.")

def search(self, **post) -> Generator[Entry, None, None]:
"""Search by key-value pair. Can search by any field, including id"""
return self.backend.search(**post)
def search(self, *post: SearchTermType) -> Generator[Entry, None, None]:
"""
Search backend for entries matching all SearchTerms in ``post``. Can search by any
field, plus some special keywords. Backends support operators listed in _Backend.search.
Some operators are supported in the UI / client and must be converted before being
passed to the backend.
"""
new_search_terms = []
for search_term in post:
if not isinstance(search_term, SearchTerm):
search_term = SearchTerm(*search_term)
if search_term.operator == 'isclose':
target, rel_tol, abs_tol = search_term.value
lower = target - target * rel_tol - abs_tol
upper = target + target * rel_tol + abs_tol
new_search_terms.append(SearchTerm(search_term.attr, 'gt', lower))
new_search_terms.append(SearchTerm(search_term.attr, 'lt', upper))
else:
new_search_terms.append(search_term)
return self.backend.search(*new_search_terms)

def save(self, entry: Entry):
"""Save information in ``entry`` to database"""
Expand Down
95 changes: 86 additions & 9 deletions superscore/tests/test_backend.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from enum import Flag, auto
from uuid import UUID

import pytest

from superscore.backends.core import _Backend
from superscore.backends.core import SearchTerm, _Backend
from superscore.errors import (BackendError, EntryExistsError,
EntryNotFoundError)
from superscore.model import Collection, Parameter, Snapshot
Expand Down Expand Up @@ -73,42 +74,118 @@ def test_delete_entry(backends: _Backend):
def test_search_entry(backends: _Backend):
# Given an entry we know is in the backend
results = backends.search(
description='collection 1 defining some motor fields'
SearchTerm('description', 'eq', 'collection 1 defining some motor fields')
)
assert len(list(results)) == 1
# Search by field name
results = backends.search(
uuid=UUID('ffd668d3-57d9-404e-8366-0778af7aee61')
SearchTerm('uuid', 'eq', UUID('ffd668d3-57d9-404e-8366-0778af7aee61'))
)
assert len(list(results)) == 1
# Search by field name
results = backends.search(data=2)
results = backends.search(
SearchTerm('data', 'eq', 2)
)
assert len(list(results)) == 3
# Search by field name
results = backends.search(
uuid=UUID('ecb42cdb-b703-4562-86e1-45bd67a2ab1a'), data=2
SearchTerm('uuid', 'eq', UUID('ecb42cdb-b703-4562-86e1-45bd67a2ab1a')),
SearchTerm('data', 'eq', 2)
)
assert len(list(results)) == 1

results = backends.search(entry_type=Snapshot,)
results = backends.search(
SearchTerm('entry_type', 'eq', Snapshot)
)
assert len(list(results)) == 1

results = backends.search(entry_type=(Snapshot, Collection))
results = backends.search(
SearchTerm('entry_type', 'in', (Snapshot, Collection))
)
assert len(list(results)) == 2

results = backends.search(
SearchTerm('data', 'lt', 3)
)
assert len(list(results)) == 3

results = backends.search(
SearchTerm('data', 'gt', 3)
)
assert len(list(results)) == 1


@pytest.mark.parametrize('backends', [0], indirect=True)
def test_fuzzy_search(backends: _Backend):
results = list(backends.search(
SearchTerm('description', 'like', 'motor'))
)
assert len(results) == 4

results = list(backends.search(
SearchTerm('description', 'like', 'motor field (?!PREC)'))
)
assert len(results) == 2

results = list(backends.search(
SearchTerm('uuid', 'like', '17cc6ebf'))
)
assert len(results) == 1


@pytest.mark.parametrize('backends', [0], indirect=True)
def test_tag_search(backends: _Backend):
results = list(backends.search(
SearchTerm('tags', 'gt', set())
))
assert len(results) == 2 # only the Collection and Snapshot have .tags

class Tag(Flag):
T1 = auto()
T2 = auto()

results[0].tags = {Tag.T1}
results[1].tags = {Tag.T1, Tag.T2}
backends.update_entry(results[0])
backends.update_entry(results[1])

results = list(backends.search(
SearchTerm('tags', 'gt', {Tag.T1})
))
assert len(results) == 2

results = list(backends.search(
SearchTerm('tags', 'gt', {Tag.T1, Tag.T2})
))
assert len(results) == 1


@pytest.mark.parametrize('backends', [0], indirect=True)
def test_search_error(backends: _Backend):
with pytest.raises(TypeError):
results = backends.search(
SearchTerm('data', 'like', 5)
)
list(results)
with pytest.raises(ValueError):
results = backends.search(
SearchTerm('data', 'near', 5)
)
list(results)


@pytest.mark.parametrize('backends', [0], indirect=True)
def test_update_entry(backends: _Backend):
# grab an entry from the database and modify it.
entry = list(backends.search(
description='collection 1 defining some motor fields'
SearchTerm('description', 'eq', 'collection 1 defining some motor fields')
))[0]
old_uuid = entry.uuid

entry.description = 'new_description'
backends.update_entry(entry)
new_entry = list(backends.search(
description='new_description'
SearchTerm('description', 'eq', 'new_description')
))[0]
new_uuid = new_entry.uuid

Expand Down
13 changes: 13 additions & 0 deletions superscore/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest

from superscore.backends.core import SearchTerm
from superscore.backends.filestore import FilestoreBackend
from superscore.client import Client
from superscore.control_layers import EpicsData
Expand Down Expand Up @@ -136,3 +137,15 @@ def test_find_config(sscore_cfg: str):
# explicit SUPERSCORE_CFG env var supercedes XDG_CONFIG_HOME
os.environ['SUPERSCORE_CFG'] = 'other/cfg'
assert 'other/cfg' == Client.find_config()


def test_search(sample_client):
results = list(sample_client.search(
('data', 'isclose', (4, 0, 0))
))
assert len(results) == 0

results = list(sample_client.search(
SearchTerm(operator='isclose', attr='data', value=(4, .5, 1))
))
assert len(results) == 4
Loading

0 comments on commit cdda3a5

Please sign in to comment.