Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Augment search methods #79

Merged
merged 11 commits into from
Sep 19, 2024
24 changes: 24 additions & 0 deletions docs/source/upcoming_release_notes/79-augment_search_methods.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
79 augment search methods
#################

API Breaks
----------
- Client.search takes SearchTerms as *args rather than key-value pairs as **kwargs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The github diff rendered part of this as italics... strange

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It behaves weird with my vim highlighting too. Maybe the double ** is a parsing edge case?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

It renders funny in sphinx too, the * ends up being a hyperlink to an id anchor? This is something we can fix up when we actually publish release notes though

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Later we can add backticks around these to force them to be literals


Features
--------
- regex search on Entry text fields
- filter Entrys by tag
- filter Entrys by attribute value

Bugfixes
--------
- N/A

Maintenance
-----------
- N/A

Contributors
------------
- shilorigins
28 changes: 25 additions & 3 deletions superscore/backends/core.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,28 @@
"""
Base superscore data storage backend interface
"""
from typing import Generator
from collections.abc import Container, Generator
from typing import NamedTuple, Union
from uuid import UUID

from superscore.model import Entry, Root
from superscore.type_hints import AnyEpicsType

SearchTermValue = Union[AnyEpicsType, Container[AnyEpicsType], tuple[AnyEpicsType, ...]]
shilorigins marked this conversation as resolved.
Show resolved Hide resolved
SearchTermType = tuple[str, str, SearchTermValue]


class SearchTerm(NamedTuple):
attr: str
operator: str
value: SearchTermValue


class _Backend:
"""
Base class for data storage backend.
"""

def get_entry(self, meta_id: UUID) -> Entry:
"""
Get entry with ``meta_id``
Expand Down Expand Up @@ -40,8 +52,18 @@ def update_entry(self, entry: Entry) -> None:
"""
raise NotImplementedError

def search(self, **search_kwargs) -> Generator[Entry, None, None]:
"""Yield a Entry objects corresponding matching ``search_kwargs``"""
def search(self, *search_terms: SearchTermType) -> Generator[Entry, None, None]:
"""
Yield Entry objects matching all ``search_terms``. Each SearchTerm has the format
(<attr>, <operator>, <value>). Some operators take tuples as values.

The supported operators are:
- eq (equals)
shilorigins marked this conversation as resolved.
Show resolved Hide resolved
- lt (less than or equal to)
- gt (greater than or equal to)
- in
- like (fuzzy match, depends on type of value)
"""
raise NotImplementedError

@property
Expand Down
82 changes: 52 additions & 30 deletions superscore/backends/filestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
import json
import logging
import os
import re
import shutil
from dataclasses import fields, replace
from typing import Any, Dict, Generator, Optional, Union
from uuid import UUID, uuid4

from apischema import deserialize, serialize

from superscore.backends.core import _Backend
from superscore.backends.core import SearchTermType, SearchTermValue, _Backend
from superscore.errors import BackendError
from superscore.model import Entry, Root
from superscore.type_hints import AnyEpicsType
from superscore.utils import build_abs_path

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -284,43 +286,63 @@ def delete_entry(self, entry: Entry) -> None:
with self._load_and_store_context() as db:
db.pop(entry.uuid, None)

def search(self, **search_kwargs) -> Generator[Entry, None, None]:
def search(self, *search_terms: SearchTermType) -> Generator[Entry, None, None]:
"""
Search for an entry that matches ``search_kwargs``.
Keys are attributes on `Entry` subclasses
Values can be either a single value to match or a tuple of valid values
Currently does not support partial matches.
Return entries that match all ``search_terms``.
Keys are attributes on `Entry` subclasses, or special keywords.
Values can be a single value or a tuple of values depending on operator.
"""
with self._load_and_store_context() as db:
for entry in db.values():
match_found = True
for key, value in search_kwargs.items():
# specific type handling, assuming is tuple
if key == "entry_type":
if not isinstance(entry, search_kwargs["entry_type"]):
match_found = False

elif key == "start_time":
if value > entry.creation_time:
match_found = False
elif key == "end_time":
if entry.creation_time > value:
match_found = False

conditions = []
for attr, op, target in search_terms:
# TODO: search for child pvs?

# plain key-value match
if attr == "entry_type":
conditions.append(isinstance(entry, target))
shilorigins marked this conversation as resolved.
Show resolved Hide resolved
else:
entry_value = getattr(entry, key, None)
if isinstance(value, tuple):
matched = entry_value in value
else:
matched = entry_value == value
try:
# check entry attribute by name
value = getattr(entry, attr)
conditions.append(self.compare(op, value, target))
except AttributeError:
shilorigins marked this conversation as resolved.
Show resolved Hide resolved
conditions.append(False)
if all(conditions):
yield entry

match_found = match_found and matched
@staticmethod
def compare(op: str, data: AnyEpicsType, target: SearchTermValue) -> bool:
"""
Return whether data and target satisfy the op comparator, typically durihg application
of a search filter. Possible values of op are detailed in _Backend.search

if match_found:
yield entry
Parameters
----------
op: str
one of the comparators that all backends must support, detailed in _Backend.search
data: AnyEpicsType | Tuple[AnyEpicsType]
data from an Entry that is being used to decide whether the Entry passes a filter
target: AnyEpicsType | Tuple[AnyEpicsType]
the filter value

Returns
-------
bool
whether data and target satisfy the op condition
"""
if op == "eq":
return data == target
elif op == "lt":
return data <= target
elif op == "gt":
return data >= target
elif op == "in":
return data in target
elif op == "like":
if isinstance(data, UUID):
data = str(data)
return re.search(target, data)
else:
raise ValueError(f"SearchTerm does not support operator \"{op}\"")

@contextlib.contextmanager
def _load_and_store_context(self) -> Generator[Dict[UUID, Any], None, None]:
Expand Down
25 changes: 21 additions & 4 deletions superscore/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from uuid import UUID

from superscore.backends import get_backend
from superscore.backends.core import _Backend
from superscore.backends.core import SearchTerm, SearchTermType, _Backend
from superscore.control_layers import ControlLayer, EpicsData
from superscore.control_layers.status import TaskStatus
from superscore.errors import CommunicationError
Expand Down Expand Up @@ -147,9 +147,26 @@ def find_config() -> Path:
# If found nothing
raise OSError("No superscore configuration file found. Check SUPERSCORE_CFG.")

def search(self, **post) -> Generator[Entry, None, None]:
"""Search by key-value pair. Can search by any field, including id"""
return self.backend.search(**post)
def search(self, *post: SearchTermType) -> Generator[Entry, None, None]:
"""
Search backend for entries matching all SearchTerms in ``post``. Can search by any
field, plus some special keywords. Backends support operators listed in _Backend.search.
Some operators are supported in the UI / client and must be converted before being
passed to the backend.
"""
new_search_terms = []
for search_term in post:
if not isinstance(search_term, SearchTerm):
search_term = SearchTerm(*search_term)
if search_term.operator == 'isclose':
target, rel_tol, abs_tol = search_term.value
lower = target - target * rel_tol - abs_tol
upper = target + target * rel_tol + abs_tol
new_search_terms.append(SearchTerm(search_term.attr, 'gt', lower))
new_search_terms.append(SearchTerm(search_term.attr, 'lt', upper))
else:
new_search_terms.append(search_term)
return self.backend.search(*new_search_terms)

def save(self, entry: Entry):
"""Save information in ``entry`` to database"""
Expand Down
95 changes: 86 additions & 9 deletions superscore/tests/test_backend.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from enum import Flag, auto
from uuid import UUID

import pytest

from superscore.backends.core import _Backend
from superscore.backends.core import SearchTerm, _Backend
from superscore.errors import (BackendError, EntryExistsError,
EntryNotFoundError)
from superscore.model import Collection, Parameter, Snapshot
Expand Down Expand Up @@ -73,42 +74,118 @@ def test_delete_entry(backends: _Backend):
def test_search_entry(backends: _Backend):
# Given an entry we know is in the backend
results = backends.search(
description='collection 1 defining some motor fields'
SearchTerm('description', 'eq', 'collection 1 defining some motor fields')
)
assert len(list(results)) == 1
# Search by field name
results = backends.search(
uuid=UUID('ffd668d3-57d9-404e-8366-0778af7aee61')
SearchTerm('uuid', 'eq', UUID('ffd668d3-57d9-404e-8366-0778af7aee61'))
)
assert len(list(results)) == 1
# Search by field name
results = backends.search(data=2)
results = backends.search(
SearchTerm('data', 'eq', 2)
)
assert len(list(results)) == 3
# Search by field name
results = backends.search(
uuid=UUID('ecb42cdb-b703-4562-86e1-45bd67a2ab1a'), data=2
SearchTerm('uuid', 'eq', UUID('ecb42cdb-b703-4562-86e1-45bd67a2ab1a')),
tangkong marked this conversation as resolved.
Show resolved Hide resolved
SearchTerm('data', 'eq', 2)
)
assert len(list(results)) == 1

results = backends.search(entry_type=Snapshot,)
results = backends.search(
SearchTerm('entry_type', 'eq', Snapshot)
)
assert len(list(results)) == 1

results = backends.search(entry_type=(Snapshot, Collection))
results = backends.search(
SearchTerm('entry_type', 'in', (Snapshot, Collection))
)
assert len(list(results)) == 2

results = backends.search(
SearchTerm('data', 'lt', 3)
)
assert len(list(results)) == 3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something I hadn't considered in the previous PRs: would it add value to check that the results match the searches in the test suite? Or is this not worth the effort? I'm not sure but I thought I'd mention it.

for res in results:
    assert res.data <= 3


results = backends.search(
SearchTerm('data', 'gt', 3)
)
assert len(list(results)) == 1


@pytest.mark.parametrize('backends', [0], indirect=True)
def test_fuzzy_search(backends: _Backend):
results = list(backends.search(
SearchTerm('description', 'like', 'motor'))
)
assert len(results) == 4

results = list(backends.search(
SearchTerm('description', 'like', 'motor field (?!PREC)'))
)
assert len(results) == 2

results = list(backends.search(
SearchTerm('uuid', 'like', '17cc6ebf'))
)
assert len(results) == 1


@pytest.mark.parametrize('backends', [0], indirect=True)
def test_tag_search(backends: _Backend):
results = list(backends.search(
SearchTerm('tags', 'gt', set())
))
assert len(results) == 2 # only the Collection and Snapshot have .tags

class Tag(Flag):
T1 = auto()
T2 = auto()

results[0].tags = {Tag.T1}
results[1].tags = {Tag.T1, Tag.T2}
backends.update_entry(results[0])
backends.update_entry(results[1])

results = list(backends.search(
SearchTerm('tags', 'gt', {Tag.T1})
))
assert len(results) == 2

results = list(backends.search(
SearchTerm('tags', 'gt', {Tag.T1, Tag.T2})
))
assert len(results) == 1


@pytest.mark.parametrize('backends', [0], indirect=True)
def test_search_error(backends: _Backend):
with pytest.raises(TypeError):
results = backends.search(
SearchTerm('data', 'like', 5)
)
list(results)
with pytest.raises(ValueError):
results = backends.search(
SearchTerm('data', 'near', 5)
)
list(results)


@pytest.mark.parametrize('backends', [0], indirect=True)
def test_update_entry(backends: _Backend):
# grab an entry from the database and modify it.
entry = list(backends.search(
description='collection 1 defining some motor fields'
SearchTerm('description', 'eq', 'collection 1 defining some motor fields')
))[0]
old_uuid = entry.uuid

entry.description = 'new_description'
backends.update_entry(entry)
new_entry = list(backends.search(
description='new_description'
SearchTerm('description', 'eq', 'new_description')
))[0]
new_uuid = new_entry.uuid

Expand Down
13 changes: 13 additions & 0 deletions superscore/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest

from superscore.backends.core import SearchTerm
from superscore.backends.filestore import FilestoreBackend
from superscore.client import Client
from superscore.control_layers import EpicsData
Expand Down Expand Up @@ -136,3 +137,15 @@ def test_find_config(sscore_cfg: str):
# explicit SUPERSCORE_CFG env var supercedes XDG_CONFIG_HOME
os.environ['SUPERSCORE_CFG'] = 'other/cfg'
assert 'other/cfg' == Client.find_config()


def test_search(sample_client):
results = list(sample_client.search(
('data', 'isclose', (4, 0, 0))
))
assert len(results) == 0

results = list(sample_client.search(
SearchTerm(operator='isclose', attr='data', value=(4, .5, 1))
))
assert len(results) == 4
Loading