Skip to content

Commit

Permalink
Merge pull request #926 from CitrineInformatics/feature/pla-13437-add…
Browse files Browse the repository at this point in the history
…-gemd-query

Implement GemdQuery obejcts for table building
  • Loading branch information
kroenlein authored Mar 15, 2024
2 parents 3e19066 + ef55b55 commit af74642
Show file tree
Hide file tree
Showing 9 changed files with 442 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/citrine/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.0.2"
__version__ = "3.1.0"
Empty file.
161 changes: 161 additions & 0 deletions src/citrine/gemd_queries/criteria.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""Definitions for GemdQuery objects, and their sub-objects."""
from typing import List, Type

from gemd.enumeration.base_enumeration import BaseEnumeration

from citrine._serialization.serializable import Serializable
from citrine._serialization.polymorphic_serializable import PolymorphicSerializable
from citrine._serialization import properties
from citrine.gemd_queries.filter import PropertyFilterType

__all__ = ['MaterialClassification', 'TextSearchType',
'AndOperator', 'OrOperator',
'PropertiesCriteria', 'NameCriteria',
'MaterialRunClassificationCriteria', 'MaterialTemplatesCriteria'
]


class MaterialClassification(BaseEnumeration):
"""A classification based on where in a Material History you find a Material."""

ATOMIC_INGREDIENT = "atomic_ingredient"
INTERMEDIATE_INGREDIENT = "intermediate_ingredient"
TERMINAL_MATERIAL = "terminal_material"


class TextSearchType(BaseEnumeration):
"""The style of text search to run."""

EXACT = "exact"
PREFIX = "prefix"
SUFFIX = "suffix"
SUBSTRING = "substring"


class Criteria(PolymorphicSerializable):
"""Abstract concept of a criteria to apply when searching for materials."""

@classmethod
def get_type(cls, data) -> Type[Serializable]:
"""Return the subtype."""
classes: List[Type[Criteria]] = [
AndOperator, OrOperator,
PropertiesCriteria, NameCriteria, MaterialRunClassificationCriteria,
MaterialTemplatesCriteria
]
return {klass.typ: klass for klass in classes}[data['type']]


class AndOperator(Serializable['AndOperator'], Criteria):
"""
Combine multiple criteria, requiring EACH to be true for a match.
Parameters
----------
criteria: Criteria
List of conditions all responses must satisfy (i.e., joined with an AND).
"""

criteria = properties.List(properties.Object(Criteria), "criteria")
typ = properties.String('type', default="and_operator", deserializable=False)


class OrOperator(Serializable['OrOperator'], Criteria):
"""
Combine multiple criteria, requiring ANY to be true for a match.
Parameters
----------
criteria: Criteria
List of conditions, at least one of which must match (i.e., joined with an OR).
"""

criteria = properties.List(properties.Object(Criteria), "criteria")
typ = properties.String('type', default="or_operator", deserializable=False)


class PropertiesCriteria(Serializable['PropertiesCriteria'], Criteria):
"""
Look for materials with a particular Property and optionally Value types & ranges.
Parameters
----------
property_templates_filter: Set[UUID]
The citrine IDs of the property templates matches must reference.
value_type_filter: Optional[PropertyFilterType]
The value range matches must conform to.
"""

property_templates_filter = properties.Set(properties.UUID, "property_templates_filter")
value_type_filter = properties.Optional(
properties.Object(PropertyFilterType), "value_type_filter"
)
typ = properties.String('type', default="properties_criteria", deserializable=False)


class NameCriteria(Serializable['NameCriteria'], Criteria):
"""
Look for materials with particular names.
Parameters
----------
name: str
The name the returned objects must have.
search_type: TextSearchType
What kind of string match to use (exact, substring, ...).
"""

name = properties.String('name')
search_type = properties.Enumeration(TextSearchType, 'search_type')
typ = properties.String('type', default="name_criteria", deserializable=False)


class MaterialRunClassificationCriteria(
Serializable['MaterialRunClassificationCriteria'],
Criteria
):
"""
Look for materials with particular classification, defined by MaterialClassification.
Parameters
----------
classifications: Set[MaterialClassification]
The classification, based on where in a material history an object appears.
"""

classifications = properties.Set(
properties.Enumeration(MaterialClassification), 'classifications'
)
typ = properties.String(
'type',
default="material_run_classification_criteria",
deserializable=False
)


class MaterialTemplatesCriteria(Serializable['MaterialTemplatesCriteria'], Criteria):
"""
Look for materials with particular Material Templates and tags.
This has a similar behavior to the old [[MaterialRunByTemplate]] Row definition
Parameters
----------
material_templates_identifiers: Set[UUID]
Which material templates to filter by.
tag_filters: Set[str]
Which tags to filter by.
"""

material_templates_identifiers = properties.Set(
properties.UUID,
"material_templates_identifiers"
)
tag_filters = properties.Set(properties.String, 'tag_filters')
typ = properties.String('type', default="material_template_criteria", deserializable=False)
75 changes: 75 additions & 0 deletions src/citrine/gemd_queries/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Definitions for GemdQuery objects, and their sub-objects."""
from typing import List, Type

from citrine._serialization.serializable import Serializable
from citrine._serialization.polymorphic_serializable import PolymorphicSerializable
from citrine._serialization import properties

__all__ = ['AllRealFilter', 'AllIntegerFilter', 'NominalCategoricalFilter']


class PropertyFilterType(PolymorphicSerializable):
"""Abstract concept of a criteria to apply when searching for materials."""

@classmethod
def get_type(cls, data) -> Type[Serializable]:
"""Return the subtype."""
classes: List[Type[PropertyFilterType]] = [
NominalCategoricalFilter,
AllRealFilter, AllIntegerFilter
]
return {klass.typ: klass for klass in classes}[data['type']]


class AllRealFilter(Serializable['AllRealFilter'], PropertyFilterType):
"""
Filter for any real value that fits certain constraints.
Parameters
----------
lower: str
The lower bound on this filter range.
upper: str
The upper bound on this filter range.
unit: str
The units associated with the floating point values for this filter.
"""

lower = properties.Float('lower')
upper = properties.Float('upper')
unit = properties.String('unit')
typ = properties.String('type', default="all_real_filter", deserializable=False)


class AllIntegerFilter(Serializable['AllIntegerFilter'], PropertyFilterType):
"""
Filter for any integer value that fits certain constraints.
Parameters
----------
lower: str
The lower bound on this filter range.
upper: str
The upper bound on this filter range.
"""

lower = properties.Float('lower')
upper = properties.Float('upper')
typ = properties.String('type', default="all_integer_filter", deserializable=False)


class NominalCategoricalFilter(Serializable['NominalCategoricalFilter'], PropertyFilterType):
"""
Filter based upon a fixed list of Categorical Values.
Parameters
----------
categories: Set[str]
Which categorical values match.
"""

categories = properties.Set(properties.String, 'categories')
typ = properties.String('type', default="nominal_categorical_filter", deserializable=False)
64 changes: 64 additions & 0 deletions src/citrine/gemd_queries/gemd_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Definitions for GemdQuery objects, and their sub-objects."""
from gemd.enumeration.base_enumeration import BaseEnumeration

from citrine._serialization.serializable import Serializable
from citrine._serialization import properties
from citrine.gemd_queries.criteria import Criteria


class GemdObjectType(BaseEnumeration):
"""The style of text search to run."""

# An old defect has some old GemdQuery values stored with invalid enums
# The synonyms will allow invalid old values to be read, but not emitted
MEASUREMENT_TEMPLATE_TYPE = "measurement_template", "MEASUREMENT_TEMPLATE_TYPE"
MATERIAL_TEMPLATE_TYPE = "material_template", "MATERIAL_TEMPLATE_TYPE"
PROCESS_TEMPLATE_TYPE = "process_template", "PROCESS_TEMPLATE_TYPE"
PROPERTY_TEMPLATE_TYPE = "property_template", "PROPERTY_TEMPLATE_TYPE"
CONDITION_TEMPLATE_TYPE = "condition_template", "CONDITION_TEMPLATE_TYPE"
PARAMETER_TEMPLATE_TYPE = "parameter_template", "PARAMETER_TEMPLATE_TYPE"
PROCESS_RUN_TYPE = "process_run", "PROCESS_RUN_TYPE"
PROCESS_SPEC_TYPE = "process_spec", "PROCESS_SPEC_TYPE"
MATERIAL_RUN_TYPE = "material_run", "MATERIAL_RUN_TYPE"
MATERIAL_SPEC_TYPE = "material_spec", "MATERIAL_SPEC_TYPE"
INGREDIENT_RUN_TYPE = "ingredient_run", "INGREDIENT_RUN_TYPE"
INGREDIENT_SPEC_TYPE = "ingredient_spec", "INGREDIENT_SPEC_TYPE"
MEASUREMENT_RUN_TYPE = "measurement_run", "MEASUREMENT_RUN_TYPE"
MEASUREMENT_SPEC_TYPE = "measurement_spec", "MEASUREMENT_SPEC_TYPE"


class GemdQuery(Serializable['GemdQuery']):
"""
This describes what data objects to fetch (or graph of data objects).
Parameters
----------
criteria: Criteria
List of conditions all responses must satisfy (i.e., joined with an AND).
datasets: UUID
Set of datasets to look in for matching objects.
object_types: GemdObjectType
Classes of objects to consider when searching.
schema_version: Int
What version of the query schema this package represents.
"""

criteria = properties.List(properties.Object(Criteria), "criteria", default=[])
datasets = properties.Set(properties.UUID, "datasets", default=set())
object_types = properties.Set(
properties.Enumeration(GemdObjectType),
'object_types',
default={x for x in GemdObjectType}
)
schema_version = properties.Integer('schema_version', default=1)

@classmethod
def _pre_build(cls, data: dict) -> dict:
"""Run data modification before building."""
version = data.get('schema_version')
if data.get('schema_version') != 1:
raise ValueError(
f"This version of the library only supports schema_version 1, not '{version}'"
)
return data
4 changes: 4 additions & 0 deletions src/citrine/resources/table_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from citrine._utils.functions import format_escaped_url
from citrine.resources.data_concepts import CITRINE_SCOPE, _make_link_by_uid
from citrine.resources.process_template import ProcessTemplate
from citrine.gemd_queries.gemd_query import GemdQuery
from citrine.gemtables.columns import Column, MeanColumn, IdentityColumn, OriginalUnitsColumn, \
ConcatColumn
from citrine.gemtables.rows import Row
Expand Down Expand Up @@ -53,6 +54,8 @@ class TableConfig(Resource["TableConfig"]):
List of row definitions that define the rows of the table
columns: list[Column]
Column definitions, which describe how the variables are shaped into the table
gemd_query: Optional[GemdQuery]
The query used to define the materials underpinning this table
"""

Expand All @@ -79,6 +82,7 @@ def _get_dups(lst: List) -> List:
variables = properties.List(properties.Object(Variable), "variables")
rows = properties.List(properties.Object(Row), "rows")
columns = properties.List(properties.Object(Column), "columns")
gemd_query = properties.Optional(properties.Object(GemdQuery), "gemd_query")

def __init__(self, name: str, *, description: str, datasets: List[UUID],
variables: List[Variable], rows: List[Row], columns: List[Column]):
Expand Down
Empty file added tests/gemd_query/__init__.py
Empty file.
45 changes: 45 additions & 0 deletions tests/gemd_query/test_gemd_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from uuid import uuid4
import pytest

from citrine.gemd_queries.criteria import PropertiesCriteria
from citrine.gemd_queries.filter import AllRealFilter
from citrine.gemd_queries.gemd_query import GemdQuery

from tests.utils.factories import GemdQueryDataFactory


def test_gemd_query_version():
valid = GemdQueryDataFactory()
assert GemdQuery.build(valid) is not None

invalid = GemdQueryDataFactory()
invalid['schema_version'] = 2
with pytest.raises(ValueError):
GemdQuery.build(invalid)


def test_criteria_rebuild():
value_filter = AllRealFilter()
value_filter.unit = 'm'
value_filter.lower = 0
value_filter.upper = 1

crit = PropertiesCriteria()
crit.property_templates_filter = {uuid4()}
crit.value_type_filter = value_filter

query = GemdQuery()
query.criteria.append(crit)
query.datasets.add(uuid4())
query.object_types = {'material_run'}

query_copy = GemdQuery.build(query.dump())

assert len(query.criteria) == len(query_copy.criteria)
assert query.criteria[0].property_templates_filter == query_copy.criteria[0].property_templates_filter
assert query.criteria[0].value_type_filter.unit == query_copy.criteria[0].value_type_filter.unit
assert query.criteria[0].value_type_filter.lower == query_copy.criteria[0].value_type_filter.lower
assert query.criteria[0].value_type_filter.upper == query_copy.criteria[0].value_type_filter.upper
assert query.datasets == query_copy.datasets
assert query.object_types == query_copy.object_types
assert query.schema_version == query_copy.schema_version
Loading

0 comments on commit af74642

Please sign in to comment.