From f50efa5c059c3dec1b1969bd1a8e828969604b9a Mon Sep 17 00:00:00 2001 From: Fabio Buso Date: Wed, 11 Nov 2020 13:45:04 +0100 Subject: [PATCH] On demand feature group append (#137) --- python/hsfs/core/feature_group_base.py | 68 +++++++++++++++----- python/hsfs/feature_group.py | 88 -------------------------- python/hsfs/feature_store.py | 2 +- 3 files changed, 54 insertions(+), 104 deletions(-) diff --git a/python/hsfs/core/feature_group_base.py b/python/hsfs/core/feature_group_base.py index 0c9894eedb..eccf13ca0d 100644 --- a/python/hsfs/core/feature_group_base.py +++ b/python/hsfs/core/feature_group_base.py @@ -24,51 +24,89 @@ def __init__(self, featurestore_id): ) def delete(self): - self._feature_group_base_engine.delete(self) + """Drop the entire feature group along with its feature data. + + !!! danger "Potentially dangerous operation" + This operation drops all metadata associated with **this version** of the + feature group **and** all the feature data in offline and online storage + associated with it. + + # Raises + `RestAPIError`. + """ + self._feature_group_engine.delete(self) def select_all(self): - """Select all features in the feature group and return a query object.""" + """Select all features in the feature group and return a query object. + + The query can be used to construct joins of feature groups or create a + training dataset immediately. + + # Returns + `Query`. A query object with all features of the feature group. + """ return query.Query( self._feature_store_name, self._feature_store_id, self, self._features ) def select(self, features=[]): + """Select a subset of features of the feature group and return a query object. + + The query can be used to construct joins of feature groups or create a training + dataset with a subset of features of the feature group. + + # Arguments + features: list, optional. A list of `Feature` objects or feature names as + strings to be selected, defaults to []. + + # Returns + `Query`: A query object with the selected features of the feature group. + """ return query.Query( self._feature_store_name, self._feature_store_id, self, features ) - def add_tag(self, name, value=None): + def add_tag(self, name: str, value: str = None): """Attach a name/value tag to a feature group. A tag can consist of a name only or a name/value pair. Tag names are unique identifiers. - :param name: name of the tag to be added - :type name: str - :param value: value of the tag to be added, defaults to None - :type value: str, optional + # Arguments + name: Name of the tag to be added. + value: Value of the tag to be added, defaults to `None`. + + # Raises + `RestAPIError`. """ self._feature_group_base_engine.add_tag(self, name, value) - def delete_tag(self, name): + def delete_tag(self, name: str): """Delete a tag from a feature group. Tag names are unique identifiers. - :param name: name of the tag to be removed - :type name: str + # Arguments + name: Name of the tag to be removed. + + # Raises + `RestAPIError`. """ self._feature_group_base_engine.delete_tag(self, name) - def get_tag(self, name=None): + def get_tag(self, name: str = None): """Get the tags of a feature group. Tag names are unique identifiers. Returns all tags if no tag name is specified. - :param name: name of the tag to get, defaults to None - :type name: str, optional - :return: list of tags as name/value pairs - :rtype: list of dict + # Arguments + name: Name of the tag to get, defaults to `None`. + + # Returns + `list[Tag]`. List of tags as name/value pairs. + + # Raises + `RestAPIError`. """ return self._feature_group_base_engine.get_tags(self, name) diff --git a/python/hsfs/feature_group.py b/python/hsfs/feature_group.py index 6250162943..d4d32bc6de 100644 --- a/python/hsfs/feature_group.py +++ b/python/hsfs/feature_group.py @@ -222,36 +222,6 @@ def show(self, n: int, online: Optional[bool] = False): ) return self.select_all().show(n, online) - def select_all(self): - """Select all features in the feature group and return a query object. - - The query can be used to construct joins of feature groups or create a - training dataset immediately. - - # Returns - `Query`. A query object with all features of the feature group. - """ - return query.Query( - self._feature_store_name, self._feature_store_id, self, self._features - ) - - def select(self, features=[]): - """Select a subset of features of the feature group and return a query object. - - The query can be used to construct joins of feature groups or create a training - dataset with a subset of features of the feature group. - - # Arguments - features: list, optional. A list of `Feature` objects or feature names as - strings to be selected, defaults to []. - - # Returns - `Query`: A query object with the selected features of the feature group. - """ - return query.Query( - self._feature_store_name, self._feature_store_id, self, features - ) - def save( self, features: Union[ @@ -383,19 +353,6 @@ def commit_details(self, limit: Optional[int] = None): """ return self._feature_group_engine.commit_details(self, limit) - def delete(self): - """Drop the entire feature group along with its feature data. - - !!! danger "Potentially dangerous operation" - This operation drops all metadata associated with **this version** of the - feature group **and** all the feature data in offline and online storage - associated with it. - - # Raises - `RestAPIError`. - """ - self._feature_group_engine.delete(self) - def commit_delete_record( self, delete_df: TypeVar("pyspark.sql.DataFrame"), # noqa: F821 @@ -499,51 +456,6 @@ def append_features(self, features): self._feature_group_engine.append_features(self, new_features) return self - def add_tag(self, name: str, value: str = None): - """Attach a name/value tag to a feature group. - - A tag can consist of a name only or a name/value pair. Tag names are - unique identifiers. - - # Arguments - name: Name of the tag to be added. - value: Value of the tag to be added, defaults to `None`. - - # Raises - `RestAPIError`. - """ - self._feature_group_engine.add_tag(self, name, value) - - def delete_tag(self, name: str): - """Delete a tag from a feature group. - - Tag names are unique identifiers. - - # Arguments - name: Name of the tag to be removed. - - # Raises - `RestAPIError`. - """ - self._feature_group_engine.delete_tag(self, name) - - def get_tag(self, name: str = None): - """Get the tags of a feature group. - - Tag names are unique identifiers. Returns all tags if no tag name is - specified. - - # Arguments - name: Name of the tag to get, defaults to `None`. - - # Returns - `list[Tag]`. List of tags as name/value pairs. - - # Raises - `RestAPIError`. - """ - return self._feature_group_engine.get_tags(self, name) - @classmethod def from_response_json(cls, json_dict): json_decamelized = humps.decamelize(json_dict) diff --git a/python/hsfs/feature_store.py b/python/hsfs/feature_store.py index 4e88b1b8b1..2033311d7f 100644 --- a/python/hsfs/feature_store.py +++ b/python/hsfs/feature_store.py @@ -114,7 +114,7 @@ def get_feature_group(self, name: str, version: int = None): name, version, feature_group_api.FeatureGroupApi.CACHED ) - def get_on_demand_feature_group(self, name: str, version: int =None): + def get_on_demand_feature_group(self, name: str, version: int = None): """Get a on-demand feature group entity from the feature store. Getting a on-demand feature group from the Feature Store means getting its