Skip to content

Commit

Permalink
[FSTORE-612] Improve API reference for feature monitoring
Browse files Browse the repository at this point in the history
  • Loading branch information
javierdlrm committed Feb 14, 2024
1 parent d5ee290 commit cf53db8
Show file tree
Hide file tree
Showing 16 changed files with 350 additions and 86 deletions.
77 changes: 76 additions & 1 deletion auto_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@
},
"api/embedding_index_api.md": {
"embedding_index": ["hsfs.embedding.EmbeddingIndex"],
"embedding_index_methods": keras_autodoc.get_methods("hsfs.embedding.EmbeddingIndex"),
"embedding_index_methods": keras_autodoc.get_methods(
"hsfs.embedding.EmbeddingIndex"
),
},
"api/spine_group_api.md": {
"fg": ["hsfs.feature_group.SpineGroup"],
Expand Down Expand Up @@ -256,6 +258,79 @@
"hsfs.core.explicit_provenance.Artifact"
),
},
"api/statistics_api.md": {
"statistics": ["hsfs.statistics.Statistics"],
"statistics_properties": keras_autodoc.get_properties(
"hsfs.statistics.Statistics"
),
},
"api/split_statistics_api.md": {
"split_statistics": ["hsfs.split_statistics.SplitStatistics"],
"split_statistics_properties": keras_autodoc.get_properties(
"hsfs.split_statistics.SplitStatistics"
),
},
"api/feature_descriptive_statistics_api.md": {
"feature_descriptive_statistics": [
"hsfs.core.feature_descriptive_statistics.FeatureDescriptiveStatistics"
],
"feature_descriptive_statistics_properties": keras_autodoc.get_properties(
"hsfs.core.feature_descriptive_statistics.FeatureDescriptiveStatistics"
),
},
"api/feature_monitoring_config_api.md": {
"feature_monitoring_config": [
"hsfs.core.feature_monitoring_config.FeatureMonitoringConfig"
],
"feature_monitoring_config_properties": keras_autodoc.get_properties(
"hsfs.core.feature_monitoring_config.FeatureMonitoringConfig"
),
"feature_monitoring_config_methods": keras_autodoc.get_methods(
"hsfs.core.feature_monitoring_config.FeatureMonitoringConfig",
exclude=[
"from_response_json",
"update_from_response_json",
"json",
"to_dict",
],
),
# from feature group
"feature_monitoring_config_creation_fg": [
"hsfs.feature_group.FeatureGroup.create_statistics_monitoring",
"hsfs.feature_group.FeatureGroup.create_feature_monitoring",
],
# from feature view
"feature_monitoring_config_creation_fv": [
"hsfs.feature_view.FeatureView.create_statistics_monitoring",
"hsfs.feature_view.FeatureView.create_feature_monitoring",
],
# retrieval
"feature_monitoring_config_retrieval_fg": [
"hsfs.feature_group.FeatureGroup.get_feature_monitoring_configs",
],
"feature_monitoring_config_retrieval_fv": [
"hsfs.feature_view.FeatureView.get_feature_monitoring_configs",
],
},
"api/feature_monitoring_result_api.md": {
"feature_monitoring_result": [
"hsfs.core.feature_monitoring_result.FeatureMonitoringResult"
],
"feature_monitoring_result_retrieval": [
"hsfs.core.feature_monitoring_config.FeatureMonitoringConfig.get_history"
],
"feature_monitoring_result_properties": keras_autodoc.get_properties(
"hsfs.core.feature_monitoring_result.FeatureMonitoringResult"
),
},
"api/feature_monitoring_window_config_api.md": {
"feature_monitoring_window_config": [
"hsfs.core.monitoring_window_config.MonitoringWindowConfig"
],
"feature_monitoring_window_config_properties": keras_autodoc.get_properties(
"hsfs.core.monitoring_window_config.MonitoringWindowConfig"
),
},
}

hsfs_dir = pathlib.Path(__file__).resolve().parents[0]
Expand Down
7 changes: 7 additions & 0 deletions docs/templates/api/feature_descriptive_statistics_api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Feature Descriptive Statistics

{{feature_descriptive_statistics}}

## Properties

{{feature_descriptive_statistics_properties}}
27 changes: 27 additions & 0 deletions docs/templates/api/feature_monitoring_config_api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Feature Monitoring Configuration

{{feature_monitoring_config}}

## Creation from Feature Group

{{feature_monitoring_config_creation_fg}}

## Creation from Feature View

{{feature_monitoring_config_creation_fv}}

## Retrieval from Feature Group

{{feature_monitoring_config_retrieval_fg}}

## Retrieval from Feature View

{{feature_monitoring_config_retrieval_fv}}

## Properties

{{feature_monitoring_config_properties}}

## Methods

{{feature_monitoring_config_methods}}
11 changes: 11 additions & 0 deletions docs/templates/api/feature_monitoring_result_api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Feature Monitoring Result

{{feature_monitoring_result}}

## Retrieval

{{feature_monitoring_result_retrieval}}

## Properties

{{feature_monitoring_result_properties}}
7 changes: 7 additions & 0 deletions docs/templates/api/feature_monitoring_window_config_api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Feature Monitoring Window Configuration

{{feature_monitoring_window_config}}

## Properties

{{feature_monitoring_window_config_properties}}
7 changes: 7 additions & 0 deletions docs/templates/api/split_statistics_api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Split Statistics

{{split_statistics}}

## Properties

{{split_statistics_properties}}
7 changes: 7 additions & 0 deletions docs/templates/api/statistics_api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Statistics

{{statistics}}

## Properties

{{statistics_properties}}
2 changes: 1 addition & 1 deletion docs/templates/api/validation_report_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

{{validation_report_validate}}

## Fetch Validation Reports
## Retrieval

{{validation_report_get}}

Expand Down
8 changes: 8 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ nav:
- ValidationReport: generated/api/validation_report_api.md
- Job: generated/api/job.md
- Provenance Links: generated/api/links.md
- Statistics:
- Statistics: generated/api/statistics_api.md
- Split Statistics: generated/api/split_statistics_api.md
- Feature descriptive statistics: generated/api/feature_descriptive_statistics_api.md
- Feature Monitoring:
- Configuration: generated/api/feature_monitoring_config_api.md
- Result: generated/api/feature_monitoring_result_api.md
- Window: generated/api/feature_monitoring_window_config_api.md
- Contributing: CONTRIBUTING.md
- Community ↗: https://community.hopsworks.ai/

Expand Down
33 changes: 33 additions & 0 deletions python/hsfs/core/feature_descriptive_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,72 +214,105 @@ def id(self) -> Optional[int]:

@property
def feature_type(self) -> str:
"""Data type of the feature. It can be one of Boolean, Fractional, Integral, or String."""
return self._feature_type

@property
def feature_name(self) -> str:
"""Name of the feature."""
return self._feature_name

@property
def count(self) -> int:
"""Number of values."""
return self._count

@property
def completeness(self) -> Optional[float]:
"""Fraction of non-null values in a column."""
return self._completeness

@property
def num_non_null_values(self) -> Optional[int]:
"""Number of non-null values."""
return self._num_non_null_values

@property
def num_null_values(self) -> Optional[int]:
"""Number of null values."""
return self._num_null_values

@property
def approx_num_distinct_values(self) -> Optional[int]:
"""Approximate number of distinct values."""
return self._approx_num_distinct_values

@property
def min(self) -> Optional[float]:
"""Minimum value."""
return self._min

@property
def max(self) -> Optional[float]:
"""Maximum value."""
return self._max

@property
def sum(self) -> Optional[float]:
"""Sum of all feature values."""
return self._sum

@property
def mean(self) -> Optional[float]:
"""Mean value."""
return self._mean

@property
def stddev(self) -> Optional[float]:
"""Standard deviation of the feature values."""
return self._stddev

@property
def percentiles(self) -> Optional[Mapping[str, float]]:
"""Percentiles."""
return self._percentiles

@property
def distinctness(self) -> Optional[float]:
"""Fraction of distinct values of a feature over the number of all its values. Distinct values occur at least once.
!!! note "Example"
$[a, a, b]$ contains two distinct values $a$ and $b$, so distinctness is $2/3$.
"""
return self._distinctness

@property
def entropy(self) -> Optional[float]:
"""Entropy is a measure of the level of information contained in an event (feature value) when considering all possible events (all feature values).
Entropy is estimated using observed value counts as the negative sum of (value_count/total_count) * log(value_count/total_count).
!!! note "Example"
$[a, b, b, c, c]$ has three distinct values with counts $[1, 2, 2]$.
Entropy is then $(-1/5*log(1/5)-2/5*log(2/5)-2/5*log(2/5)) = 1.055$.
"""
return self._entropy

@property
def uniqueness(self) -> Optional[float]:
"""Fraction of unique values over the number of all values of a column. Unique values occur exactly once.
!!! note "Example"
$[a, a, b]$ contains one unique value $b$, so uniqueness is $1/3$.
"""
return self._uniqueness

@property
def exact_num_distinct_values(self) -> Optional[int]:
"""Exact number of distinct values."""
return self._exact_num_distinct_values

@property
def extended_statistics(self) -> Optional[dict]:
"""Additional statistics computed on the feature values such as histograms and correlations."""
return self._extended_statistics
Loading

0 comments on commit cf53db8

Please sign in to comment.