From e37de2c789842c1f3a4063eed723a1f9365356e4 Mon Sep 17 00:00:00 2001 From: hectorcast-db Date: Tue, 23 Jan 2024 14:25:37 +0100 Subject: [PATCH] Release v0.18.0 (#515) Bugfixes: * Fix Databricks OAuth M2M on Azure ([#513](https://github.com/databricks/databricks-sdk-py/pull/513)). Other noteworthy changes: * Use `[]` instead of `None` as default list value for deserialising responses ([#361](https://github.com/databricks/databricks-sdk-py/pull/361)). * Support dev and staging workspaces ([#514](https://github.com/databricks/databricks-sdk-py/pull/514)). API Changes: * Added `exists()` method for [w.tables](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/tables.html) workspace-level service. * Added [w.lakehouse_monitors](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/lakehouse_monitors.html) workspace-level service. * Added the following dataclasses: `databricks.sdk.service.catalog.CreateMonitor`, `databricks.sdk.service.catalog.DeleteLakehouseMonitorRequest`, `databricks.sdk.service.catalog.ExistsRequest`, `databricks.sdk.service.catalog.GetLakehouseMonitorRequest`, `databricks.sdk.service.catalog.MonitorCronSchedule`, `databricks.sdk.service.catalog.MonitorCronSchedulePauseStatus`, `databricks.sdk.service.catalog.MonitorCustomMetric`, `databricks.sdk.service.catalog.MonitorCustomMetricType`, `databricks.sdk.service.catalog.MonitorDataClassificationConfig`, `databricks.sdk.service.catalog.MonitorDestinations`, `databricks.sdk.service.catalog.MonitorInferenceLogProfileType`, `databricks.sdk.service.catalog.MonitorInferenceLogProfileTypeProblemType`, `databricks.sdk.service.catalog.MonitorInfo`, `databricks.sdk.service.catalog.MonitorInfoStatus`, `databricks.sdk.service.catalog.MonitorNotificationsConfig`, `databricks.sdk.service.catalog.MonitorTimeSeriesProfileType`, `databricks.sdk.service.catalog.TableExistsResponse` and `databricks.sdk.service.catalog.UpdateMonitor`. * Changed `create_obo_token()` method for [w.token_management](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/token_management.html) workspace-level service with new required argument order. * Changed `get()` method for [w.token_management](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/token_management.html) workspace-level service to return `databricks.sdk.service.settings.GetTokenResponse` dataclass. * Changed `lifetime_seconds` field for `databricks.sdk.service.settings.CreateOboTokenRequest` to no longer be required. * Added `databricks.sdk.service.settings.GetTokenResponse` dataclass. OpenAPI SHA: e05401ed5dd4974c5333d737ec308a7d451f749f, Date: 2024-01-23 --- .codegen/_openapi_sha | 2 +- CHANGELOG.md | 41 ++ databricks/sdk/__init__.py | 10 +- databricks/sdk/service/catalog.py | 777 +++++++++++++++++++++++++++- databricks/sdk/service/compute.py | 36 +- databricks/sdk/service/files.py | 18 +- databricks/sdk/service/iam.py | 6 +- databricks/sdk/service/jobs.py | 13 +- databricks/sdk/service/ml.py | 12 +- databricks/sdk/service/pipelines.py | 3 +- databricks/sdk/service/serving.py | 3 +- databricks/sdk/service/settings.py | 54 +- databricks/sdk/service/sql.py | 7 +- databricks/sdk/service/workspace.py | 6 +- databricks/sdk/version.py | 2 +- 15 files changed, 915 insertions(+), 75 deletions(-) diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index c9ce6cc2e..f705ffea6 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -0e0d4cbe87193e36c73b8b2be3b0dd0f1b013e00 \ No newline at end of file +e05401ed5dd4974c5333d737ec308a7d451f749f \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 64b3d33d1..007eca6c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,46 @@ # Version changelog +## 0.18.0 + +Bugfixes: + +* Fix Databricks OAuth M2M on Azure ([#513](https://github.com/databricks/databricks-sdk-py/pull/513)). + +Other noteworthy changes: + +* Use `[]` instead of `None` as default list value for deserialising responses ([#361](https://github.com/databricks/databricks-sdk-py/pull/361)). +* Support dev and staging workspaces ([#514](https://github.com/databricks/databricks-sdk-py/pull/514)). + +API Changes: + + * Added `exists()` method for [w.tables](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/tables.html) workspace-level service. + * Added [w.lakehouse_monitors](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/lakehouse_monitors.html) workspace-level service. + * Added the following dataclasses: + `databricks.sdk.service.catalog.CreateMonitor`, + `databricks.sdk.service.catalog.DeleteLakehouseMonitorRequest`, + `databricks.sdk.service.catalog.ExistsRequest`, + `databricks.sdk.service.catalog.GetLakehouseMonitorRequest`, + `databricks.sdk.service.catalog.MonitorCronSchedule`, + `databricks.sdk.service.catalog.MonitorCronSchedulePauseStatus`, + `databricks.sdk.service.catalog.MonitorCustomMetric`, + `databricks.sdk.service.catalog.MonitorCustomMetricType`, + `databricks.sdk.service.catalog.MonitorDataClassificationConfig`, + `databricks.sdk.service.catalog.MonitorDestinations`, + `databricks.sdk.service.catalog.MonitorInferenceLogProfileType`, + `databricks.sdk.service.catalog.MonitorInferenceLogProfileTypeProblemType`, + `databricks.sdk.service.catalog.MonitorInfo`, + `databricks.sdk.service.catalog.MonitorInfoStatus`, + `databricks.sdk.service.catalog.MonitorNotificationsConfig`, + `databricks.sdk.service.catalog.MonitorTimeSeriesProfileType`, + `databricks.sdk.service.catalog.TableExistsResponse` and + `databricks.sdk.service.catalog.UpdateMonitor`. + * Changed `create_obo_token()` method for [w.token_management](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/token_management.html) workspace-level service with new required argument order. + * Changed `get()` method for [w.token_management](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/token_management.html) workspace-level service to return `databricks.sdk.service.settings.GetTokenResponse` dataclass. + * Changed `lifetime_seconds` field for `databricks.sdk.service.settings.CreateOboTokenRequest` to no longer be required. + * Added `databricks.sdk.service.settings.GetTokenResponse` dataclass. + +OpenAPI SHA: e05401ed5dd4974c5333d737ec308a7d451f749f, Date: 2024-01-23 + ## 0.17.0 * Use covariant type for `@retried(on=[...])` ([#486](https://github.com/databricks/databricks-sdk-py/pull/486)). diff --git a/databricks/sdk/__init__.py b/databricks/sdk/__init__.py index 5d345d381..13a97087c 100755 --- a/databricks/sdk/__init__.py +++ b/databricks/sdk/__init__.py @@ -12,8 +12,8 @@ ArtifactAllowlistsAPI, CatalogsAPI, ConnectionsAPI, ExternalLocationsAPI, FunctionsAPI, - GrantsAPI, MetastoresAPI, - ModelVersionsAPI, + GrantsAPI, LakehouseMonitorsAPI, + MetastoresAPI, ModelVersionsAPI, RegisteredModelsAPI, SchemasAPI, StorageCredentialsAPI, SystemSchemasAPI, @@ -173,6 +173,7 @@ def __init__(self, self._instance_profiles = InstanceProfilesAPI(self._api_client) self._ip_access_lists = IpAccessListsAPI(self._api_client) self._jobs = JobsAPI(self._api_client) + self._lakehouse_monitors = LakehouseMonitorsAPI(self._api_client) self._lakeview = LakeviewAPI(self._api_client) self._libraries = LibrariesAPI(self._api_client) self._metastores = MetastoresAPI(self._api_client) @@ -368,6 +369,11 @@ def jobs(self) -> JobsAPI: """The Jobs API allows you to create, edit, and delete jobs.""" return self._jobs + @property + def lakehouse_monitors(self) -> LakehouseMonitorsAPI: + """A monitor computes and monitors data or model quality metrics for a table over time.""" + return self._lakehouse_monitors + @property def lakeview(self) -> LakeviewAPI: """These APIs provide specific management operations for Lakeview dashboards.""" diff --git a/databricks/sdk/service/catalog.py b/databricks/sdk/service/catalog.py index 243723ec4..c1c068aff 100755 --- a/databricks/sdk/service/catalog.py +++ b/databricks/sdk/service/catalog.py @@ -1162,6 +1162,97 @@ def from_dict(cls, d: Dict[str, any]) -> CreateMetastoreAssignment: workspace_id=d.get('workspace_id', None)) +@dataclass +class CreateMonitor: + assets_dir: str + """The directory to store monitoring assets (e.g. dashboard, metric tables).""" + + output_schema_name: str + """Schema where output metric tables are created.""" + + baseline_table_name: Optional[str] = None + """Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table.""" + + custom_metrics: Optional[List[MonitorCustomMetric]] = None + """Custom metrics to compute on the monitored table. These can be aggregate metrics, derived + metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across + time windows).""" + + data_classification_config: Optional[MonitorDataClassificationConfig] = None + """The data classification config for the monitor.""" + + full_name: Optional[str] = None + """Full name of the table.""" + + inference_log: Optional[MonitorInferenceLogProfileType] = None + """Configuration for monitoring inference logs.""" + + notifications: Optional[List[MonitorNotificationsConfig]] = None + """The notification settings for the monitor.""" + + schedule: Optional[MonitorCronSchedule] = None + """The schedule for automatically updating and refreshing metric tables.""" + + skip_builtin_dashboard: Optional[bool] = None + """Whether to skip creating a default dashboard summarizing data quality metrics.""" + + slicing_exprs: Optional[List[str]] = None + """List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. + For high-cardinality columns, only the top 100 unique values by frequency will generate slices.""" + + snapshot: Optional[Any] = None + """Configuration for monitoring snapshot tables.""" + + time_series: Optional[MonitorTimeSeriesProfileType] = None + """Configuration for monitoring time series tables.""" + + warehouse_id: Optional[str] = None + """Optional argument to specify the warehouse for dashboard creation. If not specified, the first + running warehouse will be used.""" + + def as_dict(self) -> dict: + """Serializes the CreateMonitor into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.assets_dir is not None: body['assets_dir'] = self.assets_dir + if self.baseline_table_name is not None: body['baseline_table_name'] = self.baseline_table_name + if self.custom_metrics: body['custom_metrics'] = [v.as_dict() for v in self.custom_metrics] + if self.data_classification_config: + body['data_classification_config'] = self.data_classification_config.as_dict() + if self.full_name is not None: body['full_name'] = self.full_name + if self.inference_log: body['inference_log'] = self.inference_log.as_dict() + if self.notifications: body['notifications'] = [v.as_dict() for v in self.notifications] + if self.output_schema_name is not None: body['output_schema_name'] = self.output_schema_name + if self.schedule: body['schedule'] = self.schedule.as_dict() + if self.skip_builtin_dashboard is not None: + body['skip_builtin_dashboard'] = self.skip_builtin_dashboard + if self.slicing_exprs: body['slicing_exprs'] = [v for v in self.slicing_exprs] + if self.snapshot: body['snapshot'] = self.snapshot + if self.time_series: body['time_series'] = self.time_series.as_dict() + if self.warehouse_id is not None: body['warehouse_id'] = self.warehouse_id + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> CreateMonitor: + """Deserializes the CreateMonitor from a dictionary.""" + return cls(assets_dir=d.get('assets_dir', None), + baseline_table_name=d.get('baseline_table_name', None), + custom_metrics=_repeated_dict(d, 'custom_metrics', MonitorCustomMetric), + data_classification_config=_from_dict(d, 'data_classification_config', + MonitorDataClassificationConfig), + full_name=d.get('full_name', None), + inference_log=_from_dict(d, 'inference_log', MonitorInferenceLogProfileType), + notifications=_repeated_dict(d, 'notifications', MonitorNotificationsConfig), + output_schema_name=d.get('output_schema_name', None), + schedule=_from_dict(d, 'schedule', MonitorCronSchedule), + skip_builtin_dashboard=d.get('skip_builtin_dashboard', None), + slicing_exprs=d.get('slicing_exprs', None), + snapshot=d.get('snapshot', None), + time_series=_from_dict(d, 'time_series', MonitorTimeSeriesProfileType), + warehouse_id=d.get('warehouse_id', None)) + + @dataclass class CreateRegisteredModelRequest: catalog_name: str @@ -2731,6 +2822,342 @@ class ModelVersionInfoStatus(Enum): READY = 'READY' +@dataclass +class MonitorCronSchedule: + pause_status: Optional[MonitorCronSchedulePauseStatus] = None + """Whether the schedule is paused or not""" + + quartz_cron_expression: Optional[str] = None + """A cron expression using quartz syntax that describes the schedule for a job.""" + + timezone_id: Optional[str] = None + """A Java timezone id. The schedule for a job will be resolved with respect to this timezone.""" + + def as_dict(self) -> dict: + """Serializes the MonitorCronSchedule into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.pause_status is not None: body['pause_status'] = self.pause_status.value + if self.quartz_cron_expression is not None: + body['quartz_cron_expression'] = self.quartz_cron_expression + if self.timezone_id is not None: body['timezone_id'] = self.timezone_id + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorCronSchedule: + """Deserializes the MonitorCronSchedule from a dictionary.""" + return cls(pause_status=_enum(d, 'pause_status', MonitorCronSchedulePauseStatus), + quartz_cron_expression=d.get('quartz_cron_expression', None), + timezone_id=d.get('timezone_id', None)) + + +class MonitorCronSchedulePauseStatus(Enum): + """Whether the schedule is paused or not""" + + PAUSED = 'PAUSED' + UNPAUSED = 'UNPAUSED' + + +@dataclass +class MonitorCustomMetric: + definition: Optional[str] = None + """Jinja template for a SQL expression that specifies how to compute the metric. See [create metric + definition]. + + [create metric definition]: https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition""" + + input_columns: Optional[List[str]] = None + """Columns on the monitored table to apply the custom metrics to.""" + + name: Optional[str] = None + """Name of the custom metric.""" + + output_data_type: Optional[str] = None + """The output type of the custom metric.""" + + type: Optional[MonitorCustomMetricType] = None + """The type of the custom metric.""" + + def as_dict(self) -> dict: + """Serializes the MonitorCustomMetric into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.definition is not None: body['definition'] = self.definition + if self.input_columns: body['input_columns'] = [v for v in self.input_columns] + if self.name is not None: body['name'] = self.name + if self.output_data_type is not None: body['output_data_type'] = self.output_data_type + if self.type is not None: body['type'] = self.type.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorCustomMetric: + """Deserializes the MonitorCustomMetric from a dictionary.""" + return cls(definition=d.get('definition', None), + input_columns=d.get('input_columns', None), + name=d.get('name', None), + output_data_type=d.get('output_data_type', None), + type=_enum(d, 'type', MonitorCustomMetricType)) + + +class MonitorCustomMetricType(Enum): + """The type of the custom metric.""" + + CUSTOM_METRIC_TYPE_AGGREGATE = 'CUSTOM_METRIC_TYPE_AGGREGATE' + CUSTOM_METRIC_TYPE_DERIVED = 'CUSTOM_METRIC_TYPE_DERIVED' + CUSTOM_METRIC_TYPE_DRIFT = 'CUSTOM_METRIC_TYPE_DRIFT' + MONITOR_STATUS_ERROR = 'MONITOR_STATUS_ERROR' + MONITOR_STATUS_FAILED = 'MONITOR_STATUS_FAILED' + + +@dataclass +class MonitorDataClassificationConfig: + enabled: Optional[bool] = None + """Whether data classification is enabled.""" + + def as_dict(self) -> dict: + """Serializes the MonitorDataClassificationConfig into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.enabled is not None: body['enabled'] = self.enabled + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorDataClassificationConfig: + """Deserializes the MonitorDataClassificationConfig from a dictionary.""" + return cls(enabled=d.get('enabled', None)) + + +@dataclass +class MonitorDestinations: + email_addresses: Optional[List[str]] = None + """The list of email addresses to send the notification to.""" + + def as_dict(self) -> dict: + """Serializes the MonitorDestinations into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.email_addresses: body['email_addresses'] = [v for v in self.email_addresses] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorDestinations: + """Deserializes the MonitorDestinations from a dictionary.""" + return cls(email_addresses=d.get('email_addresses', None)) + + +@dataclass +class MonitorInferenceLogProfileType: + granularities: Optional[List[str]] = None + """List of granularities to use when aggregating data into time windows based on their timestamp.""" + + label_col: Optional[str] = None + """Column of the model label.""" + + model_id_col: Optional[str] = None + """Column of the model id or version.""" + + prediction_col: Optional[str] = None + """Column of the model prediction.""" + + prediction_proba_col: Optional[str] = None + """Column of the model prediction probabilities.""" + + problem_type: Optional[MonitorInferenceLogProfileTypeProblemType] = None + """Problem type the model aims to solve.""" + + timestamp_col: Optional[str] = None + """Column of the timestamp of predictions.""" + + def as_dict(self) -> dict: + """Serializes the MonitorInferenceLogProfileType into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.granularities: body['granularities'] = [v for v in self.granularities] + if self.label_col is not None: body['label_col'] = self.label_col + if self.model_id_col is not None: body['model_id_col'] = self.model_id_col + if self.prediction_col is not None: body['prediction_col'] = self.prediction_col + if self.prediction_proba_col is not None: body['prediction_proba_col'] = self.prediction_proba_col + if self.problem_type is not None: body['problem_type'] = self.problem_type.value + if self.timestamp_col is not None: body['timestamp_col'] = self.timestamp_col + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorInferenceLogProfileType: + """Deserializes the MonitorInferenceLogProfileType from a dictionary.""" + return cls(granularities=d.get('granularities', None), + label_col=d.get('label_col', None), + model_id_col=d.get('model_id_col', None), + prediction_col=d.get('prediction_col', None), + prediction_proba_col=d.get('prediction_proba_col', None), + problem_type=_enum(d, 'problem_type', MonitorInferenceLogProfileTypeProblemType), + timestamp_col=d.get('timestamp_col', None)) + + +class MonitorInferenceLogProfileTypeProblemType(Enum): + """Problem type the model aims to solve.""" + + PROBLEM_TYPE_CLASSIFICATION = 'PROBLEM_TYPE_CLASSIFICATION' + PROBLEM_TYPE_REGRESSION = 'PROBLEM_TYPE_REGRESSION' + + +@dataclass +class MonitorInfo: + assets_dir: Optional[str] = None + """The directory to store monitoring assets (e.g. dashboard, metric tables).""" + + baseline_table_name: Optional[str] = None + """Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table.""" + + custom_metrics: Optional[List[MonitorCustomMetric]] = None + """Custom metrics to compute on the monitored table. These can be aggregate metrics, derived + metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across + time windows).""" + + dashboard_id: Optional[str] = None + """The ID of the generated dashboard.""" + + data_classification_config: Optional[MonitorDataClassificationConfig] = None + """The data classification config for the monitor.""" + + drift_metrics_table_name: Optional[str] = None + """The full name of the drift metrics table. Format: + __catalog_name__.__schema_name__.__table_name__.""" + + inference_log: Optional[MonitorInferenceLogProfileType] = None + """Configuration for monitoring inference logs.""" + + latest_monitor_failure_msg: Optional[str] = None + """The latest failure message of the monitor (if any).""" + + monitor_version: Optional[str] = None + """The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted.""" + + notifications: Optional[List[MonitorNotificationsConfig]] = None + """The notification settings for the monitor.""" + + output_schema_name: Optional[str] = None + """Schema where output metric tables are created.""" + + profile_metrics_table_name: Optional[str] = None + """The full name of the profile metrics table. Format: + __catalog_name__.__schema_name__.__table_name__.""" + + schedule: Optional[MonitorCronSchedule] = None + """The schedule for automatically updating and refreshing metric tables.""" + + slicing_exprs: Optional[List[str]] = None + """List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. + For high-cardinality columns, only the top 100 unique values by frequency will generate slices.""" + + snapshot: Optional[Any] = None + """Configuration for monitoring snapshot tables.""" + + status: Optional[MonitorInfoStatus] = None + """The status of the monitor.""" + + table_name: Optional[str] = None + """The full name of the table to monitor. Format: __catalog_name__.__schema_name__.__table_name__.""" + + time_series: Optional[MonitorTimeSeriesProfileType] = None + """Configuration for monitoring time series tables.""" + + def as_dict(self) -> dict: + """Serializes the MonitorInfo into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.assets_dir is not None: body['assets_dir'] = self.assets_dir + if self.baseline_table_name is not None: body['baseline_table_name'] = self.baseline_table_name + if self.custom_metrics: body['custom_metrics'] = [v.as_dict() for v in self.custom_metrics] + if self.dashboard_id is not None: body['dashboard_id'] = self.dashboard_id + if self.data_classification_config: + body['data_classification_config'] = self.data_classification_config.as_dict() + if self.drift_metrics_table_name is not None: + body['drift_metrics_table_name'] = self.drift_metrics_table_name + if self.inference_log: body['inference_log'] = self.inference_log.as_dict() + if self.latest_monitor_failure_msg is not None: + body['latest_monitor_failure_msg'] = self.latest_monitor_failure_msg + if self.monitor_version is not None: body['monitor_version'] = self.monitor_version + if self.notifications: body['notifications'] = [v.as_dict() for v in self.notifications] + if self.output_schema_name is not None: body['output_schema_name'] = self.output_schema_name + if self.profile_metrics_table_name is not None: + body['profile_metrics_table_name'] = self.profile_metrics_table_name + if self.schedule: body['schedule'] = self.schedule.as_dict() + if self.slicing_exprs: body['slicing_exprs'] = [v for v in self.slicing_exprs] + if self.snapshot: body['snapshot'] = self.snapshot + if self.status is not None: body['status'] = self.status.value + if self.table_name is not None: body['table_name'] = self.table_name + if self.time_series: body['time_series'] = self.time_series.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorInfo: + """Deserializes the MonitorInfo from a dictionary.""" + return cls(assets_dir=d.get('assets_dir', None), + baseline_table_name=d.get('baseline_table_name', None), + custom_metrics=_repeated_dict(d, 'custom_metrics', MonitorCustomMetric), + dashboard_id=d.get('dashboard_id', None), + data_classification_config=_from_dict(d, 'data_classification_config', + MonitorDataClassificationConfig), + drift_metrics_table_name=d.get('drift_metrics_table_name', None), + inference_log=_from_dict(d, 'inference_log', MonitorInferenceLogProfileType), + latest_monitor_failure_msg=d.get('latest_monitor_failure_msg', None), + monitor_version=d.get('monitor_version', None), + notifications=_repeated_dict(d, 'notifications', MonitorNotificationsConfig), + output_schema_name=d.get('output_schema_name', None), + profile_metrics_table_name=d.get('profile_metrics_table_name', None), + schedule=_from_dict(d, 'schedule', MonitorCronSchedule), + slicing_exprs=d.get('slicing_exprs', None), + snapshot=d.get('snapshot', None), + status=_enum(d, 'status', MonitorInfoStatus), + table_name=d.get('table_name', None), + time_series=_from_dict(d, 'time_series', MonitorTimeSeriesProfileType)) + + +class MonitorInfoStatus(Enum): + """The status of the monitor.""" + + MONITOR_STATUS_ACTIVE = 'MONITOR_STATUS_ACTIVE' + MONITOR_STATUS_DELETE_PENDING = 'MONITOR_STATUS_DELETE_PENDING' + MONITOR_STATUS_ERROR = 'MONITOR_STATUS_ERROR' + MONITOR_STATUS_FAILED = 'MONITOR_STATUS_FAILED' + MONITOR_STATUS_PENDING = 'MONITOR_STATUS_PENDING' + + +@dataclass +class MonitorNotificationsConfig: + on_failure: Optional[MonitorDestinations] = None + """Who to send notifications to on monitor failure.""" + + def as_dict(self) -> dict: + """Serializes the MonitorNotificationsConfig into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.on_failure: body['on_failure'] = self.on_failure.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorNotificationsConfig: + """Deserializes the MonitorNotificationsConfig from a dictionary.""" + return cls(on_failure=_from_dict(d, 'on_failure', MonitorDestinations)) + + +@dataclass +class MonitorTimeSeriesProfileType: + granularities: Optional[List[str]] = None + """List of granularities to use when aggregating data into time windows based on their timestamp.""" + + timestamp_col: Optional[str] = None + """The timestamp column. This must be timestamp types or convertible to timestamp types using the + pyspark to_timestamp function.""" + + def as_dict(self) -> dict: + """Serializes the MonitorTimeSeriesProfileType into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.granularities: body['granularities'] = [v for v in self.granularities] + if self.timestamp_col is not None: body['timestamp_col'] = self.timestamp_col + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> MonitorTimeSeriesProfileType: + """Deserializes the MonitorTimeSeriesProfileType from a dictionary.""" + return cls(granularities=d.get('granularities', None), timestamp_col=d.get('timestamp_col', None)) + + @dataclass class NamedTableConstraint: name: str @@ -3386,6 +3813,23 @@ def from_dict(cls, d: Dict[str, any]) -> TableDependency: return cls(table_full_name=d.get('table_full_name', None)) +@dataclass +class TableExistsResponse: + table_exists: Optional[bool] = None + """Whether the table exists or not.""" + + def as_dict(self) -> dict: + """Serializes the TableExistsResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.table_exists is not None: body['table_exists'] = self.table_exists + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> TableExistsResponse: + """Deserializes the TableExistsResponse from a dictionary.""" + return cls(table_exists=d.get('table_exists', None)) + + @dataclass class TableInfo: access_point: Optional[str] = None @@ -3917,6 +4361,85 @@ def from_dict(cls, d: Dict[str, any]) -> UpdateModelVersionRequest: version=d.get('version', None)) +@dataclass +class UpdateMonitor: + assets_dir: str + """The directory to store monitoring assets (e.g. dashboard, metric tables).""" + + output_schema_name: str + """Schema where output metric tables are created.""" + + baseline_table_name: Optional[str] = None + """Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table.""" + + custom_metrics: Optional[List[MonitorCustomMetric]] = None + """Custom metrics to compute on the monitored table. These can be aggregate metrics, derived + metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across + time windows).""" + + data_classification_config: Optional[MonitorDataClassificationConfig] = None + """The data classification config for the monitor.""" + + full_name: Optional[str] = None + """Full name of the table.""" + + inference_log: Optional[MonitorInferenceLogProfileType] = None + """Configuration for monitoring inference logs.""" + + notifications: Optional[List[MonitorNotificationsConfig]] = None + """The notification settings for the monitor.""" + + schedule: Optional[MonitorCronSchedule] = None + """The schedule for automatically updating and refreshing metric tables.""" + + slicing_exprs: Optional[List[str]] = None + """List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. + For high-cardinality columns, only the top 100 unique values by frequency will generate slices.""" + + snapshot: Optional[Any] = None + """Configuration for monitoring snapshot tables.""" + + time_series: Optional[MonitorTimeSeriesProfileType] = None + """Configuration for monitoring time series tables.""" + + def as_dict(self) -> dict: + """Serializes the UpdateMonitor into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.assets_dir is not None: body['assets_dir'] = self.assets_dir + if self.baseline_table_name is not None: body['baseline_table_name'] = self.baseline_table_name + if self.custom_metrics: body['custom_metrics'] = [v.as_dict() for v in self.custom_metrics] + if self.data_classification_config: + body['data_classification_config'] = self.data_classification_config.as_dict() + if self.full_name is not None: body['full_name'] = self.full_name + if self.inference_log: body['inference_log'] = self.inference_log.as_dict() + if self.notifications: body['notifications'] = [v.as_dict() for v in self.notifications] + if self.output_schema_name is not None: body['output_schema_name'] = self.output_schema_name + if self.schedule: body['schedule'] = self.schedule.as_dict() + if self.slicing_exprs: body['slicing_exprs'] = [v for v in self.slicing_exprs] + if self.snapshot: body['snapshot'] = self.snapshot + if self.time_series: body['time_series'] = self.time_series.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> UpdateMonitor: + """Deserializes the UpdateMonitor from a dictionary.""" + return cls(assets_dir=d.get('assets_dir', None), + baseline_table_name=d.get('baseline_table_name', None), + custom_metrics=_repeated_dict(d, 'custom_metrics', MonitorCustomMetric), + data_classification_config=_from_dict(d, 'data_classification_config', + MonitorDataClassificationConfig), + full_name=d.get('full_name', None), + inference_log=_from_dict(d, 'inference_log', MonitorInferenceLogProfileType), + notifications=_repeated_dict(d, 'notifications', MonitorNotificationsConfig), + output_schema_name=d.get('output_schema_name', None), + schedule=_from_dict(d, 'schedule', MonitorCronSchedule), + slicing_exprs=d.get('slicing_exprs', None), + snapshot=d.get('snapshot', None), + time_series=_from_dict(d, 'time_series', MonitorTimeSeriesProfileType)) + + @dataclass class UpdatePermissions: changes: Optional[List[PermissionsChange]] = None @@ -5597,6 +6120,231 @@ def update(self, return PermissionsList.from_dict(res) +class LakehouseMonitorsAPI: + """A monitor computes and monitors data or model quality metrics for a table over time. It generates metrics + tables and a dashboard that you can use to monitor table health and set alerts. + + Most write operations require the user to be the owner of the table (or its parent schema or parent + catalog). Viewing the dashboard, computed metrics, or monitor configuration only requires the user to have + **SELECT** privileges on the table (along with **USE_SCHEMA** and **USE_CATALOG**).""" + + def __init__(self, api_client): + self._api = api_client + + def create(self, + full_name: str, + assets_dir: str, + output_schema_name: str, + *, + baseline_table_name: Optional[str] = None, + custom_metrics: Optional[List[MonitorCustomMetric]] = None, + data_classification_config: Optional[MonitorDataClassificationConfig] = None, + inference_log: Optional[MonitorInferenceLogProfileType] = None, + notifications: Optional[List[MonitorNotificationsConfig]] = None, + schedule: Optional[MonitorCronSchedule] = None, + skip_builtin_dashboard: Optional[bool] = None, + slicing_exprs: Optional[List[str]] = None, + snapshot: Optional[Any] = None, + time_series: Optional[MonitorTimeSeriesProfileType] = None, + warehouse_id: Optional[str] = None) -> MonitorInfo: + """Create a table monitor. + + Creates a new monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog, have **USE_SCHEMA** on the + table's parent schema, and have **SELECT** access on the table 2. have **USE_CATALOG** on the table's + parent catalog, be an owner of the table's parent schema, and have **SELECT** access on the table. 3. + have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on + the table's parent schema - be an owner of the table. + + Workspace assets, such as the dashboard, will be created in the workspace where this call was made. + + :param full_name: str + Full name of the table. + :param assets_dir: str + The directory to store monitoring assets (e.g. dashboard, metric tables). + :param output_schema_name: str + Schema where output metric tables are created. + :param baseline_table_name: str (optional) + Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table. + :param custom_metrics: List[:class:`MonitorCustomMetric`] (optional) + Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics + (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) + The data classification config for the monitor. + :param inference_log: :class:`MonitorInferenceLogProfileType` (optional) + Configuration for monitoring inference logs. + :param notifications: List[:class:`MonitorNotificationsConfig`] (optional) + The notification settings for the monitor. + :param schedule: :class:`MonitorCronSchedule` (optional) + The schedule for automatically updating and refreshing metric tables. + :param skip_builtin_dashboard: bool (optional) + Whether to skip creating a default dashboard summarizing data quality metrics. + :param slicing_exprs: List[str] (optional) + List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. For + high-cardinality columns, only the top 100 unique values by frequency will generate slices. + :param snapshot: Any (optional) + Configuration for monitoring snapshot tables. + :param time_series: :class:`MonitorTimeSeriesProfileType` (optional) + Configuration for monitoring time series tables. + :param warehouse_id: str (optional) + Optional argument to specify the warehouse for dashboard creation. If not specified, the first + running warehouse will be used. + + :returns: :class:`MonitorInfo` + """ + body = {} + if assets_dir is not None: body['assets_dir'] = assets_dir + if baseline_table_name is not None: body['baseline_table_name'] = baseline_table_name + if custom_metrics is not None: body['custom_metrics'] = [v.as_dict() for v in custom_metrics] + if data_classification_config is not None: + body['data_classification_config'] = data_classification_config.as_dict() + if inference_log is not None: body['inference_log'] = inference_log.as_dict() + if notifications is not None: body['notifications'] = [v.as_dict() for v in notifications] + if output_schema_name is not None: body['output_schema_name'] = output_schema_name + if schedule is not None: body['schedule'] = schedule.as_dict() + if skip_builtin_dashboard is not None: body['skip_builtin_dashboard'] = skip_builtin_dashboard + if slicing_exprs is not None: body['slicing_exprs'] = [v for v in slicing_exprs] + if snapshot is not None: body['snapshot'] = snapshot + if time_series is not None: body['time_series'] = time_series.as_dict() + if warehouse_id is not None: body['warehouse_id'] = warehouse_id + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('POST', + f'/api/2.1/unity-catalog/tables/{full_name}/monitor', + body=body, + headers=headers) + return MonitorInfo.from_dict(res) + + def delete(self, full_name: str): + """Delete a table monitor. + + Deletes a monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table. + + Additionally, the call must be made from the workspace where the monitor was created. + + Note that the metric tables and dashboard will not be deleted as part of this call; those assets must + be manually cleaned up (if desired). + + :param full_name: str + Full name of the table. + + + """ + + headers = {} + self._api.do('DELETE', f'/api/2.1/unity-catalog/tables/{full_name}/monitor', headers=headers) + + def get(self, full_name: str) -> MonitorInfo: + """Get a table monitor. + + Gets a monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema. 3. have the following + permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent + schema - **SELECT** privilege on the table. + + The returned information includes configuration values, as well as information on assets created by + the monitor. Some information (e.g., dashboard) may be filtered out if the caller is in a different + workspace than where the monitor was created. + + :param full_name: str + Full name of the table. + + :returns: :class:`MonitorInfo` + """ + + headers = {'Accept': 'application/json', } + res = self._api.do('GET', f'/api/2.1/unity-catalog/tables/{full_name}/monitor', headers=headers) + return MonitorInfo.from_dict(res) + + def update(self, + full_name: str, + assets_dir: str, + output_schema_name: str, + *, + baseline_table_name: Optional[str] = None, + custom_metrics: Optional[List[MonitorCustomMetric]] = None, + data_classification_config: Optional[MonitorDataClassificationConfig] = None, + inference_log: Optional[MonitorInferenceLogProfileType] = None, + notifications: Optional[List[MonitorNotificationsConfig]] = None, + schedule: Optional[MonitorCronSchedule] = None, + slicing_exprs: Optional[List[str]] = None, + snapshot: Optional[Any] = None, + time_series: Optional[MonitorTimeSeriesProfileType] = None) -> MonitorInfo: + """Update a table monitor. + + Updates a monitor for the specified table. + + The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the + table's parent catalog and be an owner of the table's parent schema 3. have the following permissions: + - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an + owner of the table. + + Additionally, the call must be made from the workspace where the monitor was created, and the caller + must be the original creator of the monitor. + + Certain configuration fields, such as output asset identifiers, cannot be updated. + + :param full_name: str + Full name of the table. + :param assets_dir: str + The directory to store monitoring assets (e.g. dashboard, metric tables). + :param output_schema_name: str + Schema where output metric tables are created. + :param baseline_table_name: str (optional) + Name of the baseline table from which drift metrics are computed from. Columns in the monitored + table should also be present in the baseline table. + :param custom_metrics: List[:class:`MonitorCustomMetric`] (optional) + Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics + (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + :param data_classification_config: :class:`MonitorDataClassificationConfig` (optional) + The data classification config for the monitor. + :param inference_log: :class:`MonitorInferenceLogProfileType` (optional) + Configuration for monitoring inference logs. + :param notifications: List[:class:`MonitorNotificationsConfig`] (optional) + The notification settings for the monitor. + :param schedule: :class:`MonitorCronSchedule` (optional) + The schedule for automatically updating and refreshing metric tables. + :param slicing_exprs: List[str] (optional) + List of column expressions to slice data with for targeted analysis. The data is grouped by each + expression independently, resulting in a separate slice for each predicate and its complements. For + high-cardinality columns, only the top 100 unique values by frequency will generate slices. + :param snapshot: Any (optional) + Configuration for monitoring snapshot tables. + :param time_series: :class:`MonitorTimeSeriesProfileType` (optional) + Configuration for monitoring time series tables. + + :returns: :class:`MonitorInfo` + """ + body = {} + if assets_dir is not None: body['assets_dir'] = assets_dir + if baseline_table_name is not None: body['baseline_table_name'] = baseline_table_name + if custom_metrics is not None: body['custom_metrics'] = [v.as_dict() for v in custom_metrics] + if data_classification_config is not None: + body['data_classification_config'] = data_classification_config.as_dict() + if inference_log is not None: body['inference_log'] = inference_log.as_dict() + if notifications is not None: body['notifications'] = [v.as_dict() for v in notifications] + if output_schema_name is not None: body['output_schema_name'] = output_schema_name + if schedule is not None: body['schedule'] = schedule.as_dict() + if slicing_exprs is not None: body['slicing_exprs'] = [v for v in slicing_exprs] + if snapshot is not None: body['snapshot'] = snapshot + if time_series is not None: body['time_series'] = time_series.as_dict() + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('PUT', + f'/api/2.1/unity-catalog/tables/{full_name}/monitor', + body=body, + headers=headers) + return MonitorInfo.from_dict(res) + + class MetastoresAPI: """A metastore is the top-level container of objects in Unity Catalog. It stores data assets (tables and views) and the permissions that govern access to them. Databricks account admins can create metastores and @@ -6868,13 +7616,34 @@ def delete(self, full_name: str): headers = {'Accept': 'application/json', } self._api.do('DELETE', f'/api/2.1/unity-catalog/tables/{full_name}', headers=headers) + def exists(self, full_name: str) -> TableExistsResponse: + """Get boolean reflecting if table exists. + + Gets if a table exists in the metastore for a specific catalog and schema. The caller must satisfy one + of the following requirements: * Be a metastore admin * Be the owner of the parent catalog * Be the + owner of the parent schema and have the USE_CATALOG privilege on the parent catalog * Have the + **USE_CATALOG** privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema, + and either be the table owner or have the SELECT privilege on the table. * Have BROWSE privilege on + the parent catalog * Have BROWSE privilege on the parent schema. + + :param full_name: str + Full name of the table. + + :returns: :class:`TableExistsResponse` + """ + + headers = {'Accept': 'application/json', } + res = self._api.do('GET', f'/api/2.1/unity-catalog/tables/{full_name}/exists', headers=headers) + return TableExistsResponse.from_dict(res) + def get(self, full_name: str, *, include_delta_metadata: Optional[bool] = None) -> TableInfo: """Get a table. - Gets a table from the metastore for a specific catalog and schema. The caller must be a metastore - admin, be the owner of the table and have the **USE_CATALOG** privilege on the parent catalog and the - **USE_SCHEMA** privilege on the parent schema, or be the owner of the table and have the **SELECT** - privilege on it as well. + Gets a table from the metastore for a specific catalog and schema. The caller must satisfy one of the + following requirements: * Be a metastore admin * Be the owner of the parent catalog * Be the owner of + the parent schema and have the USE_CATALOG privilege on the parent catalog * Have the **USE_CATALOG** + privilege on the parent catalog and the **USE_SCHEMA** privilege on the parent schema, and either be + the table owner or have the SELECT privilege on the table. :param full_name: str Full name of the table. diff --git a/databricks/sdk/service/compute.py b/databricks/sdk/service/compute.py index 60e92256c..816f0db3a 100755 --- a/databricks/sdk/service/compute.py +++ b/databricks/sdk/service/compute.py @@ -357,8 +357,7 @@ class ClusterAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -1132,8 +1131,7 @@ class ClusterPolicyAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -2053,7 +2051,8 @@ class CreatePolicy: """Additional human-readable description of the cluster policy.""" libraries: Optional[List[Library]] = None - """A list of libraries to be installed on the next cluster restart that uses this policy.""" + """A list of libraries to be installed on the next cluster restart that uses this policy. The + maximum number of libraries is 500.""" max_clusters_per_user: Optional[int] = None """Max number of clusters per user that can be active using this policy. If not present, there is @@ -2732,7 +2731,8 @@ class EditPolicy: """Additional human-readable description of the cluster policy.""" libraries: Optional[List[Library]] = None - """A list of libraries to be installed on the next cluster restart that uses this policy.""" + """A list of libraries to be installed on the next cluster restart that uses this policy. The + maximum number of libraries is 500.""" max_clusters_per_user: Optional[int] = None """Max number of clusters per user that can be active using this policy. If not present, there is @@ -3632,8 +3632,7 @@ class InstancePoolAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -4740,7 +4739,8 @@ class Policy: be deleted, and their policy families cannot be changed.""" libraries: Optional[List[Library]] = None - """A list of libraries to be installed on the next cluster restart that uses this policy.""" + """A list of libraries to be installed on the next cluster restart that uses this policy. The + maximum number of libraries is 500.""" max_clusters_per_user: Optional[int] = None """Max number of clusters per user that can be active using this policy. If not present, there is @@ -5426,9 +5426,9 @@ class ClusterPoliciesAPI: policies have ACLs that limit their use to specific users and groups. With cluster policies, you can: - Auto-install cluster libraries on the next restart by listing them in - the policy's "libraries" field. - Limit users to creating clusters with the prescribed settings. - - Simplify the user interface, enabling more users to create clusters, by fixing and hiding some fields. - - Manage costs by setting limits on attributes that impact the hourly rate. + the policy's "libraries" field (Public Preview). - Limit users to creating clusters with the prescribed + settings. - Simplify the user interface, enabling more users to create clusters, by fixing and hiding some + fields. - Manage costs by setting limits on attributes that impact the hourly rate. Cluster policy permissions limit which policies a user can select in the Policy drop-down when the user creates a cluster: - A user who has unrestricted cluster create permission can select the Unrestricted @@ -5465,7 +5465,8 @@ def create(self, :param description: str (optional) Additional human-readable description of the cluster policy. :param libraries: List[:class:`Library`] (optional) - A list of libraries to be installed on the next cluster restart that uses this policy. + A list of libraries to be installed on the next cluster restart that uses this policy. The maximum + number of libraries is 500. :param max_clusters_per_user: int (optional) Max number of clusters per user that can be active using this policy. If not present, there is no max limit. @@ -5541,7 +5542,8 @@ def edit(self, :param description: str (optional) Additional human-readable description of the cluster policy. :param libraries: List[:class:`Library`] (optional) - A list of libraries to be installed on the next cluster restart that uses this policy. + A list of libraries to be installed on the next cluster restart that uses this policy. The maximum + number of libraries is 500. :param max_clusters_per_user: int (optional) Max number of clusters per user that can be active using this policy. If not present, there is no max limit. @@ -5798,7 +5800,9 @@ def wait_get_cluster_terminated( def change_owner(self, cluster_id: str, owner_username: str): """Change cluster owner. - Change the owner of the cluster. You must be an admin to perform this operation. + Change the owner of the cluster. You must be an admin and the cluster must be terminated to perform + this operation. The service principal application ID can be supplied as an argument to + `owner_username`. :param cluster_id: str @@ -7092,7 +7096,7 @@ def list(self) -> Iterator[GlobalInitScriptDetails]: Get a list of all global init scripts for this workspace. This returns all properties for each script but **not** the script contents. To retrieve the contents of a script, use the [get a global init - script](#operation/get-script) operation. + script](:method:globalinitscripts/get) operation. :returns: Iterator over :class:`GlobalInitScriptDetails` """ diff --git a/databricks/sdk/service/files.py b/databricks/sdk/service/files.py index 29fb916f2..c1c3c184b 100755 --- a/databricks/sdk/service/files.py +++ b/databricks/sdk/service/files.py @@ -234,7 +234,7 @@ def from_dict(cls, d: Dict[str, any]) -> Put: @dataclass class ReadResponse: bytes_read: Optional[int] = None - """The number of bytes read (could be less than `length` if we hit end of file). This refers to + """The number of bytes read (could be less than ``length`` if we hit end of file). This refers to number of bytes read in unencoded version (response data is base64-encoded).""" data: Optional[str] = None @@ -264,9 +264,9 @@ def add_block(self, handle: int, data: str): """Append data block. Appends a block of data to the stream specified by the input handle. If the handle does not exist, - this call will throw an exception with `RESOURCE_DOES_NOT_EXIST`. + this call will throw an exception with ``RESOURCE_DOES_NOT_EXIST``. - If the block of data exceeds 1 MB, this call will throw an exception with `MAX_BLOCK_SIZE_EXCEEDED`. + If the block of data exceeds 1 MB, this call will throw an exception with ``MAX_BLOCK_SIZE_EXCEEDED``. :param handle: int The handle on an open stream. @@ -285,7 +285,7 @@ def close(self, handle: int): """Close the stream. Closes the stream specified by the input handle. If the handle does not exist, this call throws an - exception with `RESOURCE_DOES_NOT_EXIST`. + exception with ``RESOURCE_DOES_NOT_EXIST``. :param handle: int The handle on an open stream. @@ -302,12 +302,12 @@ def create(self, path: str, *, overwrite: Optional[bool] = None) -> CreateRespon Opens a stream to write to a file and returns a handle to this stream. There is a 10 minute idle timeout on this handle. If a file or directory already exists on the given path and __overwrite__ is - set to `false`, this call throws an exception with `RESOURCE_ALREADY_EXISTS`. + set to false, this call will throw an exception with ``RESOURCE_ALREADY_EXISTS``. A typical workflow for file upload would be: - 1. Issue a `create` call and get a handle. 2. Issue one or more `add-block` calls with the handle you - have. 3. Issue a `close` call with the handle you have. + 1. Issue a ``create`` call and get a handle. 2. Issue one or more ``add-block`` calls with the handle + you have. 3. Issue a ``close`` call with the handle you have. :param path: str The path of the new file. The path should be the absolute DBFS path. @@ -423,7 +423,7 @@ def move(self, source_path: str, destination_path: str): Moves a file from one location to another location within DBFS. If the source file does not exist, this call throws an exception with `RESOURCE_DOES_NOT_EXIST`. If a file already exists in the destination path, this call throws an exception with `RESOURCE_ALREADY_EXISTS`. If the given source - path is a directory, this call always recursively moves all files.", + path is a directory, this call always recursively moves all files. :param source_path: str The source path of the file or directory. The path should be the absolute DBFS path. @@ -477,7 +477,7 @@ def read(self, path: str, *, length: Optional[int] = None, offset: Optional[int] 1 MB, this call throws an exception with `MAX_READ_SIZE_EXCEEDED`. If `offset + length` exceeds the number of bytes in a file, it reads the contents until the end of - file.", + file. :param path: str The path of the file to read. The path should be the absolute DBFS path. diff --git a/databricks/sdk/service/iam.py b/databricks/sdk/service/iam.py index e162771f2..5a4131f19 100755 --- a/databricks/sdk/service/iam.py +++ b/databricks/sdk/service/iam.py @@ -23,8 +23,7 @@ class AccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -456,8 +455,7 @@ class PasswordAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" diff --git a/databricks/sdk/service/jobs.py b/databricks/sdk/service/jobs.py index 7aae7b5bc..e7fef2de3 100755 --- a/databricks/sdk/service/jobs.py +++ b/databricks/sdk/service/jobs.py @@ -973,8 +973,7 @@ class JobAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -3844,8 +3843,9 @@ class SubmitTask: existing_cluster_id: Optional[str] = None """If existing_cluster_id, the ID of an existing cluster that is used for all runs of this task. - When running tasks on an existing cluster, you may need to manually restart the cluster if it - stops responding. We suggest running jobs on new clusters for greater reliability.""" + Only all-purpose clusters are supported. When running tasks on an existing cluster, you may need + to manually restart the cluster if it stops responding. We suggest running jobs on new clusters + for greater reliability.""" health: Optional[JobsHealthRules] = None """An optional set of health rules that can be defined for this job.""" @@ -3998,8 +3998,9 @@ class Task: existing_cluster_id: Optional[str] = None """If existing_cluster_id, the ID of an existing cluster that is used for all runs of this task. - When running tasks on an existing cluster, you may need to manually restart the cluster if it - stops responding. We suggest running jobs on new clusters for greater reliability.""" + Only all-purpose clusters are supported. When running tasks on an existing cluster, you may need + to manually restart the cluster if it stops responding. We suggest running jobs on new clusters + for greater reliability.""" health: Optional[JobsHealthRules] = None """An optional set of health rules that can be defined for this job.""" diff --git a/databricks/sdk/service/ml.py b/databricks/sdk/service/ml.py index 58410ac9e..ff2da8655 100755 --- a/databricks/sdk/service/ml.py +++ b/databricks/sdk/service/ml.py @@ -882,8 +882,7 @@ class ExperimentAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -2085,8 +2084,7 @@ class RegisteredModelAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -3594,7 +3592,8 @@ def delete_runs(self, """Delete runs by creation time. Bulk delete runs in an experiment that were created prior to or at the specified timestamp. Deletes at - most max_runs per request. + most max_runs per request. To call this API from a Databricks Notebook in Python, you can use the + client code snippet on https://learn.microsoft.com/en-us/azure/databricks/mlflow/runs#bulk-delete. :param experiment_id: str The ID of the experiment containing the runs to delete. @@ -4068,7 +4067,8 @@ def restore_runs(self, """Restore runs by deletion time. Bulk restore runs in an experiment that were deleted no earlier than the specified timestamp. Restores - at most max_runs per request. + at most max_runs per request. To call this API from a Databricks Notebook in Python, you can use the + client code snippet on https://learn.microsoft.com/en-us/azure/databricks/mlflow/runs#bulk-restore. :param experiment_id: str The ID of the experiment containing the runs to restore. diff --git a/databricks/sdk/service/pipelines.py b/databricks/sdk/service/pipelines.py index c7ae23298..6ef8efa73 100755 --- a/databricks/sdk/service/pipelines.py +++ b/databricks/sdk/service/pipelines.py @@ -724,8 +724,7 @@ class PipelineAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" diff --git a/databricks/sdk/service/serving.py b/databricks/sdk/service/serving.py index b0d8e7f5d..b5cbdb702 100755 --- a/databricks/sdk/service/serving.py +++ b/databricks/sdk/service/serving.py @@ -1939,8 +1939,7 @@ class ServingEndpointAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" diff --git a/databricks/sdk/service/settings.py b/databricks/sdk/service/settings.py index 352263ccd..341108cf6 100755 --- a/databricks/sdk/service/settings.py +++ b/databricks/sdk/service/settings.py @@ -90,15 +90,17 @@ def from_dict(cls, d: Dict[str, any]) -> CreateNetworkConnectivityConfigRequest: @dataclass class CreateOboTokenRequest: + """Configuration details for creating on-behalf tokens.""" + application_id: str """Application ID of the service principal.""" - lifetime_seconds: int - """The number of seconds before the token expires.""" - comment: Optional[str] = None """Comment that describes the purpose of the token.""" + lifetime_seconds: Optional[int] = None + """The number of seconds before the token expires.""" + def as_dict(self) -> dict: """Serializes the CreateOboTokenRequest into a dictionary suitable for use as a JSON request body.""" body = {} @@ -117,6 +119,8 @@ def from_dict(cls, d: Dict[str, any]) -> CreateOboTokenRequest: @dataclass class CreateOboTokenResponse: + """An on-behalf token was successfully created for the service principal.""" + token_info: Optional[TokenInfo] = None token_value: Optional[str] = None @@ -454,6 +458,24 @@ def from_dict(cls, d: Dict[str, any]) -> GetTokenPermissionLevelsResponse: return cls(permission_levels=_repeated_dict(d, 'permission_levels', TokenPermissionsDescription)) +@dataclass +class GetTokenResponse: + """Token with specified Token ID was successfully returned.""" + + token_info: Optional[TokenInfo] = None + + def as_dict(self) -> dict: + """Serializes the GetTokenResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.token_info: body['token_info'] = self.token_info.as_dict() + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> GetTokenResponse: + """Deserializes the GetTokenResponse from a dictionary.""" + return cls(token_info=_from_dict(d, 'token_info', TokenInfo)) + + @dataclass class IpAccessListInfo: """Definition of an IP Access list""" @@ -601,7 +623,10 @@ def from_dict(cls, d: Dict[str, any]) -> ListPublicTokensResponse: @dataclass class ListTokensResponse: + """Tokens were successfully returned.""" + token_infos: Optional[List[TokenInfo]] = None + """Token metadata of each user-created token in the workspace""" def as_dict(self) -> dict: """Serializes the ListTokensResponse into a dictionary suitable for use as a JSON request body.""" @@ -1082,8 +1107,7 @@ class TokenAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -2217,19 +2241,19 @@ def __init__(self, api_client): def create_obo_token(self, application_id: str, - lifetime_seconds: int, *, - comment: Optional[str] = None) -> CreateOboTokenResponse: + comment: Optional[str] = None, + lifetime_seconds: Optional[int] = None) -> CreateOboTokenResponse: """Create on-behalf token. Creates a token on behalf of a service principal. :param application_id: str Application ID of the service principal. - :param lifetime_seconds: int - The number of seconds before the token expires. :param comment: str (optional) Comment that describes the purpose of the token. + :param lifetime_seconds: int (optional) + The number of seconds before the token expires. :returns: :class:`CreateOboTokenResponse` """ @@ -2255,10 +2279,10 @@ def delete(self, token_id: str): """ - headers = {} + headers = {'Accept': 'application/json', } self._api.do('DELETE', f'/api/2.0/token-management/tokens/{token_id}', headers=headers) - def get(self, token_id: str) -> TokenInfo: + def get(self, token_id: str) -> GetTokenResponse: """Get token info. Gets information about a token, specified by its ID. @@ -2266,12 +2290,12 @@ def get(self, token_id: str) -> TokenInfo: :param token_id: str The ID of the token to get. - :returns: :class:`TokenInfo` + :returns: :class:`GetTokenResponse` """ headers = {'Accept': 'application/json', } res = self._api.do('GET', f'/api/2.0/token-management/tokens/{token_id}', headers=headers) - return TokenInfo.from_dict(res) + return GetTokenResponse.from_dict(res) def get_permission_levels(self) -> GetTokenPermissionLevelsResponse: """Get token permission levels. @@ -2301,13 +2325,13 @@ def get_permissions(self) -> TokenPermissions: def list(self, *, - created_by_id: Optional[str] = None, + created_by_id: Optional[int] = None, created_by_username: Optional[str] = None) -> Iterator[TokenInfo]: """List all tokens. Lists all tokens associated with the specified workspace or user. - :param created_by_id: str (optional) + :param created_by_id: int (optional) User ID of the user that created the token. :param created_by_username: str (optional) Username of the user that created the token. diff --git a/databricks/sdk/service/sql.py b/databricks/sdk/service/sql.py index e0d0b0acf..7eacd64e3 100755 --- a/databricks/sdk/service/sql.py +++ b/databricks/sdk/service/sql.py @@ -346,7 +346,6 @@ def from_dict(cls, d: Dict[str, any]) -> ChannelInfo: class ChannelName(Enum): - """Name of the channel""" CHANNEL_NAME_CURRENT = 'CHANNEL_NAME_CURRENT' CHANNEL_NAME_CUSTOM = 'CHANNEL_NAME_CUSTOM' @@ -3441,8 +3440,7 @@ class WarehouseAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -4350,6 +4348,9 @@ def list(self, Gets a list of queries. Optionally, this list can be filtered by a search term. + ### **Warning: Calling this API concurrently 10 or more times could result in throttling, service + degradation, or a temporary ban.** + :param order: str (optional) Name of query attribute to order by. Default sort order is ascending. Append a dash (`-`) to order descending instead. diff --git a/databricks/sdk/service/workspace.py b/databricks/sdk/service/workspace.py index a5277a7b2..cf5dacc3a 100755 --- a/databricks/sdk/service/workspace.py +++ b/databricks/sdk/service/workspace.py @@ -721,8 +721,7 @@ class RepoAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" @@ -1101,8 +1100,7 @@ class WorkspaceObjectAccessControlRequest: """Permission level""" service_principal_name: Optional[str] = None - """Application ID of an active service principal. Setting this field requires the - `servicePrincipal/user` role.""" + """application ID of a service principal""" user_name: Optional[str] = None """name of the user""" diff --git a/databricks/sdk/version.py b/databricks/sdk/version.py index 435d64bd6..5ec52a922 100644 --- a/databricks/sdk/version.py +++ b/databricks/sdk/version.py @@ -1 +1 @@ -__version__ = '0.17.0' +__version__ = '0.18.0'