diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py index edb9b7b8bd5264..6102e6d61a8bd1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py @@ -13,6 +13,7 @@ from datahub.configuration.common import AllowDenyPattern from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.configuration.validate_field_removal import pydantic_removed_field from datahub.emitter.mce_builder import make_group_urn, make_user_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext @@ -51,6 +52,7 @@ OriginTypeClass, StatusClass, ) +from datahub.utilities.lossy_collections import LossyList logger = logging.getLogger(__name__) @@ -132,11 +134,7 @@ class AzureADConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin): description="regex patterns for groups to include in ingestion.", ) - # If enabled, report will contain names of filtered users and groups. - filtered_tracking: bool = Field( - default=True, - description="If enabled, report will contain names of filtered users and groups.", - ) + _remove_filtered_tracking = pydantic_removed_field("filtered_tracking") # Optional: Whether to mask sensitive information from workunit ID's. On by default. mask_group_id: bool = Field( @@ -156,14 +154,10 @@ class AzureADConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin): @dataclass class AzureADSourceReport(StaleEntityRemovalSourceReport): - filtered: List[str] = field(default_factory=list) - filtered_tracking: bool = field(default=True, repr=False) - filtered_count: int = field(default=0) + filtered: LossyList[str] = field(default_factory=LossyList) def report_filtered(self, name: str) -> None: - self.filtered_count += 1 - if self.filtered_tracking: - self.filtered.append(name) + self.filtered.append(name) # Source that extracts Azure AD users, groups and group memberships using Microsoft Graph REST API @@ -266,9 +260,7 @@ def create(cls, config_dict, ctx): def __init__(self, config: AzureADConfig, ctx: PipelineContext): super().__init__(config, ctx) self.config = config - self.report = AzureADSourceReport( - filtered_tracking=self.config.filtered_tracking - ) + self.report = AzureADSourceReport() session = requests.Session() retries = Retry( total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]