6
6
from functools import reduce
7
7
from typing import Any , Dict , List , Mapping , Match , Optional , Union , cast
8
8
9
+ from datahub .configuration .common import ConfigModel
9
10
from datahub .emitter import mce_builder
10
- from datahub .emitter .mce_builder import OwnerType
11
+ from datahub .emitter .mce_builder import (
12
+ OwnerType ,
13
+ make_user_urn ,
14
+ validate_ownership_type ,
15
+ )
11
16
from datahub .metadata .schema_classes import (
12
17
AuditStampClass ,
13
18
InstitutionalMemoryClass ,
@@ -83,6 +88,36 @@ class Constants:
83
88
SEPARATOR = "separator"
84
89
85
90
91
+ class _MappingOwner (ConfigModel ):
92
+ owner : str
93
+ owner_type : str = OwnershipTypeClass .DATAOWNER
94
+
95
+
96
+ class _DatahubProps (ConfigModel ):
97
+ owners : List [Union [str , _MappingOwner ]]
98
+
99
+ def make_owner_category_list (self ) -> List [Dict ]:
100
+ res = []
101
+ for owner in self .owners :
102
+ if isinstance (owner , str ):
103
+ owner_id = owner
104
+ owner_category = OwnershipTypeClass .DATAOWNER
105
+ else :
106
+ owner_id = owner .owner
107
+ owner_category = owner .owner_type
108
+ owner_id = make_user_urn (owner_id )
109
+ owner_category , owner_category_urn = validate_ownership_type (owner_category )
110
+
111
+ res .append (
112
+ {
113
+ "urn" : owner_id ,
114
+ "category" : owner_category ,
115
+ "categoryUrn" : owner_category_urn ,
116
+ }
117
+ )
118
+ return res
119
+
120
+
86
121
class OperationProcessor :
87
122
"""
88
123
A general class that processes a dictionary of properties and operations defined on it.
@@ -128,7 +163,7 @@ def __init__(
128
163
self .owner_source_type = owner_source_type
129
164
self .match_nested_props = match_nested_props
130
165
131
- def process (self , raw_props : Mapping [str , Any ]) -> Dict [str , Any ]:
166
+ def process (self , raw_props : Mapping [str , Any ]) -> Dict [str , Any ]: # noqa: C901
132
167
# Defining the following local variables -
133
168
# operations_map - the final resulting map when operations are processed.
134
169
# Against each operation the values to be applied are stored.
@@ -137,9 +172,35 @@ def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]:
137
172
# operation config: map which contains the parameters to carry out that operation.
138
173
# For e.g for add_tag operation config will have the tag value.
139
174
# operation_type: the type of operation (add_tag, add_term, etc.)
140
- aspect_map : Dict [str , Any ] = {} # map of aspect name to aspect object
175
+
176
+ # Process the special "datahub" property, which supports tags, terms, and owners.
177
+ operations_map : Dict [str , list ] = {}
178
+ try :
179
+ datahub_prop = raw_props .get ("datahub" )
180
+ if datahub_prop and isinstance (datahub_prop , dict ):
181
+ if datahub_prop .get ("tags" ):
182
+ # Note that tags get converted to urns later because we need to support the tag prefix.
183
+ tags = datahub_prop ["tags" ]
184
+ operations_map .setdefault (Constants .ADD_TAG_OPERATION , []).extend (
185
+ tags
186
+ )
187
+
188
+ if datahub_prop .get ("terms" ):
189
+ terms = datahub_prop ["terms" ]
190
+ operations_map .setdefault (Constants .ADD_TERM_OPERATION , []).extend (
191
+ mce_builder .make_term_urn (term ) for term in terms
192
+ )
193
+
194
+ if datahub_prop .get ("owners" ):
195
+ owners = _DatahubProps .parse_obj_allow_extras (datahub_prop )
196
+ operations_map .setdefault (Constants .ADD_OWNER_OPERATION , []).extend (
197
+ owners .make_owner_category_list ()
198
+ )
199
+ except Exception as e :
200
+ logger .error (f"Error while processing datahub property: { e } " )
201
+
202
+ # Process the actual directives.
141
203
try :
142
- operations_map : Dict [str , Union [set , list ]] = {}
143
204
for operation_key in self .operation_defs :
144
205
operation_type = self .operation_defs .get (operation_key , {}).get (
145
206
Constants .OPERATION
@@ -177,42 +238,36 @@ def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]:
177
238
isinstance (operation , list )
178
239
and operation_type == Constants .ADD_OWNER_OPERATION
179
240
):
180
- operation_value_list = operations_map .get (
181
- operation_type , list ()
182
- )
183
- cast (List , operation_value_list ).extend (
241
+ operations_map .setdefault (operation_type , []).extend (
184
242
operation
185
- ) # cast to silent the lint
186
- operations_map [operation_type ] = operation_value_list
243
+ )
187
244
188
245
elif isinstance (operation , (str , list )):
189
- operations_value_set = operations_map .get (
190
- operation_type , set ()
246
+ operations_map .setdefault (operation_type , []).extend (
247
+ operation
248
+ if isinstance (operation , list )
249
+ else [operation ]
191
250
)
192
- if isinstance (operation , list ):
193
- operations_value_set .update (operation ) # type: ignore
194
- else :
195
- operations_value_set .add (operation ) # type: ignore
196
- operations_map [operation_type ] = operations_value_set
197
251
else :
198
- operations_value_list = operations_map .get (
199
- operation_type , list ()
252
+ operations_map .setdefault ( operation_type , []). append (
253
+ operation
200
254
)
201
- operations_value_list .append (operation ) # type: ignore
202
- operations_map [operation_type ] = operations_value_list
203
- aspect_map = self .convert_to_aspects (operations_map )
204
255
except Exception as e :
205
256
logger .error (f"Error while processing operation defs over raw_props: { e } " )
257
+
258
+ aspect_map : Dict [str , Any ] = {} # map of aspect name to aspect object
259
+ try :
260
+ aspect_map = self .convert_to_aspects (operations_map )
261
+ except Exception as e :
262
+ logger .error (f"Error while converting operations map to aspects: { e } " )
206
263
return aspect_map
207
264
208
- def convert_to_aspects (
209
- self , operation_map : Dict [str , Union [set , list ]]
210
- ) -> Dict [str , Any ]:
265
+ def convert_to_aspects (self , operation_map : Dict [str , list ]) -> Dict [str , Any ]:
211
266
aspect_map : Dict [str , Any ] = {}
212
267
213
268
if Constants .ADD_TAG_OPERATION in operation_map :
214
269
tag_aspect = mce_builder .make_global_tag_aspect_with_tag_list (
215
- sorted (operation_map [Constants .ADD_TAG_OPERATION ])
270
+ sorted (set ( operation_map [Constants .ADD_TAG_OPERATION ]) )
216
271
)
217
272
218
273
aspect_map [Constants .ADD_TAG_OPERATION ] = tag_aspect
@@ -240,7 +295,7 @@ def convert_to_aspects(
240
295
241
296
if Constants .ADD_TERM_OPERATION in operation_map :
242
297
term_aspect = mce_builder .make_glossary_terms_aspect_from_urn_list (
243
- sorted (operation_map [Constants .ADD_TERM_OPERATION ])
298
+ sorted (set ( operation_map [Constants .ADD_TERM_OPERATION ]) )
244
299
)
245
300
aspect_map [Constants .ADD_TERM_OPERATION ] = term_aspect
246
301
@@ -319,12 +374,7 @@ def get_operation_value(
319
374
operation_config .get (Constants .OWNER_CATEGORY )
320
375
or OwnershipTypeClass .DATAOWNER
321
376
)
322
- owner_category_urn : Optional [str ] = None
323
- if owner_category .startswith ("urn:li:" ):
324
- owner_category_urn = owner_category
325
- owner_category = OwnershipTypeClass .DATAOWNER
326
- else :
327
- owner_category = owner_category .upper ()
377
+ owner_category , owner_category_urn = validate_ownership_type (owner_category )
328
378
329
379
if self .strip_owner_email_id :
330
380
owner_ids = [
0 commit comments