26
26
from datahub .ingestion .source .aws .sagemaker_processors .job_classes import (
27
27
AutoMlJobInfo ,
28
28
CompilationJobInfo ,
29
- EdgePackagingJobInfo ,
30
29
HyperParameterTuningJobInfo ,
31
30
LabelingJobInfo ,
32
31
ProcessingJobInfo ,
53
52
"JobInfo" ,
54
53
AutoMlJobInfo ,
55
54
CompilationJobInfo ,
56
- EdgePackagingJobInfo ,
57
55
HyperParameterTuningJobInfo ,
58
56
LabelingJobInfo ,
59
57
ProcessingJobInfo ,
65
63
class JobType (Enum ):
66
64
AUTO_ML = "auto_ml"
67
65
COMPILATION = "compilation"
68
- EDGE_PACKAGING = "edge_packaging"
69
66
HYPER_PARAMETER_TUNING = "hyper_parameter_tuning"
70
67
LABELING = "labeling"
71
68
PROCESSING = "processing"
@@ -78,7 +75,6 @@ class JobType(Enum):
78
75
job_type_to_info : Mapping [JobType , Any ] = {
79
76
JobType .AUTO_ML : AutoMlJobInfo (),
80
77
JobType .COMPILATION : CompilationJobInfo (),
81
- JobType .EDGE_PACKAGING : EdgePackagingJobInfo (),
82
78
JobType .HYPER_PARAMETER_TUNING : HyperParameterTuningJobInfo (),
83
79
JobType .LABELING : LabelingJobInfo (),
84
80
JobType .PROCESSING : ProcessingJobInfo (),
@@ -416,23 +412,20 @@ def process_auto_ml_job(self, job: Dict[str, Any]) -> SageMakerJob:
416
412
"""
417
413
Process outputs from Boto3 describe_auto_ml_job()
418
414
419
- See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.describe_auto_ml_job
415
+ See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/describe_auto_ml_job_v2 .html
420
416
"""
421
417
422
418
JOB_TYPE = JobType .AUTO_ML
423
419
424
420
input_datasets = {}
425
-
426
- for input_config in job .get ("InputDataConfig" , []):
421
+ for input_config in job .get ("AutoMLJobInputDataConfig" , []):
427
422
input_data = input_config .get ("DataSource" , {}).get ("S3DataSource" )
428
-
429
423
if input_data is not None and "S3Uri" in input_data :
430
424
input_datasets [make_s3_urn (input_data ["S3Uri" ], self .env )] = {
431
425
"dataset_type" : "s3" ,
432
426
"uri" : input_data ["S3Uri" ],
433
427
"datatype" : input_data .get ("S3DataType" ),
434
428
}
435
-
436
429
output_datasets = {}
437
430
438
431
output_s3_path = job .get ("OutputDataConfig" , {}).get ("S3OutputPath" )
@@ -448,6 +441,18 @@ def process_auto_ml_job(self, job: Dict[str, Any]) -> SageMakerJob:
448
441
JOB_TYPE ,
449
442
)
450
443
444
+ metrics : Dict [str , Any ] = {}
445
+ # Get job metrics from CandidateMetrics
446
+ candidate_metrics = (
447
+ job .get ("BestCandidate" , {})
448
+ .get ("CandidateProperties" , {})
449
+ .get ("CandidateMetrics" , [])
450
+ )
451
+ if candidate_metrics :
452
+ metrics = {
453
+ metric ["MetricName" ]: metric ["Value" ] for metric in candidate_metrics
454
+ }
455
+
451
456
model_containers = job .get ("BestCandidate" , {}).get ("InferenceContainers" , [])
452
457
453
458
for model_container in model_containers :
@@ -456,7 +461,7 @@ def process_auto_ml_job(self, job: Dict[str, Any]) -> SageMakerJob:
456
461
if model_data_url is not None :
457
462
job_key = JobKey (job_snapshot .urn , JobDirection .TRAINING )
458
463
459
- self .update_model_image_jobs (model_data_url , job_key )
464
+ self .update_model_image_jobs (model_data_url , job_key , metrics = metrics )
460
465
461
466
return SageMakerJob (
462
467
job_name = job_name ,
@@ -515,83 +520,6 @@ def process_compilation_job(self, job: Dict[str, Any]) -> SageMakerJob:
515
520
output_datasets = output_datasets ,
516
521
)
517
522
518
- def process_edge_packaging_job (
519
- self ,
520
- job : Dict [str , Any ],
521
- ) -> SageMakerJob :
522
- """
523
- Process outputs from Boto3 describe_edge_packaging_job()
524
-
525
- See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.describe_edge_packaging_job
526
- """
527
-
528
- JOB_TYPE = JobType .EDGE_PACKAGING
529
-
530
- name : str = job ["EdgePackagingJobName" ]
531
- arn : str = job ["EdgePackagingJobArn" ]
532
-
533
- output_datasets = {}
534
-
535
- model_artifact_s3_uri : Optional [str ] = job .get ("ModelArtifact" )
536
- output_s3_uri : Optional [str ] = job .get ("OutputConfig" , {}).get (
537
- "S3OutputLocation"
538
- )
539
-
540
- if model_artifact_s3_uri is not None :
541
- output_datasets [make_s3_urn (model_artifact_s3_uri , self .env )] = {
542
- "dataset_type" : "s3" ,
543
- "uri" : model_artifact_s3_uri ,
544
- }
545
-
546
- if output_s3_uri is not None :
547
- output_datasets [make_s3_urn (output_s3_uri , self .env )] = {
548
- "dataset_type" : "s3" ,
549
- "uri" : output_s3_uri ,
550
- }
551
-
552
- # from docs: "The name of the SageMaker Neo compilation job that is used to locate model artifacts that are being packaged."
553
- compilation_job_name : Optional [str ] = job .get ("CompilationJobName" )
554
-
555
- output_jobs = set ()
556
- if compilation_job_name is not None :
557
- # globally unique job name
558
- full_job_name = ("compilation" , compilation_job_name )
559
-
560
- if full_job_name in self .name_to_arn :
561
- output_jobs .add (
562
- make_sagemaker_job_urn (
563
- "compilation" ,
564
- compilation_job_name ,
565
- self .name_to_arn [full_job_name ],
566
- self .env ,
567
- )
568
- )
569
- else :
570
- self .report .report_warning (
571
- name ,
572
- f"Unable to find ARN for compilation job { compilation_job_name } produced by edge packaging job { arn } " ,
573
- )
574
-
575
- job_snapshot , job_name , job_arn = self .create_common_job_snapshot (
576
- job ,
577
- JOB_TYPE ,
578
- f"https://{ self .aws_region } .console.aws.amazon.com/sagemaker/home?region={ self .aws_region } #/edge-packaging-jobs/{ job ['EdgePackagingJobName' ]} " ,
579
- )
580
-
581
- if job .get ("ModelName" ) is not None :
582
- job_key = JobKey (job_snapshot .urn , JobDirection .DOWNSTREAM )
583
-
584
- self .update_model_name_jobs (job ["ModelName" ], job_key )
585
-
586
- return SageMakerJob (
587
- job_name = job_name ,
588
- job_arn = job_arn ,
589
- job_type = JOB_TYPE ,
590
- job_snapshot = job_snapshot ,
591
- output_datasets = output_datasets ,
592
- output_jobs = output_jobs ,
593
- )
594
-
595
523
def process_hyper_parameter_tuning_job (
596
524
self ,
597
525
job : Dict [str , Any ],
0 commit comments