Fix ServiceLab ingestion lambda regex (#5687)

The lambda has failed to ingest new data since Sep 3rd, and it turns out that the regex it uses to extract the benchmark metadata has become invalid (I will check which change is responsible for that, but it's like something on ServiceLab). Specifically, the `retry` field on the filename that ServiceLab returns is not there anymore. Before we have `pytorch/benchmarks/dynamo/manifold/..._1.a-tmp694bm90e.csv`, now it is just `pytorch/benchmarks/dynamo/manifold/....a-tmphjxk9w2x.csv`. So, I update the regex to make this field optional. This is not needed anywhere in OSS atm. ### Testing Run the lambda with a sample key on S3 `pytorch/benchmarks/dynamo/manifold/4500202979/4500315921/cudagraphs_dynamic-BERT_pytorch-training-performance-benchmark_torchbench_run_bert_pytorch_training.benchmark_torchbench_run_bert_pytorch_training.4500315921.a-tmphjxk9w2x.csv` and confirm that the record is inserted into CH --------- Co-authored-by: Sergii Dymchenko <[email protected]>
pytorch · Sep 21, 2024 · 0a955fb · 0a955fb
1 parent 0d90e09
commit 0a955fb
Showing 1 changed file with 31 additions and 1 deletion.
diff --git a/aws/lambda/servicelab-ingestor/lambda_function.py b/aws/lambda/servicelab-ingestor/lambda_function.py
@@ -26,7 +26,7 @@
 )
 
 METADATA_REGEX = re.compile(
-    r"pytorch/benchmarks/dynamo/manifold/(?P<experiment_id>\d+)/(?P<trial_id>\d+)/(?P<compiler>\w+)-(?P<model>\w+)-(?P<mode>\w+)-(?P<benchmark_type>\w+)-\w+\.\w+\.\d+_(?P<retry>\d+)\.(?P<experiment_type>\w+)-\w+\.csv"
+    r"pytorch/benchmarks/dynamo/manifold/(?P<experiment_id>\d+)/(?P<trial_id>\d+)/(?P<compiler>\w+)-(?P<model>\w+)-(?P<mode>\w+)-(?P<benchmark_type>\w+)-\w+\.\w+\.\d+_?(?P<retry>\d+)?\.(?P<experiment_type>\w+)-\w+\.csv"
 )
 
 
@@ -63,6 +63,7 @@ def extract_metadata(record: Any) -> Dict[str, Any]:
     key = extract_key(record)
     m = re.match(METADATA_REGEX, key)
     if not m:
+        print(f"Failed to extract metadata from {key}")
         return {}
 
     return {
@@ -123,6 +124,7 @@ def upsert_document(record: Any) -> None:
 if os.getenv("DEBUG", "0") == "1":
     mock_body = {
         "Records": [
+            # A mock example with the original retry field in ServiceLab result CSV
             {
                 "eventVersion": "2.1",
                 "eventSource": "aws:s3",
@@ -149,6 +151,34 @@ def upsert_document(record: Any) -> None:
                         "sequencer": "0066BF8659A2FDB5EE",
                     },
                 },
+            },
+            # A mock example without the retry field (it started to happen since Sep 3rd 2024)
+            {
+                "eventVersion": "2.1",
+                "eventSource": "aws:s3",
+                "awsRegion": "us-east-1",
+                "eventTime": "2024-08-19T15:20:02.000Z",
+                "eventName": "ObjectCreated:Put",
+                "userIdentity": {
+                    "principalId": "AWS:AROAUPVRELQNILZ34DHTP:hyperloop_worker@svc"
+                },
+                "requestParameters": {"sourceIPAddress": ""},
+                "responseElements": {"x-amz-request-id": "", "x-amz-id-2": ""},
+                "s3": {
+                    "s3SchemaVersion": "1.0",
+                    "configurationId": "deebdf19-9805-4e91-8b87-fcc7c1197872",
+                    "bucket": {
+                        "name": "ossci-benchmarks",
+                        "ownerIdentity": {"principalId": "A30JR6FIYKGDQS"},
+                        "arn": "arn:aws:s3:::ossci-benchmarks",
+                    },
+                    "object": {
+                        "key": "pytorch/benchmarks/dynamo/manifold/4500202979/4500315921/cudagraphs_dynamic-BERT_pytorch-training-performance-benchmark_torchbench_run_bert_pytorch_training.benchmark_torchbench_run_bert_pytorch_training.4500315921.a-tmphjxk9w2x.csv",
+                        "size": 310,
+                        "eTag": "cb5cc0599d7a8283606316f2ff58b49c",
+                        "sequencer": "0066BF8659A2FDB5EE",
+                    },
+                },
             }
         ]
     }