google · AdamKorcz · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/data_prep/introspector.py b/data_prep/introspector.py
@@ -1049,7 +1049,7 @@ def _parse_arguments() -> argparse.Namespace:
                                                       args.max_functions,
                                                       args.target_oracle)
   if benchmarks:
-    benchmarklib.Benchmark.to_yaml(benchmarks, args.out)
+    benchmarklib.Benchmark.to_yaml(benchmarks, outdir=args.out)
   else:
     logger.error('Nothing found for %s', args.project)
     sys.exit(1)
diff --git a/experiment/benchmark.py b/experiment/benchmark.py
@@ -43,7 +43,10 @@ class Benchmark:
   """Represents a benchmark."""
 
   @classmethod
-  def to_yaml(cls, benchmarks: list[Benchmark], outdir: str = './'):
+  def to_yaml(cls,
+              benchmarks: list[Benchmark],
+              outdir: str = './',
+              out_basename: str = ''):
     """Converts and saves selected fields of a benchmark to a YAML file."""
     # Register the custom representer
     yaml.add_representer(str, quoted_string_presenter)
@@ -69,8 +72,9 @@ def to_yaml(cls, benchmarks: list[Benchmark], outdir: str = './'):
             'params': benchmark.params
         })
 
-    with open(os.path.join(outdir, f'{benchmarks[0].project}.yaml'),
-              'w') as file:
+    if not out_basename:
+      out_basename = f'{benchmarks[0].project}.yaml'
+    with open(os.path.join(outdir, out_basename), 'w') as file:
       yaml.dump(result, file, default_flow_style=False, width=sys.maxsize)
 
   @classmethod
@@ -79,9 +83,10 @@ def from_yaml(cls, benchmark_path: str) -> List:
     benchmarks = []
     with open(benchmark_path, 'r') as benchmark_file:
       data = yaml.safe_load(benchmark_file)
+    if not data:
+      return []
 
-    project_name = os.path.splitext(os.path.basename(benchmark_path))[0]
-
+    project_name = data.get('project', '')
     use_context = data.get('use_context', False)
     use_project_examples = data.get('use_project_examples', True)
     cppify_headers = data.get('cppify_headers', False)

diff --git a/helper/update_comp_benchmarks.py b/helper/update_comp_benchmarks.py
@@ -77,7 +77,7 @@ def main():
       if b.function_name in functions:
         selected_bms.append(b)
 
-    Benchmark.to_yaml(selected_bms, target_path)
+    Benchmark.to_yaml(selected_bms, outdir=target_path)
     logging.info('Updated %s', file_name)
 
 

diff --git a/run_all_experiments.py b/run_all_experiments.py
@@ -18,6 +18,7 @@
 import json
 import logging
 import os
+import re
 import sys
 import time
 import traceback
@@ -103,7 +104,7 @@ def generate_benchmarks(args: argparse.Namespace) -> None:
     benchmarks = introspector.populate_benchmarks_using_introspector(
         project, project_lang, args.generate_benchmarks_max, benchmark_oracles)
     if benchmarks:
-      benchmarklib.Benchmark.to_yaml(benchmarks, benchmark_dir)
+      benchmarklib.Benchmark.to_yaml(benchmarks, outdir=benchmark_dir)
 
 
 def prepare_experiment_targets(
@@ -368,18 +369,38 @@ def _process_total_coverage_gain() -> dict[str, dict[str, Any]]:
 
   # Load all the textcov dirs
   for benchmark_dir in os.listdir(WORK_DIR):
-    try:
-      project = '-'.join(benchmark_dir.split('-')[1:-1])
-    except:
+    if not os.path.isdir(os.path.join(WORK_DIR, benchmark_dir)):
+      continue
+
+    result_benchmark_used_path = os.path.join(
+        os.path.join(WORK_DIR, benchmark_dir, 'benchmark.yaml'))
+    if not os.path.isfile(result_benchmark_used_path):
       continue
 
+    project_name = ''
+    ignore_patterns = []
+
+    benchmark_used = benchmarklib.Benchmark.from_yaml(
+        result_benchmark_used_path)
+    if not benchmark_used:
+      logger.info('Did not find benchmark for %s', benchmark_dir)
+      try:
+        project_name = '-'.join(benchmark_dir.split('-')[1:-1])
+      except:
+        continue
+    else:
+      logger.info('Found benchmark for %s', benchmark_dir)
+      project_name = benchmark_used[0].project
+      target_basename = os.path.basename(benchmark_used[0].target_path)
+      ignore_patterns = [re.compile(r'^' + re.escape(target_basename) + ':')]
+
     coverage_reports = os.path.join(WORK_DIR, benchmark_dir,
                                     'code-coverage-reports')
     if not os.path.isdir(coverage_reports):
       continue
 
-    if project not in textcov_dict:
-      textcov_dict[project] = []
+    if project_name not in textcov_dict:
+      textcov_dict[project_name] = []
     for sample in os.listdir(coverage_reports):
       summary = os.path.join(coverage_reports, sample, 'textcov')
       if not os.path.isdir(summary):
@@ -388,13 +409,17 @@ def _process_total_coverage_gain() -> dict[str, dict[str, Any]]:
       for textcov_file in os.listdir(summary):
         if textcov_file.endswith('.covreport'):
           with open(os.path.join(summary, textcov_file), 'rb') as f:
-            textcov_dict[project].append(textcov.Textcov.from_file(f))
+
+            textcov_dict[project_name].append(
+                textcov.Textcov.from_file(
+                    f, ignore_function_patterns=ignore_patterns))
         elif textcov_file == 'all_cov.json':
           with open(os.path.join(summary, textcov_file)) as f:
-            textcov_dict[project].append(textcov.Textcov.from_python_file(f))
+            textcov_dict[project_name].append(
+                textcov.Textcov.from_python_file(f))
         elif textcov_file == 'jacoco.xml':
           with open(os.path.join(summary, textcov_file)) as f:
-            textcov_dict[project].append(textcov.Textcov.from_jvm_file(f))
+            textcov_dict[project_name].append(textcov.Textcov.from_jvm_file(f))
 
   if not textcov_dict:
     return {}

diff --git a/run_one_experiment.py b/run_one_experiment.py
@@ -331,6 +331,11 @@ def run(benchmark: Benchmark, model: models.LLM, args: argparse.Namespace,
   """Generates code via LLM, and evaluates them."""
   model.cloud_setup()
 
+  # Save the benchmark in the working base
+  Benchmark.to_yaml([benchmark],
+                    outdir=work_dirs.base,
+                    out_basename='benchmark.yaml')
+
   if args.agent:
     # TODO(dongge): Make this default when it is ready.
     return _fuzzing_pipelines(benchmark, model, args, work_dirs)