Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

save benchmarks in results and use for aggregate coverage calculation #650

Merged
merged 3 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion data_prep/introspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,7 @@ def _parse_arguments() -> argparse.Namespace:
args.max_functions,
args.target_oracle)
if benchmarks:
benchmarklib.Benchmark.to_yaml(benchmarks, args.out)
benchmarklib.Benchmark.to_yaml(benchmarks, outdir=args.out)
else:
logger.error('Nothing found for %s', args.project)
sys.exit(1)
15 changes: 10 additions & 5 deletions experiment/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ class Benchmark:
"""Represents a benchmark."""

@classmethod
def to_yaml(cls, benchmarks: list[Benchmark], outdir: str = './'):
def to_yaml(cls,
benchmarks: list[Benchmark],
outdir: str = './',
out_basename: str = ''):
"""Converts and saves selected fields of a benchmark to a YAML file."""
# Register the custom representer
yaml.add_representer(str, quoted_string_presenter)
Expand All @@ -69,8 +72,9 @@ def to_yaml(cls, benchmarks: list[Benchmark], outdir: str = './'):
'params': benchmark.params
})

with open(os.path.join(outdir, f'{benchmarks[0].project}.yaml'),
'w') as file:
if not out_basename:
out_basename = f'{benchmarks[0].project}.yaml'
with open(os.path.join(outdir, out_basename), 'w') as file:
yaml.dump(result, file, default_flow_style=False, width=sys.maxsize)

@classmethod
Expand All @@ -79,9 +83,10 @@ def from_yaml(cls, benchmark_path: str) -> List:
benchmarks = []
with open(benchmark_path, 'r') as benchmark_file:
data = yaml.safe_load(benchmark_file)
if not data:
return []

project_name = os.path.splitext(os.path.basename(benchmark_path))[0]

project_name = data.get('project', '')
use_context = data.get('use_context', False)
use_project_examples = data.get('use_project_examples', True)
cppify_headers = data.get('cppify_headers', False)
Expand Down
2 changes: 1 addition & 1 deletion helper/update_comp_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def main():
if b.function_name in functions:
selected_bms.append(b)

Benchmark.to_yaml(selected_bms, target_path)
Benchmark.to_yaml(selected_bms, outdir=target_path)
logging.info('Updated %s', file_name)


Expand Down
43 changes: 34 additions & 9 deletions run_all_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import json
import logging
import os
import re
import sys
import time
import traceback
Expand Down Expand Up @@ -103,7 +104,7 @@ def generate_benchmarks(args: argparse.Namespace) -> None:
benchmarks = introspector.populate_benchmarks_using_introspector(
project, project_lang, args.generate_benchmarks_max, benchmark_oracles)
if benchmarks:
benchmarklib.Benchmark.to_yaml(benchmarks, benchmark_dir)
benchmarklib.Benchmark.to_yaml(benchmarks, outdir=benchmark_dir)


def prepare_experiment_targets(
Expand Down Expand Up @@ -368,18 +369,38 @@ def _process_total_coverage_gain() -> dict[str, dict[str, Any]]:

# Load all the textcov dirs
for benchmark_dir in os.listdir(WORK_DIR):
try:
project = '-'.join(benchmark_dir.split('-')[1:-1])
except:
if not os.path.isdir(os.path.join(WORK_DIR, benchmark_dir)):
continue

result_benchmark_used_path = os.path.join(
os.path.join(WORK_DIR, benchmark_dir, 'benchmark.yaml'))
if not os.path.isfile(result_benchmark_used_path):
continue

project_name = ''
ignore_patterns = []

benchmark_used = benchmarklib.Benchmark.from_yaml(
result_benchmark_used_path)
if not benchmark_used:
logger.info('Did not find benchmark for %s', benchmark_dir)
try:
project_name = '-'.join(benchmark_dir.split('-')[1:-1])
except:
continue
else:
logger.info('Found benchmark for %s', benchmark_dir)
project_name = benchmark_used[0].project
target_basename = os.path.basename(benchmark_used[0].target_path)
ignore_patterns = [re.compile(r'^' + re.escape(target_basename) + ':')]

coverage_reports = os.path.join(WORK_DIR, benchmark_dir,
'code-coverage-reports')
if not os.path.isdir(coverage_reports):
continue

if project not in textcov_dict:
textcov_dict[project] = []
if project_name not in textcov_dict:
textcov_dict[project_name] = []
for sample in os.listdir(coverage_reports):
summary = os.path.join(coverage_reports, sample, 'textcov')
if not os.path.isdir(summary):
Expand All @@ -388,13 +409,17 @@ def _process_total_coverage_gain() -> dict[str, dict[str, Any]]:
for textcov_file in os.listdir(summary):
if textcov_file.endswith('.covreport'):
with open(os.path.join(summary, textcov_file), 'rb') as f:
textcov_dict[project].append(textcov.Textcov.from_file(f))

textcov_dict[project_name].append(
textcov.Textcov.from_file(
f, ignore_function_patterns=ignore_patterns))
elif textcov_file == 'all_cov.json':
with open(os.path.join(summary, textcov_file)) as f:
textcov_dict[project].append(textcov.Textcov.from_python_file(f))
textcov_dict[project_name].append(
textcov.Textcov.from_python_file(f))
elif textcov_file == 'jacoco.xml':
with open(os.path.join(summary, textcov_file)) as f:
textcov_dict[project].append(textcov.Textcov.from_jvm_file(f))
textcov_dict[project_name].append(textcov.Textcov.from_jvm_file(f))

if not textcov_dict:
return {}
Expand Down
5 changes: 5 additions & 0 deletions run_one_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,11 @@ def run(benchmark: Benchmark, model: models.LLM, args: argparse.Namespace,
"""Generates code via LLM, and evaluates them."""
model.cloud_setup()

# Save the benchmark in the working base
Benchmark.to_yaml([benchmark],
outdir=work_dirs.base,
out_basename='benchmark.yaml')

if args.agent:
# TODO(dongge): Make this default when it is ready.
return _fuzzing_pipelines(benchmark, model, args, work_dirs)
Expand Down