Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
alex28sh committed Oct 21, 2024
2 parents 3c26518 + 32677c5 commit 169a48a
Show file tree
Hide file tree
Showing 442 changed files with 240,249 additions and 219 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ run.sh
**/.pytest_cache
run_nagini.py
/tmp
results
results/*
!results/archive
/log_tries/
/log_tries/*
.direnv
scripts/processed
scripts/processed/*
*dump.txt
2 changes: 1 addition & 1 deletion benches/HumanEval-Dafny
Submodule HumanEval-Dafny updated 160 files
9 changes: 9 additions & 0 deletions benches/HumanEval/002_truncate/truncate.prompt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def truncate_number(number: float) -> float:
""" Given a positive floating point number, it can be decomposed into
and integer part (largest integer smaller than given number) and decimals
(leftover part always smaller than 1).

Return the decimal part of the number.
>>> truncate_number(3.5)
0.5
"""
23 changes: 16 additions & 7 deletions gui/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,6 @@ fn add_common_arguments<'a>(
token: &str,
settings: &Settings,
) -> &'a mut Command {
let bench_type = if settings.incremental_run {
String::from("validating")
} else {
settings.bench_type.to_string()
};
if settings.do_filter {
cmd.args(["--filter-by-ext", &settings.filter_by_ext]);
}
Expand All @@ -72,10 +67,24 @@ fn add_common_arguments<'a>(
.args(["--insert-conditions-mode", "llm-single-step"])
.args(["--llm-profile", settings.llm_profile.as_grazie()])
.args(["--grazie-token", token])
.args(["--bench-type", &bench_type])
.args(["--bench-type", &settings.bench_type.to_string()])
.args(["--tries", &make_tries(&settings.tries)])
.args(["--retries", &make_retries(&settings.retries)])
.args(["--verifier-timeout", &make_timeout(&settings.timeout)])
.args(["--verifier-timeout", &make_timeout(&settings.timeout)]);

if settings.incremental_run && settings.ignore_failed {
cmd.arg("--ignore-failed");
}
if settings.remove_conditions {
cmd.arg("--remove-conditions");
}
if settings.remove_implementations {
cmd.arg("--remove-implementations");
}
if settings.include_text_descriptions {
cmd.arg("--include-text-descriptions");
}
cmd
}

fn parse_output(output: Output) -> (String, String) {
Expand Down
27 changes: 15 additions & 12 deletions gui/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,18 @@ fn main() -> eframe::Result {
"Verified codegen",
options,
Box::new(|cc| {
let settings = should_restore()
let mut settings: Settings = should_restore()
.then(|| {
cc.storage.and_then(|storage| {
let settings = storage.get_string("settings_json")?;
serde_json::from_str(&settings).ok()
})
})
.flatten()
.unwrap_or_default();
.unwrap_or_else(Settings::from_env);
if let Ok(token) = std::env::var("GRAZIE_JWT_TOKEN") {
settings.grazie_token = token;
}
let state = AppState {
settings,
..Default::default()
Expand Down Expand Up @@ -152,11 +155,12 @@ struct AppState {
log: Arc<RwLock<Option<String>>>,
}

#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)]
enum LLMProfile {
GPT4o,
GPT4Turbo,
Claude3Opus,
#[default]
Claude35Sonnet,
}

Expand Down Expand Up @@ -191,7 +195,7 @@ impl Display for LLMProfile {
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
struct Settings {
grazie_token: String,
llm_profile: LLMProfile,
Expand All @@ -208,26 +212,25 @@ struct Settings {
do_filter: bool,
filter_by_ext: String,
incremental_run: bool,
ignore_failed: bool,
remove_conditions: bool,
remove_implementations: bool,
include_text_descriptions: bool,
}

impl Default for Settings {
fn default() -> Self {
impl Settings {
fn from_env() -> Self {
Self {
grazie_token: std::env::var("GRAZIE_JWT_TOKEN").unwrap_or_default(),
llm_profile: LLMProfile::GPT4o,
verifier_command: std::env::var("VERIFIER_COMMAND").unwrap_or_default(),
generate_command: String::from("verified-cogen"),
use_poetry: std::env::var("USE_POETRY").unwrap_or_default() == "1",
prompts_directory: std::env::var("PROMPTS_DIRECTORY").unwrap_or_default(),
tries: String::from("1"),
retries: String::from("0"),
bench_type: BenchMode::Invariants,
file_mode: FileMode::SingleFile,
runs: String::from("1"),
timeout: String::from("60"),
incremental_run: false,
do_filter: false,
filter_by_ext: String::new(),
..Settings::default()
}
}
}
Expand Down
36 changes: 21 additions & 15 deletions gui/src/ui/run.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{fs::File, io::Read, path::PathBuf, sync::Arc};
use std::{collections::HashMap, fs::File, io::Read, path::PathBuf, sync::Arc};

use eframe::egui::{TextEdit, Ui};

Expand Down Expand Up @@ -111,20 +111,26 @@ impl AppState {
false => APP_DIRS.cache_dir().join("total_cnt.json"),
};

File::open(results_path)
.expect("results are not where they should be")
.read_to_string(&mut results_contents)
.expect("failed read");

if let Ok(mut results) = run_results.write() {
*results = Some(
serde_json::from_str(&results_contents)
.expect("results must contain a valid json"),
);
file_count.store(
cnt.expect("should be dir"),
std::sync::atomic::Ordering::SeqCst,
);
if File::open(results_path)
.ok()
.and_then(|mut file| {
file.read_to_string(&mut results_contents).ok()
})
.is_some()
{
if let Ok(mut results) = run_results.write() {
*results = Some({
let result: HashMap<String, f64> =
serde_json::from_str(&results_contents)
.expect("results must contain a valid json");

result.into_iter().filter(|(_, v)| *v != -1.0).collect()
});
file_count.store(
cnt.expect("should be dir"),
std::sync::atomic::Ordering::SeqCst,
);
}
}
}
}
Expand Down
24 changes: 20 additions & 4 deletions gui/src/ui/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,12 @@ impl AppState {
self.settings.file_mode = FileMode::Directory;
}
ui.add_space(2.0);
if !self.settings.incremental_run {
ui.horizontal(|ui| {
ui.label("Bench mode:");
ui.columns(2, |cols| {
let [left_ui, right_ui] = cols else {
panic!("should have 2 columns")
};
left_ui.label("Bench mode:");
left_ui.horizontal(|ui| {
egui::ComboBox::from_id_source("bench-mode-select")
.selected_text(format!("{}", self.settings.bench_type.name()))
.show_ui(ui, |ui| {
Expand All @@ -51,8 +54,21 @@ impl AppState {
);
}
});
if self.settings.incremental_run {
ui.checkbox(&mut self.settings.ignore_failed, "Ignore failed");
}
});
}

right_ui.checkbox(&mut self.settings.remove_conditions, "Remove conditions");
right_ui.checkbox(
&mut self.settings.remove_implementations,
"Remove implementations",
);
right_ui.checkbox(
&mut self.settings.include_text_descriptions,
"Include text descriptions",
);
});

ui.separator();
ui.horizontal(|ui| {
Expand Down
4 changes: 4 additions & 0 deletions prompts/humaneval-dafny-cot-onestep/ask_for_fixed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The following errors occurred during verification:
{error}

Please fix the error by adding, removing or modifying the invariants and/or assertions and return the fixed program.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
There are still some errors:
{error}

Could you please fix them?
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
Let's go over the provided program step by step.
First, we'll identify what each function does and what its input and output are.

1. The function `sum` takes a `seq<int>` as input and returns an `int` as output. Looking at it's body, we see that it takes the first element of the sequence, adds it to the sum of the rest of the elements, and returns the result.

2. The lemma `sum_prop` states that given that length of a sequence is greater than 0, the sum of the sequence is equal to the sum of first `|s| - 1` elements plus the last element.

3. The method `sum_loop` takes a `seq<int>` as input and returns an `int` as output. It has an accumulator variable `s` that starts at 0. It then loops through the sequence, adding each element to the accumulator. As we see, it uses a `while` loop, so one of the invariants dafny requires is that `0 <= i <= |s|`.

Now, let's try to prove the `sum_loop` method. We'll start by adding the `0 <= i <= |s|` invariant to the while loop:
```
while (i < |numbers|)
invariant 0 <= i <= |numbers|
```

Next, we need to use the `sum_prop` lemma to prove that `s == sum(numbers[0..i])`. To do this, we'll need to prove that adding `numbers[i]` to `sum(numbers[0..i])` results in `sum(numbers[0..i + 1])`. That's exactly what `sum_prop(numbers[0..i + 1])` does. For dafny to be able to ensure that, we'll need to prove that `numbers[0..i + 1][..i] == numbers[0..i]`. This is an automatic proof that dafny can do, so we don't need to add any extra proof steps, let's just add an assertion:
```
assert numbers[0..i + 1][..i] == numbers[0..i];
```

Finally, we'll add the `s == sum(numbers[0..i])` invariant to the while loop:
```
while (i < |numbers|)
invariant 0 <= i <= |numbers|
invariant s = sum(numbers[0..i])
```

Now, we can use the `sum_prop` lemma to prove that `s == sum(numbers[0..i])`:

```
assert numbers[0..i + 1][..i] == numbers[0..i];
sum_prop(numbers[0..i + 1]);
assert sum(numbers[0..i + 1]) == sum(numbers[0..i]) + numbers[i];
```

Now, the proof is complete:

<answer>
function sum(s: seq<int>) : int {
if |s| == 0 then 0 else s[0] + sum(s[1..])
}

lemma sum_prop(s: seq<int>)
requires |s| > 0
ensures sum(s) == sum(s[..|s| - 1]) + s[ |s| - 1 ]
{
if (|s| > 1) {
assert (s[1..][..|s[1..]| - 1]) == s[1..|s| - 1];
}
}
method sum_loop(numbers: seq<int>) returns (s: int)
ensures s == sum(numbers)
{
s := 0;
var i := 0;
while (i < |numbers|)
invariant 0 <= i <= |numbers|
invariant s == sum(numbers[0..i])
{
s := s + numbers[i];

assert numbers[0..i + 1][..i] == numbers[0..i];
sum_prop(numbers[0..i + 1]);
assert sum(numbers[0..i + 1]) == sum(numbers[0..i]) + numbers[i];

i := i + 1;
}
}
</answer>
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Rewrite the following dafny program, adding correct invariants into the loops and adding assertions in nessesary places.
Do not change the code, only add invariants and assertions. Don't remove any helper functions, they are there to help you.
Explain all your reasoning in detail before answering. Put the answer in <answer></answer> tags without markdown backticks.
The program:

```dafny
function sum(s: seq<int>) : int {
if |s| == 0 then 0 else s[0] + sum(s[1..])
}

lemma sum_prop(s: seq<int>)
requires |s| > 0
ensures sum(s) == sum(s[..|s| - 1]) + s[ |s| - 1 ]
{
if (|s| > 1) {
assert (s[1..][..|s[1..]| - 1]) == s[1..|s| - 1];
}
}
method sum_loop(numbers: seq<int>) returns (s: int)
ensures s == sum(numbers)
{
s := 0;
var i := 0;
while (i < |numbers|)
{
s := s + numbers[i];

i := i + 1;
}
}
```
5 changes: 5 additions & 0 deletions prompts/humaneval-dafny-cot-onestep/rewrite/question.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Rewrite the following dafny program, adding correct invariants into the loops and adding assertions in nessesary places.
Do not change the code, only add invariants and assertions. Don't remove any helper functions, they are there to help you.
Explain all your reasoning in detail before answering. Put the answer in <answer></answer> tags without markdown backticks.
The program:
{program}
3 changes: 3 additions & 0 deletions prompts/humaneval-dafny-cot-onestep/sys.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
You are an expert in Dafny.
You will be given tasks dealing with Dafny programs including precise annotations.
You respond only with code blocks.
3 changes: 3 additions & 0 deletions prompts/humaneval-dafny-cot-onestep/timeout.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The verifier timed out during the verification.
This usually means that the provided invariants were too broad or were difficult to check.
Could you please try to improve the invariants and try again?
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The following errors occurred during verification:
{error}

Please fix the error by adding, removing or modifying the invariants and/or assertions and return the fixed program.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
There are still some errors:
{error}

Could you please fix them?
Loading

0 comments on commit 169a48a

Please sign in to comment.