Open
Description
With StarCoder, I am observing a pass@1 score of 58.9 instead of 43.5 as reported in the OctoCoder paper.
Script used:
accelerate launch main.py \
--model $MODEL_DIR \
--tasks humanevalfixdocs-python \
--do_sample True \
--temperature 0.2 \
--n_samples 20 \
--batch_size 1 \
--allow_code_execution \
--save_generations \
--trust_remote_code \
--prompt starcodercommit \
--save_generations_path $MODEL_DIR/generations_humanevalfixdocspython_starcodercommit_prompt.json \
--metric_output_path $MODEL_DIR/evaluation_humanevalfixdocspython_starcodercommit_prompt.json \
--max_length_generation 2048 \
--precision fp16
Results:
{
"humanevalfixdocs-python": {
"pass@1": 0.589329268292683,
"pass@10": 0.6989868047455075
},
"config": {
"prefix": "",
"do_sample": true,
"temperature": 0.2,
"top_k": 0,
"top_p": 0.95,
"n_samples": 20,
"eos": "<|endoftext|>",
"seed": 0,
"model": "starcoder",
"modeltype": "causal",
"peft_model": null,
"revision": null,
"use_auth_token": false,
"trust_remote_code": true,
"tasks": "humanevalfixdocs-python",
"instruction_tokens": null,
"batch_size": 1,
"max_length_generation": 2048,
"precision": "fp16",
"load_in_8bit": false,
"load_in_4bit": false,
"limit": null,
"limit_start": 0,
"postprocess": true,
"allow_code_execution": true,
"generation_only": false,
"load_generations_path": null,
"load_data_path": null,
"metric_output_path": "starcoder/evaluation_humanevalfixdocspython_starcodercommit_sample_prompt.json",
"save_generations": true,
"save_generations_path": "starcoder/generations_humanevalfixdocspython_starcodercommit_sample_prompt.json",
"save_references": false,
"prompt": "starcodercommit",
"max_memory_per_gpu": null,
"check_references": false
}
}
CC: @Muennighoff
Metadata
Metadata
Assignees
Labels
No labels