Skip to content

Commit

Permalink
Added prompts with difference length.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 655310563
  • Loading branch information
The gemma.cpp Authors authored and copybara-github committed Sep 4, 2024
1 parent 9661b81 commit 043bcf8
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 7 deletions.
5 changes: 4 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,10 @@ cc_binary(

cc_binary(
name = "benchmarks",
srcs = ["evals/benchmarks.cc"],
srcs = [
"evals/benchmarks.cc",
"evals/prompts.h",
],
deps = [
":benchmark_helper",
"@benchmark//:benchmark",
Expand Down
40 changes: 34 additions & 6 deletions evals/benchmarks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include "benchmark/benchmark.h"
#include "evals/benchmark_helper.h"
#include "evals/prompts.h"

namespace gcpp {

Expand All @@ -42,26 +43,53 @@ void RunPrompt(const std::string& original_prompt, benchmark::State& state) {
state.SetItemsProcessed(total_tokens);
}

void RunPromptWithName(const std::string& original_prompt,
benchmark::State& state) {
size_t total_tokens = 0;
for (auto s : state) {
std::string prompt = original_prompt; // reset from original
auto [response, n] = s_env->QueryModel(prompt);
if (s_env->Verbosity() != 0) {
fprintf(stdout, "|%s|\n", response.c_str());
}
total_tokens += n;
}

state.SetItemsProcessed(total_tokens);
}

} // namespace gcpp

static void BM_short_prompt(benchmark::State& state) {
gcpp::RunPrompt("What is the capital of Spain?", state);
gcpp::RunPrompt(ShortPrompt(), state);
}

static void BM_factuality_prompt(benchmark::State& state) {
gcpp::RunPrompt("How does an inkjet printer work?", state);
gcpp::RunPrompt(FactualityPrompt(), state);
}

static void BM_creative_prompt(benchmark::State& state) {
gcpp::RunPrompt("Tell me a story about a magical bunny and their TRS-80.",
state);
gcpp::RunPrompt(CreativePrompt(), state);
}

static void BM_coding_prompt(benchmark::State& state) {
gcpp::RunPrompt("Write a python program to generate a fibonacci sequence.",
state);
gcpp::RunPrompt(CodingPrompt(), state);
}

static void BM_diff_length_prompt(benchmark::State& state) {
gcpp::RunPrompt(GetPrompt(state.range(0)), state);
}

BENCHMARK(BM_diff_length_prompt)
->Repetitions(3)
->Arg(32)
->Arg(64)
->Arg(128)
->Arg(256)
->Arg(512)
->Unit(benchmark::kMillisecond)
->UseRealTime();

BENCHMARK(BM_short_prompt)
->Iterations(3)
->Unit(benchmark::kMillisecond)
Expand Down
119 changes: 119 additions & 0 deletions evals/prompts.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#ifndef PROMPTS_H // Include guard to prevent multiple inclusions
#define PROMPTS_H

// Prompts for different tasks
static const char* ShortPrompt() { return "What is the capital of Spain?"; }

static const char* FactualityPrompt() {
return "How does an inkjet printer work?";
}

static const char* CreativePrompt() {
return "Tell me a story about a magical bunny and their TRS-80.";
}

static const char* CodingPrompt() {
return "Write a python program to generate a fibonacci sequence.";
}

// The input prompts, each named by its token length.

static const char* Prompt32() {
return "Once upon a time, there existed a little girl who liked to have "
"adventures. She wanted to go to places and";
}

static const char* Prompt64() {
return "It is done, and submitted. You can play 'Survival of the Tastiest' "
"on Android, and on the web. Playing on the web works, but you have "
"to simulate multiple touch for table moving and that can be a bit "
"confusing. There is a lot ";
}

static const char* Prompt128() {
return "It's done, and submitted. You can play 'Survival of the Tastiest' on "
"Android, and on the web. Playing on the web works, but you have to "
"simulate multiple touch for table moving and that can be a bit "
"confusing. There is a lot I'd like to talk about. I will go through"
" every topic, insted of making the typical what went right/wrong list"
". Concept Working over the theme was probably one of the hardest "
"tasks which I had to face. Originally, I had an idea of what kind of "
"game I wanted to develop, gamep";
}

static const char* Prompt256() {
return "It is done, and submitted. You can play 'Survival of the Tastiest' on"
" Android, and on the web. Playing on the web works, but you have to "
"simulate multiple touch for table moving and that can be a bit "
"confusing. There is a lot I'd like to talk about. I will go through "
"every topic, insted of making the typical what went right/wrong list."
" Concept Working over the theme was probably one of the hardest tasks"
" which I had to face. Originally, I had an idea of what kind of game "
"I wanted to develop, gameplay wise - something with a lot of "
"enemies/actors, simple graphics, maybe set in space, controlled from "
"a top-down view. I was confident that I could fit any theme around "
"it. In the end, the problem with a theme like 'Evolution' in a game "
"is that evolution is unassisted. It happens through several seemingly"
" random mutations over time, with the most apt permutation surviving."
" This genetic car simulator is, in my opinion, a great example of "
"actual evolution of a species facing a challenge. But is it a game? "
"In a game, you need to control something to reach an objective. This "
"could be a character, a ";
}

static const char* Prompt512() {
return "It is done, and submitted. You can play 'Survival of the Tastiest'"
" on Android, and on the web. Playing on the web works, but you have to"
" simulate multiple touch for table moving and that can be a bit"
" confusing. There is a lot I'd like to talk about. I will go through"
" every topic, instead of making the typical what went right/wrong list."
" Concept Working over the theme was probably one of the hardest tasks"
" which I had to face. Originally, I had an idea of what kind of game I"
" wanted to develop, gameplay wise - something with a lot of"
" enemies/actors, simple graphics, maybe set in space, controlled from"
" a top-down view. I was confident that I could fit any theme around"
" it. In the end, the problem with a theme like 'Evolution' in a game"
" is that evolution is unassisted. It happens through several seemingly"
" random mutations over time, with the most apt permutation surviving."
" This genetic car simulator is, in my opinion, a great example of"
" actual evolution of a species facing a challenge. But is it a game?"
" In a game, you need to control something to reach an objective. That"
" control goes against what evolution is supposed to be like. If you"
" allow the user to pick how to evolve something, it's not evolution"
" anymore - it's the equivalent of intelligent design, the fable"
" invented by creationists to combat the idea of evolution. Being"
" agnostic and a Pastafarian, that's not something that rubbed me the"
" right way. Hence, my biggest dillema when deciding what to create was"
" not with what I wanted to create, but with what I did not. I didn't"
" want to create an 'intelligent design' simulator and wrongly call it"
" evolution. This is a problem, of course, every other contestant also"
" had to face. And judging by the entries submitted, not many managed"
" to work around it. I'd say the only real solution was through the use"
" of artificial selection, somehow. So far, I have not seen any entry"
" using this at its core gameplay. Alas, this is just a fun competition"
" and after a while I decided not to be as strict with the game idea,"
" and allowed myself to pick whatever I thought would work out. My"
" initial idea was to create something where humanity tried to evolve"
" to a next level but had some kind of foe trying to stop them from"
" doing so. I kind of had this image of human souls flying in space"
" towards ";
}

static const char* GetPrompt(int length) {
switch (length) {
case 32:
return Prompt32();
case 64:
return Prompt64();
case 128:
return Prompt128();
case 256:
return Prompt256();
case 512:
return Prompt512();
default:
return ShortPrompt();
}
}

#endif

0 comments on commit 043bcf8

Please sign in to comment.