From 78b8e1605a646f4d5f89787d30088d04935e8ea1 Mon Sep 17 00:00:00 2001 From: zichao Date: Sat, 18 Nov 2023 17:29:57 -0600 Subject: [PATCH 1/2] update link --- docs/assets/js/benchmark_tasks.js | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/docs/assets/js/benchmark_tasks.js b/docs/assets/js/benchmark_tasks.js index 7ad3770..99fb0f9 100644 --- a/docs/assets/js/benchmark_tasks.js +++ b/docs/assets/js/benchmark_tasks.js @@ -17,6 +17,7 @@ function showText(buttonId) { code = "CountSavory"; property = "Navigation, Perception, Ask Human, Loops, Arithmetic, Commonsense Reasoning"; statenum = 4; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/CountSavory.py"; } else if(buttonId === "ET") { prompt1 = "Go to the elevator. Wait until someone shows up and ask them if they are here for the tour. If yes, welcome them to the university, tell them to follow you, and take them to the main conference room. If not, wait for the next person. When you get to the conference room, say you have arrived at the conference room and also say enjoy your visit here!" prompt2 = "Go to the elevator, and wait until someone shows up. Ask them if they are here for the tour. If yes, welcome them to the university, tell them to follow you, and take them to the main conference room. If not, wait for the next person. When you get to the conference room, say you have arrived at the conference room and wish them an enjoyable visit." @@ -26,6 +27,7 @@ function showText(buttonId) { code = "ElevatorTour"; property = "Navigation, Perception, Ask Human, Loops"; statenum = 3; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/ElevatorTour.py"; } else if(buttonId === "FB") { prompt1 = "Check where I left my backpack in all of the conference rooms and bring it back to me."; prompt2 = "Search all the conference rooms for my backpack and return it to me."; @@ -35,6 +37,7 @@ function showText(buttonId) { code = "FindBackpack"; property = "Navigation, Perception, Manipulation, Loops"; statenum = 3; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/FindBackpack.py"; } else if(buttonId === "GD") { prompt1 = "Go to the lobby, and ask if the visitor would like a bottle of water, iced tea, or lemonade. Bring what they ask for from the kitchen."; prompt2 = "Ask any visitor in the lobby if they would like a bottle of water, iced tea, or lemonade. Bring what they ask for from the kitchen."; @@ -44,6 +47,7 @@ function showText(buttonId) { code = "GetDrink"; property = "Navigation, Perception, Ask Human, Manipulation"; statenum = 3; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/GetDrink.py"; } else if(buttonId === "GC") { prompt1 = "Compile a list of ingredients for grilled cheese. Go to Zarko’s office, and ask him which of them he has. Come back and tell me what he does not have."; prompt2 = "Create a list of ingredients needed to make grilled cheese. Visit Zarko's office and inquire which of them he possesses. Return and inform me of the items he does not have."; @@ -53,6 +57,7 @@ function showText(buttonId) { code = "GrilledCheese"; property = "Navigation, Perception, Ask Human, Manipulation, Commonsense Reasoning"; statenum = 1; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/GrilledCheese.py"; } else if(buttonId === "HL") { prompt1 = "Go to every office, and if there is anyone there, ask if they'd like a chocolate, caramel, or gummy. Come back and tell me how many of each we need to buy."; prompt2 = "Go to every office, and if there is a person there, ask them if they'd like a chocolate, caramel, or gummy. Come back and tell me how many of each we need to buy."; @@ -62,6 +67,7 @@ function showText(buttonId) { code = "HalloweenList"; property = "Navigation, Perception, Ask Human, Loops, Arithmetic" statenum = 3; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/HalloweenList.py"; } else if(buttonId === "HS") { prompt1 = "Go to every office, and if there is anyone there, ask them how many from 0 to 4 of the following treats they’d like: chocolate, caramel, and gummy. Come back and tell me how many of each we need to buy."; prompt2 = "For each office, check if there is anyone there. If there is, ask the person to choose how many (0-4) of each of the following they'd like: caramel, chocolate, or gummy. Come back and tell me how many of each we need to buy."; @@ -71,6 +77,7 @@ function showText(buttonId) { code = "HalloweenShopping"; property = "Navigation, Perception, Ask Human, Loops, Arithmetic"; statenum = 5; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/HalloweenShopping.py"; } else if(buttonId === "LB") { prompt1 = "Ask if Alice and Bob in their offices are up for lunch. If yes, tell them that we'll meet in the lobby in 5 minutes. Come back and tell me who all are joining for lunch."; prompt2 = "Ask Alice in her office if she is up for lunch, and if yes, tell her that we will meet in the lobby in 5 minutes. Do the same for Bob. Come back and tell me who all are joining for lunch."; @@ -80,6 +87,7 @@ function showText(buttonId) { code = "LunchBreak"; property = "Navigation, Perception, Ask Human"; statenum = 4; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/LunchBreak.py"; } else if(buttonId === "LT") { prompt1 = "Go to Jill's office and ask her if she'd like to go for lunch tomorrow. If yes, ask her at what time (offer her some reasonable options). Come back and tell me what she said."; prompt2 = "Go to Jill’s office and ask her whether she's free to go for lunch tomorrow. If yes, ask her when (give her some reasonable times), and come back and tell me what she said."; @@ -89,6 +97,7 @@ function showText(buttonId) { code = "LunchTime"; property = "Navigation, Perception, Ask Human, Commonsense Reasoning"; statenum = 2; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/LunchTime.py"; } else if(buttonId === "MD") { prompt1 = "Pick up a mail bin from the mail room. Go to every office and ask if they have any mail they'd like delivered. If so, ask them to put it in the mail bin. Finally, meet me in the mail room with all the mail. Place the mail bin back and tell me which offices they are from."; prompt2 = "Go to the mail room to pick a mail bin. Ask at every office if they have any mail to deliver. If they reply yes, ask them to put the mail in the bin. Return to the mail room with the mail. Tell me at the mail room which offices these mail are from and put the mail bin back."; @@ -98,6 +107,7 @@ function showText(buttonId) { code = "MailDelivery"; property = "Navigation, Perception, Manipulation, Ask Human, Loops"; statenum = 4; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/MailDelivery.py"; } else if(buttonId === "MM") { prompt1 = "Ask Sally in her office if she wants to go to the cinema with Mark. Go to Mark's office and tell him Sally’s answer. If Sally says yes, ask Mark whether he wants to leave at 4PM, 5PM, or 6PM - then go tell Sally what time Mark is leaving."; prompt2 = "Ask Sally if she wants to go to the cinema with Mark. Tell Mark what Sally said, and if Sally said yes, ask Mark whether he wants to leave at 4PM, 5PM, or 6PM, and then go tell Sally what time Mark is leaving."; @@ -107,6 +117,7 @@ function showText(buttonId) { code = "MovieMessenger"; property = "Navigation, Perception, Ask Human"; statenum = 3; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/MovieMessenger.py"; } else if(buttonId === "SG") { prompt1 = "Go to every lab and if there is a person there, say Good Day!"; prompt2 = "Visit each lab and if someone is present, greet them with a pleasant Good Day!"; @@ -116,6 +127,7 @@ function showText(buttonId) { code = "SayGoodDay"; property = "Navigation, Perception, Ask Human, Loops"; statenum = 4; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/SayGoodDay.py"; } else if(buttonId === "ST") { prompt1 = "The thermostat is set to 72 degrees. Go to Arjun’s office and ask him if he’d like it to be warmer or colder. Come back and tell me what temperature I should set it to."; prompt2 = "Ask Arjun in his office if he would like it to be warmer or colder. The thermostat is currently set to 72 degrees. Come back and tell me what temperature I should set the thermostat to based on what Arjun says."; @@ -125,6 +137,7 @@ function showText(buttonId) { code = "SetTemperature"; property = "Navigation, Perception, Ask Human, Commonsense Reasoning"; statenum = 2; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/SetTemperature.py"; } else if(buttonId === "SD") { prompt1 = "Check every printer room for a stapler, and deliver a stapler from the supply room to every printer room without a stapler."; prompt2 = "Go to every printer room and check if there is a stapler. For each visited room that does not have a stapler, deliver a stapler from the supply room to that room."; @@ -134,6 +147,7 @@ function showText(buttonId) { code = "StaplerDelivery"; property = "Navigation, Perception, Manipulation, Loops"; statenum = 3; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/StaplerDelivery.py"; } else if(buttonId === "SS") { prompt1 = "Check every printer room for a stapler, and come back and tell me which ones do not have a stapler."; prompt2 = "Go to every printer room and check to see if there is a stapler. Come back and tell me which printer rooms do not have a stapler."; @@ -143,6 +157,7 @@ function showText(buttonId) { code = "StaplerSuppy"; property = "Navigation, Perception, Loops"; statenum = 3; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/StaplerSupply.py"; } else if(buttonId === "WP") { prompt1 = "Go to the main entrance, and wait for someone to show up. Ask them if they feel it is hot out. Repeat this until you have 10 responses, and then come back and tell me what percent of people think it is hot out."; prompt2 = "Go to the main entrance. When someone shows up, ask them if they feel that it’s hot out. Do this until you collect ten people’s responses. Come back and tell me which percent of people think it’s hot out."; @@ -152,6 +167,7 @@ function showText(buttonId) { code = "WeatherPoll"; property = "Navigation, Perception, Ask Human, Loops, Arithmetic"; statenum = 5; + url = "https://github.com/ut-amrl/codebotler/blob/production/benchmark/tasks/WeatherPoll.py"; } document.getElementById("code1").innerHTML = code + "-1"; @@ -168,12 +184,10 @@ function showText(buttonId) { document.getElementById("property").innerHTML = property; document.getElementById("statenum").innerHTML = statenum; + document.getElementById("benchmark-url").href = url; handle_plot(buttonId); handle_video(buttonId); - - url = "https://github.com/ut-amrl/codebotler-dev/blob/main/benchmark/tasks/CountSavory.py"; - document.getElementById("benchmark-url").href = url; } showText("CS") From 8721a0cc22452285bba8a76f0625d1b4b6a6e1f2 Mon Sep 17 00:00:00 2001 From: zichao Date: Tue, 21 Nov 2023 09:57:57 -0600 Subject: [PATCH 2/2] update paper ref --- docs/index.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index c9bc914..1298c36 100644 --- a/docs/index.md +++ b/docs/index.md @@ -21,7 +21,7 @@ order: 1 - +
Paper
@@ -308,3 +308,14 @@ These LLMs are evaluated on the RoboEval benchma +#### Citation +```shell +@misc{hu2023deploying, + title={Deploying and Evaluating LLMs to Program Service Mobile Robots}, + author={Zichao Hu and Francesca Lucchetti and Claire Schlesinger and Yash Saxena and Anders Freeman and Sadanand Modak and Arjun Guha and Joydeep Biswas}, + year={2023}, + eprint={2311.11183}, + archivePrefix={arXiv}, + primaryClass={cs.RO} +} +``` \ No newline at end of file