Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add action message #79

Merged
merged 15 commits into from
Oct 4, 2024
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ See the API Reference below for more detail on the `act()`, `observe()`, and `ex

#### `extract()`

`extract()` grabs structured text from the current page using [zod](https://github.com/colinhacks/zod) and [instructor](https://github.com/instructor-ai/instructor-js).
`extract()` grabs structured text from the current page using [zod](https://github.com/colinhacks/zod).
Given instructions and `schema`, you will receive structured data. Unlike some extraction libraries, stagehand can extract any information on a page, not just the main article contents.

#### `observe()`
Expand Down
113 changes: 93 additions & 20 deletions evals/index.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,38 @@ const wikipedia = async () => {
};
};


// Validate that the action is not found on the page
const nonsense_action = async () => {
const stagehand = new Stagehand({ env: "LOCAL", verbose: 1, debugDom: true, headless: true });
await stagehand.init();

try {
await stagehand.page.goto("https://www.homedepot.com/");
await stagehand.waitForSettledDom();

const result = await stagehand.act({ action: "click on the first banana" });
console.log("result", result);

// Assert the output
const expectedResult = {
success: false,
message: 'Action not found on the current page after checking all chunks.',
action: 'click on the first banana'
};

const isResultCorrect = JSON.stringify(result) === JSON.stringify(expectedResult);

return isResultCorrect;

} catch (error) {
console.error(`Error in nonsense_action function: ${error.message}`);
return false;
} finally {
await stagehand.context.close();
}
};

const costar = async () => {
const stagehand = new Stagehand({
env,
Expand All @@ -274,7 +306,7 @@ const costar = async () => {

await stagehand.act({ action: "click on the first article" });

await stagehand.act({ action: "find the footer of the page" });
await stagehand.act({ action: "click on the learn more button for the first job" });

await stagehand.waitForSettledDom();
const articleTitle = await stagehand.extract({
Expand Down Expand Up @@ -324,9 +356,8 @@ const google_jobs = async () => {

await stagehand.act({ action: "click on the search button" });

await stagehand.act({
action: "click on the learn more button for the first job",
});
// NOTE: "click on the first Learn More button" is not working - the span for learn more is not clickable and the a href is after it
await stagehand.act({ action: "click on the first job link" });

const jobDetails = await stagehand.extract({
instruction:
Expand All @@ -353,22 +384,17 @@ const google_jobs = async () => {

console.log("Job Details:", jobDetails);

const isJobDetailsValid =
jobDetails &&
Object.values(jobDetails).every(
(value) =>
value !== null &&
value !== undefined &&
value !== "" &&
(typeof value !== "object" ||
Object.values(value).every(
(v) =>
v !== null &&
v !== undefined &&
v !== "" &&
(typeof v === "number" || typeof v === "string"),
)),
);
const isJobDetailsValid = jobDetails &&
Object.values(jobDetails).every(value =>
value !== null &&
value !== undefined &&
(typeof value !== 'object' || Object.values(value).every(v =>
v !== null &&
v !== undefined &&
(typeof v === 'number' || typeof v === 'string')
))
);


await stagehand.context.close();

Expand All @@ -377,6 +403,51 @@ const google_jobs = async () => {
return { _success: isJobDetailsValid, jobDetails };
};

const homedepot = async () => {
const stagehand = new Stagehand({ env: "LOCAL", verbose: 1, debugDom: true, headless: process.env.HEADLESS !== "false" });
await stagehand.init();

try {
await stagehand.page.goto("https://www.homedepot.com/");
await stagehand.waitForSettledDom();

await stagehand.act({ action: "search for gas grills" });
await stagehand.waitForSettledDom();

await stagehand.act({ action: "click on the best selling gas grill" });
await stagehand.waitForSettledDom();

await stagehand.act({ action: "click on the Product Details" });
await stagehand.waitForSettledDom();

await stagehand.act({ action: "find the Primary Burner BTU" });
await stagehand.waitForSettledDom();

const productSpecs = await stagehand.extract({
instruction: "Extract the Primary Burner BTU of the product",
schema: z.object({
productSpecs: z.array(z.object({
burnerBTU: z.string().describe("Primary Burner BTU"),
})).describe("Gas grill Primary Burner BTU")
}),
modelName: "gpt-4o-2024-08-06"
});
console.log("The gas grill primary burner BTU is:", productSpecs);

if (!productSpecs || !productSpecs.productSpecs || productSpecs.productSpecs.length === 0) {
return false;
}

return true;

} catch (error) {
console.error(`Error in homedepot function: ${error.message}`);
return false;
} finally {
await stagehand.context.close();
}
};

const tasks = {
vanta,
vanta_h,
Expand All @@ -388,6 +459,8 @@ const tasks = {
extract_last_twenty_github_commits,
costar,
google_jobs,
homedepot,
nonsense_action
};

const exactMatch = (args: { input: any; output: any; expected?: any }) => {
Expand Down
95 changes: 17 additions & 78 deletions evals/playground.ts
Original file line number Diff line number Diff line change
@@ -1,108 +1,47 @@
import { Stagehand } from "../lib";
import { z } from "zod";

const costar = async () => {
const stagehand = new Stagehand({
env: "LOCAL",
verbose: 2,
debugDom: true,
headless: process.env.HEADLESS !== "false",
});
// eval failing
const homedepot = async () => {
const stagehand = new Stagehand({ env: "LOCAL", verbose: 1, debugDom: true, headless: process.env.HEADLESS !== "false" });

await stagehand.init();
// TODO: fix this eval

try {
await Promise.race([
stagehand.page.goto("https://www.costar.com/"),
new Promise((_, reject) =>
setTimeout(() => reject(new Error("Navigation timeout")), 30000),
),
]);
await stagehand.waitForSettledDom();

await stagehand.act({ action: "click on the first article" });

await stagehand.act({ action: "find the footer of the page" });
await stagehand.page.goto("https://www.homedepot.com/");

await stagehand.waitForSettledDom();
const articleTitle = await stagehand.extract({
instruction: "extract the title of the article",
schema: z.object({
title: z.string().describe("the title of the article").nullable(),
}),
modelName: "gpt-4o-2024-08-06",
});

console.log("articleTitle", articleTitle);

// Check if the title is more than 5 characters
const isTitleValid =
articleTitle.title !== null && articleTitle.title.length > 5;

await stagehand.context.close();

return isTitleValid;
} catch (error) {
console.error(`Error in costar function: ${error.message}`);
return { title: null };
} finally {
await stagehand.context.close();
}
};

const homedepot = async () => {
const stagehand = new Stagehand({
env: "LOCAL",
verbose: 1,
debugDom: true,
// headless: process.env.HEADLESS !== "false",
});
await stagehand.init();

try {
// await stagehand.page.goto("https://www.homedepot.com/");
// await stagehand.waitForSettledDom();

// await stagehand.act({ action: "search for gas grills" });
// await stagehand.waitForSettledDom();

// await stagehand.act({ action: "click on the best selling gas grill" });
// await stagehand.waitForSettledDom();
await stagehand.act({ action: "search for gas grills" });
await stagehand.waitForSettledDom();

await stagehand.page.goto(
"https://www.homedepot.com/p/Nexgrill-4-Burner-Propane-Gas-Grill-in-Black-with-Stainless-Steel-Main-Lid-720-0925PG/326294740",
);
await stagehand.act({ action: "click on the first gas grill" });
await stagehand.waitForSettledDom();

await stagehand.act({ action: "click on the Product Details" });
await stagehand.waitForSettledDom();

await stagehand.act({ action: "find the Primary Burner BTU" });
await stagehand.waitForSettledDom();

const productSpecs = await stagehand.extract({
instruction: "Extract the Primary Burner BTU of the product",
schema: z.object({
productSpecs: z
.array(
z.object({
burnerBTU: z.string().describe("Primary Burner BTU"),
}),
)
.describe("Gas grill Primary Burner BTU"),
productSpecs: z.array(z.object({
burnerBTU: z.string().describe("Primary Burner BTU"),
})).describe("Gas grill Primary Burner BTU")
}),
modelName: "gpt-4o-2024-08-06",
});
console.log("The gas grill primary burner BTU is:", productSpecs);

if (
!productSpecs ||
!productSpecs.productSpecs ||
productSpecs.productSpecs.length === 0
) {
if (!productSpecs || !productSpecs.productSpecs || productSpecs.productSpecs.length === 0) {
return false;
}

return true;

} catch (error) {
console.error(`Error in homedepot function: ${error.message}`);
return false;
Expand All @@ -111,9 +50,9 @@ const homedepot = async () => {
}
};


async function main() {
// const [costarResult] = await Promise.all([costar()]);
const [homedepotResult] = await Promise.all([homedepot()]);
const homedepotResult = await homedepot();

console.log("Result:", homedepotResult);
}
Expand Down
2 changes: 1 addition & 1 deletion lib/dom/process.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ async function processElements(chunk: number) {
const chunkHeight = viewportHeight * chunk;
const offsetTop = chunkHeight;

window.scrollTo(0, offsetTop);
window.scrollTo({ top: offsetTop, left: 0, behavior: 'smooth' });
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added "smooth" to the scrolling behavior, the hypothesis is that it reduces hard reloads of the site for slow sites but its just based on the one eval (homedepot)


const domString = window.document.body.outerHTML;
if (!domString) {
Expand Down
Loading