From 869441b156f49ed38bb95236f26d5b87139d6db0 Mon Sep 17 00:00:00 2001 From: DPende Date: Sat, 1 Feb 2025 21:50:55 +0100 Subject: [PATCH 1/3] fix: fixed HttpError messages --- scrapegraph-js/src/utils/handleError.js | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scrapegraph-js/src/utils/handleError.js b/scrapegraph-js/src/utils/handleError.js index c46a4fc..3d392bb 100644 --- a/scrapegraph-js/src/utils/handleError.js +++ b/scrapegraph-js/src/utils/handleError.js @@ -1,17 +1,21 @@ class HttpError extends Error { - constructor(statusCode, title, detail) { - super(HttpError.makeMessage(statusCode, title, detail)); + constructor(statusCode, title, data) { + super(HttpError.makeMessage(statusCode, title, data)); this.statusCode = statusCode; this.title = title; - this.detail = detail; + this.info = data; } - static makeMessage(statusCode, title, detail) { + static makeMessage(statusCode, title, data) { let message = ''; message += statusCode ? `${statusCode} - ` : '(unknown status code) - '; message += title ? `${title} - ` : '(unknown error message) - '; - message += detail ? `${JSON.stringify(detail)}` : '(unknown error detail)'; + message += data.detail + ? 'Error located in: ' + `${JSON.stringify(data.detail[0].loc)}` + ', ' + `${data.detail[0].msg}` + : data.error + ? `${data.error}` + : '(unknown error detail)'; return message; } @@ -31,7 +35,7 @@ class UnexpectedError extends Error { export default function handleError(error) { if (error.response) { - throw new HttpError(error.response.status, error.response.statusText, error.response.data.detail); + throw new HttpError(error.response.status, error.response.statusText, error.response.data); } else if (error.request) { throw new NetworkError('Impossible to contact the server. Check your internet connection.'); } else { From 2c5a59bd5cee46535aa1b157463db9164d7d42fb Mon Sep 17 00:00:00 2001 From: DPende Date: Tue, 4 Feb 2025 23:05:52 +0100 Subject: [PATCH 2/3] feat: implemented search scraper functionality --- .../getSearchScraperRequest_example.js | 12 ++++ .../examples/schema_searchScraper_example.js | 19 ++++++ .../examples/searchScraper_example.js | 12 ++++ scrapegraph-js/index.js | 1 + scrapegraph-js/src/searchScraper.js | 66 +++++++++++++++++++ 5 files changed, 110 insertions(+) create mode 100644 scrapegraph-js/examples/getSearchScraperRequest_example.js create mode 100644 scrapegraph-js/examples/schema_searchScraper_example.js create mode 100644 scrapegraph-js/examples/searchScraper_example.js create mode 100644 scrapegraph-js/src/searchScraper.js diff --git a/scrapegraph-js/examples/getSearchScraperRequest_example.js b/scrapegraph-js/examples/getSearchScraperRequest_example.js new file mode 100644 index 0000000..49b1797 --- /dev/null +++ b/scrapegraph-js/examples/getSearchScraperRequest_example.js @@ -0,0 +1,12 @@ +import { getSearchScraperRequest } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const requestId = '64801288-6e3b-41f3-9d94-07cff3829e15'; + +try { + const requestInfo = await getSearchScraperRequest(apiKey, requestId); + console.log(requestInfo); +} catch (error) { + console.error(error); +} diff --git a/scrapegraph-js/examples/schema_searchScraper_example.js b/scrapegraph-js/examples/schema_searchScraper_example.js new file mode 100644 index 0000000..9ef087a --- /dev/null +++ b/scrapegraph-js/examples/schema_searchScraper_example.js @@ -0,0 +1,19 @@ +import { searchScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const prompt = 'What is the latest version of Python and what are its main features?'; + +const schema = z.object({ + version: z.string().describe('The latest version'), + release_date: z.string().describe('The release date of latest version'), + major_features: z.array(z.string()), +}); + +try { + const response = await searchScraper(apiKey, prompt, schema); + console.log(response.result); +} catch (error) { + console.error(error); +} diff --git a/scrapegraph-js/examples/searchScraper_example.js b/scrapegraph-js/examples/searchScraper_example.js new file mode 100644 index 0000000..0bb1df7 --- /dev/null +++ b/scrapegraph-js/examples/searchScraper_example.js @@ -0,0 +1,12 @@ +import { searchScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const prompt = 'What is the latest version of Python and what are its main features?'; + +try { + const response = await searchScraper(apiKey, prompt); + console.log(response); +} catch (error) { + console.error(error); +} diff --git a/scrapegraph-js/index.js b/scrapegraph-js/index.js index 1e4c1c5..e050008 100644 --- a/scrapegraph-js/index.js +++ b/scrapegraph-js/index.js @@ -1,5 +1,6 @@ export { smartScraper, getSmartScraperRequest } from './src/smartScraper.js'; export { markdownify, getMarkdownifyRequest } from './src/markdownify.js'; export { localScraper, getLocalScraperRequest } from './src/localScraper.js'; +export { searchScraper, getSearchScraperRequest } from './src/searchScraper.js'; export { getCredits } from './src/credits.js'; export { sendFeedback } from './src/feedback.js'; diff --git a/scrapegraph-js/src/searchScraper.js b/scrapegraph-js/src/searchScraper.js new file mode 100644 index 0000000..3ea578d --- /dev/null +++ b/scrapegraph-js/src/searchScraper.js @@ -0,0 +1,66 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { ZodType } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; + +/** + * Search and extract information from multiple web sources using AI. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} prompt - Natural language prompt describing what data to extract + * @param {Object} [schema] - Optional schema object defining the output structure + * @param {String} userAgent - the user agent like "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + * @returns {Promise} Extracted data in JSON format matching the provided schema + * @throws - Will throw an error in case of an HTTP failure. + */ +export async function searchScraper(apiKey, prompt, schema = null, userAgent = null) { + const endpoint = 'https://api.scrapegraphai.com/v1/searchscraper'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + if (userAgent) headers['User-Agent'] = userAgent; + + const payload = { + user_prompt: prompt, + }; + + if (schema) { + if (schema instanceof ZodType) { + payload.output_schema = zodToJsonSchema(schema); + } else { + throw new Error('The schema must be an instance of a valid Zod schema'); + } + } + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Retrieve the status or the result of searchScraper request. It also allows you to see the result of old requests. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} requestId - The request ID associated with the output of a searchScraper request. + * @returns {Promise} Information related to the status or result of a scraping request. + */ +export async function getSearchScraperRequest(apiKey, requestId) { + const endpoint = 'https://api.scrapegraphai.com/v1/searchscraper/' + requestId; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} From e72de2e29e10103af0cb4d7726b05b17a99a5ff1 Mon Sep 17 00:00:00 2001 From: DPende Date: Tue, 4 Feb 2025 23:33:31 +0100 Subject: [PATCH 3/3] doc: update readme --- scrapegraph-js/README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/scrapegraph-js/README.md b/scrapegraph-js/README.md index dafcf4a..a95e9d5 100644 --- a/scrapegraph-js/README.md +++ b/scrapegraph-js/README.md @@ -107,6 +107,26 @@ const schema = z.object({ })(); ``` +### Search Scraping + +Search and extract information from multiple web sources using AI. + +```javascript +import { searchScraper } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const prompt = 'What is the latest version of Python and what are its main features?'; + +(async () => { + try { + const response = await searchScraper(apiKey, prompt); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + ### Scraping local HTML Extract structured data from local HTML content