Skip to content

Implemented Search Scraper functionality #33

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions scrapegraph-js/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,26 @@ const schema = z.object({
})();
```

### Search Scraping

Search and extract information from multiple web sources using AI.

```javascript
import { searchScraper } from 'scrapegraph-js';

const apiKey = 'your-api-key';
const prompt = 'What is the latest version of Python and what are its main features?';

(async () => {
try {
const response = await searchScraper(apiKey, prompt);
console.log(response.result);
} catch (error) {
console.error('Error:', error);
}
})();
```

### Scraping local HTML

Extract structured data from local HTML content
Expand Down
12 changes: 12 additions & 0 deletions scrapegraph-js/examples/getSearchScraperRequest_example.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { getSearchScraperRequest } from 'scrapegraph-js';
import 'dotenv/config';

const apiKey = process.env.SGAI_APIKEY;
const requestId = '64801288-6e3b-41f3-9d94-07cff3829e15';

try {
const requestInfo = await getSearchScraperRequest(apiKey, requestId);
console.log(requestInfo);
} catch (error) {
console.error(error);
}
19 changes: 19 additions & 0 deletions scrapegraph-js/examples/schema_searchScraper_example.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { searchScraper } from 'scrapegraph-js';
import { z } from 'zod';
import 'dotenv/config';

const apiKey = process.env.SGAI_APIKEY;
const prompt = 'What is the latest version of Python and what are its main features?';

const schema = z.object({
version: z.string().describe('The latest version'),
release_date: z.string().describe('The release date of latest version'),
major_features: z.array(z.string()),
});

try {
const response = await searchScraper(apiKey, prompt, schema);
console.log(response.result);
} catch (error) {
console.error(error);
}
12 changes: 12 additions & 0 deletions scrapegraph-js/examples/searchScraper_example.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { searchScraper } from 'scrapegraph-js';
import 'dotenv/config';

const apiKey = process.env.SGAI_APIKEY;
const prompt = 'What is the latest version of Python and what are its main features?';

try {
const response = await searchScraper(apiKey, prompt);
console.log(response);
} catch (error) {
console.error(error);
}
1 change: 1 addition & 0 deletions scrapegraph-js/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export { smartScraper, getSmartScraperRequest } from './src/smartScraper.js';
export { markdownify, getMarkdownifyRequest } from './src/markdownify.js';
export { localScraper, getLocalScraperRequest } from './src/localScraper.js';
export { searchScraper, getSearchScraperRequest } from './src/searchScraper.js';
export { getCredits } from './src/credits.js';
export { sendFeedback } from './src/feedback.js';
66 changes: 66 additions & 0 deletions scrapegraph-js/src/searchScraper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import axios from 'axios';
import handleError from './utils/handleError.js';
import { ZodType } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';

/**
* Search and extract information from multiple web sources using AI.
*
* @param {string} apiKey - Your ScrapeGraph AI API key
* @param {string} prompt - Natural language prompt describing what data to extract
* @param {Object} [schema] - Optional schema object defining the output structure
* @param {String} userAgent - the user agent like "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
* @returns {Promise<string>} Extracted data in JSON format matching the provided schema
* @throws - Will throw an error in case of an HTTP failure.
*/
export async function searchScraper(apiKey, prompt, schema = null, userAgent = null) {
const endpoint = 'https://api.scrapegraphai.com/v1/searchscraper';
const headers = {
'accept': 'application/json',
'SGAI-APIKEY': apiKey,
'Content-Type': 'application/json',
};

if (userAgent) headers['User-Agent'] = userAgent;

const payload = {
user_prompt: prompt,
};

if (schema) {
if (schema instanceof ZodType) {
payload.output_schema = zodToJsonSchema(schema);
} else {
throw new Error('The schema must be an instance of a valid Zod schema');
}
}

try {
const response = await axios.post(endpoint, payload, { headers });
return response.data;
} catch (error) {
handleError(error);
}
}

/**
* Retrieve the status or the result of searchScraper request. It also allows you to see the result of old requests.
*
* @param {string} apiKey - Your ScrapeGraph AI API key
* @param {string} requestId - The request ID associated with the output of a searchScraper request.
* @returns {Promise<string>} Information related to the status or result of a scraping request.
*/
export async function getSearchScraperRequest(apiKey, requestId) {
const endpoint = 'https://api.scrapegraphai.com/v1/searchscraper/' + requestId;
const headers = {
'accept': 'application/json',
'SGAI-APIKEY': apiKey,
};

try {
const response = await axios.get(endpoint, { headers });
return response.data;
} catch (error) {
handleError(error);
}
}
16 changes: 10 additions & 6 deletions scrapegraph-js/src/utils/handleError.js
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
class HttpError extends Error {
constructor(statusCode, title, detail) {
super(HttpError.makeMessage(statusCode, title, detail));
constructor(statusCode, title, data) {
super(HttpError.makeMessage(statusCode, title, data));
this.statusCode = statusCode;
this.title = title;
this.detail = detail;
this.info = data;
}

static makeMessage(statusCode, title, detail) {
static makeMessage(statusCode, title, data) {
let message = '';

message += statusCode ? `${statusCode} - ` : '(unknown status code) - ';
message += title ? `${title} - ` : '(unknown error message) - ';
message += detail ? `${JSON.stringify(detail)}` : '(unknown error detail)';
message += data.detail
? 'Error located in: ' + `${JSON.stringify(data.detail[0].loc)}` + ', ' + `${data.detail[0].msg}`
: data.error
? `${data.error}`
: '(unknown error detail)';

return message;
}
Expand All @@ -31,7 +35,7 @@ class UnexpectedError extends Error {

export default function handleError(error) {
if (error.response) {
throw new HttpError(error.response.status, error.response.statusText, error.response.data.detail);
throw new HttpError(error.response.status, error.response.statusText, error.response.data);
} else if (error.request) {
throw new NetworkError('Impossible to contact the server. Check your internet connection.');
} else {
Expand Down