Skip to content

Commit

Permalink
Merge branch 'issue-193-type-error' of github.com:fadingNA/scrape-it …
Browse files Browse the repository at this point in the history
…into new-version
  • Loading branch information
IonicaBizau committed Oct 4, 2024
2 parents e36a14d + 22431a2 commit 5ea7fd5
Show file tree
Hide file tree
Showing 5 changed files with 680 additions and 67 deletions.
87 changes: 87 additions & 0 deletions example/index-type.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"use strict";

import scrapeIt from "../lib";

// Promise interface
scrapeIt("https://ionicabizau.net", {
title: ".header h1"
, desc: ".header h2"
, avatar: {
selector: ".header img"
, attr: "src"
}
}).then(({ data, status }) => {
console.log(`Status Code: ${status}`)
console.log(data)
});

// Async-Await
(async () => {
const { data } = await scrapeIt("https://ionicabizau.net", {
// Fetch the articles
articles: {
listItem: ".article"
, data: {
// Get the article date and convert it into a Date object
createdAt: {
selector: ".date"
, convert: x => new Date(x)
}
// Get the title
, title: "a.article-title"
// Nested list
, tags: {
listItem: ".tags > span",
}
// Get the content
, content: {
selector: ".article-content"
, how: "html",
}
// Get attribute value of root listItem by omitting the selector
, classes: {
attr: "class"
}
}
}
// Fetch the blog pages
, pages: {
listItem: "li.page"
, data: {
title: "a"
, url: {
selector: "a"
, attr: "href"
}
}
}
// Fetch some other data from the page
, title: ".header h1"
, desc: ".header h2"
, avatar: {
selector: ".header img"
, attr: "src"
}
})
//console.log(data);
})();



(() => {
const { data } = scrapeIt.scrapeHTML<{data: unknown}>("https://ionicabizau.net", {
data: {
listItem: 'main'
, data: {
items:{
selector: 'article'
, how: (element) => {
const $items = element.find('p:nth-child(n+2)')
return $items.text()
}
}
}
}
})
console.log(data)
})
8 changes: 4 additions & 4 deletions lib/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ declare namespace scrapeIt {
export interface ScrapeOptions {
[key: string]: string | ScrapeOptionList | ScrapeOptionElement;
}

export interface ScrapeOptionElement {
selector?: string;
convert?: (value: any) => any;
how?: string | ((element: cheerio.Selector) => any);
// using Cheerio type directly from modules it won't show warning types.
how?: string | ((element: Cheerio) => any);
attr?: string;
trim?: boolean;
closest?: string;
Expand All @@ -26,11 +26,11 @@ declare namespace scrapeIt {
data: T,
status: number,
statusText: string,
$: cheerio.Cheerio,
$: Cheerio,
body: string
}

export function scrapeHTML<T>(body: cheerio.Root | string, options: ScrapeOptions): T;
export function scrapeHTML<T>(body: Cheerio | string, options: ScrapeOptions): T;
}

declare function scrapeIt<T>(url: string | object, opts: scrapeIt.ScrapeOptions): Promise<scrapeIt.ScrapeResult<T>>;
Expand Down
Loading

0 comments on commit 5ea7fd5

Please sign in to comment.