Skip to content

Commit

Permalink
Allow multiple instances and make user agent private.
Browse files Browse the repository at this point in the history
  • Loading branch information
brahma-dev committed Mar 1, 2024
1 parent d900ab2 commit 2808ac4
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 69 deletions.
44 changes: 29 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Node [metafetch](https://www.npmjs.org/package/metafetch)

[![Build Status](https://github.com/brahma-dev/metafetch/actions/workflows/build.yml/badge.svg)](https://github.com/brahma-dev/metafetch/actions/workflows/build.yml)
[![Coverage](https://img.shields.io/codecov/c/github/brahma-dev/metafetch.svg?style=flat-square)](https://codecov.io/github/brahma-dev/metafetch)
[![Coverage](https://img.shields.io/coveralls/brahma-dev/metafetch.svg?style=flat-square)](https://coveralls.io/github/brahma-dev/metafetch)
Expand All @@ -8,7 +9,7 @@

Metafetch fetches a given URL's title, description, images, links etc.

## Installation ##
## Installation

Use NPM to install:

Expand Down Expand Up @@ -36,9 +37,9 @@ Use NPM to install:

#### Optional flags to disable parsing images and links and http timeout or headers

metafetch.fetch('http://www.facebook.com', {
metafetch.fetch('http://www.facebook.com', {
userAgent: "User Agent/Defaults to Firefox 58",
flags: {
flags: {
images: false,
links: false,
language: false
Expand Down Expand Up @@ -67,20 +68,33 @@ Use NPM to install:

metafetch.setUserAgent("PersonalBot");

#### Multiple instances with different User Agent

import { Metafetch } from 'metafetch';
const instance0 = new Metafetch("Bot 0");
const instance1 = new Metafetch("Bot 1");

-- or --

const instance0 = new Metafetch();
instance0.setUserAgent("Bot 0")
const instance1 = new Metafetch();
instance1.setUserAgent("Bot 1")

### Response Data

* `title` : Page title.
* `description` : Page description or `og:description` meta tag.
* `image` : `og:image` meta tag.
* `url` : Page url or `og:url` meta tag.
* `ampURL` : URL from amphtml tag or null.
* `images` : All images on this page.
* `links` : All links on this page.
* `meta` : All the meta tags that with a `property` or `name` attribute. e.g `<meta property="author" content="Example">`, `<meta name="description" content="Example.">`
* `headers` : HTTP headers, lowercasing field names much like node does.
* `language` : Content language (ISO 639-1) based on meta data/headers, falling back to detection by [franc](https://www.npmjs.com/package/franc).

## License ##
- `title` : Page title.
- `description` : Page description or `og:description` meta tag.
- `image` : `og:image` meta tag.
- `url` : Page url or `og:url` meta tag.
- `ampURL` : URL from amphtml tag or null.
- `images` : All images on this page.
- `links` : All links on this page.
- `meta` : All the meta tags that with a `property` or `name` attribute. e.g `<meta property="author" content="Example">`, `<meta name="description" content="Example.">`
- `headers` : HTTP headers, lowercasing field names much like node does.
- `language` : Content language (ISO 639-1) based on meta data/headers, falling back to detection by [franc](https://www.npmjs.com/package/franc).

## License

(The MIT License)

Expand Down
20 changes: 13 additions & 7 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,24 @@ let franc: ((value?: string | undefined) => string) | ((arg0: string) => string)


class Metafetch {
public userAgent: string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0";
private _userAgent: string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0";
public setUserAgent(agent: string) {
if (typeof agent == "string") {
this.userAgent = agent;
this._userAgent = agent;
} else {
throw new Error("METAFETCH: Invalid User agent supplied");
}
}
constructor(){
get userAgent() {
return this._userAgent;
}
constructor(ua?: string) {
import('franc').then((f) => {
franc = f.franc;
})
});
if (ua) {
this._userAgent = ua;
}
}

public fetch(url: string, options?: FetchOptions) {
Expand All @@ -80,7 +86,7 @@ class Metafetch {
timeout: 20000,
headers: {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'User-Agent': this.userAgent
'User-Agent': this._userAgent
},
maxRedirects: 5
};
Expand All @@ -98,7 +104,7 @@ class Metafetch {
headers: true,
language: true
};
let userAgent = this.userAgent;
let userAgent = this._userAgent;
if (typeof options === 'object') {
_.merge(http_options.headers, options.http?.headers || {});
http_options.timeout = options.http?.timeout || http_options.timeout;
Expand Down Expand Up @@ -141,5 +147,5 @@ class Metafetch {
}

const exportobj = new Metafetch();

export { Metafetch }
export default exportobj;
100 changes: 53 additions & 47 deletions test/test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
/*jshint mocha: true*/
var should = require('should'),
path = require('path'),
fetchog = require(path.join(__dirname, '../dist/index.js')).default;
fetchog = require(path.join(__dirname, '../dist/index.js')).default,
classog = require(path.join(__dirname, '../dist/index.js')).Metafetch;
console.log(fetchog)
//Server for redirects
var http = require('http');
Expand Down Expand Up @@ -58,42 +59,42 @@ describe('server', function () {
server3.close();
done();
});
it('should return invalid url error', function(done) {
fetchog.fetch("").then((res)=>{
it('should return invalid url error', function (done) {
fetchog.fetch("").then((res) => {
done(res);
}).catch((err)=>{
}).catch((err) => {
should.exist(err);
err.message.should.equal("Invalid URL");
done();
})
});
it('should not return invalid url error', function(done) {
fetchog.fetch("https://www.rediff.com/news/report/what-next-for-uddhav-thackeray/20191123.htm? var = uddhave next cm").then((res)=>{
it('should not return invalid url error', function (done) {
fetchog.fetch("https://www.rediff.com/news/report/what-next-for-uddhav-thackeray/20191123.htm? var = uddhave next cm").then((res) => {
done();
}).catch((err)=>{
}).catch((err) => {
done(err);
})
});
it('should return promise', function(done) {
it('should return promise', function (done) {
var res = fetchog.fetch("");
res.should.be.an.instanceOf(Promise);
done();
});
it('should get a return 404 from npmjs.com', function(done) {
it('should get a return 404 from npmjs.com', function (done) {
fetchog.fetch('https://npmjs.com/~brahma-dev/nonexistenturl', {
flags: {
images: false,
links: false
}
}).then((res)=>{
}).then((res) => {
done(res);
}).catch((err)=>{
}).catch((err) => {
should.exist(err);
err.message.should.equal("HTTP:404");
done();
})
});
it('should get a meta without error from npmjs.com', function(done) {
it('should get a meta without error from npmjs.com', function (done) {
fetchog.fetch('https://npmjs.com/~brahma-dev#someanchor', {
flags: {
images: false,
Expand All @@ -102,17 +103,17 @@ describe('server', function () {
http: {
timeout: 30000
}
}).then((res)=>{
}).then((res) => {
should.exist(res);
should.exist(res.url);
res.url.host.should.equal('npmjs.com');
done()
}).catch((err)=>{
}).catch((err) => {
should.not.exist(err);
done(err);
});
});
it('should get a meta without error from npmjs.com', function(done) {
it('should get a meta without error from npmjs.com', function (done) {
fetchog.fetch('https://npmjs.com/~brahma-dev#someanchor', {
flags: {
title: false,
Expand All @@ -131,37 +132,37 @@ describe('server', function () {
http: {
timeout: 30000
}
}).then((res)=>{
}).then((res) => {
should.exist(res);
done()
}).catch((err)=>{
}).catch((err) => {
should.not.exist(err);
done(err);
});
});
it('should get a meta without error from bbc.com', function(done) {
fetchog.fetch('http://www.bbc.com/news/newsbeat-43722444').then((res)=>{
it('should get a meta without error from bbc.com', function (done) {
fetchog.fetch('http://www.bbc.com/news/newsbeat-43722444').then((res) => {
should.exist(res);
should.exist(res.url);
res.url.host.should.equal('www.bbc.com');
done()
}).catch((err)=>{
}).catch((err) => {
should.not.exist(err);
done(err);
});
});
it('should get a meta without error from npmjs.com', function(done) {
fetchog.fetch('http://npmjs.com/~brahma-dev#someanchor').then((res)=>{
it('should get a meta without error from npmjs.com', function (done) {
fetchog.fetch('http://npmjs.com/~brahma-dev#someanchor').then((res) => {
should.exist(res);
should.exist(res.url);
res.url.host.should.equal('npmjs.com');
done()
}).catch((err)=>{
}).catch((err) => {
should.not.exist(err);
done(err);
});
});
it('should get a meta without error from nasa.gov', function(done) {
it('should get a meta without error from nasa.gov', function (done) {
// www.nasa.gov adds a trailing slash
fetchog.fetch('https://www.nasa.gov/technology/carbon-dioxide-fertilization-greening-earth-study-finds/', {
flags: {
Expand All @@ -171,97 +172,97 @@ describe('server', function () {
http: {
timeout: 30000
}
}).then((res)=>{
}).then((res) => {
should.exist(res);
should.exist(res.url);
res.url.host.should.equal('www.nasa.gov');
res.url.pathname.should.equal('/technology/carbon-dioxide-fertilization-greening-earth-study-finds/');
done()
}).catch((err)=>{
}).catch((err) => {
should.not.exist(err);
done(err);
});
});
it('should err', function(done) {
it('should err', function (done) {
fetchog.fetch('http://0.0.0.0/test.pdf', {
http: {
timeout: 1500
}
}).then((res)=>{
}).then((res) => {
done(res);
}).catch((err)=>{
}).catch((err) => {
should.exist(err);
err.message.should.equal("Invalid URL");
done();
})
});
it('should get a meta without error from nasa.gov', function(done) {
it('should get a meta without error from nasa.gov', function (done) {
// www.nasa.gov adds a trailing slash
fetchog.fetch('http://www.nasa.gov/technology/carbon-dioxide-fertilization-greening-earth-study-finds/').then((res)=>{
fetchog.fetch('http://www.nasa.gov/technology/carbon-dioxide-fertilization-greening-earth-study-finds/').then((res) => {
should.exist(res);
should.exist(res.url);
res.url.host.should.equal('www.nasa.gov');
res.url.pathname.should.equal('/technology/carbon-dioxide-fertilization-greening-earth-study-finds/');
done()
}).catch((err)=>{
}).catch((err) => {
should.not.exist(err);
done(err);
});
});
it('should get a meta without error from test server for invalid links', function(done) {
fetchog.fetch('http://127.0.0.1:2445/').then((res)=>{
it('should get a meta without error from test server for invalid links', function (done) {
fetchog.fetch('http://127.0.0.1:2445/').then((res) => {
should.exist(res);
should.exist(res.url);
done()
}).catch((err)=>{
}).catch((err) => {
should.not.exist(err);
done(err);
});
});
it('should verify http headers', function(done) {
fetchog.fetch('http://127.0.0.1:2445/').then((res)=>{
it('should verify http headers', function (done) {
fetchog.fetch('http://127.0.0.1:2445/').then((res) => {
should.exist(res);
should.exist(res.headers);
should.exist(res.headers['content-type']);
done()
}).catch((err)=>{
}).catch((err) => {
should.not.exist(err);
done(err);
});
});
it('should redirect too many times.', function(done) {
it('should redirect too many times.', function (done) {
fetchog.fetch('http://127.0.0.1:2444/', {
http: {
timeout: 1500
}
}).then((res)=>{
}).then((res) => {
done(res);
}).catch((err)=>{
}).catch((err) => {
should.exist(err);
err.message.should.equal("Maximum number of redirects exceeded");
done();
})
});
it('should err', function(done) {
it('should err', function (done) {
fetchog.fetch('http://0.0.0.0/', {
http: {
timeout: 1500
}
}).then((res)=>{
}).then((res) => {
done(res);
}).catch((err)=>{
}).catch((err) => {
should.exist(err);
done();
})
});
it('should timeout', function(done) {
it('should timeout', function (done) {
fetchog.fetch('http://127.0.0.1:2444/', {
http: {
timeout: 1
}
}).then((res)=>{
}).then((res) => {
done(res);
}).catch((err)=>{
}).catch((err) => {
should.exist(err);
done();
})
Expand Down Expand Up @@ -308,6 +309,11 @@ describe('server', function () {
done();
})
});
it('new instance with user agent without error', function (done) {
const instance0 = new classog("GOOGLEBOT");
instance0.userAgent.should.equal('GOOGLEBOT');
done();
});
it('should set user agent without error', function (done) {
var err;
try {
Expand Down

0 comments on commit 2808ac4

Please sign in to comment.