From 2808ac493c80fe7d9e3451a295752850e00f460e Mon Sep 17 00:00:00 2001 From: Brahma Dev Date: Fri, 1 Mar 2024 15:26:51 +0000 Subject: [PATCH] Allow multiple instances and make user agent private. --- README.md | 44 +++++++++++++++-------- src/index.ts | 20 +++++++---- test/test.js | 100 +++++++++++++++++++++++++++------------------------ 3 files changed, 95 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 11f865f..b0bcda2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Node [metafetch](https://www.npmjs.org/package/metafetch) + [![Build Status](https://github.com/brahma-dev/metafetch/actions/workflows/build.yml/badge.svg)](https://github.com/brahma-dev/metafetch/actions/workflows/build.yml) [![Coverage](https://img.shields.io/codecov/c/github/brahma-dev/metafetch.svg?style=flat-square)](https://codecov.io/github/brahma-dev/metafetch) [![Coverage](https://img.shields.io/coveralls/brahma-dev/metafetch.svg?style=flat-square)](https://coveralls.io/github/brahma-dev/metafetch) @@ -8,7 +9,7 @@ Metafetch fetches a given URL's title, description, images, links etc. -## Installation ## +## Installation Use NPM to install: @@ -36,9 +37,9 @@ Use NPM to install: #### Optional flags to disable parsing images and links and http timeout or headers - metafetch.fetch('http://www.facebook.com', { + metafetch.fetch('http://www.facebook.com', { userAgent: "User Agent/Defaults to Firefox 58", - flags: { + flags: { images: false, links: false, language: false @@ -67,20 +68,33 @@ Use NPM to install: metafetch.setUserAgent("PersonalBot"); +#### Multiple instances with different User Agent + + import { Metafetch } from 'metafetch'; + const instance0 = new Metafetch("Bot 0"); + const instance1 = new Metafetch("Bot 1"); + + -- or -- + + const instance0 = new Metafetch(); + instance0.setUserAgent("Bot 0") + const instance1 = new Metafetch(); + instance1.setUserAgent("Bot 1") + ### Response Data -* `title` : Page title. -* `description` : Page description or `og:description` meta tag. -* `image` : `og:image` meta tag. -* `url` : Page url or `og:url` meta tag. -* `ampURL` : URL from amphtml tag or null. -* `images` : All images on this page. -* `links` : All links on this page. -* `meta` : All the meta tags that with a `property` or `name` attribute. e.g ``, `` -* `headers` : HTTP headers, lowercasing field names much like node does. -* `language` : Content language (ISO 639-1) based on meta data/headers, falling back to detection by [franc](https://www.npmjs.com/package/franc). - -## License ## +- `title` : Page title. +- `description` : Page description or `og:description` meta tag. +- `image` : `og:image` meta tag. +- `url` : Page url or `og:url` meta tag. +- `ampURL` : URL from amphtml tag or null. +- `images` : All images on this page. +- `links` : All links on this page. +- `meta` : All the meta tags that with a `property` or `name` attribute. e.g ``, `` +- `headers` : HTTP headers, lowercasing field names much like node does. +- `language` : Content language (ISO 639-1) based on meta data/headers, falling back to detection by [franc](https://www.npmjs.com/package/franc). + +## License (The MIT License) diff --git a/src/index.ts b/src/index.ts index ddf4a5c..d95f4db 100644 --- a/src/index.ts +++ b/src/index.ts @@ -52,18 +52,24 @@ let franc: ((value?: string | undefined) => string) | ((arg0: string) => string) class Metafetch { - public userAgent: string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"; + private _userAgent: string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"; public setUserAgent(agent: string) { if (typeof agent == "string") { - this.userAgent = agent; + this._userAgent = agent; } else { throw new Error("METAFETCH: Invalid User agent supplied"); } } - constructor(){ + get userAgent() { + return this._userAgent; + } + constructor(ua?: string) { import('franc').then((f) => { franc = f.franc; - }) + }); + if (ua) { + this._userAgent = ua; + } } public fetch(url: string, options?: FetchOptions) { @@ -80,7 +86,7 @@ class Metafetch { timeout: 20000, headers: { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', - 'User-Agent': this.userAgent + 'User-Agent': this._userAgent }, maxRedirects: 5 }; @@ -98,7 +104,7 @@ class Metafetch { headers: true, language: true }; - let userAgent = this.userAgent; + let userAgent = this._userAgent; if (typeof options === 'object') { _.merge(http_options.headers, options.http?.headers || {}); http_options.timeout = options.http?.timeout || http_options.timeout; @@ -141,5 +147,5 @@ class Metafetch { } const exportobj = new Metafetch(); - +export { Metafetch } export default exportobj; diff --git a/test/test.js b/test/test.js index 17bdb6b..320d520 100644 --- a/test/test.js +++ b/test/test.js @@ -1,7 +1,8 @@ /*jshint mocha: true*/ var should = require('should'), path = require('path'), - fetchog = require(path.join(__dirname, '../dist/index.js')).default; + fetchog = require(path.join(__dirname, '../dist/index.js')).default, + classog = require(path.join(__dirname, '../dist/index.js')).Metafetch; console.log(fetchog) //Server for redirects var http = require('http'); @@ -58,42 +59,42 @@ describe('server', function () { server3.close(); done(); }); - it('should return invalid url error', function(done) { - fetchog.fetch("").then((res)=>{ + it('should return invalid url error', function (done) { + fetchog.fetch("").then((res) => { done(res); - }).catch((err)=>{ + }).catch((err) => { should.exist(err); err.message.should.equal("Invalid URL"); done(); }) }); - it('should not return invalid url error', function(done) { - fetchog.fetch("https://www.rediff.com/news/report/what-next-for-uddhav-thackeray/20191123.htm? var = uddhave next cm").then((res)=>{ + it('should not return invalid url error', function (done) { + fetchog.fetch("https://www.rediff.com/news/report/what-next-for-uddhav-thackeray/20191123.htm? var = uddhave next cm").then((res) => { done(); - }).catch((err)=>{ + }).catch((err) => { done(err); }) }); - it('should return promise', function(done) { + it('should return promise', function (done) { var res = fetchog.fetch(""); res.should.be.an.instanceOf(Promise); done(); }); - it('should get a return 404 from npmjs.com', function(done) { + it('should get a return 404 from npmjs.com', function (done) { fetchog.fetch('https://npmjs.com/~brahma-dev/nonexistenturl', { flags: { images: false, links: false } - }).then((res)=>{ + }).then((res) => { done(res); - }).catch((err)=>{ + }).catch((err) => { should.exist(err); err.message.should.equal("HTTP:404"); done(); }) }); - it('should get a meta without error from npmjs.com', function(done) { + it('should get a meta without error from npmjs.com', function (done) { fetchog.fetch('https://npmjs.com/~brahma-dev#someanchor', { flags: { images: false, @@ -102,17 +103,17 @@ describe('server', function () { http: { timeout: 30000 } - }).then((res)=>{ + }).then((res) => { should.exist(res); should.exist(res.url); res.url.host.should.equal('npmjs.com'); done() - }).catch((err)=>{ + }).catch((err) => { should.not.exist(err); done(err); }); }); - it('should get a meta without error from npmjs.com', function(done) { + it('should get a meta without error from npmjs.com', function (done) { fetchog.fetch('https://npmjs.com/~brahma-dev#someanchor', { flags: { title: false, @@ -131,37 +132,37 @@ describe('server', function () { http: { timeout: 30000 } - }).then((res)=>{ + }).then((res) => { should.exist(res); done() - }).catch((err)=>{ + }).catch((err) => { should.not.exist(err); done(err); }); }); - it('should get a meta without error from bbc.com', function(done) { - fetchog.fetch('http://www.bbc.com/news/newsbeat-43722444').then((res)=>{ + it('should get a meta without error from bbc.com', function (done) { + fetchog.fetch('http://www.bbc.com/news/newsbeat-43722444').then((res) => { should.exist(res); should.exist(res.url); res.url.host.should.equal('www.bbc.com'); done() - }).catch((err)=>{ + }).catch((err) => { should.not.exist(err); done(err); }); }); - it('should get a meta without error from npmjs.com', function(done) { - fetchog.fetch('http://npmjs.com/~brahma-dev#someanchor').then((res)=>{ + it('should get a meta without error from npmjs.com', function (done) { + fetchog.fetch('http://npmjs.com/~brahma-dev#someanchor').then((res) => { should.exist(res); should.exist(res.url); res.url.host.should.equal('npmjs.com'); done() - }).catch((err)=>{ + }).catch((err) => { should.not.exist(err); done(err); }); }); - it('should get a meta without error from nasa.gov', function(done) { + it('should get a meta without error from nasa.gov', function (done) { // www.nasa.gov adds a trailing slash fetchog.fetch('https://www.nasa.gov/technology/carbon-dioxide-fertilization-greening-earth-study-finds/', { flags: { @@ -171,97 +172,97 @@ describe('server', function () { http: { timeout: 30000 } - }).then((res)=>{ + }).then((res) => { should.exist(res); should.exist(res.url); res.url.host.should.equal('www.nasa.gov'); res.url.pathname.should.equal('/technology/carbon-dioxide-fertilization-greening-earth-study-finds/'); done() - }).catch((err)=>{ + }).catch((err) => { should.not.exist(err); done(err); }); }); - it('should err', function(done) { + it('should err', function (done) { fetchog.fetch('http://0.0.0.0/test.pdf', { http: { timeout: 1500 } - }).then((res)=>{ + }).then((res) => { done(res); - }).catch((err)=>{ + }).catch((err) => { should.exist(err); err.message.should.equal("Invalid URL"); done(); }) }); - it('should get a meta without error from nasa.gov', function(done) { + it('should get a meta without error from nasa.gov', function (done) { // www.nasa.gov adds a trailing slash - fetchog.fetch('http://www.nasa.gov/technology/carbon-dioxide-fertilization-greening-earth-study-finds/').then((res)=>{ + fetchog.fetch('http://www.nasa.gov/technology/carbon-dioxide-fertilization-greening-earth-study-finds/').then((res) => { should.exist(res); should.exist(res.url); res.url.host.should.equal('www.nasa.gov'); res.url.pathname.should.equal('/technology/carbon-dioxide-fertilization-greening-earth-study-finds/'); done() - }).catch((err)=>{ + }).catch((err) => { should.not.exist(err); done(err); }); }); - it('should get a meta without error from test server for invalid links', function(done) { - fetchog.fetch('http://127.0.0.1:2445/').then((res)=>{ + it('should get a meta without error from test server for invalid links', function (done) { + fetchog.fetch('http://127.0.0.1:2445/').then((res) => { should.exist(res); should.exist(res.url); done() - }).catch((err)=>{ + }).catch((err) => { should.not.exist(err); done(err); }); }); - it('should verify http headers', function(done) { - fetchog.fetch('http://127.0.0.1:2445/').then((res)=>{ + it('should verify http headers', function (done) { + fetchog.fetch('http://127.0.0.1:2445/').then((res) => { should.exist(res); should.exist(res.headers); should.exist(res.headers['content-type']); done() - }).catch((err)=>{ + }).catch((err) => { should.not.exist(err); done(err); }); }); - it('should redirect too many times.', function(done) { + it('should redirect too many times.', function (done) { fetchog.fetch('http://127.0.0.1:2444/', { http: { timeout: 1500 } - }).then((res)=>{ + }).then((res) => { done(res); - }).catch((err)=>{ + }).catch((err) => { should.exist(err); err.message.should.equal("Maximum number of redirects exceeded"); done(); }) }); - it('should err', function(done) { + it('should err', function (done) { fetchog.fetch('http://0.0.0.0/', { http: { timeout: 1500 } - }).then((res)=>{ + }).then((res) => { done(res); - }).catch((err)=>{ + }).catch((err) => { should.exist(err); done(); }) }); - it('should timeout', function(done) { + it('should timeout', function (done) { fetchog.fetch('http://127.0.0.1:2444/', { http: { timeout: 1 } - }).then((res)=>{ + }).then((res) => { done(res); - }).catch((err)=>{ + }).catch((err) => { should.exist(err); done(); }) @@ -308,6 +309,11 @@ describe('server', function () { done(); }) }); + it('new instance with user agent without error', function (done) { + const instance0 = new classog("GOOGLEBOT"); + instance0.userAgent.should.equal('GOOGLEBOT'); + done(); + }); it('should set user agent without error', function (done) { var err; try {