-
Notifications
You must be signed in to change notification settings - Fork 43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
build scrapper passed #73
Closed
Closed
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
add8589
add ext links to achievements
heytulsiprasad 44e20c5
project card rotate animation
heytulsiprasad 4bc828b
:zap: chore: build scraper for blog.ejs
heytulsiprasad 8c4a020
:sparkles: feat: build scrapper success
heytulsiprasad 4ef9569
:art: chore: add ejs template to blogs.ejs
heytulsiprasad 076f255
:iphone: feat: make responsive layout+ add regex
heytulsiprasad 724e04c
:ok_hand: style: date format change to DD/MM/YYYY
heytulsiprasad b1b1841
:boom: feat: fetch latest blog details
heytulsiprasad f821ad6
:heavy_plus_sign: add moment as dependency
heytulsiprasad 04a095a
:ok_hand: update index.js with moment pkg
heytulsiprasad 567dc39
:zap: build: scrapers for individual blog posts
heytulsiprasad 33f8547
:construction: chore: add cover objects in json
heytulsiprasad 2b837e5
:recycle: refactor: scraper codes and json
heytulsiprasad ba01f29
:alembic: make an array of four urls to scrap
heytulsiprasad File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,29 @@ | ||
var express = require('express'); | ||
var router = express.Router(); | ||
const express = require('express'); | ||
const router = express.Router(); | ||
const moment = require("moment") | ||
const json = require('./data.json') | ||
|
||
// need 4 elements from the json array in blog.ejs file | ||
let ejsRenderedData = [] | ||
|
||
for (let i=0; i<=3; i++) { | ||
|
||
let date = moment(json[i].release) | ||
|
||
ejsRenderedData.push({ | ||
title: json[i].title, | ||
href: json[i].href, | ||
author: json[i].author, | ||
release: date.format("DD/MM/YYYY"), | ||
cover: json[i].cover | ||
}) | ||
} | ||
|
||
/* GET home page. */ | ||
router.get('/', function(req, res) { | ||
console.log("check") | ||
res.render('index'); | ||
res.render('index', { ejsRenderedData: ejsRenderedData }); | ||
}); | ||
|
||
|
||
module.exports = router; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[ | ||
{ | ||
"img": "https://miro.medium.com/max/2380/1*pvH4Qw7wmBFu2meLGU4HXg.jpeg" | ||
}, | ||
{ | ||
"img": "https://miro.medium.com/max/1460/1*evXUXDtSpBZo_HHfXt4CNA.jpeg" | ||
}, | ||
{ | ||
"img": "https://miro.medium.com/max/442/1*-5x0MC7k53mV4VMqKiKHOg.png" | ||
}, | ||
{ | ||
"img": "https://miro.medium.com/max/2560/1*5yVOKthzCe2Ko5zoCgrLkQ.jpeg" | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
[ | ||
{ | ||
"title": "Frequently asked Interview Questions in react-redux", | ||
"href": "https://blog.zairza.in/frequently-asked-interview-questions-in-react-redux-bc774733b449?source=collection_home---5------0-----------------------", | ||
"author": "Jayashree Panda", | ||
"release": "2019-11-22T08:11:26.615Z" | ||
}, | ||
{ | ||
"title": "OAuth using MEVN Stack", | ||
"href": "https://blog.zairza.in/oauth-using-mevn-stack-4b4a383dae08?source=collection_home---6------0-----------------------", | ||
"author": "Ramakrishna Pattnaik", | ||
"release": "2019-08-25T12:13:49.122Z" | ||
}, | ||
{ | ||
"title": "A Guide to Dual Booting", | ||
"href": "https://blog.zairza.in/a-guide-to-dual-booting-d9dffe042ee6?source=collection_home---6------1-----------------------", | ||
"author": "Dikshant Brahma", | ||
"release": "2019-08-10T06:01:01.253Z" | ||
}, | ||
{ | ||
"title": "Home Automation : Making Our Lives Easier", | ||
"href": "https://blog.zairza.in/home-automation-making-our-lives-easier-a782ee067ea8?source=collection_home---6------2-----------------------", | ||
"author": "Subhangi Choudhary", | ||
"release": "2019-08-06T10:19:07.530Z" | ||
}, | ||
{ | ||
"title": "What is “BITCOIN” and How its Mining Process Works…", | ||
"href": "https://blog.zairza.in/https-medium-com-pruthwirajnayak-what-is-bitcoin-and-how-its-mining-process-works-8cf5aa51d87f?source=collection_home---6------3-----------------------", | ||
"author": "Pruthwiraj Nayak", | ||
"release": "2019-07-29T10:55:38.837Z" | ||
}, | ||
{ | ||
"title": "Modern Radios — Miles of Range and Years of Battery Life?", | ||
"href": "https://blog.zairza.in/modern-radios-miles-of-range-and-years-of-battery-life-6ca1a690c4d9?source=collection_home---6------4-----------------------", | ||
"author": "Sanjanamohapatra 99", | ||
"release": "2019-07-14T09:29:38.928Z" | ||
}, | ||
{ | ||
"title": "Why ROS?", | ||
"href": "https://blog.zairza.in/why-ros-18221b2fe6a?source=collection_home---6------5-----------------------", | ||
"author": "Abhishek Mishra", | ||
"release": "2019-06-26T17:40:00.150Z" | ||
}, | ||
{ | ||
"title": "5G: Is the future really near ?", | ||
"href": "https://blog.zairza.in/5g-is-the-future-really-near-f194b95ef94e?source=collection_home---6------6-----------------------", | ||
"author": "Aditya Bisoi", | ||
"release": "2019-06-21T11:31:01.210Z" | ||
}, | ||
{ | ||
"title": "Human Area Network (HAN) : Turning Humans Into Swipe Card?", | ||
"href": "https://blog.zairza.in/human-area-network-han-turning-humans-into-swipe-card-7d670b9ee0cb?source=collection_home---6------7-----------------------", | ||
"author": "Sanjanamohapatra 99", | ||
"release": "2019-06-16T14:35:57.071Z" | ||
}, | ||
{ | ||
"title": "Security Features of Blockchain and How it Works", | ||
"href": "https://blog.zairza.in/security-features-of-blockchain-and-how-it-works-2870490d3461?source=collection_home---6------8-----------------------", | ||
"author": "Pruthwiraj Nayak", | ||
"release": "2019-06-07T19:19:52.142Z" | ||
}, | ||
{ | ||
"title": "Industrial Automation For Enabling Industry 4.0", | ||
"href": "https://blog.zairza.in/industrial-automation-for-enabling-industry-4-0-b6144898adb8?source=collection_home---6------9-----------------------", | ||
"author": "Sarthak Kar", | ||
"release": "2019-06-01T16:07:34.086Z" | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
// This file is reponsible for scraping the cover image from each individual blogs.accordion | ||
|
||
// TODO: scraper for cover image from individual blog post | ||
|
||
const express = require("express"); | ||
const fs = require("fs"); | ||
const cheerio = require("cheerio") | ||
const request = require("request") | ||
const json = require("./json/data.json") | ||
|
||
const app = express(); | ||
|
||
let count = 0 | ||
let finalFourUrl = [] | ||
let bloglinks = [] | ||
|
||
// This brings first four urls from data.json to bloglinks (array) | ||
|
||
for (let i = 0; i <= 3; i++) { | ||
|
||
let coverlink = json[i].href | ||
bloglinks.push(coverlink) | ||
|
||
} | ||
|
||
|
||
app.get("/scrapcover", function (req, res) { | ||
|
||
while (count < 4) { | ||
|
||
// set url for the cover image | ||
let coverURL = json[count].href | ||
console.log(coverURL) | ||
|
||
let coverData = [] | ||
|
||
request(coverURL, function (error, response, html) { | ||
|
||
if (!error) { | ||
|
||
let $ = cheerio.load(html) | ||
|
||
// Declaring variables to scrape | ||
let coverLink | ||
|
||
const Cover = function (cover) { | ||
this.cover = cover | ||
} | ||
|
||
$(".paragraph-image").closest("figure").filter(function () { | ||
|
||
let element = $(this).find("noscript").html() | ||
let imgsrc = $(element).attr("src") | ||
|
||
coverData.push(imgsrc) | ||
|
||
}) | ||
|
||
newCover = new Cover(coverData[0]) | ||
|
||
} | ||
|
||
finalFourUrl.unshift({ | ||
"img": newCover.cover | ||
}) | ||
|
||
|
||
fs.writeFile(__dirname + "/../routes/json/cover.json", JSON.stringify(finalFourUrl, null, 4), function (err) { | ||
if (err) { | ||
console.log(err) | ||
} | ||
}) | ||
|
||
}) | ||
|
||
count++ | ||
// while loop ends | ||
} | ||
|
||
|
||
}) | ||
|
||
|
||
app.listen("3000", function () { | ||
console.log("Magic happens on port 3000") | ||
}) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
// The scraper for the blog.ejs section in the application. | ||
// The source for scraping is: https://blog.zairza.in/ | ||
|
||
// Things we need from the website: | ||
// clickable cover image with blog link | ||
// date published in DD/MM/YYYY format | ||
// title of the blog post | ||
// author of the blog post | ||
|
||
const express = require("express"); | ||
const fs = require("fs"); | ||
const cheerio = require("cheerio"); | ||
const request = require("request"); | ||
|
||
const app = express(); | ||
|
||
|
||
app.get("/scrape", function (req, res) { | ||
|
||
// set the main page url | ||
url = "https://blog.zairza.in/" | ||
|
||
// scraped data is stored here | ||
let data = [] | ||
|
||
// make request to the to be scraped website through request | ||
request(url, function (error, response, html) { | ||
// check if any errors | ||
|
||
if (!error) { | ||
// whole scraping code will be inside this block | ||
|
||
let $ = cheerio.load(html); | ||
|
||
// Declare variables to capture: | ||
// TODO: SCRAPE: covers, releases, titles, authors | ||
|
||
let title, href, author, release; | ||
|
||
// make constructor function blog | ||
const Blog = function (title, href, author, release, cover) { | ||
this.title = title | ||
this.href = href | ||
this.author = author | ||
this.release = release | ||
} | ||
|
||
$(".postArticle--short").filter(function () { | ||
|
||
let i = 1 | ||
|
||
while (i <= 1) { | ||
title = $(this).find("h3").text() | ||
href = $(this).find(".u-clearfix").next("a").attr("href") | ||
author = $(this).find(".postMetaInline-authorLockup").children().first().text() | ||
release = $(this).find("time").attr("datetime") | ||
|
||
const newBlog = new Blog(title, href, author, release) | ||
|
||
data.push({ | ||
"title": newBlog.title, | ||
"href": newBlog.href, | ||
"author": newBlog.author, | ||
"release": newBlog.release | ||
}) | ||
|
||
// continuation of loop | ||
i += 1 | ||
} | ||
}) | ||
|
||
// This scrapes the one latest blog post | ||
$(".u-paddingTop30").filter(function () { | ||
|
||
title = $(this).find("h3").first().text() | ||
href = $(this).find(".u-borderLighter").attr("href") | ||
author = $(this).find(".u-flexCenter").children().last().children().first().text() | ||
release = $(this).find("time").attr("datetime") | ||
|
||
data.unshift({ | ||
"title": title, | ||
"href": href, | ||
"author": author, | ||
"release": release | ||
}) | ||
}) | ||
|
||
// Add data array into a json file | ||
fs.writeFile(__dirname + "/../routes/json/data.json", JSON.stringify(data, null, 4), function (err) { | ||
if (err) { | ||
console.log(err) | ||
} | ||
}); | ||
} | ||
|
||
|
||
}) | ||
|
||
}); | ||
|
||
app.listen(3000, function () { | ||
console.log("Magic happens at port 3000") | ||
}); |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where do you run this file?