Skip to content

Commit

Permalink
Updated backend
Browse files Browse the repository at this point in the history
  • Loading branch information
PrJayK committed Jan 29, 2024
1 parent 9ae51d3 commit 6e0dfac
Show file tree
Hide file tree
Showing 6 changed files with 2,661 additions and 62 deletions.
12 changes: 10 additions & 2 deletions Dark Patterns Buster/backend/index.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
const express = require('express');
const { scraperUtil } = require('./scraper2.0/index.js');
const cors = require('cors');

const app = express();

const PORT = 3000;

app.post('/', () => {

app.use(express.json());
app.use(cors());

app.post('/url', async (req, res) => {
const url = req.body.url;
await scraperUtil(url);
res.send();
});

app.listen(PORT, () => {
Expand Down
122 changes: 69 additions & 53 deletions Dark Patterns Buster/backend/scraper2.0/index.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
const puppeteer=require('puppeteer-extra')
const {Parser}=require('@json2csv/plainjs')
const puppeteer=require('puppeteer-extra');
const {Parser}=require('@json2csv/plainjs');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const fs = require('fs');

puppeteer.use(StealthPlugin());

var ID=0;
let RANDOM_ID_FORMAT=`dark_patternsbuster${ID}`
const IGNORE_ELEMENTS=['SCRIPT','NOSCRIPT','STYLE','BR']
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
puppeteer.use(StealthPlugin())

const IGNORE_ELEMENTS=['SCRIPT','NOSCRIPT','STYLE','BR'];


async function Scraper(url){
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url,{ timeout: 60000 });
await page.screenshot({ path: './page.png', fullPage: 'true' });
const body=await page.$('body')
const child=await getChild(page,body)
const texts=await Scrapy(page,body)
console.log(texts)
await browser.close()
return texts
const body=await page.$('body');
const child=await getChild(page,body);
const texts=await Scrapy(page,body);
console.log(texts);
await browser.close();
return texts;
}

async function Scrapy(page,element){
let data=[]
let data=[];
if(!element){
return []
return [];
}
else{
if(!(await isIgnoredElement(element))){
Expand All @@ -30,77 +35,88 @@ async function Scrapy(page,element){
id:await getId(element), //id to text node is given 20 by default
text: (await getTextContent(element)).trim()
}
return [info]
return [info];
}
const parent=element
const parent=element;
const children=await getChild(page,element);
for(const child of children){
if(await getNodeType(page,child)===3){
const info={
id: await getId(page,parent),
text:(await getTextContent(child)).trim()
}
data.push(info)
data.push(info);
}
data=data.concat(await Scrapy(page,child))
data=data.concat(await Scrapy(page,child));
}
}
}
return data
return data;
}

async function getTextContent(element){
const text = await (await element?.getProperty('textContent'))?.jsonValue();
return text
return text;
}

async function getChild(page,element){
if(!element){
return []
return [];
}
const list=await page.evaluateHandle((e)=>e.childNodes,element)
const properties=await list.getProperties()
const children=[]
const list=await page.evaluateHandle((e)=>e.childNodes,element);
const properties=await list.getProperties();
const children=[];
for(const property of properties.values() ) {
const element=property.asElement()
children.push(element)
const element=property.asElement();
children.push(element);
}
return children
return children;
}

async function isIgnoredElement(element){
const tagName=await getTagName(element)
return typeof tagName === 'string' && IGNORE_ELEMENTS.includes(tagName.toUpperCase());
const tagName=await getTagName(element);
return typeof tagName === 'string' && IGNORE_ELEMENTS.includes(tagName.toUpperCase());

}

async function getNodeType(page,element){
const nodetype=await page.evaluate((e)=>{return e.nodeType},element)
return nodetype
const nodetype=await page.evaluate((e)=>{return e.nodeType},element);
return nodetype;
}

async function getId(page,element){
let id=await page.evaluate((e)=>e.id,element)
if(!id){
id=`dark-patterns-buster-${ID}`// random id given to elements with no id
ID=ID+1
}
return id
let id=await page.evaluate((e)=>e.id,element);
if(!id){
id=`dark-patterns-buster-${ID}`;// random id given to elements with no id
ID=ID+1;
}
return id;
}

async function getTagName(element){
return await (await element?.getProperty('tagName'))?.jsonValue();
}
async function fileConverter(){
const jsonFile= await Scraper("https://www.flipkart.com/search?q=google&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=off&as=off")
const parser = new Parser();
const cleanFile=fileCleaner(jsonFile)
const csv = parser.parse(cleanFile);
console.log(csv);
const filePath = 'output.csv';
fs.writeFile(filePath, csv, 'utf8', (err) => {
if (err) {
console.error('Error writing CSV file:', err);
} else {
console.log(`CSV file saved successfully at ${filePath}`);
}
})

async function scraperUtil(url){
const jsonFile = await Scraper(url);
const parser = new Parser();
const cleanFile=fileCleaner(jsonFile);
const csv = parser.parse(cleanFile);
// console.log(csv);
const filePath = 'output.csv';
fs.writeFile(filePath, csv, 'utf8', (err) => {
if (err) {
console.error('Error writing CSV file:', err);
} else {
console.log(`CSV file saved successfully at ${filePath}`);
}
})
}
fileConverter()

function fileCleaner(file){
return file.filter((e)=>e.text!="");
}
}

module.exports = {
scraperUtil
};
8 changes: 4 additions & 4 deletions Dark Patterns Buster/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
<h3>Dark Patterns Buster</h3>
</div>
<div class="input">
<form action="" class="search-bar">
<input class="textbox" type="text" id="url" name="url" placeholder="Enter The url">
<button class="button" type="submit" id="go-button">Go</button>
</form>
<div class="search-bar">
<input class="textbox" type="text" id="url" placeholder="Enter The url">
<button class="button" id="go-button">Go</button>
</div>
</div>
</div>

Expand Down
16 changes: 13 additions & 3 deletions Dark Patterns Buster/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,17 @@ document.addEventListener("DOMContentLoaded", function() {
document.getElementById("go-button").addEventListener("click", getUrl);
});

function getUrl() {
var url = document.getElementById("url").value;
console.log(url);
async function getUrl() {
const urlElement = document.getElementById("url");
var url = urlElement.value;
urlElement.value = "";
const response = await fetch("http://localhost:3000/url", {
method : "POST",
body: JSON.stringify({
url : url
}),
headers:{
"Content-type" : "application/json; charset=UTF-8",
}
});
}
Loading

0 comments on commit 6e0dfac

Please sign in to comment.