Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: jobs offers scraping #39

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions background.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
console.log("background run");
let jobs = {};

// Establece una conexion para escuchar los comandos entrantes
chrome.runtime.onConnect.addListener(function (port) {
port.onMessage.addListener(async function (params, sender) {
const [tab] = await chrome.tabs.query({
active: true,
currentWindow: true,
});

//no encontro la tab
if (!tab) {
console.log("error al conseguir la tab");
return;
}
console.log("tab found");

//establece un puerto con el content script
let portContentScript = chrome.tabs.connect(tab.id, {
name: "bg-content_script",
});

// si el comando que llega es "start" ... (desde indexjs)
if (params.cmd === "start") {
//envia el comando 'scrap' al contentscript
console.log("envia scrap a contetn");
portContentScript.postMessage({ cmd: "scrap" });
}

// si el comando que esta recibiendo es stop (indexjs)
if (params.cmd === "stop") {
//envia el comando stop al contentscript
portContentScript.postMessage({ cmd: "stop" });
}

// si el comando que recibe es saveInfo (contentscript)
if (params.cmd === "saveInfo") {
// guarda la info entrante en la variable global jobs
const { jobsInfo } = params;

//acomoda el nuevo conjunto de jobsInformation a lo que ya se tiene ?
jobsInfo.forEach((job) => {
const { location, salary } = job;

// si no existe esa localidad en jobs
if (!jobs[location]) {
//crea la localidad y le asigna un nuevo arreglo con el salario y la cantidad 1
jobs[location] = [{ salary, count: 1 }];
} else {
//ya existe esa localidad
let bool = false;

jobs[location].forEach((u) => {
//si ese salario ya existe
if (u.salary == salary) {
bool = true;
u.count++; //aumenta el contador
}
});

//en caso no existia ese salario
if (!bool) {
//pushea el nuevo salario con count en 1
jobs[location].push({ salary, count: 1 });
}
}
});
}

// si el comando que recbie es saveInLocalStorage ((contentscript))
if (params.cmd === "saveInLocalStorage") {
//guarda jobs en el localStorage
chrome.storage.local.set({
jobsAnalysis: JSON.stringify({ data: jobs }),
});

//resetea la variable donde estaba guardando los trabajos
jobs = {};
}
});
});
85 changes: 85 additions & 0 deletions contentscript.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// nos devuelve un array de objetos con la info importante de cada aviso
function jobCardsInfo() {
//obtiene todos los elementos cuyos id comienzan con "jobcard"
// los recibe como nodos y con el spread operator convertimos el array de Nodes a un array de Elements
const jobCards = [...document.querySelectorAll("div[id*=jobcard]")];

// extraemos solo la informacion que queremos
return jobCards.map((card) => {
// Del cardJob extraemos: fecha, titulo del trabajo, salario
const [{ innerText: date }, { innerText: title }, { innerText: salary }] =
card.children[1].children[0].children; // card.children me da 2 elementos, pero la info esta en el 2do. Y de ese 2do elemento necesito su primer hijo. Y finalmente los hijos de este ultimo

// Del cardJob extramos: localidad
const locations = [...card.querySelectorAll("a[title*=Empleos]")];
// locations me devuelve un array de 2 elementos. La localidad y el estado
// pero hay cards que no tienen localidad, solamente estado

// por ello pongo el reverse()[0]
let [{ title: location } = { title: "" }] =
locations.reverse();

// si el card no tiene location le asignamos "" y luego al imprimir en el front le pondremos "Sin especificar localidad"
if (location === "") {
return { title, salary, location };
}
//location = location.slice(11); // le quitamos el "empleos en "
return { title, salary, location: location.slice(11) };
});
}

// establece una conexion con el background
//const buttonNext = () => document.querySelector("li[class*=next]"); //
const element = document.querySelector("div[class*=jobCardContainer]");
const portBackground = chrome.runtime.connect({
name: "content_script-background",
});

let mutation = null;
chrome.runtime.onConnect.addListener(function (port) {
port.onMessage.addListener(({ cmd }) => {
if (cmd === "scrap") {
console.log("scrapeando");
mutation = new MutationObserver(() => {
const buttonNext = document.querySelector("li[class*=next]");
console.log("buttonNext", buttonNext);
if (buttonNext) {
console.log("existe el button next ");
const nextPage = buttonNext.className.includes("disabled");

//si es que no tiene la clase disabled
if (nextPage === false) {
const jobsInfo = jobCardsInfo();

//envia la info al background para que la vaya sumando
portBackground.postMessage({
cmd: "saveInfo",
jobsInfo,
});
buttonNext.click();
} else {
mutation && mutation.disconnect();
portBackground.postMessage({ cmd: "saveInLocalStorage" });
}
}
//alertar que no hubo boton (por implementar)
});

mutation.observe(element, { subtree: true, childList: true });

const jobsInfo = jobCardsInfo();

portBackground.postMessage({
cmd: "saveInfo",
jobsInfo,
});

document.querySelector("li[class*=next]").click();
}

if (cmd === "stop") {
mutation && mutation.disconnect();
portBackground.postMessage({ cmd: "saveInLocalStorage" });
}
});
});
Binary file added images/occ.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"manifest_version": 3,
"name": "jobs-scraping",
"author": "https://github.com/GersoZz",
"version": "1.0.0",
"description": "Scraping Jobs Offers",
"permissions": ["tabs", "storage", "activeTab", "scripting"],
"background": {
"service_worker": "./background.js"
},
"action": {
"default_popup": "./popup/index.html"
},
"icons": {
"32": "./images/occ.png"
},
"content_scripts": [
{
"matches": ["https://www.occ.com.mx/*"],
"js": ["./contentscript.js"]
}
]
}
45 changes: 45 additions & 0 deletions popup/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />

<link
href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
rel="stylesheet"
integrity="sha384-KK94CHFLLe+nY2dmCWGMq91rCGa5gtU4mk92HdvYe+M/SXH301p5ILy+dN9+nJOZ"
crossorigin="anonymous"
/>

<link href="styles.css" rel="stylesheet" />

<title>Document</title>
</head>
<body>
<h1 class="text-center mb-3 mt-3">Jobs Scraping</h1>

<div class="d-flex justify-content-around mb-3">
<button id="btnStartScrap" type="button" class="btn btn-primary">
Start
</button>
<button id="btnStop" type="button" class="btn btn-warning d-none">
Stop
</button>
</div>

<div class="d-flex justify-content-center">
<div
id="spinner"
class="spinner-border text-primary d-none"
role="status"
>
<span class="visually-hidden">Loading...</span>
</div>
</div>

<div id="results"></div>

<script src="./index.js" type="module"></script>
</body>
</html>
91 changes: 91 additions & 0 deletions popup/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
const btnStartScrap = document.getElementById("btnStartScrap");
const btnStop = document.getElementById("btnStop");
const spinner = document.getElementById("spinner");
const results = document.getElementById("results");

const backgroundPort = chrome.runtime.connect({ name: "popup-background" });

// funcion para crear elementos con texto dentro
const createElementWithText = (elementType, innerString) => {
const element = document.createElement(elementType);
element.innerText = innerString;
return element;
};

// crea una nueva fila en la tabla con el elemento y la tabla que se le pasa
const addRowToTable = (element, tableBody) => {
const tr = document.createElement("tr");
const td = createElementWithText("td", element.salary.replace("Mensual", "")); // Rango Salarial
const td2 = createElementWithText("td", element.count); // Cantidad de vcantes

tr.append(td, td2);
return tableBody.append(tr);
};

/* crea una tabla con un Head por defecto y con el Body que le pasen */
const createcontentTable = (tBody) => {
const table = document.createElement("table");
table.setAttribute("class", "table");
const tHead = document.createElement("thead");
const tr = document.createElement("tr");
tr.append(
createElementWithText("th", "Salario"),
createElementWithText("th", "Vacantes")
);

tHead.append(tr);
table.append(tHead, tBody);
return table;
};

const printAnalysis = () => {
chrome.storage.local.get("jobsAnalysis", (items) => {
if (typeof items.jobsAnalysis !== "undefined") {
results.innerHTML = ""; //limpia el div de resultados
const jobsObject = JSON.parse(items.jobsAnalysis); //convierte a Json el String guardado
const fragment = document.createDocumentFragment(); // fragment y vamos poniendo todo ahi al final solo habra un porceso para a;adir todo al dom

// recorre el objeto
for (const key in jobsObject.data) {
const tBody = document.createElement("tbody");
const localidad = document.createElement("h5");

const localidadStr = key === "" ? "Sin especificar localidad" : key;
localidad.innerText = ` ${localidadStr}`;

jobsObject.data[key].forEach((el) => {
addRowToTable(el, tBody); // crea un elemento fila (con el) en tBody
});

const table = createcontentTable(tBody);
fragment.append(localidad, table);
results.appendChild(fragment);
}
}
});
};

btnStartScrap.addEventListener("click", (e) => {
//limpia el div results
results.innerHTML = ``;
// manda el comando start al background
backgroundPort.postMessage({ cmd: "start" });

// muestra boton Stop y Spinner
btnStop.classList.remove("d-none");
spinner.classList.remove("d-none");
btnStartScrap.classList.add("d-none");
});

chrome.storage.onChanged.addListener((e, a) => {
printAnalysis();
});

printAnalysis();

btnStop.onclick = () => {
backgroundPort.postMessage({ cmd: "stop" });
btnStop.classList.add("d-none");
spinner.classList.add("d-none");
btnStartScrap.classList.remove("d-none");
};
9 changes: 9 additions & 0 deletions popup/styles.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
body {
width: 600px;
height: auto;
padding: 20px;
}

.title {
text-align: center;
}