-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.mjs
141 lines (110 loc) · 4.07 KB
/
app.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import express from "express";
import bodyParser from "body-parser";
import { fileURLToPath } from "url";
import path from "path";
import fs from "fs";
import PromiseCrawler from "promise-crawler";
import low from "lowdb";
import FileSync from "lowdb/adapters/FileSync.js";
import { spawn } from "child_process";
const app = express();
const port = process.env.PORT || 3000;
app.use(bodyParser.urlencoded({ extended: true }));
app.use(bodyParser.json());
app.use(express.static(path.resolve(path.dirname(fileURLToPath(import.meta.url)), "public")));
const crawler = new PromiseCrawler({
maxConnections: 10,
retries: 3,
});
const setupDatabase = (fileName) => {
const adapter = new FileSync(fileName);
const db = low(adapter);
db.defaults({ username: "", movies: [] }).write();
return db;
};
const scrapeLetterboxd = async (username, db, fileName) => {
const LETTERBOXD_FILMS_BASE = `https://letterboxd.com/${username}/films/`;
let LETTERBOXD_MAX_PAGES = 0;
const movies = db.get("movies");
movies.remove().write(); // Clear existing movies before scraping
const getLetterboxdPage = ({ page }) =>
crawler.request({
url: `${LETTERBOXD_FILMS_BASE}${page > 1 ? `page/${page}/` : ""}`,
});
const handleLetterBoxdResponse = ({ $ }) => {
if (!LETTERBOXD_MAX_PAGES) {
LETTERBOXD_MAX_PAGES = parseInt($(".paginate-page:last-child").text().trim());
}
$(".poster-list > li").each((i, el) => {
const $li = $(el);
const $img = $("img.image", $li);
const $poster = $(".film-poster", $li);
const $ratingEl = $(".poster-viewingdata > .rating", $li);
const letterboxdId = parseInt($poster.data("filmId"));
let letterboxdRating = 0;
if ($ratingEl.prop("name") !== undefined) {
const ratingStr = $ratingEl.attr("class");
letterboxdRating = parseInt(ratingStr.substring(ratingStr.lastIndexOf("-") + 1));
}
const title = $img.attr("alt");
movies.push({
letterboxdId,
title,
letterboxdRating,
}).write();
});
};
const res = await getLetterboxdPage({ page: 1 });
handleLetterBoxdResponse(res);
let pages = [];
if (LETTERBOXD_MAX_PAGES > 1) {
for (let i = 0; i < LETTERBOXD_MAX_PAGES - 1; i++) {
pages.push(i + 2);
}
}
const responses = await Promise.all(pages.map((pg) => getLetterboxdPage({ page: pg })));
responses.forEach(handleLetterBoxdResponse);
process.nextTick(async () => {
crawler.destroy();
// Write the movie data to the specified JSON file with the username at the top
await fs.promises.writeFile(
path.resolve(path.dirname(fileURLToPath(import.meta.url)), fileName),
JSON.stringify({ username, movies: movies.value() })
);
// Start same.js after scraping is completed
const sameProcess = spawn("node", ["same.js"]);
// Log the output of same.js
sameProcess.stdout.on("data", (data) => {
console.log(`same.js Output: ${data}`);
});
// Log any errors that occur in same.js
sameProcess.stderr.on("data", (data) => {
console.error(`Error in same.js: ${data}`);
});
// Log when same.js exits
sameProcess.on("close", (code) => {
console.log(`same.js exited with code ${code}`);
});
});
};
app.post("/scrape", async (req, res) => {
const usernames = req.body.usernames;
if (!usernames || usernames.length !== 2) {
return res.status(400).send("Please provide exactly two Letterboxd usernames.");
}
const [USERNAME1, USERNAME2] = usernames;
// Include the Letterboxd scraping logic for USERNAME1
const db1 = setupDatabase("movies1.json");
const scrape1Promise = scrapeLetterboxd(USERNAME1, db1, "movies1.json");
// Include the Letterboxd scraping logic for USERNAME2
const db2 = setupDatabase("movies2.json");
const scrape2Promise = scrapeLetterboxd(USERNAME2, db2, "movies2.json");
// Wait for both files to be written
await scrape1Promise;
await scrape2Promise;
// Redirect to commonMovies.html
res.redirect("/commonMovies.html");
});
app.listen(port, () => {
console.log(`App listening at http://localhost:${port}`);
});