-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathindex.js
245 lines (205 loc) · 7.96 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import { chromium } from 'playwright-extra'
import stealth from 'puppeteer-extra-plugin-stealth'
import path from 'path'
import { moveFile } from 'move-file'
import fsP from 'node:fs/promises'
import fs from 'node:fs'
import { exiftool } from 'exiftool-vendored'
import ua from 'user-agents'
const userAgent = new ua({
platform: 'MacIntel', // 'Win32', 'Linux ...'
deviceCategory: 'desktop', // 'mobile', 'tablet'
});
chromium.use(stealth())
const timeoutValue = 300000
const userDataDir = './session'
const downloadPath = './download'
let headless = true
// accept --headless=false argument to run in headful mode
if (process.argv[2] === '--headless=false') {
headless = false
}
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms))
const getProgress = async () => {
try {
const lastDone = await fsP.readFile('.lastdone', 'utf-8')
if (lastDone === '') throw new Error('Please add the starting link in .lastdone file')
return lastDone
} catch (error) {
throw new Error(error)
}
}
const saveProgress = async (page) => {
const currentUrl = await page.url();
// Only save if the URL is a valid Google Photos URL 'https://photos.google.com'
if (currentUrl.startsWith('https://photos.google.com')) {
await fsP.writeFile('.lastdone', currentUrl, 'utf-8');
} else {
console.log('Current URL does not start with https://photos.google.com, not saving progress.');
}
}
const getMonthAndYear = async (metadata, page) => {
let year = 1970
let month = 1
let dateType = "default"
if (metadata.DateTimeOriginal) {
year = metadata.DateTimeOriginal.year
month = metadata.DateTimeOriginal.month
dateType = "DateTimeOriginal"
} else if (metadata.CreateDate) {
year = metadata.CreateDate.year
month = metadata.CreateDate.month
dateType = "CreateDate"
} else {
// if metadata is not available, we try to get the date from the html
console.log('Metadata not found, trying to get date from html')
const data = await page.request.get(page.url())
const html = await data.text()
const regex = /aria-label="(Photo|Video) - (Landscape|Portrait|Square) - ([A-Za-z]{3} \d{1,2}, \d{4}, \d{1,2}:\d{2}:\d{2} [APM]{2})"/
const match = regex.exec(html)
if (match) {
const dateString = match[3].replace(/\u202F/g, ' ') // Remove U+202F character
const date = new Date(dateString)
if (date.toString() !== 'Invalid Date') {
year = date.getFullYear()
month = date.getMonth() + 1
dateType = "HTML"
}
}
}
return { year, month, dateType }
}
(async () => {
const startLink = await getProgress()
console.log('Starting from:', new URL(startLink).href)
const browser = await chromium.launchPersistentContext(path.resolve(userDataDir), {
headless,
channel: 'chromium',
acceptDownloads: true,
args: [
'--disable-features=IsolateOrigins,site-per-process',
'--disable-blink-features=AutomationControlled',
'--no-sandbox', // May help in some environments
'--disable-infobars', // Prevent infobars
'--disable-extensions', // Disable extensions
'--start-maximized', // Start maximized
'--window-size=1280,720' // Set a specific window size
],
userAgent: userAgent.toString(),
viewport: { width: 1280, height: 720 },
deviceScaleFactor: 1,
})
const page = await browser.newPage()
await page.goto('https://photos.google.com')
const latestPhoto = await getLatestPhoto(page)
console.log('Latest Photo:', latestPhoto)
console.log('-------------------------------------')
await page.goto(clean(startLink))
/*
We download the first (Oldest) photo and overwrite it if it already exists. Otherwise running first time, it will skip the first photo.
*/
await downloadPhoto(page, true)
while (true) {
const currentUrl = await page.url()
if (clean(currentUrl) === clean(latestPhoto)) {
console.log('-------------------------------------')
console.log('Reached the latest photo, exiting...')
break
}
/*
We click on the left side of arrow in the html. This will take us to the previous photo.
Note: I have tried both left arrow press and clicking directly the left side of arrow using playwright click method.
However, both of them are not working. So, I have injected the click method in the html.
*/
await page.evaluate(() => document.getElementsByClassName('SxgK2b OQEhnd')[0].click())
// we wait until new photo is loaded
await page.waitForURL((url) => {
return url.host === 'photos.google.com' && url.href !== currentUrl
},
{
timeout: timeoutValue,
})
await downloadPhoto(page)
await saveProgress(page)
}
await browser.close()
await exiftool.end()
})()
const downloadPhoto = async (page, overwrite = false) => {
const downloadPromise = page.waitForEvent('download', {
timeout: timeoutValue
})
await page.keyboard.down('Shift')
await page.keyboard.press('KeyD')
let download
try {
download = await downloadPromise
} catch (error) {
console.log('There was an error while downloading the photo, Skipping...', page.url())
return
}
const temp = await download.path()
const fileName = await download.suggestedFilename()
const metadata = await exiftool.read(temp)
const date = await getMonthAndYear(metadata, page)
const year = date.year
const month = date.month
try {
let path = `${downloadPath}/${year}/${month}/${fileName}`
path = validatePath(path)
await moveFile(temp, path, { overwrite })
console.log('Download Complete:', `${year}/${month}/${fileName}`)
} catch (error) {
const randomNumber = Math.floor(Math.random() * 1000000)
const fileName = await download.suggestedFilename().replace(/(\.[\w\d_-]+)$/i, `_${randomNumber}$1`)
var downloadFilePath = path
// check for long paths that could result in ENAMETOOLONG and truncate if necessary
if (downloadFilePath.length > 225) {
downloadFilePath = truncatePath(downloadFilePath)
}
await moveFile(temp, `${downloadFilePath}`)
console.log('Download Complete:', `${downloadFilePath}`)
}
}
/*
This function truncates the filename (retaining the file extension) to avoid ENAMETOOLONG errors with long filenames
*/
function truncatePath(pathString) {
const pathStringSplit = pathString.split(".");
var fileExtension = pathStringSplit[pathStringSplit.length - 1];
var fileExtensionLength = fileExtension.length + 1;
var truncatedPath = pathString.substring(0, 225 - fileExtensionLength) + "." + fileExtension;
return truncatedPath;
}
/*
This function exists to avoid accidental file overwrites.
It checks if the path exists and if it does, we append a number- eg: _1 and set that as the new path
while the new path exists, we increment the number
when the path doesnt exist, we return the new path string.
*/
function validatePath(pathString) {
let newPath = pathString;
let counter = 1;
while (fs.existsSync(newPath)) {
const extensionIndex = newPath.lastIndexOf(".");
const newPathWithoutExt = extensionIndex === -1 ? newPath : newPath.slice(0, extensionIndex);
const extension = extensionIndex === -1 ? "" : newPath.slice(extensionIndex);
newPath = `${newPathWithoutExt}_${counter}${extension}`;
counter++;
}
return newPath;
}
/*
This function is used to get the latest photo in the library. Once Page is loaded,
We press right click, It will select the latest photo in the grid. And then
we get the active element, which is the latest photo.
*/
const getLatestPhoto = async (page) => {
await page.keyboard.press('ArrowRight')
await sleep(500)
return await page.evaluate(() => document.activeElement.toString())
}
// remove /u/0/
const clean = (link) => {
return link.replace(/\/u\/\d+\//, '/')
}