forked from simonw/til
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_screenshots.py
96 lines (83 loc) · 2.78 KB
/
generate_screenshots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import hashlib
import json
import pathlib
import subprocess
import sqlite_utils
import tempfile
root = pathlib.Path(__file__).parent.resolve()
TMP_PATH = pathlib.Path(tempfile.gettempdir())
SHOT_HASH_PATHS = [
(root / "templates" / "pages" / "{topic}" / "{slug}.html"),
(root / "templates" / "til_base.html"),
]
def s3_contents():
proc = subprocess.run(
["s3-credentials", "list-bucket", "til.simonwillison.net"], capture_output=True
)
return [item["Key"] for item in json.loads(proc.stdout)]
def jpeg_for_path(path):
page_html = str(TMP_PATH / "generate-screenshots-page.html")
# Use datasette to generate HTML
proc = subprocess.run(["datasette", ".", "--get", path], capture_output=True)
open(page_html, "wb").write(proc.stdout)
# Now use shot-scraper to generate a PNG
proc2 = subprocess.run(
[
"shot-scraper",
"shot",
page_html,
"-w",
"800",
"-h",
"400",
"--retina",
"--quality",
"60",
"-o",
"-",
],
capture_output=True,
)
return proc2.stdout
def generate_screenshots(root):
db = sqlite_utils.Database(root / "tils.db")
# If the old 'shot' column exists, drop it
if "shot" in db["til"].columns_dict:
db["til"].transform(drop=["shot"])
# shot_hash incorporates a hash of key templates
shot_html_hash = hashlib.md5()
for filepath in SHOT_HASH_PATHS:
shot_html_hash.update(filepath.read_text().encode("utf-8"))
shot_html_hash = shot_html_hash.hexdigest()
s3_keys = s3_contents()
for row in db["til"].rows:
path = row["path"]
html = row["html"]
shot_hash = hashlib.md5((shot_html_hash + html).encode("utf-8")).hexdigest()
shot_filename = "{}.jpg".format(shot_hash)
if shot_hash != row.get("shot_hash") or shot_filename not in s3_keys:
jpeg = jpeg_for_path("/{}/{}".format(row["topic"], row["slug"]))
db["til"].update(path, {"shot_hash": shot_hash}, alter=True)
# Store it to S3
subprocess.run(
[
"s3-credentials",
"put-object",
"til.simonwillison.net",
shot_filename,
"-",
"--content-type",
"image/jpeg",
"--silent",
],
input=jpeg,
)
print(
"Stored {} byte JPEG for {} shot hash {}".format(
len(jpeg), path, shot_hash
)
)
else:
print("Skipped {} with shot hash {}".format(path, shot_hash))
if __name__ == "__main__":
generate_screenshots(root)