-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathungooglefont.py
executable file
·134 lines (94 loc) · 4.23 KB
/
ungooglefont.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python3
import sys
import os
from urllib.parse import urlparse, urlunparse
import requests
import re
GOOGLE_CSS_HOST = 'fonts.googleapis.com'
# urls like https://fonts.googleapis.com/css2?family=Libre+Baskerville:ital,wght@0,400;0,700;1,400&display=swap
GOOGLE_FONT_HOST = 'fonts.gstatic.com'
# urls like https://fonts.gstatic.com/s/librebaskerville/v14/kmKiZrc3Hgbbcjq75U4uslyuy4kn0qviTgY3KcA.woff2
API_REQUEST_HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'}
def download(url, targetFile, headers = None):
r = requests.get(url, allow_redirects=True, headers=headers)
if r.status_code != 200:
raise Exception('download failed for ' + url)
targetFile.write(r.content)
def findCssUrls(url):
r = requests.get(url, allow_redirects=True)
if r.status_code != 200:
raise Exception('cant get specified url')
for match in re.findall('<link.+href="(.+\.css)".*?>', r.text):
print('... found ' + match)
yield match
# css may import other css
for subcss in findCssUrls(match):
yield subcss
for match in re.findall('@import url\((.+)\)', r.text):
print('... found ' + match)
yield match
# css may import other css
for subcss in findCssUrls(match):
yield subcss
for match in re.findall('@import\"(.+?)\"', r.text):
print('... found ' + match)
yield match
# css may import other css
for subcss in findCssUrls(match):
yield subcss
def localiseCssFonts(inputCss, outputCss, localFontsPath):
inputCss.seek(0)
cssLines = inputCss.readlines()
for i in range(len(cssLines)):
for googleFontUrl in re.findall('url\((https?:\/\/' + GOOGLE_FONT_HOST + '.+)\) format', cssLines[i]):
urlParsed = urlparse(googleFontUrl)
fontFace = urlParsed.path.split('/')[2]
print('google font found: {} ({})'.format(fontFace, googleFontUrl))
localFontPath = localFontsPath + '/' + urlParsed.path[1:].replace('/','-')
localFont = open(localFontPath, 'wb')
download(googleFontUrl, localFont)
cssLines[i] = cssLines[i].replace(googleFontUrl, 'https://' + localFontPath)
for line in cssLines:
outputCss.write(line)
def processUrl(siteUrl, cssUrls = None):
siteUrl = urlparse(siteUrl, scheme='https')
siteHost = siteUrl.hostname
fontsDir = siteHost + '/fonts'
cssDir = siteHost + siteUrl.path.replace('/','-')
for path in [siteHost, fontsDir, cssDir]:
if not os.path.exists(path):
os.makedirs(path)
googleFontsCssPath = cssDir + '/googleFonts.css'
# check file doesn't exist to avoid appending to existing file
try:
googleFontsCss = open(googleFontsCssPath, 'x')
except:
print('Hmmm, {} already exists. Are you sure you want to continue? This could result in duplicate css being appended to the file'.format(googleFontsCssPath))
if input('Continue? (yN)') != 'y':
return 0
googleFontsCss = open(googleFontsCssPath, 'ab')
# use css urls explicitly provided on command line
if cssUrls:
print('using explicitly provided css urls:')
for url in cssUrls:
print(' - ' + url)
else:
siteUrlString = urlunparse(siteUrl)
print('finding css from {}'.format(siteUrlString))
cssUrls = findCssUrls(siteUrlString)
for cssUrl in cssUrls:
if GOOGLE_CSS_HOST in cssUrl:
print('google font css --> appending to {}'.format(googleFontsCssPath))
download(
cssUrl,
googleFontsCss,
# provide user agent for woff2 support
API_REQUEST_HEADERS
)
googleFontsCss.close()
print('downloading google fonts occurring in {}'.format(googleFontsCssPath))
localiseCssFonts(open(googleFontsCssPath, 'r'), open(cssDir + '/fonts.css', 'w'), fontsDir)
if __name__ == "__main__":
if len(sys.argv) < 2:
raise Exception('Usage: ungooglefonts.py <url>')
processUrl(sys.argv[1], sys.argv[2:])