-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDownload_course.py
43 lines (34 loc) · 1018 Bytes
/
Download_course.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import urllib.request
from bs4 import BeautifulSoup
import re
#url = 'https://safari.ethz.ch/digitaltechnik/spring2019/doku.php?id=schedule'
url = 'http://ece.tamu.edu/~spalermo/ecen720.html'
#root = 'https://safari.ethz.ch'
root = 'http://ece.tamu.edu/~spalermo/'
r_pdf = re.compile(r'\.pdf$')
r_ppt = re.compile(r'\.ppt$')
r_pptx = re.compile(r'\.pptx$')
r_video = re.compile(r'youtube')
pdfs = []
ppts = []
videos = []
with urllib.request.urlopen(url) as f:
data = f.read().decode('utf-8')
soup = BeautifulSoup(data, 'lxml')
for a in soup.find_all('a'):
link = a['href']
print(link)
if r_pdf.search(link):
pdfs.append(link)
if r_ppt.search(link):
ppts.append(link)
if r_pptx.search(link):
ppts.append(link)
if r_video.search(link):
videos.append(link)
print("PDF files")
[print(root + p) for p in pdfs]
print("PPT files")
[print(root + p) for p in ppts]
print("\nVideos")
[print(v) for v in videos]