-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawler.py
85 lines (69 loc) · 2.99 KB
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
This program automatically downloads all the files of a directory from surgstory
"""
from unicodedata import name
from selenium import webdriver
import time
import os
import base64
import numpy as np
from webdriver_manager.microsoft import EdgeChromiumDriverManager
channel = None
category = None #'test set'
# Fill out the ID & PW to login
print('Login')
ID = input('ID : ')
PW = input('Password : ')
# Open surgstory
browser = webdriver.Edge(EdgeChromiumDriverManager().install())
browser.get("https://www.surgstory.com/v2/login")
# Send ID & PW then click the login button
browser.find_element("xpath", "/html/body/div[1]/div[1]/div[1]/div/div[2]/form/div[1]/div/input").send_keys(ID)
browser.find_element("xpath", "/html/body/div[1]/div[1]/div[1]/div/div[2]/form/div[2]/div/input").send_keys(PW)
browser.find_element("xpath", "/html/body/div[1]/div[1]/div[1]/div/div[2]/form/div[4]/div/span").click()
browser.implicitly_wait(3)
# Select the right channel
browser.find_element("xpath", f'//*[@title="{channel}"]').click()
browser.find_element("xpath", "//html/body/div[1]/div[1]/div[1]/div/div[1]/div[2]/div/span").click()
browser.implicitly_wait(3)
time.sleep(1)
# Move to the directory of interest
browser.get('https://www.surgstory.com/v2/drive?folder=3622')
browser.implicitly_wait(3)
time_list = np.zeros(600)
for i in range(600):
n = browser.find_elements("xpath", f'//*[@class="ellipsis max_100p hover_point pr_5"]')[i]
n.click()
browser.implicitly_wait(3)
browser.find_element("xpath", '//*[@class="row time"]').click()
browser.implicitly_wait(3)
time_text = browser.find_elements_by_class_name("time_text")[-1].text[-8:]
time_list[599-i] = int(time_text.replace(':',''))
canvas = browser.find_elements_by_class_name("annotation_canvas")#_hit")
#canvas = browser.find_element_by_css_selector("#canvas")
for canva, category in zip(canvas[:3], ['artery', 'rib', 'nerve']):
# get the canvas as a PNG base64 string
canvas_base64 = browser.execute_script("return arguments[0].toDataURL('image/png').substring(21);", canva)
# decode
canvas_png = base64.b64decode(canvas_base64)
# save to a file
with open(f"{category}/{600-i}.png", 'wb') as f:
f.write(canvas_png)
if len(canvas) > 3:
for m, canva in enumerate(canvas[3:]):
canvas_base64 = browser.execute_script("return arguments[0].toDataURL('image/png').substring(21);", canva)
# decode
canvas_png = base64.b64decode(canvas_base64)
# save to a file
with open(f"etc/{600-i}_{m}.png", 'wb') as f:
f.write(canvas_png)
browser.get('https://www.surgstory.com/v2/drive?folder=3622')
browser.implicitly_wait(3)
time.sleep(1)
# cur_dir = os.path.dirname(os.path.abspath(__file__))
# save_dir = os.path.join(cur_dir, 'videos')
# Get the list of files to be downloaded
# No = browser.find_elements("xpath", '//*[@class="download_btn"]')
# for n in No:
# n.click()
# time.sleep(1)