Skip to content

Commit

Permalink
MacOS Test
Browse files Browse the repository at this point in the history
  • Loading branch information
Naibo_Mac_M2 committed Dec 6, 2023
1 parent e4037e2 commit 5376aa3
Show file tree
Hide file tree
Showing 12 changed files with 508 additions and 248 deletions.
598 changes: 391 additions & 207 deletions .temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py

Large diffs are not rendered by default.

40 changes: 24 additions & 16 deletions .temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,21 @@ def find_element(self, by=By.ID, value=None, iframe=False):
except Exception as e:
print(e)
find_element = False
# 遍历所有的 iframe 并点击里面的元素
# 遍历所有的 iframe 并查找里面的元素
for iframe in iframes:
# 切换到 iframe
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
try:
# 在 iframe 中查找并点击元素
# 在 iframe 中查找元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
element = super().find_element(by=by, value=value)
find_element = True
except:
print("No such element found in the iframe")
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
Expand All @@ -68,14 +70,14 @@ def find_elements(self, by=By.ID, value=None, iframe=False):
# 获取所有的 iframe
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
find_element = False
# 遍历所有的 iframe 并点击里面的元素
# 遍历所有的 iframe 并找到里面的元素
for iframe in iframes:
# 切换到 iframe
try:
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
# 在 iframe 中查找并点击元素
# 在 iframe 中查找元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
elements = super().find_elements(by=by, value=value)
if len(elements) > 0:
Expand All @@ -84,8 +86,10 @@ def find_elements(self, by=By.ID, value=None, iframe=False):
# super().switch_to.default_content()
if find_element:
return elements
except:
print("No such element found in the iframe")
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
if not find_element:
raise NoSuchElementException
else:
Expand Down Expand Up @@ -117,19 +121,21 @@ def find_element(self, by=By.ID, value=None, iframe=False):
except Exception as e:
print(e)
find_element = False
# 遍历所有的 iframe 并点击里面的元素
# 遍历所有的 iframe 并找到里面的元素
for iframe in iframes:
# 切换到 iframe
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
try:
# 在 iframe 中查找并点击元素
# 在 iframe 中查找元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
element = super().find_element(by=by, value=value)
find_element = True
except:
print("No such element found in the iframe")
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
Expand All @@ -148,14 +154,14 @@ def find_elements(self, by=By.ID, value=None, iframe=False):
# 获取所有的 iframe
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
find_element = False
# 遍历所有的 iframe 并点击里面的元素
# 遍历所有的 iframe 并查找里面的元素
for iframe in iframes:
# 切换到 iframe
try:
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
# 在 iframe 中查找并点击元素
# 在 iframe 中查找元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
elements = super().find_elements(by=by, value=value)
if len(elements) > 0:
Expand All @@ -164,8 +170,10 @@ def find_elements(self, by=By.ID, value=None, iframe=False):
# super().switch_to.default_content()
if find_element:
return elements
except:
print("No such element found in the iframe")
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
if not find_element:
raise NoSuchElementException
else:
Expand Down
40 changes: 25 additions & 15 deletions .temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,22 @@ def lowercase_tags_in_xpath(xpath):
def on_press_creator(press_time, event):
def on_press(key):
try:
if key.char == 'p':
if key.char == press_time["pause_key"]:
if press_time["is_pressed"] == False: # 没按下p键时,记录按下p键的时间
press_time["duration"] = time.time()
press_time["is_pressed"] = True
else: # 按下p键时,判断按下p键的时间是否超过2.5秒
duration = time.time() - press_time["duration"]
if duration > 2:
if event._flag == False:
print("任务执行中,长按p键暂停执行。")
print("Task is running, long press 'p' to pause.")
print("任务执行中,长按" + press_time["pause_key"] + "键暂停执行。")
print("Task is running, long press '" + press_time["pause_key"] + "' to pause.")
# 设置Event的值为True,使得线程b可以继续执行
event.set()
else:
# 设置Event的值为False,使得线程b暂停执行
print("任务已暂停,长按p键继续执行...")
print("Task paused, long press 'p' to continue...")
print("任务已暂停,长按" + press_time["pause_key"] + "键继续执行...")
print("Task paused, long press '" + press_time["pause_key"] + "' to continue...")
event.clear()
press_time["duration"] = time.time()
press_time["is_pressed"] = False
Expand Down Expand Up @@ -176,26 +176,36 @@ def write_to_csv(file_name, data, record):
f_csv.writerow(to_write)
f.close()


def eval_repl(matchobj):
print(matchobj.group(1))
return str(eval(matchobj.group(1), globals(), locals()))



def replace_field_values(orginal_text, outputParameters, browser=None):
pattern = r'Field\["([^"]+)"\]'
try:
replaced_text = re.sub(
pattern, lambda match: outputParameters.get(match.group(1), ''), orginal_text)
if replaced_text.find("EVAL") != -1: # 如果返回值中包含EVAL
if re.search(r'eval\(', replaced_text, re.IGNORECASE): # 如果返回值中包含EVAL
replaced_text = replaced_text.replace("self.", "browser.")
replaced_text = re.sub(r'EVAL\("(.*?)"\)', lambda match: str(eval(match.group(1))), replaced_text)
except:
pattern = re.compile(r'(?i)eval\("(.+?)"\)')
# 循环替换所有匹配到的eval语句
while True:
match = pattern.search(replaced_text)
if not match:
break
# 执行eval并将其结果转换为字符串形式
eval_replaced_text = str(eval(match.group(1)))
# 替换eval语句
replaced_text = replaced_text.replace(match.group(0), eval_replaced_text)
except Exception as e:
print("eval替换失败,请检查eval语句是否正确。| Failed to replace eval, please check if the eval statement is correct.")
replaced_text = orginal_text
return replaced_text


def readCode(code):
if code.startswith("outside:"):
file_name = os.path.join(os.path.abspath("./"), code[8:])
with open(file_name, 'r', encoding='utf-8-sig') as file_obj:
code = file_obj.read()
return code

def write_to_json(file_name, data, types, record, keys):
keys = list(keys)
# Prepare empty list for data
Expand Down
57 changes: 57 additions & 0 deletions .temp_to_pub/EasySpider_MacOS_all_arch/myCode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
这是一个示例代码文件,可以直接在这里写Python代码,然后在程序中的exec操作中调用。如果此文件名称为myCode.py,请将此文件放置在EasySpider程序目录下(和Data/文件夹同级),那么在程序中的exec操作中可以直接写outside:myCode.py来调用此文件中的代码,示例:
1. 用self.browser表示当前操作的浏览器,可直接用selenium的API进行操作,如self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END)即可滚动到页面最下方。
2. 自定义一个全局变量:self.myVar = 1
3. 操纵上面定义的全局变量:self.myVar = self.myVar + 1
4. 打印上面定义的全局变量:print(self.myVar)
5. 将自定义变量的值赋值为某个字段提取的值:self.myVar = self.outputParameters["字段名"]
6. 修改某个字段提取的值:self.outputParameters["字段名"] = "新值"
对于更加复杂的操作,请直接下载源代码并编译执行。
This is a sample code snippet file. You can directly write Python code here, and then call it in the program using an `exec` operation. If this file is named myCode.py, please place this file under the EasySpider program directory (at the same level as the Data/ folder). Then, in the program's `exec` operation, you can directly write outside:myCode.py to invoke the code from this file. Examples:
1. Use self.browser to refer to the current browser being operated on. You can directly utilize the selenium API to perform actions. For instance, self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END) will scroll to the bottom of the page.
2. Define a global variable: self.myVar = 1
3. Manipulate the above-defined global variable: self.myVar = self.myVar + 1
4. Print the above-defined global variable: print(self.myVar)
5. Assign a value to the custom variable from a value extracted for some field: self.myVar = self.outputParameters["field name"]
6. Modify the value extracted for some field: self.outputParameters["field name"] = "new value"
For more complex operations, please download the source code and compile it for execution.
"""

# 请在下面编写你的代码,不要有代码缩进!!! | Please write your code below, do not indent the code!!!

# 导包 | Import packages
from selenium.common.exceptions import ElementClickInterceptedException

# 定义一个函数 | Define a function
def test(n = 0):
for i in range(0, n):
if i % 2 == 0:
print(i)
return "test"

# 异常捕获 | Exception capture
try:
# 使用XPath定位元素并点击浏览器中元素 | Use XPath to locate the element and click the element in the browser
element = self.browser.find_element(By.XPATH, "//*[contains(@class, 'LeftSide_menu_list__qXCeM')]/div[1]/a[1]") # 这里请忽略IDE的报错,因为代码是嵌入到程序中的,IDE无法识别self变量和By变量是正常的 | Please ignore the error reported by the IDE, because the code is embedded in the program, and the IDE cannot recognize that the self variable and By variable are normal
element.click()
print("点击成功|Click success")
except ElementClickInterceptedException:
# 如果元素被遮挡,点击失败
print("元素被遮挡,无法点击|The element is blocked and cannot be clicked")
except Exception as e:
# 打印其他异常
print("发生了一个异常|An exception occurred", e)
finally:
# 测试函数 | Test function
self.a = 1
print("a = ", self.a)
self.a = self.a + 1
print("a = ", self.a)
print("All parameters:", self.outputParameters)
print(test(3))
print("执行完毕|Execution completed")
Binary file modified ElectronJS/EasySpider_en.crx
Binary file not shown.
Binary file modified ElectronJS/EasySpider_zh.crx
Binary file not shown.
2 changes: 1 addition & 1 deletion ElectronJS/config.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"/Users/naibo/Documents/EasySpider/ElectronJS/user_data"}
10 changes: 4 additions & 6 deletions ElectronJS/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion ElectronJS/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"formidable": "^3.5.0",
"http": "^0.0.1-security",
"multer": "^1.4.5-lts.1",
"node-abi": "^3.52.0",
"node-window-manager": "^2.2.4",
"selenium-webdriver": "^4.16.0",
"ws": "^8.12.0",
Expand Down Expand Up @@ -79,4 +80,4 @@
"publishers": []
}
}
}
}
1 change: 1 addition & 0 deletions ElectronJS/tasks/229.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":229,"name":"知乎 - 有问题,就会有答案","url":"https://www.zhihu.com","links":"https://www.zhihu.com","create_time":"07/12/2023, 03:26:24","update_time":"07/12/2023, 03:43:34","version":"0.6.0","saveThreshold":10,"quitWaitTime":6,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"desc":"https://www.zhihu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","//div[contains(., '死刑执行前可以谎称肚')]","/html/body/div[last()-7]/div/main/div/div/div[last()-1]/div/div/div/div/div/div[last()-12]/div/div/div/div/h2/div"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"unique_index":"onlvi030w9jlpu5tjzb","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
2 changes: 1 addition & 1 deletion ElectronJS/update_chrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def copy_folder(source_folder, destination_folder):


def get_chrome_version():
version = "115"
version = "120"
if sys.platform == "win32":
version_re = re.compile(r"^[1-9]\d*\.\d*.\d*")
try:
Expand Down
Loading

0 comments on commit 5376aa3

Please sign in to comment.