-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathconftest.py
42 lines (39 loc) · 916 Bytes
/
conftest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pytest
import os
from crawlerflow.extractors import HTMLExtractor
import yaml
html_text = """
<html>
<body>
<h1>Hello, Parsel!</h1>
<ul class="header">
<li><a href="http://example.com">Link 1</a></li>
<li><a href="http://scrapy.org">Link 2</a></li>
</ul>
<img class="banner-pic" src="https://placehold.co/600x400.png">
<main>Main text here</main>
</body>
</html>
"""
extractor_config ="""
---
title:
selector: h1::text
cover_pic:
selector: .banner-pic::attr(src)
header_links:
selector: .header li a
type:
- DictField
fields:
link:
selector: ::attr(href)
text:
selector: ::text
post_content_html:
selector: main
"""
@pytest.fixture(scope="function")
def html_extractor() -> str:
extractor_config_json = yaml.safe_load(extractor_config)
return HTMLExtractor(html_text, extractor_config_json)