Skip to content

Commit 9bd6868

Browse files
committed
Fix README.md for crates.io
1 parent 08afce2 commit 9bd6868

File tree

2 files changed

+153
-153
lines changed

2 files changed

+153
-153
lines changed

README.md

-152
This file was deleted.

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
scraper/README.md

scraper/README.md

+152-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,152 @@
1-
../README.md
1+
# scraper
2+
3+
[![crates.io](https://img.shields.io/crates/v/scraper?color=dark-green)][crate]
4+
[![downloads](https://img.shields.io/crates/d/scraper)][crate]
5+
[![test](https://github.com/causal-agent/scraper/actions/workflows/test.yml/badge.svg)][tests]
6+
7+
HTML parsing and querying with CSS selectors.
8+
9+
`scraper` is on [Crates.io][crate] and [GitHub][github].
10+
11+
[crate]: https://crates.io/crates/scraper
12+
[github]: https://github.com/causal-agent/scraper
13+
[tests]: https://github.com/causal-agent/scraper/actions/workflows/test.yml
14+
15+
Scraper provides an interface to Servo's `html5ever` and `selectors` crates, for browser-grade parsing and querying.
16+
17+
## Examples
18+
19+
### Parsing a document
20+
21+
```rust
22+
use scraper::Html;
23+
24+
let html = r#"
25+
<!DOCTYPE html>
26+
<meta charset="utf-8">
27+
<title>Hello, world!</title>
28+
<h1 class="foo">Hello, <i>world!</i></h1>
29+
"#;
30+
31+
let document = Html::parse_document(html);
32+
```
33+
34+
### Parsing a fragment
35+
36+
```rust
37+
use scraper::Html;
38+
let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
39+
```
40+
41+
### Parsing a selector
42+
43+
```rust
44+
use scraper::Selector;
45+
let selector = Selector::parse("h1.foo").unwrap();
46+
```
47+
48+
### Selecting elements
49+
50+
```rust
51+
use scraper::{Html, Selector};
52+
53+
let html = r#"
54+
<ul>
55+
<li>Foo</li>
56+
<li>Bar</li>
57+
<li>Baz</li>
58+
</ul>
59+
"#;
60+
61+
let fragment = Html::parse_fragment(html);
62+
let selector = Selector::parse("li").unwrap();
63+
64+
for element in fragment.select(&selector) {
65+
assert_eq!("li", element.value().name());
66+
}
67+
```
68+
69+
### Selecting descendent elements
70+
71+
```rust
72+
use scraper::{Html, Selector};
73+
74+
let html = r#"
75+
<ul>
76+
<li>Foo</li>
77+
<li>Bar</li>
78+
<li>Baz</li>
79+
</ul>
80+
"#;
81+
82+
let fragment = Html::parse_fragment(html);
83+
let ul_selector = Selector::parse("ul").unwrap();
84+
let li_selector = Selector::parse("li").unwrap();
85+
86+
let ul = fragment.select(&ul_selector).next().unwrap();
87+
for element in ul.select(&li_selector) {
88+
assert_eq!("li", element.value().name());
89+
}
90+
```
91+
92+
### Accessing element attributes
93+
94+
```rust
95+
use scraper::{Html, Selector};
96+
97+
let fragment = Html::parse_fragment(r#"<input name="foo" value="bar">"#);
98+
let selector = Selector::parse(r#"input[name="foo"]"#).unwrap();
99+
100+
let input = fragment.select(&selector).next().unwrap();
101+
assert_eq!(Some("bar"), input.value().attr("value"));
102+
```
103+
104+
### Serializing HTML and inner HTML
105+
106+
```rust
107+
use scraper::{Html, Selector};
108+
109+
let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
110+
let selector = Selector::parse("h1").unwrap();
111+
112+
let h1 = fragment.select(&selector).next().unwrap();
113+
114+
assert_eq!("<h1>Hello, <i>world!</i></h1>", h1.html());
115+
assert_eq!("Hello, <i>world!</i>", h1.inner_html());
116+
```
117+
118+
### Accessing descendent text
119+
120+
```rust
121+
use scraper::{Html, Selector};
122+
123+
let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
124+
let selector = Selector::parse("h1").unwrap();
125+
126+
let h1 = fragment.select(&selector).next().unwrap();
127+
let text = h1.text().collect::<Vec<_>>();
128+
129+
assert_eq!(vec!["Hello, ", "world!"], text);
130+
```
131+
132+
### Manipulating the DOM
133+
134+
```rust
135+
use html5ever::tree_builder::TreeSink;
136+
use scraper::{Html, Selector};
137+
138+
let html = "<html><body>hello<p class=\"hello\">REMOVE ME</p></body></html>";
139+
let selector = Selector::parse(".hello").unwrap();
140+
let mut document = Html::parse_document(html);
141+
let node_ids: Vec<_> = document.select(&selector).map(|x| x.id()).collect();
142+
for id in node_ids {
143+
document.remove_from_parent(&id);
144+
}
145+
assert_eq!(document.html(), "<html><head></head><body>hello</body></html>");
146+
```
147+
148+
## Contributing
149+
150+
Please feel free to open pull requests. If you're planning on implementing
151+
something big (i.e. not fixing a typo, a small bug fix, minor refactor, etc)
152+
then please open an issue first.

0 commit comments

Comments
 (0)