forked from hadley/web-scraping
-
Notifications
You must be signed in to change notification settings - Fork 0
/
quotes.R
36 lines (27 loc) · 1.01 KB
/
quotes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
library(rvest)
library(tidyverse)
url <- "https://quotes.toscrape.com"
html <- read_html(url)
row <- html |> html_elements(".quote")
row |> html_element(".text") |> html_text2()
row |> html_element(".author") |> html_text2()
row |> html_element("span a") |> html_attr("href")
row |> html_element(".tags") |> html_text2() |> str_remove("Tags: ")
# Or, if you look carefully, you'll notice that there's a hidden element
# that contains the tags as comma separated
row |> html_elements(".tags meta") |> html_attr("content")
# What if you wanted to extract the individual tags as strings? ----------------
# Only gives the first tag
row |> html_element(".tags") |> html_element(".tag")
# Gives all the tags, but loses connection
row |> html_element(".tags") |> html_elements(".tag")
# Need to use map()/lapply()
row |>
html_element(".tags") |>
lapply(\(tag) tag |> html_elements(".tag") |> html_text2())
# or
row |>
html_element(".tags") |>
lapply(function(tag) {
tag |> html_elements(".tag") |> html_text2()
})