-
Notifications
You must be signed in to change notification settings - Fork 2
/
test.py
27 lines (22 loc) · 877 Bytes
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import requests
from bs4 import BeautifulSoup
# URL of the Wikipedia page you want to scrape
url = 'https://en.wikipedia.org/wiki/Baahubali:_The_Beginning'
# Section heading you want to scrape
section_heading = 'Plot'
# Send a GET request to the URL
response = requests.get(url)
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Find the section you want to scrape by its heading
section = soup.find('span', {'id': 'Plot'})
print(section)
next_section = soup.find('span', {'id': 'Cast'})
# Find all the HTML elements between the section heading and the next heading
paragraph = section.find_next('p')
newpara = next_section.find_next('p')
print(newpara)
while paragraph!= next_section.find_next('p'):
print(paragraph.get_text())
section = paragraph
paragraph = section.find_next('p')