Skip to content

Commit f25332f

Browse files
committed
Create ksu_scrape_new.py
THis version works with the KSU News Website update from August 2022.
1 parent 3596b61 commit f25332f

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed

Diff for: 2 ksu scrape/ksu_scrape_new.py

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
'''
2+
TO use this code, you will first need to install the three packages being imported below using pip or a manual install method.
3+
'''
4+
from bs4 import BeautifulSoup
5+
import requests
6+
import csv
7+
from datetime import datetime
8+
9+
#grab the basic content from a web page
10+
source = requests.get('https://www.kennesaw.edu/news/news-releases/index.php?&p=1').text
11+
#using the lxml parser to process the web page text content
12+
soup = BeautifulSoup(source, 'lxml')
13+
#create a csv file in "w" write mode so we can add content to it.
14+
ksu_news_csv = open("ksu_news "+"{:%B %d, %Y}".format(datetime.now())+".csv","w")
15+
csv_writer = csv.writer(ksu_news_csv)
16+
#write the header row into our csv file
17+
csv_writer.writerow(["Number","Title","URL","Date"])
18+
#show the content of the website we retrieved so that we can find the unique tags around the content we want
19+
#print(soup.prettify())
20+
21+
#blog_post = soup.find('ul',class_='two_col has_gap is_30')
22+
#blog_post = soup.find('div',{"id":"main"})
23+
blog_list = soup.find('ul',class_='blog_listing')
24+
#print(blog_list.prettify())
25+
blog_posts = blog_list.find_all('li')
26+
27+
#print(type(blog_posts))
28+
#blog_posts = blog_posts.split("<li>")
29+
30+
i = 1
31+
for blog_post in blog_posts:
32+
33+
#print(i)
34+
title = blog_post.h3.text
35+
#print(title)
36+
37+
date = blog_post.p.text
38+
date = date.strip()
39+
date = date.strip('"')
40+
date = date.strip()
41+
#print(date)
42+
43+
44+
URL = blog_post.find('a')['href']
45+
#print(URL)
46+
47+
48+
csv_writer.writerow([i,title,URL,date])
49+
50+
i += 1
51+
52+
53+
ksu_news_csv.close()

0 commit comments

Comments
 (0)