Skip to content

Commit 7233646

Browse files
authored
Add files via upload
1 parent 4253f7c commit 7233646

File tree

1 file changed

+115
-0
lines changed

1 file changed

+115
-0
lines changed

flipkart_scraping.ipynb

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"pop-up closed\n",
13+
"Task completed\n"
14+
]
15+
}
16+
],
17+
"source": [
18+
"import os \n",
19+
"from selenium import webdriver\n",
20+
"from bs4 import BeautifulSoup\n",
21+
"import time\n",
22+
"import csv\n",
23+
"\n",
24+
"\n",
25+
"class Flipkart():\n",
26+
" \n",
27+
" def __init__(self):\n",
28+
" self.current_path= os.getcwd()\n",
29+
" self.url= 'https://www.flipkart.com'\n",
30+
" self.driver_path= os.path.join(os.getcwd(),'chromedriver')\n",
31+
" self.driver= webdriver.Chrome(self.driver_path)\n",
32+
" \n",
33+
" def page_load(self):\n",
34+
" self.driver.get(self.url)\n",
35+
" try:\n",
36+
" login_pop = self.driver.find_element_by_class_name('_29YdH8')\n",
37+
" login_pop.click()\n",
38+
" print('pop-up closed')\n",
39+
" except:\n",
40+
" pass\n",
41+
" search_field = self.driver.find_element_by_class_name('LM6RPg')\n",
42+
" search_field.send_keys('smartphone'+ '\\n')\n",
43+
" time.sleep(2)\n",
44+
" page_html = self.driver.page_source\n",
45+
" self.soup = BeautifulSoup(page_html,'html.parser')\n",
46+
" \n",
47+
" def create_csv_file(self):\n",
48+
" rowHeaders=[\"Name\",\"Storage_details\",\"Screen_size\",\"Camera_details\",\"Battery_details\",\"Processor\",\n",
49+
" \"Warranty\",\"Price in Rupees\"]\n",
50+
" self.file_csv = open('Flipkart_output.csv', 'w', newline='', encoding='utf-8')\n",
51+
" self.mycsv = csv.DictWriter(self.file_csv, fieldnames=rowHeaders)\n",
52+
" self.mycsv.writeheader()\n",
53+
" \n",
54+
" def data_scrap(self):\n",
55+
" first_page_mobiles = (self.soup.find_all('div',class_='_3O0U0u')) \n",
56+
" for i in first_page_mobiles:\n",
57+
" Name = i.find('img',class_ ='_1Nyybr')['alt']\n",
58+
" price = i.find('div',class_ ='_1vC4OE _2rQ-NK')\n",
59+
" details = i.find_all(\"li\")\n",
60+
" storage = details[0].text\n",
61+
" screen_size = details[1].text\n",
62+
" camera_details = details[2].text\n",
63+
" battery_details =details[3].text\n",
64+
" processor = details[4].text\n",
65+
" try:\n",
66+
" warranty_details = [j.text for j in details if j.text[:14] == \"Brand Warranty\"][0]\n",
67+
" except:\n",
68+
" warranty_details = \"No data available\"\n",
69+
" price = price.text[1:]\n",
70+
" self.mycsv.writerow({\"Name\":Name, \"Storage_details\":storage, \"Screen_size\":screen_size, \"Camera_details\":camera_details,\"Battery_details\":battery_details, \"Processor\":processor, \"Warranty\":warranty_details, \"Price in Rupees\":price})\n",
71+
" \n",
72+
" def tearDown(self):\n",
73+
" self.driver.quit()\n",
74+
" self.file_csv.close()\n",
75+
"\n",
76+
"if __name__==\"__main__\":\n",
77+
" \n",
78+
" Flipkart=Flipkart()\n",
79+
" Flipkart.page_load()\n",
80+
" Flipkart.create_csv_file()\n",
81+
" Flipkart.data_scrap()\n",
82+
" Flipkart.tearDown()\n",
83+
" print(\"Task completed\")\n"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": null,
89+
"metadata": {},
90+
"outputs": [],
91+
"source": []
92+
}
93+
],
94+
"metadata": {
95+
"kernelspec": {
96+
"display_name": "Python 3",
97+
"language": "python",
98+
"name": "python3"
99+
},
100+
"language_info": {
101+
"codemirror_mode": {
102+
"name": "ipython",
103+
"version": 3
104+
},
105+
"file_extension": ".py",
106+
"mimetype": "text/x-python",
107+
"name": "python",
108+
"nbconvert_exporter": "python",
109+
"pygments_lexer": "ipython3",
110+
"version": "3.5.2"
111+
}
112+
},
113+
"nbformat": 4,
114+
"nbformat_minor": 2
115+
}

0 commit comments

Comments
 (0)