Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
rajat4665 authored Apr 28, 2019
1 parent 4253f7c commit 7233646
Showing 1 changed file with 115 additions and 0 deletions.
115 changes: 115 additions & 0 deletions flipkart_scraping.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pop-up closed\n",
"Task completed\n"
]
}
],
"source": [
"import os \n",
"from selenium import webdriver\n",
"from bs4 import BeautifulSoup\n",
"import time\n",
"import csv\n",
"\n",
"\n",
"class Flipkart():\n",
" \n",
" def __init__(self):\n",
" self.current_path= os.getcwd()\n",
" self.url= 'https://www.flipkart.com'\n",
" self.driver_path= os.path.join(os.getcwd(),'chromedriver')\n",
" self.driver= webdriver.Chrome(self.driver_path)\n",
" \n",
" def page_load(self):\n",
" self.driver.get(self.url)\n",
" try:\n",
" login_pop = self.driver.find_element_by_class_name('_29YdH8')\n",
" login_pop.click()\n",
" print('pop-up closed')\n",
" except:\n",
" pass\n",
" search_field = self.driver.find_element_by_class_name('LM6RPg')\n",
" search_field.send_keys('smartphone'+ '\\n')\n",
" time.sleep(2)\n",
" page_html = self.driver.page_source\n",
" self.soup = BeautifulSoup(page_html,'html.parser')\n",
" \n",
" def create_csv_file(self):\n",
" rowHeaders=[\"Name\",\"Storage_details\",\"Screen_size\",\"Camera_details\",\"Battery_details\",\"Processor\",\n",
" \"Warranty\",\"Price in Rupees\"]\n",
" self.file_csv = open('Flipkart_output.csv', 'w', newline='', encoding='utf-8')\n",
" self.mycsv = csv.DictWriter(self.file_csv, fieldnames=rowHeaders)\n",
" self.mycsv.writeheader()\n",
" \n",
" def data_scrap(self):\n",
" first_page_mobiles = (self.soup.find_all('div',class_='_3O0U0u')) \n",
" for i in first_page_mobiles:\n",
" Name = i.find('img',class_ ='_1Nyybr')['alt']\n",
" price = i.find('div',class_ ='_1vC4OE _2rQ-NK')\n",
" details = i.find_all(\"li\")\n",
" storage = details[0].text\n",
" screen_size = details[1].text\n",
" camera_details = details[2].text\n",
" battery_details =details[3].text\n",
" processor = details[4].text\n",
" try:\n",
" warranty_details = [j.text for j in details if j.text[:14] == \"Brand Warranty\"][0]\n",
" except:\n",
" warranty_details = \"No data available\"\n",
" price = price.text[1:]\n",
" self.mycsv.writerow({\"Name\":Name, \"Storage_details\":storage, \"Screen_size\":screen_size, \"Camera_details\":camera_details,\"Battery_details\":battery_details, \"Processor\":processor, \"Warranty\":warranty_details, \"Price in Rupees\":price})\n",
" \n",
" def tearDown(self):\n",
" self.driver.quit()\n",
" self.file_csv.close()\n",
"\n",
"if __name__==\"__main__\":\n",
" \n",
" Flipkart=Flipkart()\n",
" Flipkart.page_load()\n",
" Flipkart.create_csv_file()\n",
" Flipkart.data_scrap()\n",
" Flipkart.tearDown()\n",
" print(\"Task completed\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 7233646

Please sign in to comment.